[SCSI] zfcp: Fix hang when offlining device with offline chpid
authorChristof Schmitt <christof.schmitt@de.ibm.com>
Thu, 24 Sep 2009 08:23:25 +0000 (10:23 +0200)
committerJames Bottomley <James.Bottomley@suse.de>
Fri, 2 Oct 2009 14:50:21 +0000 (09:50 -0500)
Running chchp --vary 0 and chccwdev -d on a FCP device with scsi
devices attached can lead to this thread hanging:

================================================================
STACK TRACE FOR TASK: 0x2fbfcc00 (kslowcrw)

 STACK:
 0 schedule+1136 [0x45f99c]
 1 schedule_timeout+534 [0x46054e]
 2 wait_for_common+374 [0x45f442]
 3 blk_execute_rq+160 [0x217a2c]
 4 scsi_execute+278 [0x26daf2]
 5 scsi_execute_req+150 [0x26dc86]
 6 sd_sync_cache+138 [0x28460a]
 7 sd_shutdown+130 [0x28486a]
 8 sd_remove+104 [0x284c84]
 9 __device_release_driver+152 [0x257430]
10 device_release_driver+56 [0x2575c8]
11 bus_remove_device+214 [0x25672a]
12 device_del+352 [0x25456c]
13 __scsi_remove_device+108 [0x272630]
14 scsi_remove_device+66 [0x2726ba]
15 zfcp_ccw_remove+824 [0x335558]
16 ccw_device_remove+62 [0x2b3f2a]
17 __device_release_driver+152 [0x257430]
18 device_release_driver+56 [0x2575c8]
19 bus_remove_device+214 [0x25672a]
20 device_del+352 [0x25456c]
21 ccw_device_unregister+92 [0x2b48c4]
22 io_subchannel_remove+108 [0x2b4950]
23 css_remove+62 [0x2af7ee]
24 __device_release_driver+152 [0x257430]
25 device_release_driver+56 [0x2575c8]
26 bus_remove_device+214 [0x25672a]
27 device_del+352 [0x25456c]
28 device_unregister+38 [0x25464a]
29 css_sch_device_unregister+68 [0x2af97c]
30 ccw_device_call_sch_unregister+78 [0x2b581e]
31 worker_thread+604 [0x69eb0]
32 kthread+154 [0x6ff42]
33 kernel_thread_starter+6 [0x1c952]
================================================================

The problem is that the chchp --vary 0 leads to zfcp first calling
fc_remote_port_delete which blocks all scsi devices on the remote
port. Calling scsi_remove_device later lets the sd driver issue a
SYNCHRONIZE_CACHE command. This command stays on the "stopped" request
requeue because the SCSI device is blocked. Fix this by first removing
the scsi and fc hosts which removes all scsi devices and do not use
scsi_remove_device.

Reviewed-by: Felix Beck <felix.beck@de.ibm.com>
Signed-off-by: Christof Schmitt <christof.schmitt@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
drivers/s390/scsi/zfcp_aux.c
drivers/s390/scsi/zfcp_ccw.c

index 5dcac24..0f79f3a 100644 (file)
@@ -604,7 +604,6 @@ void zfcp_adapter_dequeue(struct zfcp_adapter *adapter)
 
        cancel_work_sync(&adapter->stat_work);
        zfcp_fc_wka_ports_force_offline(adapter->gs);
-       zfcp_adapter_scsi_unregister(adapter);
        sysfs_remove_group(&adapter->ccw_device->dev.kobj,
                           &zfcp_sysfs_adapter_attrs);
        dev_set_drvdata(&adapter->ccw_device->dev, NULL);
index 9fe32f7..e083394 100644 (file)
@@ -107,6 +107,10 @@ static void zfcp_ccw_remove(struct ccw_device *ccw_device)
        cancel_work_sync(&adapter->scan_work);
 
        mutex_lock(&zfcp_data.config_mutex);
+
+       /* this also removes the scsi devices, so call it first */
+       zfcp_adapter_scsi_unregister(adapter);
+
        write_lock_irq(&zfcp_data.config_lock);
        list_for_each_entry_safe(port, p, &adapter->port_list_head, list) {
                list_for_each_entry_safe(unit, u, &port->unit_list_head, list) {
@@ -121,11 +125,8 @@ static void zfcp_ccw_remove(struct ccw_device *ccw_device)
        write_unlock_irq(&zfcp_data.config_lock);
 
        list_for_each_entry_safe(port, p, &port_remove_lh, list) {
-               list_for_each_entry_safe(unit, u, &unit_remove_lh, list) {
-                       if (unit->device)
-                               scsi_remove_device(unit->device);
+               list_for_each_entry_safe(unit, u, &unit_remove_lh, list)
                        zfcp_unit_dequeue(unit);
-               }
                zfcp_port_dequeue(port);
        }
        wait_event(adapter->remove_wq, atomic_read(&adapter->refcount) == 0);