X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=drivers%2Fata%2Flibata-eh.c;h=d6e67488174c01d9f36af72771b78aade75aacf1;hb=c429137a67b82788d24682153bb9c96501a9ef34;hp=2eaa39fc65d031d5f44519e3ec43e9a80d3cb34b;hpb=2855568b1ee4f58ef2c0a13ddfceb4b0b216b7ed;p=safe%2Fjmp%2Flinux-2.6 diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 2eaa39f..d6e6748 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -33,12 +33,14 @@ */ #include +#include #include #include #include #include #include #include +#include #include "../scsi/scsi_transport_api.h" #include @@ -46,18 +48,45 @@ #include "libata.h" enum { + /* speed down verdicts */ ATA_EH_SPDN_NCQ_OFF = (1 << 0), ATA_EH_SPDN_SPEED_DOWN = (1 << 1), ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), -}; + ATA_EH_SPDN_KEEP_ERRORS = (1 << 3), + + /* error flags */ + ATA_EFLAG_IS_IO = (1 << 0), + ATA_EFLAG_DUBIOUS_XFER = (1 << 1), + + /* error categories */ + ATA_ECAT_NONE = 0, + ATA_ECAT_ATA_BUS = 1, + ATA_ECAT_TOUT_HSM = 2, + ATA_ECAT_UNK_DEV = 3, + ATA_ECAT_DUBIOUS_NONE = 4, + ATA_ECAT_DUBIOUS_ATA_BUS = 5, + ATA_ECAT_DUBIOUS_TOUT_HSM = 6, + ATA_ECAT_DUBIOUS_UNK_DEV = 7, + ATA_ECAT_NR = 8, + + ATA_EH_CMD_DFL_TIMEOUT = 5000, + + /* always put at least this amount of time between resets */ + ATA_EH_RESET_COOL_DOWN = 5000, + + /* Waiting in ->prereset can never be reliable. It's + * sometimes nice to wait there but it can't be depended upon; + * otherwise, we wouldn't be resetting. Just give it enough + * time for most drives to spin up. + */ + ATA_EH_PRERESET_TIMEOUT = 10000, + ATA_EH_FASTDRAIN_INTERVAL = 3000, -/* Waiting in ->prereset can never be reliable. It's sometimes nice - * to wait there but it can't be depended upon; otherwise, we wouldn't - * be resetting. Just give it enough time for most drives to spin up. - */ -enum { - ATA_EH_PRERESET_TIMEOUT = 10 * HZ, - ATA_EH_FASTDRAIN_INTERVAL = 3 * HZ, + ATA_EH_UA_TRIES = 5, + + /* probe speed down parameters, see ata_eh_schedule_probe() */ + ATA_EH_PROBE_TRIAL_INTERVAL = 60000, /* 1 min */ + ATA_EH_PROBE_TRIALS = 2, }; /* The following table determines how we sequence resets. Each entry @@ -67,12 +96,68 @@ enum { * are mostly for error handling, hotplug and retarded devices. */ static const unsigned long ata_eh_reset_timeouts[] = { - 10 * HZ, /* most drives spin up by 10sec */ - 10 * HZ, /* > 99% working drives spin up before 20sec */ - 35 * HZ, /* give > 30 secs of idleness for retarded devices */ - 5 * HZ, /* and sweet one last chance */ - /* > 1 min has elapsed, give up */ + 10000, /* most drives spin up by 10sec */ + 10000, /* > 99% working drives spin up before 20sec */ + 35000, /* give > 30 secs of idleness for retarded devices */ + 5000, /* and sweet one last chance */ + ULONG_MAX, /* > 1 min has elapsed, give up */ +}; + +static const unsigned long ata_eh_identify_timeouts[] = { + 5000, /* covers > 99% of successes and not too boring on failures */ + 10000, /* combined time till here is enough even for media access */ + 30000, /* for true idiots */ + ULONG_MAX, +}; + +static const unsigned long ata_eh_flush_timeouts[] = { + 15000, /* be generous with flush */ + 15000, /* ditto */ + 30000, /* and even more generous */ + ULONG_MAX, +}; + +static const unsigned long ata_eh_other_timeouts[] = { + 5000, /* same rationale as identify timeout */ + 10000, /* ditto */ + /* but no merciful 30sec for other commands, it just isn't worth it */ + ULONG_MAX, +}; + +struct ata_eh_cmd_timeout_ent { + const u8 *commands; + const unsigned long *timeouts; +}; + +/* The following table determines timeouts to use for EH internal + * commands. Each table entry is a command class and matches the + * commands the entry applies to and the timeout table to use. + * + * On the retry after a command timed out, the next timeout value from + * the table is used. If the table doesn't contain further entries, + * the last value is used. + * + * ehc->cmd_timeout_idx keeps track of which timeout to use per + * command class, so if SET_FEATURES times out on the first try, the + * next try will use the second timeout value only for that class. + */ +#define CMDS(cmds...) (const u8 []){ cmds, 0 } +static const struct ata_eh_cmd_timeout_ent +ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { + { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI), + .timeouts = ata_eh_identify_timeouts, }, + { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT), + .timeouts = ata_eh_other_timeouts, }, + { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT), + .timeouts = ata_eh_other_timeouts, }, + { .commands = CMDS(ATA_CMD_SET_FEATURES), + .timeouts = ata_eh_other_timeouts, }, + { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS), + .timeouts = ata_eh_other_timeouts, }, + { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT), + .timeouts = ata_eh_flush_timeouts }, }; +#undef CMDS static void __ata_port_freeze(struct ata_port *ap); #ifdef CONFIG_PM @@ -213,12 +298,80 @@ void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, if (offset < 0) ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start); else - ata_port_desc(ap, "%s 0x%llx", name, start + offset); + ata_port_desc(ap, "%s 0x%llx", name, + start + (unsigned long long)offset); } #endif /* CONFIG_PCI */ -static void ata_ering_record(struct ata_ering *ering, int is_io, +static int ata_lookup_timeout_table(u8 cmd) +{ + int i; + + for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) { + const u8 *cur; + + for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++) + if (*cur == cmd) + return i; + } + + return -1; +} + +/** + * ata_internal_cmd_timeout - determine timeout for an internal command + * @dev: target device + * @cmd: internal command to be issued + * + * Determine timeout for internal command @cmd for @dev. + * + * LOCKING: + * EH context. + * + * RETURNS: + * Determined timeout. + */ +unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd) +{ + struct ata_eh_context *ehc = &dev->link->eh_context; + int ent = ata_lookup_timeout_table(cmd); + int idx; + + if (ent < 0) + return ATA_EH_CMD_DFL_TIMEOUT; + + idx = ehc->cmd_timeout_idx[dev->devno][ent]; + return ata_eh_cmd_timeout_table[ent].timeouts[idx]; +} + +/** + * ata_internal_cmd_timed_out - notification for internal command timeout + * @dev: target device + * @cmd: internal command which timed out + * + * Notify EH that internal command @cmd for @dev timed out. This + * function should be called only for commands whose timeouts are + * determined using ata_internal_cmd_timeout(). + * + * LOCKING: + * EH context. + */ +void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd) +{ + struct ata_eh_context *ehc = &dev->link->eh_context; + int ent = ata_lookup_timeout_table(cmd); + int idx; + + if (ent < 0) + return; + + idx = ehc->cmd_timeout_idx[dev->devno][ent]; + if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX) + ehc->cmd_timeout_idx[dev->devno][ent]++; +} + +static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, unsigned int err_mask) { struct ata_ering_entry *ent; @@ -229,11 +382,20 @@ static void ata_ering_record(struct ata_ering *ering, int is_io, ering->cursor %= ATA_ERING_SIZE; ent = &ering->ring[ering->cursor]; - ent->is_io = is_io; + ent->eflags = eflags; ent->err_mask = err_mask; ent->timestamp = get_jiffies_64(); } +static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering) +{ + struct ata_ering_entry *ent = &ering->ring[ering->cursor]; + + if (ent->err_mask) + return ent; + return NULL; +} + static void ata_ering_clear(struct ata_ering *ering) { memset(ering, 0, sizeof(*ering)); @@ -274,7 +436,7 @@ static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, if (!dev) { ehi->action &= ~action; - ata_link_for_each_dev(tdev, link) + ata_for_each_dev(tdev, link, ALL) ehi->dev_action[tdev->devno] &= ~action; } else { /* doesn't make sense for port-wide EH actions */ @@ -282,7 +444,7 @@ static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, /* break ehi->action into ehi->dev_action */ if (ehi->action & action) { - ata_link_for_each_dev(tdev, link) + ata_for_each_dev(tdev, link, ALL) ehi->dev_action[tdev->devno] |= ehi->action & action; ehi->action &= ~action; @@ -312,29 +474,29 @@ static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, * RETURNS: * EH_HANDLED or EH_NOT_HANDLED */ -enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) +enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) { struct Scsi_Host *host = cmd->device->host; struct ata_port *ap = ata_shost_to_port(host); unsigned long flags; struct ata_queued_cmd *qc; - enum scsi_eh_timer_return ret; + enum blk_eh_timer_return ret; DPRINTK("ENTER\n"); if (ap->ops->error_handler) { - ret = EH_NOT_HANDLED; + ret = BLK_EH_NOT_HANDLED; goto out; } - ret = EH_HANDLED; + ret = BLK_EH_HANDLED; spin_lock_irqsave(ap->lock, flags); qc = ata_qc_from_tag(ap, ap->link.active_tag); if (qc) { WARN_ON(qc->scsicmd != cmd); qc->flags |= ATA_QCFLAG_EH_SCHEDULED; qc->err_mask |= AC_ERR_TIMEOUT; - ret = EH_NOT_HANDLED; + ret = BLK_EH_NOT_HANDLED; } spin_unlock_irqrestore(ap->lock, flags); @@ -343,6 +505,31 @@ enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) return ret; } +static void ata_eh_unload(struct ata_port *ap) +{ + struct ata_link *link; + struct ata_device *dev; + unsigned long flags; + + /* Restore SControl IPM and SPD for the next driver and + * disable attached devices. + */ + ata_for_each_link(link, ap, PMP_FIRST) { + sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0); + ata_for_each_dev(dev, link, ALL) + ata_dev_disable(dev); + } + + /* freeze and set UNLOADED */ + spin_lock_irqsave(ap->lock, flags); + + ata_port_freeze(ap); /* won't be thawed */ + ap->pflags &= ~ATA_PFLAG_EH_PENDING; /* clear pending from freeze */ + ap->pflags |= ATA_PFLAG_UNLOADED; + + spin_unlock_irqrestore(ap->lock, flags); +} + /** * ata_scsi_error - SCSI layer error handler callback * @host: SCSI host on which error occurred @@ -363,14 +550,14 @@ void ata_scsi_error(struct Scsi_Host *host) DPRINTK("ENTER\n"); - /* synchronize with port task */ - ata_port_flush_task(ap); + /* make sure sff pio task is not running */ + ata_sff_flush_pio_task(ap); /* synchronize with host lock and sort out timeouts */ /* For new EH, all qcs are finished in one of three ways - * normal completion, error completion, and SCSI timeout. - * Both cmpletions can race against SCSI timeout. When normal + * Both completions can race against SCSI timeout. When normal * completion wins, the qc never reaches EH. When error * completion wins, the qc has ATA_QCFLAG_FAILED set. * @@ -385,7 +572,19 @@ void ata_scsi_error(struct Scsi_Host *host) int nr_timedout = 0; spin_lock_irqsave(ap->lock, flags); - + + /* This must occur under the ap->lock as we don't want + a polled recovery to race the real interrupt handler + + The lost_interrupt handler checks for any completed but + non-notified command and completes much like an IRQ handler. + + We then fall into the error recovery code which will treat + this as if normal completion won the race */ + + if (ap->ops->lost_interrupt) + ap->ops->lost_interrupt(ap); + list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { struct ata_queued_cmd *qc; @@ -429,6 +628,9 @@ void ata_scsi_error(struct Scsi_Host *host) ap->eh_tries = ATA_EH_MAX_TRIES; } else spin_unlock_wait(ap->lock); + + /* If we timed raced normal completion and there is nothing to + recover nr_timedout == 0 why exactly are we doing error recovery ? */ repeat: /* invoke error handler */ @@ -444,10 +646,21 @@ void ata_scsi_error(struct Scsi_Host *host) /* fetch & clear EH info */ spin_lock_irqsave(ap->lock, flags); - __ata_port_for_each_link(link, ap) { + ata_for_each_link(link, ap, HOST_FIRST) { + struct ata_eh_context *ehc = &link->eh_context; + struct ata_device *dev; + memset(&link->eh_context, 0, sizeof(link->eh_context)); link->eh_context.i = link->eh_info; memset(&link->eh_info, 0, sizeof(link->eh_info)); + + ata_for_each_dev(dev, link, ENABLED) { + int devno = dev->devno; + + ehc->saved_xfer_mode[devno] = dev->xfer_mode; + if (ata_ncq_enabled(dev)) + ehc->saved_ncq_enabled |= 1 << devno; + } } ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; @@ -459,8 +672,13 @@ void ata_scsi_error(struct Scsi_Host *host) /* invoke EH, skip if unloading or suspended */ if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) ap->ops->error_handler(ap); - else + else { + /* if unloading, commence suicide */ + if ((ap->pflags & ATA_PFLAG_UNLOADING) && + !(ap->pflags & ATA_PFLAG_UNLOADED)) + ata_eh_unload(ap); ata_eh_finish(ap); + } /* process port suspend request */ ata_eh_handle_port_suspend(ap); @@ -482,7 +700,7 @@ void ata_scsi_error(struct Scsi_Host *host) } /* this run is complete, make sure EH info is clear */ - __ata_port_for_each_link(link, ap) + ata_for_each_link(link, ap, HOST_FIRST) memset(&link->eh_info, 0, sizeof(link->eh_info)); /* Clear host_eh_scheduled while holding ap->lock such @@ -559,101 +777,6 @@ void ata_port_wait_eh(struct ata_port *ap) } } -/** - * ata_qc_timeout - Handle timeout of queued command - * @qc: Command that timed out - * - * Some part of the kernel (currently, only the SCSI layer) - * has noticed that the active command on port @ap has not - * completed after a specified length of time. Handle this - * condition by disabling DMA (if necessary) and completing - * transactions, with error if necessary. - * - * This also handles the case of the "lost interrupt", where - * for some reason (possibly hardware bug, possibly driver bug) - * an interrupt was not delivered to the driver, even though the - * transaction completed successfully. - * - * TODO: kill this function once old EH is gone. - * - * LOCKING: - * Inherited from SCSI layer (none, can sleep) - */ -static void ata_qc_timeout(struct ata_queued_cmd *qc) -{ - struct ata_port *ap = qc->ap; - u8 host_stat = 0, drv_stat; - unsigned long flags; - - DPRINTK("ENTER\n"); - - ap->hsm_task_state = HSM_ST_IDLE; - - spin_lock_irqsave(ap->lock, flags); - - switch (qc->tf.protocol) { - - case ATA_PROT_DMA: - case ATA_PROT_ATAPI_DMA: - host_stat = ap->ops->bmdma_status(ap); - - /* before we do anything else, clear DMA-Start bit */ - ap->ops->bmdma_stop(qc); - - /* fall through */ - - default: - ata_altstatus(ap); - drv_stat = ata_chk_status(ap); - - /* ack bmdma irq events */ - ap->ops->irq_clear(ap); - - ata_dev_printk(qc->dev, KERN_ERR, "command 0x%x timeout, " - "stat 0x%x host_stat 0x%x\n", - qc->tf.command, drv_stat, host_stat); - - /* complete taskfile transaction */ - qc->err_mask |= AC_ERR_TIMEOUT; - break; - } - - spin_unlock_irqrestore(ap->lock, flags); - - ata_eh_qc_complete(qc); - - DPRINTK("EXIT\n"); -} - -/** - * ata_eng_timeout - Handle timeout of queued command - * @ap: Port on which timed-out command is active - * - * Some part of the kernel (currently, only the SCSI layer) - * has noticed that the active command on port @ap has not - * completed after a specified length of time. Handle this - * condition by disabling DMA (if necessary) and completing - * transactions, with error if necessary. - * - * This also handles the case of the "lost interrupt", where - * for some reason (possibly hardware bug, possibly driver bug) - * an interrupt was not delivered to the driver, even though the - * transaction completed successfully. - * - * TODO: kill this function once old EH is gone. - * - * LOCKING: - * Inherited from SCSI layer (none, can sleep) - */ -void ata_eng_timeout(struct ata_port *ap) -{ - DPRINTK("ENTER\n"); - - ata_qc_timeout(ata_qc_from_tag(ap, ap->link.active_tag)); - - DPRINTK("EXIT\n"); -} - static int ata_eh_nr_in_flight(struct ata_port *ap) { unsigned int tag; @@ -698,7 +821,7 @@ void ata_eh_fastdrain_timerfn(unsigned long arg) /* some qcs have finished, give it another chance */ ap->fastdrain_cnt = cnt; ap->fastdrain_timer.expires = - jiffies + ATA_EH_FASTDRAIN_INTERVAL; + ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); add_timer(&ap->fastdrain_timer); } @@ -738,7 +861,8 @@ static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) /* activate fast drain */ ap->fastdrain_cnt = cnt; - ap->fastdrain_timer.expires = jiffies + ATA_EH_FASTDRAIN_INTERVAL; + ap->fastdrain_timer.expires = + ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); add_timer(&ap->fastdrain_timer); } @@ -755,6 +879,8 @@ static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) void ata_qc_schedule_eh(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; + struct request_queue *q = qc->scsicmd->device->request_queue; + unsigned long flags; WARN_ON(!ap->ops->error_handler); @@ -766,7 +892,9 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc) * Note that ATA_QCFLAG_FAILED is unconditionally set after * this function completes. */ - scsi_req_abort_cmd(qc->scsicmd); + spin_lock_irqsave(q->queue_lock, flags); + blk_abort_request(qc->scsicmd->request); + spin_unlock_irqrestore(q->queue_lock, flags); } /** @@ -885,7 +1013,9 @@ static void __ata_port_freeze(struct ata_port *ap) * ata_port_freeze - abort & freeze port * @ap: ATA port to freeze * - * Abort and freeze @ap. + * Abort and freeze @ap. The freeze operation must be called + * first, because some hardware requires special operations + * before the taskfile registers are accessible. * * LOCKING: * spin_lock_irqsave(host lock) @@ -899,8 +1029,8 @@ int ata_port_freeze(struct ata_port *ap) WARN_ON(!ap->ops->error_handler); - nr_aborted = ata_port_abort(ap); __ata_port_freeze(ap); + nr_aborted = ata_port_abort(ap); return nr_aborted; } @@ -930,9 +1060,9 @@ int sata_async_notification(struct ata_port *ap) if (rc == 0) sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf); - if (!ap->nr_pmp_links || rc) { + if (!sata_pmp_attached(ap) || rc) { /* PMP is not attached or SNTF is not available */ - if (!ap->nr_pmp_links) { + if (!sata_pmp_attached(ap)) { /* PMP is not attached. Check whether ATAPI * AN is configured. If so, notify media * change. @@ -957,7 +1087,7 @@ int sata_async_notification(struct ata_port *ap) struct ata_link *link; /* check and notify ATAPI AN */ - ata_port_for_each_link(link, ap) { + ata_for_each_link(link, ap, EDGE) { if (!(sntf & (1 << link->pmp))) continue; @@ -1081,6 +1211,32 @@ void ata_eh_qc_retry(struct ata_queued_cmd *qc) } /** + * ata_dev_disable - disable ATA device + * @dev: ATA device to disable + * + * Disable @dev. + * + * Locking: + * EH context. + */ +void ata_dev_disable(struct ata_device *dev) +{ + if (!ata_dev_enabled(dev)) + return; + + if (ata_msg_drv(dev->link->ap)) + ata_dev_printk(dev, KERN_WARNING, "disabled\n"); + ata_acpi_on_disable(dev); + ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET); + dev->class++; + + /* From now till the next successful probe, ering is used to + * track probe failures. Clear accumulated device error info. + */ + ata_ering_clear(&dev->ering); +} + +/** * ata_eh_detach_dev - detach ATA device * @dev: ATA device to detach * @@ -1093,6 +1249,7 @@ void ata_eh_detach_dev(struct ata_device *dev) { struct ata_link *link = dev->link; struct ata_port *ap = link->ap; + struct ata_eh_context *ehc = &link->eh_context; unsigned long flags; ata_dev_disable(dev); @@ -1106,9 +1263,11 @@ void ata_eh_detach_dev(struct ata_device *dev) ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; } - /* clear per-dev EH actions */ + /* clear per-dev EH info */ ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK); ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK); + ehc->saved_xfer_mode[dev->devno] = 0; + ehc->saved_ncq_enabled &= ~(1 << dev->devno); spin_unlock_irqrestore(ap->lock, flags); } @@ -1136,22 +1295,12 @@ void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, spin_lock_irqsave(ap->lock, flags); - /* Reset is represented by combination of actions and EHI - * flags. Suck in all related bits before clearing eh_info to - * avoid losing requested action. - */ - if (action & ATA_EH_RESET_MASK) { - ehc->i.action |= ehi->action & ATA_EH_RESET_MASK; - ehc->i.flags |= ehi->flags & ATA_EHI_RESET_MODIFIER_MASK; - - /* make sure all reset actions are cleared & clear EHI flags */ - action |= ATA_EH_RESET_MASK; - ehi->flags &= ~ATA_EHI_RESET_MODIFIER_MASK; - } - ata_eh_clear_action(link, dev, ehi, action); - if (!(ehc->i.flags & ATA_EHI_QUIET)) + /* About to take EH action, set RECOVERED. Ignore actions on + * slave links as master will do them again. + */ + if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link) ap->pflags |= ATA_PFLAG_RECOVERED; spin_unlock_irqrestore(ap->lock, flags); @@ -1174,12 +1323,6 @@ void ata_eh_done(struct ata_link *link, struct ata_device *dev, { struct ata_eh_context *ehc = &link->eh_context; - /* if reset is complete, clear all reset actions & reset modifier */ - if (action & ATA_EH_RESET_MASK) { - action |= ATA_EH_RESET_MASK; - ehc->i.flags &= ~ATA_EHI_RESET_MODIFIER_MASK; - } - ata_eh_clear_action(link, dev, &ehc->i, action); } @@ -1197,7 +1340,7 @@ void ata_eh_done(struct ata_link *link, struct ata_device *dev, * RETURNS: * Descriptive string for @err_mask */ -static const char * ata_err_string(unsigned int err_mask) +static const char *ata_err_string(unsigned int err_mask) { if (err_mask & AC_ERR_HOST_BUS) return "host bus error"; @@ -1311,9 +1454,41 @@ static int ata_eh_read_log_10h(struct ata_device *dev, } /** + * atapi_eh_tur - perform ATAPI TEST_UNIT_READY + * @dev: target ATAPI device + * @r_sense_key: out parameter for sense_key + * + * Perform ATAPI TEST_UNIT_READY. + * + * LOCKING: + * EH context (may sleep). + * + * RETURNS: + * 0 on success, AC_ERR_* mask on failure. + */ +static unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) +{ + u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 }; + struct ata_taskfile tf; + unsigned int err_mask; + + ata_tf_init(dev, &tf); + + tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; + tf.command = ATA_CMD_PACKET; + tf.protocol = ATAPI_PROT_NODATA; + + err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0); + if (err_mask == AC_ERR_DEV) + *r_sense_key = tf.feature >> 4; + return err_mask; +} + +/** * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE * @dev: device to perform REQUEST_SENSE to * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) + * @dfl_sense_key: default sense key to use * * Perform ATAPI REQUEST_SENSE after the device reported CHECK * SENSE. This function is EH helper. @@ -1324,13 +1499,13 @@ static int ata_eh_read_log_10h(struct ata_device *dev, * RETURNS: * 0 on success, AC_ERR_* mask on failure */ -static unsigned int atapi_eh_request_sense(struct ata_queued_cmd *qc) +static unsigned int atapi_eh_request_sense(struct ata_device *dev, + u8 *sense_buf, u8 dfl_sense_key) { - struct ata_device *dev = qc->dev; - unsigned char *sense_buf = qc->scsicmd->sense_buffer; + u8 cdb[ATAPI_CDB_LEN] = + { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 }; struct ata_port *ap = dev->link->ap; struct ata_taskfile tf; - u8 cdb[ATAPI_CDB_LEN]; DPRINTK("ATAPI request sense\n"); @@ -1341,26 +1516,22 @@ static unsigned int atapi_eh_request_sense(struct ata_queued_cmd *qc) * for the case where they are -not- overwritten */ sense_buf[0] = 0x70; - sense_buf[2] = qc->result_tf.feature >> 4; + sense_buf[2] = dfl_sense_key; /* some devices time out if garbage left in tf */ ata_tf_init(dev, &tf); - memset(cdb, 0, ATAPI_CDB_LEN); - cdb[0] = REQUEST_SENSE; - cdb[4] = SCSI_SENSE_BUFFERSIZE; - tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; tf.command = ATA_CMD_PACKET; /* is it pointless to prefer PIO for "safety reasons"? */ if (ap->flags & ATA_FLAG_PIO_DMA) { - tf.protocol = ATA_PROT_ATAPI_DMA; + tf.protocol = ATAPI_PROT_DMA; tf.feature |= ATAPI_PKT_DMA; } else { - tf.protocol = ATA_PROT_ATAPI; - tf.lbam = (8 * 1024) & 0xff; - tf.lbah = (8 * 1024) >> 8; + tf.protocol = ATAPI_PROT_PIO; + tf.lbam = SCSI_SENSE_BUFFERSIZE; + tf.lbah = 0; } return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, @@ -1384,22 +1555,17 @@ static void ata_eh_analyze_serror(struct ata_link *link) unsigned int err_mask = 0, action = 0; u32 hotplug_mask; - if (serror & SERR_PERSISTENT) { - err_mask |= AC_ERR_ATA_BUS; - action |= ATA_EH_HARDRESET; - } - if (serror & - (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) { + if (serror & (SERR_PERSISTENT | SERR_DATA)) { err_mask |= AC_ERR_ATA_BUS; - action |= ATA_EH_SOFTRESET; + action |= ATA_EH_RESET; } if (serror & SERR_PROTOCOL) { err_mask |= AC_ERR_HSM; - action |= ATA_EH_SOFTRESET; + action |= ATA_EH_RESET; } if (serror & SERR_INTERNAL) { err_mask |= AC_ERR_SYSTEM; - action |= ATA_EH_HARDRESET; + action |= ATA_EH_RESET; } /* Determine whether a hotplug event has occurred. Both @@ -1433,7 +1599,7 @@ static void ata_eh_analyze_serror(struct ata_link *link) * LOCKING: * Kernel thread context (may sleep). */ -static void ata_eh_analyze_ncq_error(struct ata_link *link) +void ata_eh_analyze_ncq_error(struct ata_link *link) { struct ata_port *ap = link->ap; struct ata_eh_context *ehc = &link->eh_context; @@ -1462,6 +1628,7 @@ static void ata_eh_analyze_ncq_error(struct ata_link *link) } /* okay, this error is ours */ + memset(&tf, 0, sizeof(tf)); rc = ata_eh_read_log_10h(dev, &tag, &tf); if (rc) { ata_link_printk(link, KERN_ERR, "failed to read log page 10h " @@ -1478,6 +1645,7 @@ static void ata_eh_analyze_ncq_error(struct ata_link *link) /* we've got the perpetrator, condemn it */ qc = __ata_qc_from_tag(ap, tag); memcpy(&qc->result_tf, &tf, sizeof(tf)); + qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48; qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; ehc->i.err_mask &= ~AC_ERR_DEV; } @@ -1505,7 +1673,7 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { qc->err_mask |= AC_ERR_HSM; - return ATA_EH_SOFTRESET; + return ATA_EH_RESET; } if (stat & (ATA_ERR | ATA_DF)) @@ -1525,7 +1693,9 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, case ATA_DEV_ATAPI: if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { - tmp = atapi_eh_request_sense(qc); + tmp = atapi_eh_request_sense(qc->dev, + qc->scsicmd->sense_buffer, + qc->result_tf.feature >> 4); if (!tmp) { /* ATA_QCFLAG_SENSE_VALID is used to * tell atapi_qc_complete() that sense @@ -1541,25 +1711,34 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, } if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) - action |= ATA_EH_SOFTRESET; + action |= ATA_EH_RESET; return action; } -static int ata_eh_categorize_error(int is_io, unsigned int err_mask) +static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask, + int *xfer_ok) { + int base = 0; + + if (!(eflags & ATA_EFLAG_DUBIOUS_XFER)) + *xfer_ok = 1; + + if (!*xfer_ok) + base = ATA_ECAT_DUBIOUS_NONE; + if (err_mask & AC_ERR_ATA_BUS) - return 1; + return base + ATA_ECAT_ATA_BUS; if (err_mask & AC_ERR_TIMEOUT) - return 2; + return base + ATA_ECAT_TOUT_HSM; - if (is_io) { + if (eflags & ATA_EFLAG_IS_IO) { if (err_mask & AC_ERR_HSM) - return 2; + return base + ATA_ECAT_TOUT_HSM; if ((err_mask & (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) - return 3; + return base + ATA_ECAT_UNK_DEV; } return 0; @@ -1567,18 +1746,22 @@ static int ata_eh_categorize_error(int is_io, unsigned int err_mask) struct speed_down_verdict_arg { u64 since; - int nr_errors[4]; + int xfer_ok; + int nr_errors[ATA_ECAT_NR]; }; static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) { struct speed_down_verdict_arg *arg = void_arg; - int cat = ata_eh_categorize_error(ent->is_io, ent->err_mask); + int cat; if (ent->timestamp < arg->since) return -1; + cat = ata_eh_categorize_error(ent->eflags, ent->err_mask, + &arg->xfer_ok); arg->nr_errors[cat]++; + return 0; } @@ -1590,22 +1773,48 @@ static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) * whether NCQ needs to be turned off, transfer speed should be * stepped down, or falling back to PIO is necessary. * - * Cat-1 is ATA_BUS error for any command. + * ECAT_ATA_BUS : ATA_BUS error for any command + * + * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for + * IO commands + * + * ECAT_UNK_DEV : Unknown DEV error for IO commands + * + * ECAT_DUBIOUS_* : Identical to above three but occurred while + * data transfer hasn't been verified. * - * Cat-2 is TIMEOUT for any command or HSM violation for known - * supported commands. + * Verdicts are * - * Cat-3 is is unclassified DEV error for known supported - * command. + * NCQ_OFF : Turn off NCQ. * - * NCQ needs to be turned off if there have been more than 3 - * Cat-2 + Cat-3 errors during last 10 minutes. + * SPEED_DOWN : Speed down transfer speed but don't fall back + * to PIO. * - * Speed down is necessary if there have been more than 3 Cat-1 + - * Cat-2 errors or 10 Cat-3 errors during last 10 minutes. + * FALLBACK_TO_PIO : Fall back to PIO. * - * Falling back to PIO mode is necessary if there have been more - * than 10 Cat-1 + Cat-2 + Cat-3 errors during last 5 minutes. + * Even if multiple verdicts are returned, only one action is + * taken per error. An action triggered by non-DUBIOUS errors + * clears ering, while one triggered by DUBIOUS_* errors doesn't. + * This is to expedite speed down decisions right after device is + * initially configured. + * + * The followings are speed down rules. #1 and #2 deal with + * DUBIOUS errors. + * + * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors + * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO. + * + * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors + * occurred during last 5 mins, NCQ_OFF. + * + * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors + * ocurred during last 5 mins, FALLBACK_TO_PIO + * + * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred + * during last 10 mins, NCQ_OFF. + * + * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 + * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. * * LOCKING: * Inherited from caller. @@ -1620,23 +1829,38 @@ static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) struct speed_down_verdict_arg arg; unsigned int verdict = 0; - /* scan past 10 mins of error history */ + /* scan past 5 mins of error history */ memset(&arg, 0, sizeof(arg)); - arg.since = j64 - min(j64, j10mins); + arg.since = j64 - min(j64, j5mins); ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); - if (arg.nr_errors[2] + arg.nr_errors[3] > 3) - verdict |= ATA_EH_SPDN_NCQ_OFF; - if (arg.nr_errors[1] + arg.nr_errors[2] > 3 || arg.nr_errors[3] > 10) - verdict |= ATA_EH_SPDN_SPEED_DOWN; + if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] + + arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1) + verdict |= ATA_EH_SPDN_SPEED_DOWN | + ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS; + + if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] + + arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1) + verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS; - /* scan past 3 mins of error history */ + if (arg.nr_errors[ATA_ECAT_ATA_BUS] + + arg.nr_errors[ATA_ECAT_TOUT_HSM] + + arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) + verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; + + /* scan past 10 mins of error history */ memset(&arg, 0, sizeof(arg)); - arg.since = j64 - min(j64, j5mins); + arg.since = j64 - min(j64, j10mins); ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); - if (arg.nr_errors[1] + arg.nr_errors[2] + arg.nr_errors[3] > 10) - verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; + if (arg.nr_errors[ATA_ECAT_TOUT_HSM] + + arg.nr_errors[ATA_ECAT_UNK_DEV] > 3) + verdict |= ATA_EH_SPDN_NCQ_OFF; + + if (arg.nr_errors[ATA_ECAT_ATA_BUS] + + arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 || + arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) + verdict |= ATA_EH_SPDN_SPEED_DOWN; return verdict; } @@ -1644,7 +1868,7 @@ static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) /** * ata_eh_speed_down - record error and speed down if necessary * @dev: Failed device - * @is_io: Did the device fail during normal IO? + * @eflags: mask of ATA_EFLAG_* flags * @err_mask: err_mask of the error * * Record error and examine error history to determine whether @@ -1658,18 +1882,20 @@ static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) * RETURNS: * Determined recovery action. */ -static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io, - unsigned int err_mask) +static unsigned int ata_eh_speed_down(struct ata_device *dev, + unsigned int eflags, unsigned int err_mask) { + struct ata_link *link = ata_dev_phys_link(dev); + int xfer_ok = 0; unsigned int verdict; unsigned int action = 0; /* don't bother if Cat-0 error */ - if (ata_eh_categorize_error(is_io, err_mask) == 0) + if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0) return 0; /* record error and determine whether speed down is necessary */ - ata_ering_record(&dev->ering, is_io, err_mask); + ata_ering_record(&dev->ering, eflags, err_mask); verdict = ata_eh_speed_down_verdict(dev); /* turn off NCQ? */ @@ -1685,8 +1911,8 @@ static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io, /* speed down? */ if (verdict & ATA_EH_SPDN_SPEED_DOWN) { /* speed down SATA link speed if possible */ - if (sata_down_spd_limit(dev->link) == 0) { - action |= ATA_EH_HARDRESET; + if (sata_down_spd_limit(link, 0) == 0) { + action |= ATA_EH_RESET; goto done; } @@ -1706,21 +1932,21 @@ static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io, dev->spdn_cnt++; if (ata_down_xfermask_limit(dev, sel) == 0) { - action |= ATA_EH_SOFTRESET; + action |= ATA_EH_RESET; goto done; } } } /* Fall back to PIO? Slowing down to PIO is meaningless for - * SATA. Consider it only for PATA. + * SATA ATA devices. Consider it only for PATA and SATAPI. */ if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && - (dev->link->ap->cbl != ATA_CBL_SATA) && + (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) && (dev->xfer_shift != ATA_SHIFT_PIO)) { if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { dev->spdn_cnt = 0; - action |= ATA_EH_SOFTRESET; + action |= ATA_EH_RESET; goto done; } } @@ -1728,7 +1954,8 @@ static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io, return 0; done: /* device has been slowed down, blow error history */ - ata_ering_clear(&dev->ering); + if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS)) + ata_ering_clear(&dev->ering); return action; } @@ -1747,8 +1974,9 @@ static void ata_eh_link_autopsy(struct ata_link *link) { struct ata_port *ap = link->ap; struct ata_eh_context *ehc = &link->eh_context; - unsigned int all_err_mask = 0; - int tag, is_io = 0; + struct ata_device *dev; + unsigned int all_err_mask = 0, eflags = 0; + int tag; u32 serror; int rc; @@ -1763,9 +1991,9 @@ static void ata_eh_link_autopsy(struct ata_link *link) ehc->i.serror |= serror; ata_eh_analyze_serror(link); } else if (rc != -EOPNOTSUPP) { - /* SError read failed, force hardreset and probing */ - ata_ehi_schedule_probe(&ehc->i); - ehc->i.action |= ATA_EH_HARDRESET; + /* SError read failed, force reset and probing */ + ehc->i.probe_mask |= ATA_ALL_DEVICES; + ehc->i.action |= ATA_EH_RESET; ehc->i.err_mask |= AC_ERR_OTHER; } @@ -1781,7 +2009,8 @@ static void ata_eh_link_autopsy(struct ata_link *link) for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); - if (!(qc->flags & ATA_QCFLAG_FAILED) || qc->dev->link != link) + if (!(qc->flags & ATA_QCFLAG_FAILED) || + ata_dev_phys_link(qc->dev) != link) continue; /* inherit upper level err_mask */ @@ -1800,37 +2029,55 @@ static void ata_eh_link_autopsy(struct ata_link *link) qc->err_mask &= ~AC_ERR_OTHER; /* SENSE_VALID trumps dev/unknown error and revalidation */ - if (qc->flags & ATA_QCFLAG_SENSE_VALID) { + if (qc->flags & ATA_QCFLAG_SENSE_VALID) qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); - ehc->i.action &= ~ATA_EH_REVALIDATE; - } + + /* determine whether the command is worth retrying */ + if (qc->flags & ATA_QCFLAG_IO || + (!(qc->err_mask & AC_ERR_INVALID) && + qc->err_mask != AC_ERR_DEV)) + qc->flags |= ATA_QCFLAG_RETRY; /* accumulate error info */ ehc->i.dev = qc->dev; all_err_mask |= qc->err_mask; if (qc->flags & ATA_QCFLAG_IO) - is_io = 1; + eflags |= ATA_EFLAG_IS_IO; } /* enforce default EH actions */ if (ap->pflags & ATA_PFLAG_FROZEN || all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) - ehc->i.action |= ATA_EH_SOFTRESET; - else if (all_err_mask) + ehc->i.action |= ATA_EH_RESET; + else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) || + (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV))) ehc->i.action |= ATA_EH_REVALIDATE; - /* if we have offending qcs and the associated failed device */ + /* If we have offending qcs and the associated failed device, + * perform per-dev EH action only on the offending device. + */ if (ehc->i.dev) { - /* speed down */ - ehc->i.action |= ata_eh_speed_down(ehc->i.dev, is_io, - all_err_mask); - - /* perform per-dev EH action only on the offending device */ ehc->i.dev_action[ehc->i.dev->devno] |= ehc->i.action & ATA_EH_PERDEV_MASK; ehc->i.action &= ~ATA_EH_PERDEV_MASK; } + /* propagate timeout to host link */ + if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link)) + ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT; + + /* record error and consider speeding down */ + dev = ehc->i.dev; + if (!dev && ((ata_link_max_devices(link) == 1 && + ata_dev_enabled(link->device)))) + dev = link->device; + + if (dev) { + if (dev->flags & ATA_DFLAG_DUBIOUS_XFER) + eflags |= ATA_EFLAG_DUBIOUS_XFER; + ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); + } + DPRINTK("EXIT\n"); } @@ -1848,8 +2095,146 @@ void ata_eh_autopsy(struct ata_port *ap) { struct ata_link *link; - __ata_port_for_each_link(link, ap) + ata_for_each_link(link, ap, EDGE) ata_eh_link_autopsy(link); + + /* Handle the frigging slave link. Autopsy is done similarly + * but actions and flags are transferred over to the master + * link and handled from there. + */ + if (ap->slave_link) { + struct ata_eh_context *mehc = &ap->link.eh_context; + struct ata_eh_context *sehc = &ap->slave_link->eh_context; + + /* transfer control flags from master to slave */ + sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK; + + /* perform autopsy on the slave link */ + ata_eh_link_autopsy(ap->slave_link); + + /* transfer actions from slave to master and clear slave */ + ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); + mehc->i.action |= sehc->i.action; + mehc->i.dev_action[1] |= sehc->i.dev_action[1]; + mehc->i.flags |= sehc->i.flags; + ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); + } + + /* Autopsy of fanout ports can affect host link autopsy. + * Perform host link autopsy last. + */ + if (sata_pmp_attached(ap)) + ata_eh_link_autopsy(&ap->link); +} + +/** + * ata_get_cmd_descript - get description for ATA command + * @command: ATA command code to get description for + * + * Return a textual description of the given command, or NULL if the + * command is not known. + * + * LOCKING: + * None + */ +const char *ata_get_cmd_descript(u8 command) +{ +#ifdef CONFIG_ATA_VERBOSE_ERROR + static const struct + { + u8 command; + const char *text; + } cmd_descr[] = { + { ATA_CMD_DEV_RESET, "DEVICE RESET" }, + { ATA_CMD_CHK_POWER, "CHECK POWER MODE" }, + { ATA_CMD_STANDBY, "STANDBY" }, + { ATA_CMD_IDLE, "IDLE" }, + { ATA_CMD_EDD, "EXECUTE DEVICE DIAGNOSTIC" }, + { ATA_CMD_DOWNLOAD_MICRO, "DOWNLOAD MICROCODE" }, + { ATA_CMD_NOP, "NOP" }, + { ATA_CMD_FLUSH, "FLUSH CACHE" }, + { ATA_CMD_FLUSH_EXT, "FLUSH CACHE EXT" }, + { ATA_CMD_ID_ATA, "IDENTIFY DEVICE" }, + { ATA_CMD_ID_ATAPI, "IDENTIFY PACKET DEVICE" }, + { ATA_CMD_SERVICE, "SERVICE" }, + { ATA_CMD_READ, "READ DMA" }, + { ATA_CMD_READ_EXT, "READ DMA EXT" }, + { ATA_CMD_READ_QUEUED, "READ DMA QUEUED" }, + { ATA_CMD_READ_STREAM_EXT, "READ STREAM EXT" }, + { ATA_CMD_READ_STREAM_DMA_EXT, "READ STREAM DMA EXT" }, + { ATA_CMD_WRITE, "WRITE DMA" }, + { ATA_CMD_WRITE_EXT, "WRITE DMA EXT" }, + { ATA_CMD_WRITE_QUEUED, "WRITE DMA QUEUED EXT" }, + { ATA_CMD_WRITE_STREAM_EXT, "WRITE STREAM EXT" }, + { ATA_CMD_WRITE_STREAM_DMA_EXT, "WRITE STREAM DMA EXT" }, + { ATA_CMD_WRITE_FUA_EXT, "WRITE DMA FUA EXT" }, + { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" }, + { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" }, + { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" }, + { ATA_CMD_PIO_READ, "READ SECTOR(S)" }, + { ATA_CMD_PIO_READ_EXT, "READ SECTOR(S) EXT" }, + { ATA_CMD_PIO_WRITE, "WRITE SECTOR(S)" }, + { ATA_CMD_PIO_WRITE_EXT, "WRITE SECTOR(S) EXT" }, + { ATA_CMD_READ_MULTI, "READ MULTIPLE" }, + { ATA_CMD_READ_MULTI_EXT, "READ MULTIPLE EXT" }, + { ATA_CMD_WRITE_MULTI, "WRITE MULTIPLE" }, + { ATA_CMD_WRITE_MULTI_EXT, "WRITE MULTIPLE EXT" }, + { ATA_CMD_WRITE_MULTI_FUA_EXT, "WRITE MULTIPLE FUA EXT" }, + { ATA_CMD_SET_FEATURES, "SET FEATURES" }, + { ATA_CMD_SET_MULTI, "SET MULTIPLE MODE" }, + { ATA_CMD_VERIFY, "READ VERIFY SECTOR(S)" }, + { ATA_CMD_VERIFY_EXT, "READ VERIFY SECTOR(S) EXT" }, + { ATA_CMD_WRITE_UNCORR_EXT, "WRITE UNCORRECTABLE EXT" }, + { ATA_CMD_STANDBYNOW1, "STANDBY IMMEDIATE" }, + { ATA_CMD_IDLEIMMEDIATE, "IDLE IMMEDIATE" }, + { ATA_CMD_SLEEP, "SLEEP" }, + { ATA_CMD_INIT_DEV_PARAMS, "INITIALIZE DEVICE PARAMETERS" }, + { ATA_CMD_READ_NATIVE_MAX, "READ NATIVE MAX ADDRESS" }, + { ATA_CMD_READ_NATIVE_MAX_EXT, "READ NATIVE MAX ADDRESS EXT" }, + { ATA_CMD_SET_MAX, "SET MAX ADDRESS" }, + { ATA_CMD_SET_MAX_EXT, "SET MAX ADDRESS EXT" }, + { ATA_CMD_READ_LOG_EXT, "READ LOG EXT" }, + { ATA_CMD_WRITE_LOG_EXT, "WRITE LOG EXT" }, + { ATA_CMD_READ_LOG_DMA_EXT, "READ LOG DMA EXT" }, + { ATA_CMD_WRITE_LOG_DMA_EXT, "WRITE LOG DMA EXT" }, + { ATA_CMD_TRUSTED_RCV, "TRUSTED RECEIVE" }, + { ATA_CMD_TRUSTED_RCV_DMA, "TRUSTED RECEIVE DMA" }, + { ATA_CMD_TRUSTED_SND, "TRUSTED SEND" }, + { ATA_CMD_TRUSTED_SND_DMA, "TRUSTED SEND DMA" }, + { ATA_CMD_PMP_READ, "READ BUFFER" }, + { ATA_CMD_PMP_WRITE, "WRITE BUFFER" }, + { ATA_CMD_CONF_OVERLAY, "DEVICE CONFIGURATION OVERLAY" }, + { ATA_CMD_SEC_SET_PASS, "SECURITY SET PASSWORD" }, + { ATA_CMD_SEC_UNLOCK, "SECURITY UNLOCK" }, + { ATA_CMD_SEC_ERASE_PREP, "SECURITY ERASE PREPARE" }, + { ATA_CMD_SEC_ERASE_UNIT, "SECURITY ERASE UNIT" }, + { ATA_CMD_SEC_FREEZE_LOCK, "SECURITY FREEZE LOCK" }, + { ATA_CMD_SEC_DISABLE_PASS, "SECURITY DISABLE PASSWORD" }, + { ATA_CMD_CONFIG_STREAM, "CONFIGURE STREAM" }, + { ATA_CMD_SMART, "SMART" }, + { ATA_CMD_MEDIA_LOCK, "DOOR LOCK" }, + { ATA_CMD_MEDIA_UNLOCK, "DOOR UNLOCK" }, + { ATA_CMD_CHK_MED_CRD_TYP, "CHECK MEDIA CARD TYPE" }, + { ATA_CMD_CFA_REQ_EXT_ERR, "CFA REQUEST EXTENDED ERROR" }, + { ATA_CMD_CFA_WRITE_NE, "CFA WRITE SECTORS WITHOUT ERASE" }, + { ATA_CMD_CFA_TRANS_SECT, "CFA TRANSLATE SECTOR" }, + { ATA_CMD_CFA_ERASE, "CFA ERASE SECTORS" }, + { ATA_CMD_CFA_WRITE_MULT_NE, "CFA WRITE MULTIPLE WITHOUT ERASE" }, + { ATA_CMD_READ_LONG, "READ LONG (with retries)" }, + { ATA_CMD_READ_LONG_ONCE, "READ LONG (without retries)" }, + { ATA_CMD_WRITE_LONG, "WRITE LONG (with retries)" }, + { ATA_CMD_WRITE_LONG_ONCE, "WRITE LONG (without retries)" }, + { ATA_CMD_RESTORE, "RECALIBRATE" }, + { 0, NULL } /* terminate list */ + }; + + unsigned int i; + for (i = 0; cmd_descr[i].text; i++) + if (cmd_descr[i].command == command) + return cmd_descr[i].text; +#endif + + return NULL; } /** @@ -1879,7 +2264,10 @@ static void ata_eh_link_report(struct ata_link *link) for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); - if (!(qc->flags & ATA_QCFLAG_FAILED) || qc->dev->link != link) + if (!(qc->flags & ATA_QCFLAG_FAILED) || + ata_dev_phys_link(qc->dev) != link || + ((qc->flags & ATA_QCFLAG_QUIET) && + qc->err_mask == AC_ERR_DEV)) continue; if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) continue; @@ -1915,8 +2303,9 @@ static void ata_eh_link_report(struct ata_link *link) ata_link_printk(link, KERN_ERR, "%s\n", desc); } +#ifdef CONFIG_ATA_VERBOSE_ERROR if (ehc->i.serror) - ata_port_printk(ap, KERN_ERR, + ata_link_printk(link, KERN_ERR, "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n", ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "", ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "", @@ -1934,33 +2323,67 @@ static void ata_eh_link_report(struct ata_link *link) ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "", ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "", ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "", - ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : "" ); + ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : ""); +#endif for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { - static const char *dma_str[] = { - [DMA_BIDIRECTIONAL] = "bidi", - [DMA_TO_DEVICE] = "out", - [DMA_FROM_DEVICE] = "in", - [DMA_NONE] = "", - }; struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; + const u8 *cdb = qc->cdb; + char data_buf[20] = ""; + char cdb_buf[70] = ""; if (!(qc->flags & ATA_QCFLAG_FAILED) || - qc->dev->link != link || !qc->err_mask) + ata_dev_phys_link(qc->dev) != link || !qc->err_mask) continue; + if (qc->dma_dir != DMA_NONE) { + static const char *dma_str[] = { + [DMA_BIDIRECTIONAL] = "bidi", + [DMA_TO_DEVICE] = "out", + [DMA_FROM_DEVICE] = "in", + }; + static const char *prot_str[] = { + [ATA_PROT_PIO] = "pio", + [ATA_PROT_DMA] = "dma", + [ATA_PROT_NCQ] = "ncq", + [ATAPI_PROT_PIO] = "pio", + [ATAPI_PROT_DMA] = "dma", + }; + + snprintf(data_buf, sizeof(data_buf), " %s %u %s", + prot_str[qc->tf.protocol], qc->nbytes, + dma_str[qc->dma_dir]); + } + + if (ata_is_atapi(qc->tf.protocol)) { + if (qc->scsicmd) + scsi_print_command(qc->scsicmd); + else + snprintf(cdb_buf, sizeof(cdb_buf), + "cdb %02x %02x %02x %02x %02x %02x %02x %02x " + "%02x %02x %02x %02x %02x %02x %02x %02x\n ", + cdb[0], cdb[1], cdb[2], cdb[3], + cdb[4], cdb[5], cdb[6], cdb[7], + cdb[8], cdb[9], cdb[10], cdb[11], + cdb[12], cdb[13], cdb[14], cdb[15]); + } else { + const char *descr = ata_get_cmd_descript(cmd->command); + if (descr) + ata_dev_printk(qc->dev, KERN_ERR, + "failed command: %s\n", descr); + } + ata_dev_printk(qc->dev, KERN_ERR, "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " - "tag %d cdb 0x%x data %u %s\n " + "tag %d%s\n %s" "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " "Emask 0x%x (%s)%s\n", cmd->command, cmd->feature, cmd->nsect, cmd->lbal, cmd->lbam, cmd->lbah, cmd->hob_feature, cmd->hob_nsect, cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, - cmd->device, qc->tag, qc->cdb[0], qc->nbytes, - dma_str[qc->dma_dir], + cmd->device, qc->tag, data_buf, cdb_buf, res->command, res->feature, res->nsect, res->lbal, res->lbam, res->lbah, res->hob_feature, res->hob_nsect, @@ -1968,18 +2391,19 @@ static void ata_eh_link_report(struct ata_link *link) res->device, qc->err_mask, ata_err_string(qc->err_mask), qc->err_mask & AC_ERR_NCQ ? " " : ""); +#ifdef CONFIG_ATA_VERBOSE_ERROR if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | - ATA_ERR) ) { + ATA_ERR)) { if (res->command & ATA_BUSY) ata_dev_printk(qc->dev, KERN_ERR, - "status: { Busy }\n" ); + "status: { Busy }\n"); else ata_dev_printk(qc->dev, KERN_ERR, "status: { %s%s%s%s}\n", res->command & ATA_DRDY ? "DRDY " : "", res->command & ATA_DF ? "DF " : "", res->command & ATA_DRQ ? "DRQ " : "", - res->command & ATA_ERR ? "ERR " : "" ); + res->command & ATA_ERR ? "ERR " : ""); } if (cmd->command != ATA_CMD_PACKET && @@ -1990,7 +2414,8 @@ static void ata_eh_link_report(struct ata_link *link) res->feature & ATA_ICRC ? "ICRC " : "", res->feature & ATA_UNC ? "UNC " : "", res->feature & ATA_IDNF ? "IDNF " : "", - res->feature & ATA_ABORTED ? "ABRT " : "" ); + res->feature & ATA_ABORTED ? "ABRT " : ""); +#endif } } @@ -2007,55 +2432,31 @@ void ata_eh_report(struct ata_port *ap) { struct ata_link *link; - __ata_port_for_each_link(link, ap) + ata_for_each_link(link, ap, HOST_FIRST) ata_eh_link_report(link); } static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, - unsigned int *classes, unsigned long deadline) + unsigned int *classes, unsigned long deadline, + bool clear_classes) { struct ata_device *dev; - int rc; - - ata_link_for_each_dev(dev, link) - classes[dev->devno] = ATA_DEV_UNKNOWN; - - rc = reset(link, classes, deadline); - if (rc) - return rc; - /* If any class isn't ATA_DEV_UNKNOWN, consider classification - * is complete and convert all ATA_DEV_UNKNOWN to - * ATA_DEV_NONE. - */ - ata_link_for_each_dev(dev, link) - if (classes[dev->devno] != ATA_DEV_UNKNOWN) - break; - - if (dev) { - ata_link_for_each_dev(dev, link) { - if (classes[dev->devno] == ATA_DEV_UNKNOWN) - classes[dev->devno] = ATA_DEV_NONE; - } - } + if (clear_classes) + ata_for_each_dev(dev, link, ALL) + classes[dev->devno] = ATA_DEV_UNKNOWN; - return 0; + return reset(link, classes, deadline); } static int ata_eh_followup_srst_needed(struct ata_link *link, - int rc, int classify, - const unsigned int *classes) + int rc, const unsigned int *classes) { - if (link->flags & ATA_LFLAG_NO_SRST) + if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link)) return 0; if (rc == -EAGAIN) return 1; - if (rc != 0) - return 0; - if ((link->ap->flags & ATA_FLAG_PMP) && ata_is_host_link(link)) - return 1; - if (classify && !(link->flags & ATA_LFLAG_ASSUME_CLASS) && - classes[0] == ATA_DEV_UNKNOWN) + if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) return 1; return 0; } @@ -2065,45 +2466,109 @@ int ata_eh_reset(struct ata_link *link, int classify, ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) { struct ata_port *ap = link->ap; + struct ata_link *slave = ap->slave_link; struct ata_eh_context *ehc = &link->eh_context; + struct ata_eh_context *sehc = slave ? &slave->eh_context : NULL; unsigned int *classes = ehc->classes; + unsigned int lflags = link->flags; int verbose = !(ehc->i.flags & ATA_EHI_QUIET); - int try = 0; + int max_tries = 0, try = 0; + struct ata_link *failed_link; struct ata_device *dev; - unsigned long deadline; - unsigned int action; + unsigned long deadline, now; ata_reset_fn_t reset; unsigned long flags; - int rc; + u32 sstatus; + int nr_unknown, rc; + + /* + * Prepare to reset + */ + while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX) + max_tries++; + if (link->flags & ATA_LFLAG_NO_HRST) + hardreset = NULL; + if (link->flags & ATA_LFLAG_NO_SRST) + softreset = NULL; + + /* make sure each reset attemp is at least COOL_DOWN apart */ + if (ehc->i.flags & ATA_EHI_DID_RESET) { + now = jiffies; + WARN_ON(time_after(ehc->last_reset, now)); + deadline = ata_deadline(ehc->last_reset, + ATA_EH_RESET_COOL_DOWN); + if (time_before(now, deadline)) + schedule_timeout_uninterruptible(deadline - now); + } - /* about to reset */ spin_lock_irqsave(ap->lock, flags); ap->pflags |= ATA_PFLAG_RESETTING; spin_unlock_irqrestore(ap->lock, flags); - ata_eh_about_to_do(link, NULL, ehc->i.action & ATA_EH_RESET_MASK); + ata_eh_about_to_do(link, NULL, ATA_EH_RESET); - /* Determine which reset to use and record in ehc->i.action. - * prereset() may examine and modify it. - */ - action = ehc->i.action; - ehc->i.action &= ~ATA_EH_RESET_MASK; - if (softreset && (!hardreset || (!(link->flags & ATA_LFLAG_NO_SRST) && - !sata_set_spd_needed(link) && - !(action & ATA_EH_HARDRESET)))) - ehc->i.action |= ATA_EH_SOFTRESET; - else + ata_for_each_dev(dev, link, ALL) { + /* If we issue an SRST then an ATA drive (not ATAPI) + * may change configuration and be in PIO0 timing. If + * we do a hard reset (or are coming from power on) + * this is true for ATA or ATAPI. Until we've set a + * suitable controller mode we should not touch the + * bus as we may be talking too fast. + */ + dev->pio_mode = XFER_PIO_0; + + /* If the controller has a pio mode setup function + * then use it to set the chipset to rights. Don't + * touch the DMA setup as that will be dealt with when + * configuring devices. + */ + if (ap->ops->set_piomode) + ap->ops->set_piomode(ap, dev); + } + + /* prefer hardreset */ + reset = NULL; + ehc->i.action &= ~ATA_EH_RESET; + if (hardreset) { + reset = hardreset; ehc->i.action |= ATA_EH_HARDRESET; + } else if (softreset) { + reset = softreset; + ehc->i.action |= ATA_EH_SOFTRESET; + } if (prereset) { - rc = prereset(link, jiffies + ATA_EH_PRERESET_TIMEOUT); + unsigned long deadline = ata_deadline(jiffies, + ATA_EH_PRERESET_TIMEOUT); + + if (slave) { + sehc->i.action &= ~ATA_EH_RESET; + sehc->i.action |= ehc->i.action; + } + + rc = prereset(link, deadline); + + /* If present, do prereset on slave link too. Reset + * is skipped iff both master and slave links report + * -ENOENT or clear ATA_EH_RESET. + */ + if (slave && (rc == 0 || rc == -ENOENT)) { + int tmp; + + tmp = prereset(slave, deadline); + if (tmp != -ENOENT) + rc = tmp; + + ehc->i.action |= sehc->i.action; + } + if (rc) { if (rc == -ENOENT) { ata_link_printk(link, KERN_DEBUG, "port disabled. ignoring.\n"); - ehc->i.action &= ~ATA_EH_RESET_MASK; + ehc->i.action &= ~ATA_EH_RESET; - ata_link_for_each_dev(dev, link) + ata_for_each_dev(dev, link, ALL) classes[dev->devno] = ATA_DEV_NONE; rc = 0; @@ -2112,138 +2577,326 @@ int ata_eh_reset(struct ata_link *link, int classify, "prereset failed (errno=%d)\n", rc); goto out; } - } - /* prereset() might have modified ehc->i.action */ - if (ehc->i.action & ATA_EH_HARDRESET) - reset = hardreset; - else if (ehc->i.action & ATA_EH_SOFTRESET) - reset = softreset; - else { - /* prereset told us not to reset, bang classes and return */ - ata_link_for_each_dev(dev, link) - classes[dev->devno] = ATA_DEV_NONE; - rc = 0; - goto out; - } - - /* did prereset() screw up? if so, fix up to avoid oopsing */ - if (!reset) { - if (softreset) - reset = softreset; - else - reset = hardreset; + /* prereset() might have cleared ATA_EH_RESET. If so, + * bang classes, thaw and return. + */ + if (reset && !(ehc->i.action & ATA_EH_RESET)) { + ata_for_each_dev(dev, link, ALL) + classes[dev->devno] = ATA_DEV_NONE; + if ((ap->pflags & ATA_PFLAG_FROZEN) && + ata_is_host_link(link)) + ata_eh_thaw_port(ap); + rc = 0; + goto out; + } } retry: - deadline = jiffies + ata_eh_reset_timeouts[try++]; - - /* shut up during boot probing */ - if (verbose) - ata_link_printk(link, KERN_INFO, "%s resetting link\n", - reset == softreset ? "soft" : "hard"); - - /* mark that this EH session started with reset */ - if (reset == hardreset) - ehc->i.flags |= ATA_EHI_DID_HARDRESET; - else - ehc->i.flags |= ATA_EHI_DID_SOFTRESET; + /* + * Perform reset + */ + if (ata_is_host_link(link)) + ata_eh_freeze_port(ap); - rc = ata_do_reset(link, reset, classes, deadline); + deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]); - if (reset == hardreset && - ata_eh_followup_srst_needed(link, rc, classify, classes)) { - /* okay, let's do follow-up softreset */ - reset = softreset; + if (reset) { + if (verbose) + ata_link_printk(link, KERN_INFO, "%s resetting link\n", + reset == softreset ? "soft" : "hard"); + + /* mark that this EH session started with reset */ + ehc->last_reset = jiffies; + if (reset == hardreset) + ehc->i.flags |= ATA_EHI_DID_HARDRESET; + else + ehc->i.flags |= ATA_EHI_DID_SOFTRESET; - if (!reset) { - ata_link_printk(link, KERN_ERR, - "follow-up softreset required " - "but no softreset avaliable\n"); - rc = -EINVAL; - goto out; + rc = ata_do_reset(link, reset, classes, deadline, true); + if (rc && rc != -EAGAIN) { + failed_link = link; + goto fail; } - ata_eh_about_to_do(link, NULL, ATA_EH_RESET_MASK); - rc = ata_do_reset(link, reset, classes, deadline); + /* hardreset slave link if existent */ + if (slave && reset == hardreset) { + int tmp; - if (rc == 0 && classify && classes[0] == ATA_DEV_UNKNOWN && - !(link->flags & ATA_LFLAG_ASSUME_CLASS)) { - ata_link_printk(link, KERN_ERR, - "classification failed\n"); - rc = -EINVAL; - goto out; - } - } - - /* if we skipped follow-up srst, clear rc */ - if (rc == -EAGAIN) - rc = 0; + if (verbose) + ata_link_printk(slave, KERN_INFO, + "hard resetting link\n"); - if (rc && rc != -ERESTART && try < ARRAY_SIZE(ata_eh_reset_timeouts)) { - unsigned long now = jiffies; + ata_eh_about_to_do(slave, NULL, ATA_EH_RESET); + tmp = ata_do_reset(slave, reset, classes, deadline, + false); + switch (tmp) { + case -EAGAIN: + rc = -EAGAIN; + case 0: + break; + default: + failed_link = slave; + rc = tmp; + goto fail; + } + } - if (time_before(now, deadline)) { - unsigned long delta = deadline - jiffies; + /* perform follow-up SRST if necessary */ + if (reset == hardreset && + ata_eh_followup_srst_needed(link, rc, classes)) { + reset = softreset; - ata_link_printk(link, KERN_WARNING, "reset failed " - "(errno=%d), retrying in %u secs\n", - rc, (jiffies_to_msecs(delta) + 999) / 1000); + if (!reset) { + ata_link_printk(link, KERN_ERR, + "follow-up softreset required " + "but no softreset avaliable\n"); + failed_link = link; + rc = -EINVAL; + goto fail; + } - while (delta) - delta = schedule_timeout_uninterruptible(delta); + ata_eh_about_to_do(link, NULL, ATA_EH_RESET); + rc = ata_do_reset(link, reset, classes, deadline, true); + if (rc) { + failed_link = link; + goto fail; + } } - - if (rc == -EPIPE || - try == ARRAY_SIZE(ata_eh_reset_timeouts) - 1) - sata_down_spd_limit(link); - if (hardreset) - reset = hardreset; - goto retry; + } else { + if (verbose) + ata_link_printk(link, KERN_INFO, "no reset method " + "available, skipping reset\n"); + if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) + lflags |= ATA_LFLAG_ASSUME_ATA; } - if (rc == 0) { - u32 sstatus; + /* + * Post-reset processing + */ + ata_for_each_dev(dev, link, ALL) { + /* After the reset, the device state is PIO 0 and the + * controller state is undefined. Reset also wakes up + * drives from sleeping mode. + */ + dev->pio_mode = XFER_PIO_0; + dev->flags &= ~ATA_DFLAG_SLEEPING; - ata_link_for_each_dev(dev, link) { - /* After the reset, the device state is PIO 0 - * and the controller state is undefined. - * Record the mode. - */ - dev->pio_mode = XFER_PIO_0; + if (ata_phys_link_offline(ata_dev_phys_link(dev))) + continue; - if (ata_link_offline(link)) - continue; + /* apply class override */ + if (lflags & ATA_LFLAG_ASSUME_ATA) + classes[dev->devno] = ATA_DEV_ATA; + else if (lflags & ATA_LFLAG_ASSUME_SEMB) + classes[dev->devno] = ATA_DEV_SEMB_UNSUP; + } + + /* record current link speed */ + if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) + link->sata_spd = (sstatus >> 4) & 0xf; + if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0) + slave->sata_spd = (sstatus >> 4) & 0xf; + + /* thaw the port */ + if (ata_is_host_link(link)) + ata_eh_thaw_port(ap); + + /* postreset() should clear hardware SError. Although SError + * is cleared during link resume, clearing SError here is + * necessary as some PHYs raise hotplug events after SRST. + * This introduces race condition where hotplug occurs between + * reset and here. This race is mediated by cross checking + * link onlineness and classification result later. + */ + if (postreset) { + postreset(link, classes); + if (slave) + postreset(slave, classes); + } - /* apply class override and convert UNKNOWN to NONE */ - if (link->flags & ATA_LFLAG_ASSUME_ATA) - classes[dev->devno] = ATA_DEV_ATA; - else if (link->flags & ATA_LFLAG_ASSUME_SEMB) - classes[dev->devno] = ATA_DEV_SEMB_UNSUP; /* not yet */ - else if (classes[dev->devno] == ATA_DEV_UNKNOWN) + /* + * Some controllers can't be frozen very well and may set + * spuruious error conditions during reset. Clear accumulated + * error information. As reset is the final recovery action, + * nothing is lost by doing this. + */ + spin_lock_irqsave(link->ap->lock, flags); + memset(&link->eh_info, 0, sizeof(link->eh_info)); + if (slave) + memset(&slave->eh_info, 0, sizeof(link->eh_info)); + ap->pflags &= ~ATA_PFLAG_EH_PENDING; + spin_unlock_irqrestore(link->ap->lock, flags); + + /* + * Make sure onlineness and classification result correspond. + * Hotplug could have happened during reset and some + * controllers fail to wait while a drive is spinning up after + * being hotplugged causing misdetection. By cross checking + * link on/offlineness and classification result, those + * conditions can be reliably detected and retried. + */ + nr_unknown = 0; + ata_for_each_dev(dev, link, ALL) { + if (ata_phys_link_online(ata_dev_phys_link(dev))) { + if (classes[dev->devno] == ATA_DEV_UNKNOWN) { + ata_dev_printk(dev, KERN_DEBUG, "link online " + "but device misclassifed\n"); classes[dev->devno] = ATA_DEV_NONE; + nr_unknown++; + } + } else if (ata_phys_link_offline(ata_dev_phys_link(dev))) { + if (ata_class_enabled(classes[dev->devno])) + ata_dev_printk(dev, KERN_DEBUG, "link offline, " + "clearing class %d to NONE\n", + classes[dev->devno]); + classes[dev->devno] = ATA_DEV_NONE; + } else if (classes[dev->devno] == ATA_DEV_UNKNOWN) { + ata_dev_printk(dev, KERN_DEBUG, "link status unknown, " + "clearing UNKNOWN to NONE\n"); + classes[dev->devno] = ATA_DEV_NONE; } + } - /* record current link speed */ - if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) - link->sata_spd = (sstatus >> 4) & 0xf; + if (classify && nr_unknown) { + if (try < max_tries) { + ata_link_printk(link, KERN_WARNING, "link online but " + "%d devices misclassified, retrying\n", + nr_unknown); + failed_link = link; + rc = -EAGAIN; + goto fail; + } + ata_link_printk(link, KERN_WARNING, + "link online but %d devices misclassified, " + "device detection might fail\n", nr_unknown); + } - if (postreset) - postreset(link, classes); + /* reset successful, schedule revalidation */ + ata_eh_done(link, NULL, ATA_EH_RESET); + if (slave) + ata_eh_done(slave, NULL, ATA_EH_RESET); + ehc->last_reset = jiffies; /* update to completion time */ + ehc->i.action |= ATA_EH_REVALIDATE; - /* reset successful, schedule revalidation */ - ata_eh_done(link, NULL, ehc->i.action & ATA_EH_RESET_MASK); - ehc->i.action |= ATA_EH_REVALIDATE; - } + rc = 0; out: /* clear hotplug flag */ ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; + if (slave) + sehc->i.flags &= ~ATA_EHI_HOTPLUGGED; spin_lock_irqsave(ap->lock, flags); ap->pflags &= ~ATA_PFLAG_RESETTING; spin_unlock_irqrestore(ap->lock, flags); return rc; + + fail: + /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */ + if (!ata_is_host_link(link) && + sata_scr_read(link, SCR_STATUS, &sstatus)) + rc = -ERESTART; + + if (rc == -ERESTART || try >= max_tries) + goto out; + + now = jiffies; + if (time_before(now, deadline)) { + unsigned long delta = deadline - now; + + ata_link_printk(failed_link, KERN_WARNING, + "reset failed (errno=%d), retrying in %u secs\n", + rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000)); + + while (delta) + delta = schedule_timeout_uninterruptible(delta); + } + + if (try == max_tries - 1) { + sata_down_spd_limit(link, 0); + if (slave) + sata_down_spd_limit(slave, 0); + } else if (rc == -EPIPE) + sata_down_spd_limit(failed_link, 0); + + if (hardreset) + reset = hardreset; + goto retry; +} + +static inline void ata_eh_pull_park_action(struct ata_port *ap) +{ + struct ata_link *link; + struct ata_device *dev; + unsigned long flags; + + /* + * This function can be thought of as an extended version of + * ata_eh_about_to_do() specially crafted to accommodate the + * requirements of ATA_EH_PARK handling. Since the EH thread + * does not leave the do {} while () loop in ata_eh_recover as + * long as the timeout for a park request to *one* device on + * the port has not expired, and since we still want to pick + * up park requests to other devices on the same port or + * timeout updates for the same device, we have to pull + * ATA_EH_PARK actions from eh_info into eh_context.i + * ourselves at the beginning of each pass over the loop. + * + * Additionally, all write accesses to &ap->park_req_pending + * through INIT_COMPLETION() (see below) or complete_all() + * (see ata_scsi_park_store()) are protected by the host lock. + * As a result we have that park_req_pending.done is zero on + * exit from this function, i.e. when ATA_EH_PARK actions for + * *all* devices on port ap have been pulled into the + * respective eh_context structs. If, and only if, + * park_req_pending.done is non-zero by the time we reach + * wait_for_completion_timeout(), another ATA_EH_PARK action + * has been scheduled for at least one of the devices on port + * ap and we have to cycle over the do {} while () loop in + * ata_eh_recover() again. + */ + + spin_lock_irqsave(ap->lock, flags); + INIT_COMPLETION(ap->park_req_pending); + ata_for_each_link(link, ap, EDGE) { + ata_for_each_dev(dev, link, ALL) { + struct ata_eh_info *ehi = &link->eh_info; + + link->eh_context.i.dev_action[dev->devno] |= + ehi->dev_action[dev->devno] & ATA_EH_PARK; + ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK); + } + } + spin_unlock_irqrestore(ap->lock, flags); +} + +static void ata_eh_park_issue_cmd(struct ata_device *dev, int park) +{ + struct ata_eh_context *ehc = &dev->link->eh_context; + struct ata_taskfile tf; + unsigned int err_mask; + + ata_tf_init(dev, &tf); + if (park) { + ehc->unloaded_mask |= 1 << dev->devno; + tf.command = ATA_CMD_IDLEIMMEDIATE; + tf.feature = 0x44; + tf.lbal = 0x4c; + tf.lbam = 0x4e; + tf.lbah = 0x55; + } else { + ehc->unloaded_mask &= ~(1 << dev->devno); + tf.command = ATA_CMD_CHK_POWER; + } + + tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; + tf.protocol |= ATA_PROT_NODATA; + err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); + if (park && (err_mask || tf.lbal != 0xc4)) { + ata_dev_printk(dev, KERN_ERR, "head unload failed!\n"); + ehc->unloaded_mask &= ~(1 << dev->devno); + } } static int ata_eh_revalidate_and_attach(struct ata_link *link, @@ -2262,7 +2915,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, * be done backwards such that PDIAG- is released by the slave * device before the master device is identified. */ - ata_link_for_each_dev_reverse(dev, link) { + ata_for_each_dev(dev, link, ALL_REVERSE) { unsigned int action = ata_eh_dev_action(dev); unsigned int readid_flags = 0; @@ -2272,7 +2925,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { WARN_ON(dev->class == ATA_DEV_PMP); - if (ata_link_offline(link)) { + if (ata_phys_link_offline(ata_dev_phys_link(dev))) { rc = -EIO; goto err; } @@ -2295,6 +2948,12 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, } else if (dev->class == ATA_DEV_UNKNOWN && ehc->tries[dev->devno] && ata_class_enabled(ehc->classes[dev->devno])) { + /* Temporarily set dev->class, it will be + * permanently set once all configurations are + * complete. This is necessary because new + * device configuration is done in two + * separate loops. + */ dev->class = ehc->classes[dev->devno]; if (dev->class == ATA_DEV_PMP) @@ -2302,43 +2961,56 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, else rc = ata_dev_read_id(dev, &dev->class, readid_flags, dev->id); + + /* read_id might have changed class, store and reset */ + ehc->classes[dev->devno] = dev->class; + dev->class = ATA_DEV_UNKNOWN; + switch (rc) { case 0: + /* clear error info accumulated during probe */ + ata_ering_clear(&dev->ering); new_mask |= 1 << dev->devno; break; case -ENOENT: /* IDENTIFY was issued to non-existent * device. No need to reset. Just - * thaw and kill the device. + * thaw and ignore the device. */ ata_eh_thaw_port(ap); - dev->class = ATA_DEV_UNKNOWN; break; default: - dev->class = ATA_DEV_UNKNOWN; goto err; } } } /* PDIAG- should have been released, ask cable type if post-reset */ - if (ata_is_host_link(link) && ap->ops->cable_detect && - (ehc->i.flags & ATA_EHI_DID_RESET)) - ap->cbl = ap->ops->cable_detect(ap); + if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) { + if (ap->ops->cable_detect) + ap->cbl = ap->ops->cable_detect(ap); + ata_force_cbl(ap); + } /* Configure new devices forward such that user doesn't see * device detection messages backwards. */ - ata_link_for_each_dev(dev, link) { - if (!(new_mask & (1 << dev->devno)) || - dev->class == ATA_DEV_PMP) + ata_for_each_dev(dev, link, ALL) { + if (!(new_mask & (1 << dev->devno))) + continue; + + dev->class = ehc->classes[dev->devno]; + + if (dev->class == ATA_DEV_PMP) continue; ehc->i.flags |= ATA_EHI_PRINTINFO; rc = ata_dev_configure(dev); ehc->i.flags &= ~ATA_EHI_PRINTINFO; - if (rc) + if (rc) { + dev->class = ATA_DEV_UNKNOWN; goto err; + } spin_lock_irqsave(ap->lock, flags); ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; @@ -2356,14 +3028,188 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, return rc; } +/** + * ata_set_mode - Program timings and issue SET FEATURES - XFER + * @link: link on which timings will be programmed + * @r_failed_dev: out parameter for failed device + * + * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If + * ata_set_mode() fails, pointer to the failing device is + * returned in @r_failed_dev. + * + * LOCKING: + * PCI/etc. bus probe sem. + * + * RETURNS: + * 0 on success, negative errno otherwise + */ +int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev) +{ + struct ata_port *ap = link->ap; + struct ata_device *dev; + int rc; + + /* if data transfer is verified, clear DUBIOUS_XFER on ering top */ + ata_for_each_dev(dev, link, ENABLED) { + if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) { + struct ata_ering_entry *ent; + + ent = ata_ering_top(&dev->ering); + if (ent) + ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER; + } + } + + /* has private set_mode? */ + if (ap->ops->set_mode) + rc = ap->ops->set_mode(link, r_failed_dev); + else + rc = ata_do_set_mode(link, r_failed_dev); + + /* if transfer mode has changed, set DUBIOUS_XFER on device */ + ata_for_each_dev(dev, link, ENABLED) { + struct ata_eh_context *ehc = &link->eh_context; + u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno]; + u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno)); + + if (dev->xfer_mode != saved_xfer_mode || + ata_ncq_enabled(dev) != saved_ncq) + dev->flags |= ATA_DFLAG_DUBIOUS_XFER; + } + + return rc; +} + +/** + * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset + * @dev: ATAPI device to clear UA for + * + * Resets and other operations can make an ATAPI device raise + * UNIT ATTENTION which causes the next operation to fail. This + * function clears UA. + * + * LOCKING: + * EH context (may sleep). + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int atapi_eh_clear_ua(struct ata_device *dev) +{ + int i; + + for (i = 0; i < ATA_EH_UA_TRIES; i++) { + u8 *sense_buffer = dev->link->ap->sector_buf; + u8 sense_key = 0; + unsigned int err_mask; + + err_mask = atapi_eh_tur(dev, &sense_key); + if (err_mask != 0 && err_mask != AC_ERR_DEV) { + ata_dev_printk(dev, KERN_WARNING, "TEST_UNIT_READY " + "failed (err_mask=0x%x)\n", err_mask); + return -EIO; + } + + if (!err_mask || sense_key != UNIT_ATTENTION) + return 0; + + err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key); + if (err_mask) { + ata_dev_printk(dev, KERN_WARNING, "failed to clear " + "UNIT ATTENTION (err_mask=0x%x)\n", err_mask); + return -EIO; + } + } + + ata_dev_printk(dev, KERN_WARNING, + "UNIT ATTENTION persists after %d tries\n", ATA_EH_UA_TRIES); + + return 0; +} + +/** + * ata_eh_maybe_retry_flush - Retry FLUSH if necessary + * @dev: ATA device which may need FLUSH retry + * + * If @dev failed FLUSH, it needs to be reported upper layer + * immediately as it means that @dev failed to remap and already + * lost at least a sector and further FLUSH retrials won't make + * any difference to the lost sector. However, if FLUSH failed + * for other reasons, for example transmission error, FLUSH needs + * to be retried. + * + * This function determines whether FLUSH failure retry is + * necessary and performs it if so. + * + * RETURNS: + * 0 if EH can continue, -errno if EH needs to be repeated. + */ +static int ata_eh_maybe_retry_flush(struct ata_device *dev) +{ + struct ata_link *link = dev->link; + struct ata_port *ap = link->ap; + struct ata_queued_cmd *qc; + struct ata_taskfile tf; + unsigned int err_mask; + int rc = 0; + + /* did flush fail for this device? */ + if (!ata_tag_valid(link->active_tag)) + return 0; + + qc = __ata_qc_from_tag(ap, link->active_tag); + if (qc->dev != dev || (qc->tf.command != ATA_CMD_FLUSH_EXT && + qc->tf.command != ATA_CMD_FLUSH)) + return 0; + + /* if the device failed it, it should be reported to upper layers */ + if (qc->err_mask & AC_ERR_DEV) + return 0; + + /* flush failed for some other reason, give it another shot */ + ata_tf_init(dev, &tf); + + tf.command = qc->tf.command; + tf.flags |= ATA_TFLAG_DEVICE; + tf.protocol = ATA_PROT_NODATA; + + ata_dev_printk(dev, KERN_WARNING, "retrying FLUSH 0x%x Emask 0x%x\n", + tf.command, qc->err_mask); + + err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); + if (!err_mask) { + /* + * FLUSH is complete but there's no way to + * successfully complete a failed command from EH. + * Making sure retry is allowed at least once and + * retrying it should do the trick - whatever was in + * the cache is already on the platter and this won't + * cause infinite loop. + */ + qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1); + } else { + ata_dev_printk(dev, KERN_WARNING, "FLUSH failed Emask 0x%x\n", + err_mask); + rc = -EIO; + + /* if device failed it, report it to upper layers */ + if (err_mask & AC_ERR_DEV) { + qc->err_mask |= AC_ERR_DEV; + qc->result_tf = tf; + if (!(ap->pflags & ATA_PFLAG_FROZEN)) + rc = 0; + } + } + return rc; +} + static int ata_link_nr_enabled(struct ata_link *link) { struct ata_device *dev; int cnt = 0; - ata_link_for_each_dev(dev, link) - if (ata_dev_enabled(dev)) - cnt++; + ata_for_each_dev(dev, link, ENABLED) + cnt++; return cnt; } @@ -2372,7 +3218,7 @@ static int ata_link_nr_vacant(struct ata_link *link) struct ata_device *dev; int cnt = 0; - ata_link_for_each_dev(dev, link) + ata_for_each_dev(dev, link, ALL) if (dev->class == ATA_DEV_UNKNOWN) cnt++; return cnt; @@ -2380,6 +3226,7 @@ static int ata_link_nr_vacant(struct ata_link *link) static int ata_eh_skip_recovery(struct ata_link *link) { + struct ata_port *ap = link->ap; struct ata_eh_context *ehc = &link->eh_context; struct ata_device *dev; @@ -2387,13 +3234,17 @@ static int ata_eh_skip_recovery(struct ata_link *link) if (link->flags & ATA_LFLAG_DISABLED) return 1; - /* thaw frozen port, resume link and recover failed devices */ - if ((link->ap->pflags & ATA_PFLAG_FROZEN) || - (ehc->i.flags & ATA_EHI_RESUME_LINK) || ata_link_nr_enabled(link)) + /* thaw frozen port and recover failed devices */ + if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link)) + return 0; + + /* reset at least once if reset is requested */ + if ((ehc->i.action & ATA_EH_RESET) && + !(ehc->i.flags & ATA_EHI_DID_RESET)) return 0; /* skip if class codes for all vacant slots are ATA_DEV_NONE */ - ata_link_for_each_dev(dev, link) { + ata_for_each_dev(dev, link, ALL) { if (dev->class == ATA_DEV_UNKNOWN && ehc->classes[dev->devno] != ATA_DEV_NONE) return 0; @@ -2402,11 +3253,67 @@ static int ata_eh_skip_recovery(struct ata_link *link) return 1; } +static int ata_count_probe_trials_cb(struct ata_ering_entry *ent, void *void_arg) +{ + u64 interval = msecs_to_jiffies(ATA_EH_PROBE_TRIAL_INTERVAL); + u64 now = get_jiffies_64(); + int *trials = void_arg; + + if (ent->timestamp < now - min(now, interval)) + return -1; + + (*trials)++; + return 0; +} + +static int ata_eh_schedule_probe(struct ata_device *dev) +{ + struct ata_eh_context *ehc = &dev->link->eh_context; + struct ata_link *link = ata_dev_phys_link(dev); + int trials = 0; + + if (!(ehc->i.probe_mask & (1 << dev->devno)) || + (ehc->did_probe_mask & (1 << dev->devno))) + return 0; + + ata_eh_detach_dev(dev); + ata_dev_init(dev); + ehc->did_probe_mask |= (1 << dev->devno); + ehc->i.action |= ATA_EH_RESET; + ehc->saved_xfer_mode[dev->devno] = 0; + ehc->saved_ncq_enabled &= ~(1 << dev->devno); + + /* Record and count probe trials on the ering. The specific + * error mask used is irrelevant. Because a successful device + * detection clears the ering, this count accumulates only if + * there are consecutive failed probes. + * + * If the count is equal to or higher than ATA_EH_PROBE_TRIALS + * in the last ATA_EH_PROBE_TRIAL_INTERVAL, link speed is + * forced to 1.5Gbps. + * + * This is to work around cases where failed link speed + * negotiation results in device misdetection leading to + * infinite DEVXCHG or PHRDY CHG events. + */ + ata_ering_record(&dev->ering, 0, AC_ERR_OTHER); + ata_ering_map(&dev->ering, ata_count_probe_trials_cb, &trials); + + if (trials > ATA_EH_PROBE_TRIALS) + sata_down_spd_limit(link, 1); + + return 1; +} + static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) { struct ata_eh_context *ehc = &dev->link->eh_context; - ehc->tries[dev->devno]--; + /* -EAGAIN from EH routine indicates retry without prejudice. + * The requester is responsible for ensuring forward progress. + */ + if (err != -EAGAIN) + ehc->tries[dev->devno]--; switch (err) { case -ENODEV: @@ -2420,8 +3327,9 @@ static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) /* This is the last chance, better to slow * down than lose it. */ - sata_down_spd_limit(dev->link); - ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); + sata_down_spd_limit(ata_dev_phys_link(dev), 0); + if (dev->pio_mode > XFER_PIO_0) + ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); } } @@ -2430,28 +3338,19 @@ static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) ata_dev_disable(dev); /* detach if offline */ - if (ata_link_offline(dev->link)) - ata_eh_detach_dev(dev); - - /* probe if requested */ - if ((ehc->i.probe_mask & (1 << dev->devno)) && - !(ehc->did_probe_mask & (1 << dev->devno))) { + if (ata_phys_link_offline(ata_dev_phys_link(dev))) ata_eh_detach_dev(dev); - ata_dev_init(dev); + /* schedule probe if necessary */ + if (ata_eh_schedule_probe(dev)) { ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; - ehc->did_probe_mask |= (1 << dev->devno); - ehc->i.action |= ATA_EH_SOFTRESET; + memset(ehc->cmd_timeout_idx[dev->devno], 0, + sizeof(ehc->cmd_timeout_idx[dev->devno])); } return 1; } else { - /* soft didn't work? be haaaaard */ - if (ehc->i.flags & ATA_EHI_DID_RESET) - ehc->i.action |= ATA_EH_HARDRESET; - else - ehc->i.action |= ATA_EH_SOFTRESET; - + ehc->i.action |= ATA_EH_RESET; return 0; } } @@ -2485,14 +3384,14 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, { struct ata_link *link; struct ata_device *dev; - int nr_failed_devs, nr_disabled_devs; - int reset, rc; - unsigned long flags; + int nr_failed_devs; + int rc; + unsigned long flags, deadline; DPRINTK("ENTER\n"); /* prep for recovery */ - ata_port_for_each_link(link, ap) { + ata_for_each_link(link, ap, EDGE) { struct ata_eh_context *ehc = &link->eh_context; /* re-enable link? */ @@ -2504,7 +3403,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK); } - ata_link_for_each_dev(dev, link) { + ata_for_each_dev(dev, link, ALL) { if (link->flags & ATA_LFLAG_NO_RETRY) ehc->tries[dev->devno] = 1; else @@ -2519,73 +3418,100 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, if (dev->flags & ATA_DFLAG_DETACH) ata_eh_detach_dev(dev); - if (!ata_dev_enabled(dev) && - ((ehc->i.probe_mask & (1 << dev->devno)) && - !(ehc->did_probe_mask & (1 << dev->devno)))) { - ata_eh_detach_dev(dev); - ata_dev_init(dev); - ehc->did_probe_mask |= (1 << dev->devno); - ehc->i.action |= ATA_EH_SOFTRESET; - } + /* schedule probe if necessary */ + if (!ata_dev_enabled(dev)) + ata_eh_schedule_probe(dev); } } retry: rc = 0; nr_failed_devs = 0; - nr_disabled_devs = 0; - reset = 0; /* if UNLOADING, finish immediately */ if (ap->pflags & ATA_PFLAG_UNLOADING) goto out; /* prep for EH */ - ata_port_for_each_link(link, ap) { + ata_for_each_link(link, ap, EDGE) { struct ata_eh_context *ehc = &link->eh_context; /* skip EH if possible. */ if (ata_eh_skip_recovery(link)) ehc->i.action = 0; - /* do we need to reset? */ - if (ehc->i.action & ATA_EH_RESET_MASK) - reset = 1; - - ata_link_for_each_dev(dev, link) + ata_for_each_dev(dev, link, ALL) ehc->classes[dev->devno] = ATA_DEV_UNKNOWN; } /* reset */ - if (reset) { - /* if PMP is attached, this function only deals with - * downstream links, port should stay thawed. - */ - if (!ap->nr_pmp_links) - ata_eh_freeze_port(ap); + ata_for_each_link(link, ap, EDGE) { + struct ata_eh_context *ehc = &link->eh_context; - ata_port_for_each_link(link, ap) { - struct ata_eh_context *ehc = &link->eh_context; + if (!(ehc->i.action & ATA_EH_RESET)) + continue; - if (!(ehc->i.action & ATA_EH_RESET_MASK)) - continue; + rc = ata_eh_reset(link, ata_link_nr_vacant(link), + prereset, softreset, hardreset, postreset); + if (rc) { + ata_link_printk(link, KERN_ERR, + "reset failed, giving up\n"); + goto out; + } + } - rc = ata_eh_reset(link, ata_link_nr_vacant(link), - prereset, softreset, hardreset, - postreset); - if (rc) { - ata_link_printk(link, KERN_ERR, - "reset failed, giving up\n"); - goto out; + do { + unsigned long now; + + /* + * clears ATA_EH_PARK in eh_info and resets + * ap->park_req_pending + */ + ata_eh_pull_park_action(ap); + + deadline = jiffies; + ata_for_each_link(link, ap, EDGE) { + ata_for_each_dev(dev, link, ALL) { + struct ata_eh_context *ehc = &link->eh_context; + unsigned long tmp; + + if (dev->class != ATA_DEV_ATA) + continue; + if (!(ehc->i.dev_action[dev->devno] & + ATA_EH_PARK)) + continue; + tmp = dev->unpark_deadline; + if (time_before(deadline, tmp)) + deadline = tmp; + else if (time_before_eq(tmp, jiffies)) + continue; + if (ehc->unloaded_mask & (1 << dev->devno)) + continue; + + ata_eh_park_issue_cmd(dev, 1); } } - if (!ap->nr_pmp_links) - ata_eh_thaw_port(ap); + now = jiffies; + if (time_before_eq(deadline, now)) + break; + + deadline = wait_for_completion_timeout(&ap->park_req_pending, + deadline - now); + } while (deadline); + ata_for_each_link(link, ap, EDGE) { + ata_for_each_dev(dev, link, ALL) { + if (!(link->eh_context.unloaded_mask & + (1 << dev->devno))) + continue; + + ata_eh_park_issue_cmd(dev, 0); + ata_eh_done(link, dev, ATA_EH_PARK); + } } /* the rest */ - ata_port_for_each_link(link, ap) { + ata_for_each_link(link, ap, EDGE) { struct ata_eh_context *ehc = &link->eh_context; /* revalidate existing devices and attach new ones */ @@ -2607,37 +3533,53 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, ehc->i.flags &= ~ATA_EHI_SETMODE; } + /* If reset has been issued, clear UA to avoid + * disrupting the current users of the device. + */ + if (ehc->i.flags & ATA_EHI_DID_RESET) { + ata_for_each_dev(dev, link, ALL) { + if (dev->class != ATA_DEV_ATAPI) + continue; + rc = atapi_eh_clear_ua(dev); + if (rc) + goto dev_fail; + } + } + + /* retry flush if necessary */ + ata_for_each_dev(dev, link, ALL) { + if (dev->class != ATA_DEV_ATA) + continue; + rc = ata_eh_maybe_retry_flush(dev); + if (rc) + goto dev_fail; + } + + /* configure link power saving */ + if (ehc->i.action & ATA_EH_LPM) + ata_for_each_dev(dev, link, ALL) + ata_dev_enable_pm(dev, ap->pm_policy); + /* this link is okay now */ ehc->i.flags = 0; continue; - dev_fail: +dev_fail: nr_failed_devs++; - if (ata_eh_handle_dev_fail(dev, rc)) - nr_disabled_devs++; + ata_eh_handle_dev_fail(dev, rc); if (ap->pflags & ATA_PFLAG_FROZEN) { /* PMP reset requires working host port. * Can't retry if it's frozen. */ - if (ap->nr_pmp_links) + if (sata_pmp_attached(ap)) goto out; break; } } - if (nr_failed_devs) { - if (nr_failed_devs != nr_disabled_devs) { - ata_port_printk(ap, KERN_WARNING, "failed to recover " - "some devices, retrying in 5 secs\n"); - ssleep(5); - } else { - /* no device left to recover, repeat fast */ - msleep(500); - } - + if (nr_failed_devs) goto retry; - } out: if (rc && r_failed_link) @@ -2673,10 +3615,10 @@ void ata_eh_finish(struct ata_port *ap) * generate sense data in this function, * considering both err_mask and tf. */ - if (qc->err_mask & AC_ERR_INVALID) - ata_eh_qc_complete(qc); - else + if (qc->flags & ATA_QCFLAG_RETRY) ata_eh_qc_retry(qc); + else + ata_eh_qc_complete(qc); } else { if (qc->flags & ATA_QCFLAG_SENSE_VALID) { ata_eh_qc_complete(qc); @@ -2696,6 +3638,7 @@ void ata_eh_finish(struct ata_port *ap) /** * ata_do_eh - do standard error handling * @ap: host port to handle error for + * * @prereset: prereset method (can be NULL) * @softreset: softreset method (can be NULL) * @hardreset: hardreset method (can be NULL) @@ -2719,13 +3662,34 @@ void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset, NULL); if (rc) { - ata_link_for_each_dev(dev, &ap->link) + ata_for_each_dev(dev, &ap->link, ALL) ata_dev_disable(dev); } ata_eh_finish(ap); } +/** + * ata_std_error_handler - standard error handler + * @ap: host port to handle error for + * + * Standard error handler + * + * LOCKING: + * Kernel thread context (may sleep). + */ +void ata_std_error_handler(struct ata_port *ap) +{ + struct ata_port_operations *ops = ap->ops; + ata_reset_fn_t hardreset = ops->hardreset; + + /* ignore built-in hardreset if SCR access is not available */ + if (ata_is_builtin_hardreset(hardreset) && !sata_scr_valid(&ap->link)) + hardreset = NULL; + + ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset); +} + #ifdef CONFIG_PM /** * ata_eh_handle_port_suspend - perform port suspend operation @@ -2763,6 +3727,7 @@ static void ata_eh_handle_port_suspend(struct ata_port *ap) if (ap->ops->port_suspend) rc = ap->ops->port_suspend(ap, ap->pm_mesg); + ata_acpi_set_state(ap, PMSG_SUSPEND); out: /* report result */ spin_lock_irqsave(ap->lock, flags); @@ -2794,6 +3759,8 @@ static void ata_eh_handle_port_suspend(struct ata_port *ap) */ static void ata_eh_handle_port_resume(struct ata_port *ap) { + struct ata_link *link; + struct ata_device *dev; unsigned long flags; int rc = 0; @@ -2808,6 +3775,19 @@ static void ata_eh_handle_port_resume(struct ata_port *ap) WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); + /* + * Error timestamps are in jiffies which doesn't run while + * suspended and PHY events during resume isn't too uncommon. + * When the two are combined, it can lead to unnecessary speed + * downs if the machine is suspended and resumed repeatedly. + * Clear error history. + */ + ata_for_each_link(link, ap, HOST_FIRST) + ata_for_each_dev(dev, link, ALL) + ata_ering_clear(&dev->ering); + + ata_acpi_set_state(ap, PMSG_ON); + if (ap->ops->port_resume) rc = ap->ops->port_resume(ap);