USB: EHCI: add native scatter-gather support
[safe/jmp/linux-2.6] / drivers / usb / host / ehci-q.c
index 2284028..a427d3b 100644 (file)
@@ -87,31 +87,33 @@ qtd_fill(struct ehci_hcd *ehci, struct ehci_qtd *qtd, dma_addr_t buf,
 static inline void
 qh_update (struct ehci_hcd *ehci, struct ehci_qh *qh, struct ehci_qtd *qtd)
 {
+       struct ehci_qh_hw *hw = qh->hw;
+
        /* writes to an active overlay are unsafe */
        BUG_ON(qh->qh_state != QH_STATE_IDLE);
 
-       qh->hw_qtd_next = QTD_NEXT(ehci, qtd->qtd_dma);
-       qh->hw_alt_next = EHCI_LIST_END(ehci);
+       hw->hw_qtd_next = QTD_NEXT(ehci, qtd->qtd_dma);
+       hw->hw_alt_next = EHCI_LIST_END(ehci);
 
        /* Except for control endpoints, we make hardware maintain data
         * toggle (like OHCI) ... here (re)initialize the toggle in the QH,
         * and set the pseudo-toggle in udev. Only usb_clear_halt() will
         * ever clear it.
         */
-       if (!(qh->hw_info1 & cpu_to_hc32(ehci, 1 << 14))) {
+       if (!(hw->hw_info1 & cpu_to_hc32(ehci, 1 << 14))) {
                unsigned        is_out, epnum;
 
                is_out = !(qtd->hw_token & cpu_to_hc32(ehci, 1 << 8));
-               epnum = (hc32_to_cpup(ehci, &qh->hw_info1) >> 8) & 0x0f;
+               epnum = (hc32_to_cpup(ehci, &hw->hw_info1) >> 8) & 0x0f;
                if (unlikely (!usb_gettoggle (qh->dev, epnum, is_out))) {
-                       qh->hw_token &= ~cpu_to_hc32(ehci, QTD_TOGGLE);
+                       hw->hw_token &= ~cpu_to_hc32(ehci, QTD_TOGGLE);
                        usb_settoggle (qh->dev, epnum, is_out, 1);
                }
        }
 
        /* HC must see latest qtd and qh data before we clear ACTIVE+HALT */
        wmb ();
-       qh->hw_token &= cpu_to_hc32(ehci, QTD_TOGGLE | QTD_STS_PING);
+       hw->hw_token &= cpu_to_hc32(ehci, QTD_TOGGLE | QTD_STS_PING);
 }
 
 /* if it weren't for a common silicon quirk (writing the dummy into the qh
@@ -129,7 +131,7 @@ qh_refresh (struct ehci_hcd *ehci, struct ehci_qh *qh)
                qtd = list_entry (qh->qtd_list.next,
                                struct ehci_qtd, qtd_list);
                /* first qtd may already be partially processed */
-               if (cpu_to_hc32(ehci, qtd->qtd_dma) == qh->hw_current)
+               if (cpu_to_hc32(ehci, qtd->qtd_dma) == qh->hw->hw_current)
                        qtd = NULL;
        }
 
@@ -139,83 +141,120 @@ qh_refresh (struct ehci_hcd *ehci, struct ehci_qh *qh)
 
 /*-------------------------------------------------------------------------*/
 
-static void qtd_copy_status (
+static void qh_link_async(struct ehci_hcd *ehci, struct ehci_qh *qh);
+
+static void ehci_clear_tt_buffer_complete(struct usb_hcd *hcd,
+               struct usb_host_endpoint *ep)
+{
+       struct ehci_hcd         *ehci = hcd_to_ehci(hcd);
+       struct ehci_qh          *qh = ep->hcpriv;
+       unsigned long           flags;
+
+       spin_lock_irqsave(&ehci->lock, flags);
+       qh->clearing_tt = 0;
+       if (qh->qh_state == QH_STATE_IDLE && !list_empty(&qh->qtd_list)
+                       && HC_IS_RUNNING(hcd->state))
+               qh_link_async(ehci, qh);
+       spin_unlock_irqrestore(&ehci->lock, flags);
+}
+
+static void ehci_clear_tt_buffer(struct ehci_hcd *ehci, struct ehci_qh *qh,
+               struct urb *urb, u32 token)
+{
+
+       /* If an async split transaction gets an error or is unlinked,
+        * the TT buffer may be left in an indeterminate state.  We
+        * have to clear the TT buffer.
+        *
+        * Note: this routine is never called for Isochronous transfers.
+        */
+       if (urb->dev->tt && !usb_pipeint(urb->pipe) && !qh->clearing_tt) {
+#ifdef DEBUG
+               struct usb_device *tt = urb->dev->tt->hub;
+               dev_dbg(&tt->dev,
+                       "clear tt buffer port %d, a%d ep%d t%08x\n",
+                       urb->dev->ttport, urb->dev->devnum,
+                       usb_pipeendpoint(urb->pipe), token);
+#endif /* DEBUG */
+               if (!ehci_is_TDI(ehci)
+                               || urb->dev->tt->hub !=
+                                  ehci_to_hcd(ehci)->self.root_hub) {
+                       if (usb_hub_clear_tt_buffer(urb) == 0)
+                               qh->clearing_tt = 1;
+               } else {
+
+                       /* REVISIT ARC-derived cores don't clear the root
+                        * hub TT buffer in this way...
+                        */
+               }
+       }
+}
+
+static int qtd_copy_status (
        struct ehci_hcd *ehci,
        struct urb *urb,
        size_t length,
        u32 token
 )
 {
+       int     status = -EINPROGRESS;
+
        /* count IN/OUT bytes, not SETUP (even short packets) */
        if (likely (QTD_PID (token) != 2))
                urb->actual_length += length - QTD_LENGTH (token);
 
        /* don't modify error codes */
-       if (unlikely (urb->status != -EINPROGRESS))
-               return;
+       if (unlikely(urb->unlinked))
+               return status;
 
        /* force cleanup after short read; not always an error */
        if (unlikely (IS_SHORT_READ (token)))
-               urb->status = -EREMOTEIO;
+               status = -EREMOTEIO;
 
        /* serious "can't proceed" faults reported by the hardware */
        if (token & QTD_STS_HALT) {
                if (token & QTD_STS_BABBLE) {
                        /* FIXME "must" disable babbling device's port too */
-                       urb->status = -EOVERFLOW;
+                       status = -EOVERFLOW;
+               /* CERR nonzero + halt --> stall */
+               } else if (QTD_CERR(token)) {
+                       status = -EPIPE;
+
+               /* In theory, more than one of the following bits can be set
+                * since they are sticky and the transaction is retried.
+                * Which to test first is rather arbitrary.
+                */
                } else if (token & QTD_STS_MMF) {
                        /* fs/ls interrupt xfer missed the complete-split */
-                       urb->status = -EPROTO;
+                       status = -EPROTO;
                } else if (token & QTD_STS_DBE) {
-                       urb->status = (QTD_PID (token) == 1) /* IN ? */
+                       status = (QTD_PID (token) == 1) /* IN ? */
                                ? -ENOSR  /* hc couldn't read data */
                                : -ECOMM; /* hc couldn't write data */
                } else if (token & QTD_STS_XACT) {
-                       /* timeout, bad crc, wrong PID, etc; retried */
-                       if (QTD_CERR (token))
-                               urb->status = -EPIPE;
-                       else {
-                               ehci_dbg (ehci, "devpath %s ep%d%s 3strikes\n",
-                                       urb->dev->devpath,
-                                       usb_pipeendpoint (urb->pipe),
-                                       usb_pipein (urb->pipe) ? "in" : "out");
-                               urb->status = -EPROTO;
-                       }
-               /* CERR nonzero + no errors + halt --> stall */
-               } else if (QTD_CERR (token))
-                       urb->status = -EPIPE;
-               else    /* unknown */
-                       urb->status = -EPROTO;
+                       /* timeout, bad CRC, wrong PID, etc */
+                       ehci_dbg(ehci, "devpath %s ep%d%s 3strikes\n",
+                               urb->dev->devpath,
+                               usb_pipeendpoint(urb->pipe),
+                               usb_pipein(urb->pipe) ? "in" : "out");
+                       status = -EPROTO;
+               } else {        /* unknown */
+                       status = -EPROTO;
+               }
 
                ehci_vdbg (ehci,
                        "dev%d ep%d%s qtd token %08x --> status %d\n",
                        usb_pipedevice (urb->pipe),
                        usb_pipeendpoint (urb->pipe),
                        usb_pipein (urb->pipe) ? "in" : "out",
-                       token, urb->status);
-
-               /* if async CSPLIT failed, try cleaning out the TT buffer */
-               if (urb->status != -EPIPE
-                               && urb->dev->tt && !usb_pipeint (urb->pipe)
-                               && ((token & QTD_STS_MMF) != 0
-                                       || QTD_CERR(token) == 0)
-                               && (!ehci_is_TDI(ehci)
-                                       || urb->dev->tt->hub !=
-                                          ehci_to_hcd(ehci)->self.root_hub)) {
-#ifdef DEBUG
-                       struct usb_device *tt = urb->dev->tt->hub;
-                       dev_dbg (&tt->dev,
-                               "clear tt buffer port %d, a%d ep%d t%08x\n",
-                               urb->dev->ttport, urb->dev->devnum,
-                               usb_pipeendpoint (urb->pipe), token);
-#endif /* DEBUG */
-                       usb_hub_tt_clear_buffer (urb->dev, urb->pipe);
-               }
+                       token, status);
        }
+
+       return status;
 }
 
 static void
-ehci_urb_done (struct ehci_hcd *ehci, struct urb *urb)
+ehci_urb_done(struct ehci_hcd *ehci, struct urb *urb, int status)
 __releases(ehci->lock)
 __acquires(ehci->lock)
 {
@@ -223,7 +262,7 @@ __acquires(ehci->lock)
                struct ehci_qh  *qh = (struct ehci_qh *) urb->hcpriv;
 
                /* S-mask in a QH means it's an interrupt urb */
-               if ((qh->hw_info2 & cpu_to_hc32(ehci, QH_SMASK)) != 0) {
+               if ((qh->hw->hw_info2 & cpu_to_hc32(ehci, QH_SMASK)) != 0) {
 
                        /* ... update hc-wide periodic stats (for usbfs) */
                        ehci_to_hcd(ehci)->self.bandwidth_int_reqs--;
@@ -231,46 +270,35 @@ __acquires(ehci->lock)
                qh_put (qh);
        }
 
-       spin_lock (&urb->lock);
-       urb->hcpriv = NULL;
-       switch (urb->status) {
-       case -EINPROGRESS:              /* success */
-               urb->status = 0;
-       default:                        /* fault */
-               COUNT (ehci->stats.complete);
-               break;
-       case -EREMOTEIO:                /* fault or normal */
-               if (!(urb->transfer_flags & URB_SHORT_NOT_OK))
-                       urb->status = 0;
-               COUNT (ehci->stats.complete);
-               break;
-       case -ECONNRESET:               /* canceled */
-       case -ENOENT:
-               COUNT (ehci->stats.unlink);
-               break;
+       if (unlikely(urb->unlinked)) {
+               COUNT(ehci->stats.unlink);
+       } else {
+               /* report non-error and short read status as zero */
+               if (status == -EINPROGRESS || status == -EREMOTEIO)
+                       status = 0;
+               COUNT(ehci->stats.complete);
        }
-       spin_unlock (&urb->lock);
 
 #ifdef EHCI_URB_TRACE
        ehci_dbg (ehci,
                "%s %s urb %p ep%d%s status %d len %d/%d\n",
-               __FUNCTION__, urb->dev->devpath, urb,
+               __func__, urb->dev->devpath, urb,
                usb_pipeendpoint (urb->pipe),
                usb_pipein (urb->pipe) ? "in" : "out",
-               urb->status,
+               status,
                urb->actual_length, urb->transfer_buffer_length);
 #endif
 
        /* complete() can reenter this HCD */
+       usb_hcd_unlink_urb_from_ep(ehci_to_hcd(ehci), urb);
        spin_unlock (&ehci->lock);
-       usb_hcd_giveback_urb (ehci_to_hcd(ehci), urb);
+       usb_hcd_giveback_urb(ehci_to_hcd(ehci), urb, status);
        spin_lock (&ehci->lock);
 }
 
 static void start_unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh);
 static void unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh);
 
-static void intr_deschedule (struct ehci_hcd *ehci, struct ehci_qh *qh);
 static int qh_schedule (struct ehci_hcd *ehci, struct ehci_qh *qh);
 
 /*
@@ -281,13 +309,14 @@ static int qh_schedule (struct ehci_hcd *ehci, struct ehci_qh *qh);
 static unsigned
 qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh)
 {
-       struct ehci_qtd         *last = NULL, *end = qh->dummy;
+       struct ehci_qtd         *last, *end = qh->dummy;
        struct list_head        *entry, *tmp;
+       int                     last_status;
        int                     stopped;
        unsigned                count = 0;
-       int                     do_status = 0;
        u8                      state;
-       u32                     halt = HALT_BIT(ehci);
+       const __le32            halt = HALT_BIT(ehci);
+       struct ehci_qh_hw       *hw = qh->hw;
 
        if (unlikely (list_empty (&qh->qtd_list)))
                return count;
@@ -297,11 +326,20 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh)
         * they add urbs to this qh's queue or mark them for unlinking.
         *
         * NOTE:  unlinking expects to be done in queue order.
+        *
+        * It's a bug for qh->qh_state to be anything other than
+        * QH_STATE_IDLE, unless our caller is scan_async() or
+        * scan_periodic().
         */
        state = qh->qh_state;
        qh->qh_state = QH_STATE_COMPLETING;
        stopped = (state == QH_STATE_IDLE);
 
+ rescan:
+       last = NULL;
+       last_status = -EINPROGRESS;
+       qh->needs_rescan = 0;
+
        /* remove de-activated QTDs from front of queue.
         * after faults (including short reads), cleanup this urb
         * then let the queue advance.
@@ -312,18 +350,15 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh)
                struct urb      *urb;
                u32             token = 0;
 
-               /* ignore QHs that are currently inactive */
-               if (qh->hw_info1 & __constant_cpu_to_le32(QH_INACTIVATE))
-                       break;
-
                qtd = list_entry (entry, struct ehci_qtd, qtd_list);
                urb = qtd->urb;
 
                /* clean up any state from previous QTD ...*/
                if (last) {
                        if (likely (last->urb != urb)) {
-                               ehci_urb_done (ehci, last->urb);
+                               ehci_urb_done(ehci, last->urb, last_status);
                                count++;
+                               last_status = -EINPROGRESS;
                        }
                        ehci_qtd_free (ehci, last);
                        last = NULL;
@@ -338,13 +373,50 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh)
                token = hc32_to_cpu(ehci, qtd->hw_token);
 
                /* always clean up qtds the hc de-activated */
+ retry_xacterr:
                if ((token & QTD_STS_ACTIVE) == 0) {
 
+                       /* on STALL, error, and short reads this urb must
+                        * complete and all its qtds must be recycled.
+                        */
                        if ((token & QTD_STS_HALT) != 0) {
+
+                               /* retry transaction errors until we
+                                * reach the software xacterr limit
+                                */
+                               if ((token & QTD_STS_XACT) &&
+                                               QTD_CERR(token) == 0 &&
+                                               ++qh->xacterrs < QH_XACTERR_MAX &&
+                                               !urb->unlinked) {
+                                       ehci_dbg(ehci,
+       "detected XactErr len %zu/%zu retry %d\n",
+       qtd->length - QTD_LENGTH(token), qtd->length, qh->xacterrs);
+
+                                       /* reset the token in the qtd and the
+                                        * qh overlay (which still contains
+                                        * the qtd) so that we pick up from
+                                        * where we left off
+                                        */
+                                       token &= ~QTD_STS_HALT;
+                                       token |= QTD_STS_ACTIVE |
+                                                       (EHCI_TUNE_CERR << 10);
+                                       qtd->hw_token = cpu_to_hc32(ehci,
+                                                       token);
+                                       wmb();
+                                       hw->hw_token = cpu_to_hc32(ehci,
+                                                       token);
+                                       goto retry_xacterr;
+                               }
                                stopped = 1;
 
                        /* magic dummy for some short reads; qh won't advance.
                         * that silicon quirk can kick in with this dummy too.
+                        *
+                        * other short reads won't stop the queue, including
+                        * control transfers (status stage handles that) or
+                        * most other single-qtd reads ... the queue stops if
+                        * URB_SHORT_NOT_OK was set so the driver submitting
+                        * the urbs could clean it up.
                         */
                        } else if (IS_SHORT_READ (token)
                                        && !(qtd->hw_alt_next
@@ -358,66 +430,118 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh)
                                && HC_IS_RUNNING (ehci_to_hcd(ehci)->state))) {
                        break;
 
+               /* scan the whole queue for unlinks whenever it stops */
                } else {
                        stopped = 1;
 
-                       if (unlikely (!HC_IS_RUNNING (ehci_to_hcd(ehci)->state)))
-                               urb->status = -ESHUTDOWN;
+                       /* cancel everything if we halt, suspend, etc */
+                       if (!HC_IS_RUNNING(ehci_to_hcd(ehci)->state))
+                               last_status = -ESHUTDOWN;
 
-                       /* ignore active urbs unless some previous qtd
-                        * for the urb faulted (including short read) or
-                        * its urb was canceled.  we may patch qh or qtds.
+                       /* this qtd is active; skip it unless a previous qtd
+                        * for its urb faulted, or its urb was canceled.
                         */
-                       if (likely (urb->status == -EINPROGRESS))
+                       else if (last_status == -EINPROGRESS && !urb->unlinked)
                                continue;
 
-                       /* issue status after short control reads */
-                       if (unlikely (do_status != 0)
-                                       && QTD_PID (token) == 0 /* OUT */) {
-                               do_status = 0;
-                               continue;
-                       }
-
-                       /* token in overlay may be most current */
+                       /* qh unlinked; token in overlay may be most current */
                        if (state == QH_STATE_IDLE
                                        && cpu_to_hc32(ehci, qtd->qtd_dma)
-                                               == qh->hw_current)
-                               token = hc32_to_cpu(ehci, qh->hw_token);
+                                               == hw->hw_current) {
+                               token = hc32_to_cpu(ehci, hw->hw_token);
+
+                               /* An unlink may leave an incomplete
+                                * async transaction in the TT buffer.
+                                * We have to clear it.
+                                */
+                               ehci_clear_tt_buffer(ehci, qh, urb, token);
+                       }
 
                        /* force halt for unlinked or blocked qh, so we'll
                         * patch the qh later and so that completions can't
                         * activate it while we "know" it's stopped.
                         */
-                       if ((halt & qh->hw_token) == 0) {
+                       if ((halt & hw->hw_token) == 0) {
 halt:
-                               qh->hw_token |= halt;
+                               hw->hw_token |= halt;
                                wmb ();
                        }
                }
 
-               /* remove it from the queue */
-               spin_lock (&urb->lock);
-               qtd_copy_status (ehci, urb, qtd->length, token);
-               do_status = (urb->status == -EREMOTEIO)
-                               && usb_pipecontrol (urb->pipe);
-               spin_unlock (&urb->lock);
+               /* unless we already know the urb's status, collect qtd status
+                * and update count of bytes transferred.  in common short read
+                * cases with only one data qtd (including control transfers),
+                * queue processing won't halt.  but with two or more qtds (for
+                * example, with a 32 KB transfer), when the first qtd gets a
+                * short read the second must be removed by hand.
+                */
+               if (last_status == -EINPROGRESS) {
+                       last_status = qtd_copy_status(ehci, urb,
+                                       qtd->length, token);
+                       if (last_status == -EREMOTEIO
+                                       && (qtd->hw_alt_next
+                                               & EHCI_LIST_END(ehci)))
+                               last_status = -EINPROGRESS;
+
+                       /* As part of low/full-speed endpoint-halt processing
+                        * we must clear the TT buffer (11.17.5).
+                        */
+                       if (unlikely(last_status != -EINPROGRESS &&
+                                       last_status != -EREMOTEIO)) {
+                               /* The TT's in some hubs malfunction when they
+                                * receive this request following a STALL (they
+                                * stop sending isochronous packets).  Since a
+                                * STALL can't leave the TT buffer in a busy
+                                * state (if you believe Figures 11-48 - 11-51
+                                * in the USB 2.0 spec), we won't clear the TT
+                                * buffer in this case.  Strictly speaking this
+                                * is a violation of the spec.
+                                */
+                               if (last_status != -EPIPE)
+                                       ehci_clear_tt_buffer(ehci, qh, urb,
+                                                       token);
+                       }
+               }
 
+               /* if we're removing something not at the queue head,
+                * patch the hardware queue pointer.
+                */
                if (stopped && qtd->qtd_list.prev != &qh->qtd_list) {
                        last = list_entry (qtd->qtd_list.prev,
                                        struct ehci_qtd, qtd_list);
                        last->hw_next = qtd->hw_next;
                }
+
+               /* remove qtd; it's recycled after possible urb completion */
                list_del (&qtd->qtd_list);
                last = qtd;
+
+               /* reinit the xacterr counter for the next qtd */
+               qh->xacterrs = 0;
        }
 
        /* last urb's completion might still need calling */
        if (likely (last != NULL)) {
-               ehci_urb_done (ehci, last->urb);
+               ehci_urb_done(ehci, last->urb, last_status);
                count++;
                ehci_qtd_free (ehci, last);
        }
 
+       /* Do we need to rescan for URBs dequeued during a giveback? */
+       if (unlikely(qh->needs_rescan)) {
+               /* If the QH is already unlinked, do the rescan now. */
+               if (state == QH_STATE_IDLE)
+                       goto rescan;
+
+               /* Otherwise we have to wait until the QH is fully unlinked.
+                * Our caller will start an unlink if qh->needs_rescan is
+                * set.  But if an unlink has already started, nothing needs
+                * to be done.
+                */
+               if (state != QH_STATE_LINKED)
+                       qh->needs_rescan = 0;
+       }
+
        /* restore original state; caller must unlink or relink */
        qh->qh_state = state;
 
@@ -425,21 +549,26 @@ halt:
         * it after fault cleanup, or recovering from silicon wrongly
         * overlaying the dummy qtd (which reduces DMA chatter).
         */
-       if (stopped != 0 || qh->hw_qtd_next == EHCI_LIST_END(ehci)) {
+       if (stopped != 0 || hw->hw_qtd_next == EHCI_LIST_END(ehci)) {
                switch (state) {
                case QH_STATE_IDLE:
                        qh_refresh(ehci, qh);
                        break;
                case QH_STATE_LINKED:
-                       /* should be rare for periodic transfers,
+                       /* We won't refresh a QH that's linked (after the HC
+                        * stopped the queue).  That avoids a race:
+                        *  - HC reads first part of QH;
+                        *  - CPU updates that first part and the token;
+                        *  - HC reads rest of that QH, including token
+                        * Result:  HC gets an inconsistent image, and then
+                        * DMAs to/from the wrong memory (corrupting it).
+                        *
+                        * That should be rare for interrupt transfers,
                         * except maybe high bandwidth ...
                         */
-                       if ((cpu_to_hc32(ehci, QH_SMASK)
-                                       & qh->hw_info2) != 0) {
-                               intr_deschedule (ehci, qh);
-                               (void) qh_schedule (ehci, qh);
-                       } else
-                               unlink_async (ehci, qh);
+
+                       /* Tell the caller to start an unlink */
+                       qh->needs_rescan = 1;
                        break;
                /* otherwise, unlink already started */
                }
@@ -487,9 +616,11 @@ qh_urb_transaction (
 ) {
        struct ehci_qtd         *qtd, *qtd_prev;
        dma_addr_t              buf;
-       int                     len, maxpacket;
+       int                     len, this_sg_len, maxpacket;
        int                     is_input;
        u32                     token;
+       int                     i;
+       struct scatterlist      *sg;
 
        /*
         * URBs map to sequences of QTDs:  one logical transaction
@@ -530,7 +661,20 @@ qh_urb_transaction (
        /*
         * data transfer stage:  buffer setup
         */
-       buf = urb->transfer_dma;
+       i = urb->num_sgs;
+       if (len > 0 && i > 0) {
+               sg = urb->sg->sg;
+               buf = sg_dma_address(sg);
+
+               /* urb->transfer_buffer_length may be smaller than the
+                * size of the scatterlist (or vice versa)
+                */
+               this_sg_len = min_t(int, sg_dma_len(sg), len);
+       } else {
+               sg = NULL;
+               buf = urb->transfer_dma;
+               this_sg_len = len;
+       }
 
        if (is_input)
                token |= (1 /* "in" */ << 8);
@@ -546,18 +690,31 @@ qh_urb_transaction (
        for (;;) {
                int this_qtd_len;
 
-               this_qtd_len = qtd_fill(ehci, qtd, buf, len, token, maxpacket);
+               this_qtd_len = qtd_fill(ehci, qtd, buf, this_sg_len, token,
+                               maxpacket);
+               this_sg_len -= this_qtd_len;
                len -= this_qtd_len;
                buf += this_qtd_len;
+
+               /*
+                * short reads advance to a "magic" dummy instead of the next
+                * qtd ... that forces the queue to stop, for manual cleanup.
+                * (this will usually be overridden later.)
+                */
                if (is_input)
-                       qtd->hw_alt_next = ehci->async->hw_alt_next;
+                       qtd->hw_alt_next = ehci->async->hw->hw_alt_next;
 
                /* qh makes control packets use qtd toggle; maybe switch it */
                if ((maxpacket & (this_qtd_len + (maxpacket - 1))) == 0)
                        token ^= QTD_TOGGLE;
 
-               if (likely (len <= 0))
-                       break;
+               if (likely(this_sg_len <= 0)) {
+                       if (--i <= 0 || len <= 0)
+                               break;
+                       sg = sg_next(sg);
+                       buf = sg_dma_address(sg);
+                       this_sg_len = min_t(int, sg_dma_len(sg), len);
+               }
 
                qtd_prev = qtd;
                qtd = ehci_qtd_alloc (ehci, flags);
@@ -568,8 +725,10 @@ qh_urb_transaction (
                list_add_tail (&qtd->qtd_list, head);
        }
 
-       /* unless the bulk/interrupt caller wants a chance to clean
-        * up after short reads, hc should advance qh past this urb
+       /*
+        * unless the caller requires manual cleanup after short reads,
+        * have the alt_next mechanism keep the queue running after the
+        * last data qtd (the only one, for control and most other cases).
         */
        if (likely ((urb->transfer_flags & URB_SHORT_NOT_OK) == 0
                                || usb_pipecontrol (urb->pipe)))
@@ -642,6 +801,8 @@ qh_make (
        u32                     info1 = 0, info2 = 0;
        int                     is_input, type;
        int                     maxp = 0;
+       struct usb_tt           *tt = urb->dev->tt;
+       struct ehci_qh_hw       *hw;
 
        if (!qh)
                return qh;
@@ -656,6 +817,14 @@ qh_make (
        type = usb_pipetype (urb->pipe);
        maxp = usb_maxpacket (urb->dev, urb->pipe, !is_input);
 
+       /* 1024 byte maxpacket is a hardware ceiling.  High bandwidth
+        * acts like up to 3KB, but is built from smaller packets.
+        */
+       if (max_packet(maxp) > 1024) {
+               ehci_dbg(ehci, "bogus qh maxpacket %d\n", max_packet(maxp));
+               goto done;
+       }
+
        /* Compute interrupt scheduling parameters just once, and save.
         * - allowing for high bandwidth, how many nsec/uframe are used?
         * - split transactions need a second CSPLIT uframe; same question
@@ -665,8 +834,9 @@ qh_make (
         * For control/bulk requests, the HC or TT handles these.
         */
        if (type == PIPE_INTERRUPT) {
-               qh->usecs = NS_TO_US (usb_calc_bus_time (USB_SPEED_HIGH, is_input, 0,
-                               hb_mult (maxp) * max_packet (maxp)));
+               qh->usecs = NS_TO_US(usb_calc_bus_time(USB_SPEED_HIGH,
+                               is_input, 0,
+                               hb_mult(maxp) * max_packet(maxp)));
                qh->start = NO_FRAME;
 
                if (urb->dev->speed == USB_SPEED_HIGH) {
@@ -684,7 +854,6 @@ qh_make (
                                goto done;
                        }
                } else {
-                       struct usb_tt   *tt = urb->dev->tt;
                        int             think_time;
 
                        /* gap is f(FS/LS transfer times) */
@@ -740,10 +909,8 @@ qh_make (
                /* set the address of the TT; for TDI's integrated
                 * root hub tt, leave it zeroed.
                 */
-               if (!ehci_is_TDI(ehci)
-                               || urb->dev->tt->hub !=
-                                       ehci_to_hcd(ehci)->self.root_hub)
-                       info2 |= urb->dev->tt->hub->devnum << 16;
+               if (tt && tt->hub != ehci_to_hcd(ehci)->self.root_hub)
+                       info2 |= tt->hub->devnum << 16;
 
                /* NOTE:  if (PIPE_INTERRUPT) { scheduler sets c-mask } */
 
@@ -758,7 +925,13 @@ qh_make (
                        info2 |= (EHCI_TUNE_MULT_HS << 30);
                } else if (type == PIPE_BULK) {
                        info1 |= (EHCI_TUNE_RL_HS << 28);
-                       info1 |= 512 << 16;     /* usb2 fixed maxpacket */
+                       /* The USB spec says that high speed bulk endpoints
+                        * always use 512 byte maxpacket.  But some device
+                        * vendors decided to ignore that, and MSFT is happy
+                        * to help them do so.  So now people expect to use
+                        * such nonconformant devices with Linux too; sigh.
+                        */
+                       info1 |= max_packet(maxp) << 16;
                        info2 |= (EHCI_TUNE_MULT_HS << 30);
                } else {                /* PIPE_INTERRUPT */
                        info1 |= max_packet (maxp) << 16;
@@ -776,8 +949,9 @@ done:
 
        /* init as live, toggle clear, advance to dummy */
        qh->qh_state = QH_STATE_IDLE;
-       qh->hw_info1 = cpu_to_hc32(ehci, info1);
-       qh->hw_info2 = cpu_to_hc32(ehci, info2);
+       hw = qh->hw;
+       hw->hw_info1 = cpu_to_hc32(ehci, info1);
+       hw->hw_info2 = cpu_to_hc32(ehci, info2);
        usb_settoggle (urb->dev, usb_pipeendpoint (urb->pipe), !is_input, 1);
        qh_refresh (ehci, qh);
        return qh;
@@ -792,6 +966,12 @@ static void qh_link_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
        __hc32          dma = QH_NEXT(ehci, qh->qh_dma);
        struct ehci_qh  *head;
 
+       /* Don't link a QH if there's a Clear-TT-Buffer pending */
+       if (unlikely(qh->clearing_tt))
+               return;
+
+       WARN_ON(qh->qh_state != QH_STATE_IDLE);
+
        /* (re)start the async schedule? */
        head = ehci->async;
        timer_action_done (ehci, TIMER_ASYNC_OFF);
@@ -810,17 +990,18 @@ static void qh_link_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
        }
 
        /* clear halt and/or toggle; and maybe recover from silicon quirk */
-       if (qh->qh_state == QH_STATE_IDLE)
-               qh_refresh (ehci, qh);
+       qh_refresh(ehci, qh);
 
        /* splice right after start */
        qh->qh_next = head->qh_next;
-       qh->hw_next = head->hw_next;
+       qh->hw->hw_next = head->hw->hw_next;
        wmb ();
 
        head->qh_next.qh = qh;
-       head->hw_next = dma;
+       head->hw->hw_next = dma;
 
+       qh_get(qh);
+       qh->xacterrs = 0;
        qh->qh_state = QH_STATE_LINKED;
        /* qtd completions reported later by interrupt */
 }
@@ -842,7 +1023,7 @@ static struct ehci_qh *qh_append_tds (
 )
 {
        struct ehci_qh          *qh = NULL;
-       u32                     qh_addr_mask = cpu_to_hc32(ehci, 0x7f);
+       __hc32                  qh_addr_mask = cpu_to_hc32(ehci, 0x7f);
 
        qh = (struct ehci_qh *) *ptr;
        if (unlikely (qh == NULL)) {
@@ -864,7 +1045,7 @@ static struct ehci_qh *qh_append_tds (
 
                         /* usb_reset_device() briefly reverts to address 0 */
                         if (usb_pipedevice (urb->pipe) == 0)
-                                qh->hw_info1 &= ~qh_addr_mask;
+                               qh->hw->hw_info1 &= ~qh_addr_mask;
                }
 
                /* just one way to queue requests: swap with the dummy qtd.
@@ -891,7 +1072,7 @@ static struct ehci_qh *qh_append_tds (
 
                        list_del (&qtd->qtd_list);
                        list_add (&dummy->qtd_list, qtd_list);
-                       __list_splice (qtd_list, qh->qtd_list.prev);
+                       list_splice_tail(qtd_list, &qh->qtd_list);
 
                        ehci_qtd_init(ehci, qtd, qtd->qtd_dma);
                        qh->dummy = qtd;
@@ -917,7 +1098,6 @@ static struct ehci_qh *qh_append_tds (
 static int
 submit_async (
        struct ehci_hcd         *ehci,
-       struct usb_host_endpoint *ep,
        struct urb              *urb,
        struct list_head        *qtd_list,
        gfp_t                   mem_flags
@@ -926,18 +1106,18 @@ submit_async (
        int                     epnum;
        unsigned long           flags;
        struct ehci_qh          *qh = NULL;
-       int                     rc = 0;
+       int                     rc;
 
        qtd = list_entry (qtd_list->next, struct ehci_qtd, qtd_list);
-       epnum = ep->desc.bEndpointAddress;
+       epnum = urb->ep->desc.bEndpointAddress;
 
 #ifdef EHCI_URB_TRACE
        ehci_dbg (ehci,
                "%s %s urb %p ep%d%s len %d, qtd %p [qh %p]\n",
-               __FUNCTION__, urb->dev->devpath, urb,
+               __func__, urb->dev->devpath, urb,
                epnum & 0x0f, (epnum & USB_DIR_IN) ? "in" : "out",
                urb->transfer_buffer_length,
-               qtd, ep->hcpriv);
+               qtd, urb->ep->hcpriv);
 #endif
 
        spin_lock_irqsave (&ehci->lock, flags);
@@ -946,9 +1126,13 @@ submit_async (
                rc = -ESHUTDOWN;
                goto done;
        }
+       rc = usb_hcd_link_urb_to_ep(ehci_to_hcd(ehci), urb);
+       if (unlikely(rc))
+               goto done;
 
-       qh = qh_append_tds (ehci, urb, qtd_list, epnum, &ep->hcpriv);
+       qh = qh_append_tds(ehci, urb, qtd_list, epnum, &urb->ep->hcpriv);
        if (unlikely(qh == NULL)) {
+               usb_hcd_unlink_urb_from_ep(ehci_to_hcd(ehci), urb);
                rc = -ENOMEM;
                goto done;
        }
@@ -957,7 +1141,7 @@ submit_async (
         * the HC and TT handle it when the TT has a buffer ready.
         */
        if (likely (qh->qh_state == QH_STATE_IDLE))
-               qh_link_async (ehci, qh_get (qh));
+               qh_link_async(ehci, qh);
  done:
        spin_unlock_irqrestore (&ehci->lock, flags);
        if (unlikely (qh == NULL))
@@ -974,7 +1158,7 @@ static void end_unlink_async (struct ehci_hcd *ehci)
        struct ehci_qh          *qh = ehci->reclaim;
        struct ehci_qh          *next;
 
-       timer_action_done (ehci, TIMER_IAA_WATCHDOG);
+       iaa_watchdog_done(ehci);
 
        // qh->hw_next = cpu_to_hc32(qh->qh_dma);
        qh->qh_state = QH_STATE_IDLE;
@@ -984,7 +1168,6 @@ static void end_unlink_async (struct ehci_hcd *ehci)
        /* other unlink(s) may be pending (in QH_STATE_UNLINK_WAIT) */
        next = qh->reclaim;
        ehci->reclaim = next;
-       ehci->reclaim_ready = 0;
        qh->reclaim = NULL;
 
        qh_completions (ehci, qh);
@@ -993,8 +1176,6 @@ static void end_unlink_async (struct ehci_hcd *ehci)
                        && HC_IS_RUNNING (ehci_to_hcd(ehci)->state))
                qh_link_async (ehci, qh);
        else {
-               qh_put (qh);            // refcount from async list
-
                /* it's not free to turn the async schedule on/off; leave it
                 * active but idle for a while once it empties.
                 */
@@ -1002,6 +1183,7 @@ static void end_unlink_async (struct ehci_hcd *ehci)
                                && ehci->async->qh_next.qh == NULL)
                        timer_action (ehci, TIMER_ASYNC_OFF);
        }
+       qh_put(qh);                     /* refcount from async list */
 
        if (next) {
                ehci->reclaim = NULL;
@@ -1048,11 +1230,12 @@ static void start_unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
        while (prev->qh_next.qh != qh)
                prev = prev->qh_next.qh;
 
-       prev->hw_next = qh->hw_next;
+       prev->hw->hw_next = qh->hw->hw_next;
        prev->qh_next = qh->qh_next;
        wmb ();
 
-       if (unlikely (ehci_to_hcd(ehci)->state == HC_STATE_HALT)) {
+       /* If the controller isn't running, we don't have to wait for it */
+       if (unlikely(!HC_IS_RUNNING(ehci_to_hcd(ehci)->state))) {
                /* if (unlikely (qh->reclaim != 0))
                 *      this will recurse, probably not much
                 */
@@ -1060,11 +1243,10 @@ static void start_unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
                return;
        }
 
-       ehci->reclaim_ready = 0;
        cmd |= CMD_IAAD;
        ehci_writel(ehci, cmd, &ehci->regs->command);
        (void)ehci_readl(ehci, &ehci->regs->command);
-       timer_action (ehci, TIMER_IAA_WATCHDOG);
+       iaa_watchdog_start(ehci);
 }
 
 /*-------------------------------------------------------------------------*/
@@ -1074,8 +1256,7 @@ static void scan_async (struct ehci_hcd *ehci)
        struct ehci_qh          *qh;
        enum ehci_timer_action  action = TIMER_IO_WATCHDOG;
 
-       if (!++(ehci->stamp))
-               ehci->stamp++;
+       ehci->stamp = ehci_readl(ehci, &ehci->regs->frame_index);
        timer_action_done (ehci, TIMER_ASYNC_SHRINK);
 rescan:
        qh = ehci->async->qh_next.qh;
@@ -1094,24 +1275,28 @@ rescan:
                                qh = qh_get (qh);
                                qh->stamp = ehci->stamp;
                                temp = qh_completions (ehci, qh);
+                               if (qh->needs_rescan)
+                                       unlink_async(ehci, qh);
                                qh_put (qh);
                                if (temp != 0) {
                                        goto rescan;
                                }
                        }
 
-                       /* unlink idle entries, reducing HC PCI usage as well
+                       /* unlink idle entries, reducing DMA usage as well
                         * as HCD schedule-scanning costs.  delay for any qh
                         * we just scanned, there's a not-unusual case that it
                         * doesn't stay idle for long.
                         * (plus, avoids some kind of re-activation race.)
                         */
-                       if (list_empty (&qh->qtd_list)) {
-                               if (qh->stamp == ehci->stamp)
+                       if (list_empty(&qh->qtd_list)
+                                       && qh->qh_state == QH_STATE_LINKED) {
+                               if (!ehci->reclaim
+                                       && ((ehci->stamp - qh->stamp) & 0x1fff)
+                                               >= (EHCI_SHRINK_FRAMES * 8))
+                                       start_unlink_async(ehci, qh);
+                               else
                                        action = TIMER_ASYNC_SHRINK;
-                               else if (!ehci->reclaim
-                                           && qh->qh_state == QH_STATE_LINKED)
-                                       start_unlink_async (ehci, qh);
                        }
 
                        qh = qh->qh_next.qh;