qlge: Fix lock/mutex warnings.

[safe/jmp/linux-2.6] / drivers / net / qlge / qlge_main.c
diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c

index 89ea9c7..6168071 100644 (file)
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -34,7 +34,6 @@
  #include <linux/etherdevice.h>
  #include <linux/ethtool.h>
  #include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
  #include <linux/if_vlan.h>
  #include <linux/delay.h>
  #include <linux/mm.h>
@@ -1859,11 +1858,41 @@ static int ql_napi_poll_msix(struct napi_struct *napi, int budget)
  {
         struct rx_ring *rx_ring = container_of(napi, struct rx_ring, napi);
         struct ql_adapter *qdev = rx_ring->qdev;
-       int work_done = ql_clean_inbound_rx_ring(rx_ring, budget);
+       struct rx_ring *trx_ring;
+       int i, work_done = 0;
+       struct intr_context *ctx = &qdev->intr_context[rx_ring->cq_id];
  
         QPRINTK(qdev, RX_STATUS, DEBUG, "Enter, NAPI POLL cq_id = %d.\n",
                 rx_ring->cq_id);
  
+       /* Service the TX rings first.  They start
+        * right after the RSS rings. */
+       for (i = qdev->rss_ring_count; i < qdev->rx_ring_count; i++) {
+               trx_ring = &qdev->rx_ring[i];
+               /* If this TX completion ring belongs to this vector and
+                * it's not empty then service it.
+                */
+               if ((ctx->irq_mask & (1 << trx_ring->cq_id)) &&
+                       (ql_read_sh_reg(trx_ring->prod_idx_sh_reg) !=
+                                       trx_ring->cnsmr_idx)) {
+                       QPRINTK(qdev, INTR, DEBUG,
+                               "%s: Servicing TX completion ring %d.\n",
+                               __func__, trx_ring->cq_id);
+                       ql_clean_outbound_rx_ring(trx_ring);
+               }
+       }
+
+       /*
+        * Now service the RSS ring if it's active.
+        */
+       if (ql_read_sh_reg(rx_ring->prod_idx_sh_reg) !=
+                                       rx_ring->cnsmr_idx) {
+               QPRINTK(qdev, INTR, DEBUG,
+                       "%s: Servicing RX completion ring %d.\n",
+                       __func__, rx_ring->cq_id);
+               work_done = ql_clean_inbound_rx_ring(rx_ring, budget);
+       }
+
         if (work_done < budget) {
                 napi_complete(napi);
                 ql_enable_completion_interrupt(qdev, rx_ring->irq);
@@ -1896,12 +1925,10 @@ static void ql_vlan_rx_add_vid(struct net_device *ndev, u16 vid)
         status = ql_sem_spinlock(qdev, SEM_MAC_ADDR_MASK);
         if (status)
                 return;
-       spin_lock(&qdev->hw_lock);
         if (ql_set_mac_addr_reg
             (qdev, (u8 *) &enable_bit, MAC_ADDR_TYPE_VLAN, vid)) {
                 QPRINTK(qdev, IFUP, ERR, "Failed to init vlan address.\n");
         }
-       spin_unlock(&qdev->hw_lock);
         ql_sem_unlock(qdev, SEM_MAC_ADDR_MASK);
  }
  
@@ -1915,48 +1942,14 @@ static void ql_vlan_rx_kill_vid(struct net_device *ndev, u16 vid)
         if (status)
                 return;
  
-       spin_lock(&qdev->hw_lock);
         if (ql_set_mac_addr_reg
             (qdev, (u8 *) &enable_bit, MAC_ADDR_TYPE_VLAN, vid)) {
                 QPRINTK(qdev, IFUP, ERR, "Failed to clear vlan address.\n");
         }
-       spin_unlock(&qdev->hw_lock);
         ql_sem_unlock(qdev, SEM_MAC_ADDR_MASK);
  
  }
  
-/* Worker thread to process a given rx_ring that is dedicated
- * to outbound completions.
- */
-static void ql_tx_clean(struct work_struct *work)
-{
-       struct rx_ring *rx_ring =
-           container_of(work, struct rx_ring, rx_work.work);
-       ql_clean_outbound_rx_ring(rx_ring);
-       ql_enable_completion_interrupt(rx_ring->qdev, rx_ring->irq);
-
-}
-
-/* Worker thread to process a given rx_ring that is dedicated
- * to inbound completions.
- */
-static void ql_rx_clean(struct work_struct *work)
-{
-       struct rx_ring *rx_ring =
-           container_of(work, struct rx_ring, rx_work.work);
-       ql_clean_inbound_rx_ring(rx_ring, 64);
-       ql_enable_completion_interrupt(rx_ring->qdev, rx_ring->irq);
-}
-
-/* MSI-X Multiple Vector Interrupt Handler for outbound completions. */
-static irqreturn_t qlge_msix_tx_isr(int irq, void *dev_id)
-{
-       struct rx_ring *rx_ring = dev_id;
-       queue_delayed_work_on(rx_ring->cpu, rx_ring->qdev->q_workqueue,
-                             &rx_ring->rx_work, 0);
-       return IRQ_HANDLED;
-}
-
  /* MSI-X Multiple Vector Interrupt Handler for inbound completions. */
  static irqreturn_t qlge_msix_rx_isr(int irq, void *dev_id)
  {
@@ -1976,7 +1969,6 @@ static irqreturn_t qlge_isr(int irq, void *dev_id)
         struct ql_adapter *qdev = rx_ring->qdev;
         struct intr_context *intr_context = &qdev->intr_context[0];
         u32 var;
-       int i;
         int work_done = 0;
  
         spin_lock(&qdev->hw_lock);
@@ -2004,54 +1996,33 @@ static irqreturn_t qlge_isr(int irq, void *dev_id)
         /*
          * Check MPI processor activity.
          */
-       if (var & STS_PI) {
+       if ((var & STS_PI) &&
+               (ql_read32(qdev, INTR_MASK) & INTR_MASK_PI)) {
                 /*
                  * We've got an async event or mailbox completion.
                  * Handle it and clear the source of the interrupt.
                  */
                 QPRINTK(qdev, INTR, ERR, "Got MPI processor interrupt.\n");
                 ql_disable_completion_interrupt(qdev, intr_context->intr);
-               queue_delayed_work_on(smp_processor_id(), qdev->workqueue,
-                                     &qdev->mpi_work, 0);
+               ql_write32(qdev, INTR_MASK, (INTR_MASK_PI << 16));
+               queue_delayed_work_on(smp_processor_id(),
+                               qdev->workqueue, &qdev->mpi_work, 0);
                 work_done++;
         }
  
         /*
-        * Check the default queue and wake handler if active.
+        * Get the bit-mask that shows the active queues for this
+        * pass.  Compare it to the queues that this irq services
+        * and call napi if there's a match.
          */
-       rx_ring = &qdev->rx_ring[0];
-       if (ql_read_sh_reg(rx_ring->prod_idx_sh_reg) != rx_ring->cnsmr_idx) {
-               QPRINTK(qdev, INTR, INFO, "Waking handler for rx_ring[0].\n");
-               ql_disable_completion_interrupt(qdev, intr_context->intr);
-               queue_delayed_work_on(smp_processor_id(), qdev->q_workqueue,
-                                     &rx_ring->rx_work, 0);
-               work_done++;
-       }
-
-       if (!test_bit(QL_MSIX_ENABLED, &qdev->flags)) {
-               /*
-                * Start the DPC for each active queue.
-                */
-               for (i = 1; i < qdev->rx_ring_count; i++) {
-                       rx_ring = &qdev->rx_ring[i];
-                       if (ql_read_sh_reg(rx_ring->prod_idx_sh_reg) !=
-                           rx_ring->cnsmr_idx) {
+       var = ql_read32(qdev, ISR1);
+       if (var & intr_context->irq_mask) {
                                 QPRINTK(qdev, INTR, INFO,
-                                       "Waking handler for rx_ring[%d].\n", i);
-                               ql_disable_completion_interrupt(qdev,
-                                                               intr_context->
-                                                               intr);
-                               if (i >= qdev->rss_ring_count)
-                                       queue_delayed_work_on(rx_ring->cpu,
-                                                             qdev->q_workqueue,
-                                                             &rx_ring->rx_work,
-                                                             0);
-                               else
+                       "Waking handler for rx_ring[0].\n");
+               ql_disable_completion_interrupt(qdev, intr_context->intr);
                                         napi_schedule(&rx_ring->napi);
                                 work_done++;
                         }
-               }
-       }
         ql_enable_completion_interrupt(qdev, intr_context->intr);
         return work_done ? IRQ_HANDLED : IRQ_NONE;
  }
@@ -2129,7 +2100,7 @@ static void ql_hw_csum_setup(struct sk_buff *skb,
                                     iph->daddr, len, iph->protocol, 0);
  }
  
-static int qlge_send(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t qlge_send(struct sk_buff *skb, struct net_device *ndev)
  {
         struct tx_ring_desc *tx_ring_desc;
         struct ob_mac_iocb_req *mac_iocb_ptr;
@@ -2656,7 +2627,7 @@ static int ql_start_rx_ring(struct ql_adapter *qdev, struct rx_ring *rx_ring)
             FLAGS_LI;           /* Load irq delay values */
         if (rx_ring->lbq_len) {
                 cqicb->flags |= FLAGS_LL;       /* Load lbq values */
-               tmp = (u64)rx_ring->lbq_base_dma;;
+               tmp = (u64)rx_ring->lbq_base_dma;
                 base_indirect_ptr = (__le64 *) rx_ring->lbq_base_indirect;
                 page_entries = 0;
                 do {
@@ -2680,7 +2651,7 @@ static int ql_start_rx_ring(struct ql_adapter *qdev, struct rx_ring *rx_ring)
         }
         if (rx_ring->sbq_len) {
                 cqicb->flags |= FLAGS_LS;       /* Load sbq values */
-               tmp = (u64)rx_ring->sbq_base_dma;;
+               tmp = (u64)rx_ring->sbq_base_dma;
                 base_indirect_ptr = (__le64 *) rx_ring->sbq_base_indirect;
                 page_entries = 0;
                 do {
@@ -2703,35 +2674,9 @@ static int ql_start_rx_ring(struct ql_adapter *qdev, struct rx_ring *rx_ring)
         }
         switch (rx_ring->type) {
         case TX_Q:
-               /* If there's only one interrupt, then we use
-                * worker threads to process the outbound
-                * completion handling rx_rings. We do this so
-                * they can be run on multiple CPUs. There is
-                * room to play with this more where we would only
-                * run in a worker if there are more than x number
-                * of outbound completions on the queue and more
-                * than one queue active.  Some threshold that
-                * would indicate a benefit in spite of the cost
-                * of a context switch.
-                * If there's more than one interrupt, then the
-                * outbound completions are processed in the ISR.
-                */
-               if (!test_bit(QL_MSIX_ENABLED, &qdev->flags))
-                       INIT_DELAYED_WORK(&rx_ring->rx_work, ql_tx_clean);
-               else {
-                       /* With all debug warnings on we see a WARN_ON message
-                        * when we free the skb in the interrupt context.
-                        */
-                       INIT_DELAYED_WORK(&rx_ring->rx_work, ql_tx_clean);
-               }
                 cqicb->irq_delay = cpu_to_le16(qdev->tx_coalesce_usecs);
                 cqicb->pkt_delay = cpu_to_le16(qdev->tx_max_coalesced_frames);
                 break;
-       case DEFAULT_Q:
-               INIT_DELAYED_WORK(&rx_ring->rx_work, ql_rx_clean);
-               cqicb->irq_delay = 0;
-               cqicb->pkt_delay = 0;
-               break;
         case RX_Q:
                 /* Inbound completion handling rx_rings run in
                  * separate NAPI contexts.
@@ -2815,17 +2760,20 @@ static void ql_disable_msix(struct ql_adapter *qdev)
         }
  }
  
+/* We start by trying to get the number of vectors
+ * stored in qdev->intr_count. If we don't get that
+ * many then we reduce the count and try again.
+ */
  static void ql_enable_msix(struct ql_adapter *qdev)
  {
-       int i;
+       int i, err;
  
-       qdev->intr_count = 1;
         /* Get the MSIX vectors. */
         if (irq_type == MSIX_IRQ) {
                 /* Try to alloc space for the msix struct,
                  * if it fails then go to MSI/legacy.
                  */
-               qdev->msi_x_entry = kcalloc(qdev->rx_ring_count,
+               qdev->msi_x_entry = kcalloc(qdev->intr_count,
                                             sizeof(struct msix_entry),
                                             GFP_KERNEL);
                 if (!qdev->msi_x_entry) {
@@ -2833,26 +2781,36 @@ static void ql_enable_msix(struct ql_adapter *qdev)
                         goto msi;
                 }
  
-               for (i = 0; i < qdev->rx_ring_count; i++)
+               for (i = 0; i < qdev->intr_count; i++)
                         qdev->msi_x_entry[i].entry = i;
  
-               if (!pci_enable_msix
-                   (qdev->pdev, qdev->msi_x_entry, qdev->rx_ring_count)) {
-                       set_bit(QL_MSIX_ENABLED, &qdev->flags);
-                       qdev->intr_count = qdev->rx_ring_count;
-                       QPRINTK(qdev, IFUP, DEBUG,
-                               "MSI-X Enabled, got %d vectors.\n",
-                               qdev->intr_count);
-                       return;
-               } else {
+               /* Loop to get our vectors.  We start with
+                * what we want and settle for what we get.
+                */
+               do {
+                       err = pci_enable_msix(qdev->pdev,
+                               qdev->msi_x_entry, qdev->intr_count);
+                       if (err > 0)
+                               qdev->intr_count = err;
+               } while (err > 0);
+
+               if (err < 0) {
                         kfree(qdev->msi_x_entry);
                         qdev->msi_x_entry = NULL;
                         QPRINTK(qdev, IFUP, WARNING,
                                 "MSI-X Enable failed, trying MSI.\n");
+                       qdev->intr_count = 1;
                         irq_type = MSI_IRQ;
+               } else if (err == 0) {
+                       set_bit(QL_MSIX_ENABLED, &qdev->flags);
+                       QPRINTK(qdev, IFUP, INFO,
+                               "MSI-X Enabled, got %d vectors.\n",
+                               qdev->intr_count);
+                       return;
                 }
         }
  msi:
+       qdev->intr_count = 1;
         if (irq_type == MSI_IRQ) {
                 if (!pci_enable_msi(qdev->pdev)) {
                         set_bit(QL_MSI_ENABLED, &qdev->flags);
@@ -2865,6 +2823,71 @@ msi:
         QPRINTK(qdev, IFUP, DEBUG, "Running with legacy interrupts.\n");
  }
  
+/* Each vector services 1 RSS ring and and 1 or more
+ * TX completion rings.  This function loops through
+ * the TX completion rings and assigns the vector that
+ * will service it.  An example would be if there are
+ * 2 vectors (so 2 RSS rings) and 8 TX completion rings.
+ * This would mean that vector 0 would service RSS ring 0
+ * and TX competion rings 0,1,2 and 3.  Vector 1 would
+ * service RSS ring 1 and TX completion rings 4,5,6 and 7.
+ */
+static void ql_set_tx_vect(struct ql_adapter *qdev)
+{
+       int i, j, vect;
+       u32 tx_rings_per_vector = qdev->tx_ring_count / qdev->intr_count;
+
+       if (likely(test_bit(QL_MSIX_ENABLED, &qdev->flags))) {
+               /* Assign irq vectors to TX rx_rings.*/
+               for (vect = 0, j = 0, i = qdev->rss_ring_count;
+                                        i < qdev->rx_ring_count; i++) {
+                       if (j == tx_rings_per_vector) {
+                               vect++;
+                               j = 0;
+                       }
+                       qdev->rx_ring[i].irq = vect;
+                       j++;
+               }
+       } else {
+               /* For single vector all rings have an irq
+                * of zero.
+                */
+               for (i = 0; i < qdev->rx_ring_count; i++)
+                       qdev->rx_ring[i].irq = 0;
+       }
+}
+
+/* Set the interrupt mask for this vector.  Each vector
+ * will service 1 RSS ring and 1 or more TX completion
+ * rings.  This function sets up a bit mask per vector
+ * that indicates which rings it services.
+ */
+static void ql_set_irq_mask(struct ql_adapter *qdev, struct intr_context *ctx)
+{
+       int j, vect = ctx->intr;
+       u32 tx_rings_per_vector = qdev->tx_ring_count / qdev->intr_count;
+
+       if (likely(test_bit(QL_MSIX_ENABLED, &qdev->flags))) {
+               /* Add the RSS ring serviced by this vector
+                * to the mask.
+                */
+               ctx->irq_mask = (1 << qdev->rx_ring[vect].cq_id);
+               /* Add the TX ring(s) serviced by this vector
+                * to the mask. */
+               for (j = 0; j < tx_rings_per_vector; j++) {
+                       ctx->irq_mask |=
+                       (1 << qdev->rx_ring[qdev->rss_ring_count +
+                       (vect * tx_rings_per_vector) + j].cq_id);
+               }
+       } else {
+               /* For single vector we just shift each queue's
+                * ID into the mask.
+                */
+               for (j = 0; j < qdev->rx_ring_count; j++)
+                       ctx->irq_mask |= (1 << qdev->rx_ring[j].cq_id);
+       }
+}
+
  /*
   * Here we build the intr_context structures based on
   * our rx_ring count and intr vector count.
@@ -2876,18 +2899,19 @@ static void ql_resolve_queues_to_irqs(struct ql_adapter *qdev)
         int i = 0;
         struct intr_context *intr_context = &qdev->intr_context[0];
  
-       ql_enable_msix(qdev);
-
         if (likely(test_bit(QL_MSIX_ENABLED, &qdev->flags))) {
                 /* Each rx_ring has it's
                  * own intr_context since we have separate
                  * vectors for each queue.
-                * This only true when MSI-X is enabled.
                  */
                 for (i = 0; i < qdev->intr_count; i++, intr_context++) {
                         qdev->rx_ring[i].irq = i;
                         intr_context->intr = i;
                         intr_context->qdev = qdev;
+                       /* Set up this vector's bit-mask that indicates
+                        * which queues it services.
+                        */
+                       ql_set_irq_mask(qdev, intr_context);
                         /*
                          * We set up each vectors enable/disable/read bits so
                          * there's no bit/mask calculations in the critical path.
@@ -2904,20 +2928,21 @@ static void ql_resolve_queues_to_irqs(struct ql_adapter *qdev)
                             INTR_EN_TYPE_MASK | INTR_EN_INTR_MASK |
                             INTR_EN_TYPE_READ | INTR_EN_IHD_MASK | INTR_EN_IHD |
                             i;
-
-                       if (i < qdev->rss_ring_count) {
-                               /*
-                                * Inbound queues handle unicast frames only.
+                       if (i == 0) {
+                               /* The first vector/queue handles
+                                * broadcast/multicast, fatal errors,
+                                * and firmware events.  This in addition
+                                * to normal inbound NAPI processing.
                                  */
-                               intr_context->handler = qlge_msix_rx_isr;
+                               intr_context->handler = qlge_isr;
                                 sprintf(intr_context->name, "%s-rx-%d",
                                         qdev->ndev->name, i);
                         } else {
                                 /*
-                                * Outbound queue is for outbound completions only.
+                                * Inbound queues handle unicast frames only.
                                  */
-                               intr_context->handler = qlge_msix_tx_isr;
-                               sprintf(intr_context->name, "%s-tx-%d",
+                               intr_context->handler = qlge_msix_rx_isr;
+                               sprintf(intr_context->name, "%s-rx-%d",
                                         qdev->ndev->name, i);
                         }
                 }
@@ -2944,9 +2969,17 @@ static void ql_resolve_queues_to_irqs(struct ql_adapter *qdev)
                  */
                 intr_context->handler = qlge_isr;
                 sprintf(intr_context->name, "%s-single_irq", qdev->ndev->name);
-               for (i = 0; i < qdev->rx_ring_count; i++)
-                       qdev->rx_ring[i].irq = 0;
+               /* Set up this vector's bit-mask that indicates
+                * which queues it services. In this case there is
+                * a single vector so it will service all RSS and
+                * TX completion rings.
+                */
+               ql_set_irq_mask(qdev, intr_context);
         }
+       /* Tell the TX completion rings which MSIx vector
+        * they will be using.
+        */
+       ql_set_tx_vect(qdev);
  }
  
  static void ql_free_irq(struct ql_adapter *qdev)
@@ -3106,14 +3139,14 @@ static int ql_route_initialize(struct ql_adapter *qdev)
  {
         int status = 0;
  
-       status = ql_sem_spinlock(qdev, SEM_RT_IDX_MASK);
+       /* Clear all the entries in the routing table. */
+       status = ql_clear_routing_entries(qdev);
         if (status)
                 return status;
  
-       /* Clear all the entries in the routing table. */
-       status = ql_clear_routing_entries(qdev);
+       status = ql_sem_spinlock(qdev, SEM_RT_IDX_MASK);
         if (status)
-               goto exit;
+               return status;
  
         status = ql_set_routing_reg(qdev, RT_IDX_ALL_ERR_SLOT, RT_IDX_ERR, 1);
         if (status) {
@@ -3315,7 +3348,6 @@ static void ql_display_dev_info(struct net_device *ndev)
  static int ql_adapter_down(struct ql_adapter *qdev)
  {
         int i, status = 0;
-       struct rx_ring *rx_ring;
  
         ql_link_off(qdev);
  
@@ -3329,27 +3361,8 @@ static int ql_adapter_down(struct ql_adapter *qdev)
         cancel_delayed_work_sync(&qdev->mpi_idc_work);
         cancel_delayed_work_sync(&qdev->mpi_port_cfg_work);
  
-       /* The default queue at index 0 is always processed in
-        * a workqueue.
-        */
-       cancel_delayed_work_sync(&qdev->rx_ring[0].rx_work);
-
-       /* The rest of the rx_rings are processed in
-        * a workqueue only if it's a single interrupt
-        * environment (MSI/Legacy).
-        */
-       for (i = 1; i < qdev->rx_ring_count; i++) {
-               rx_ring = &qdev->rx_ring[i];
-               /* Only the RSS rings use NAPI on multi irq
-                * environment.  Outbound completion processing
-                * is done in interrupt context.
-                */
-               if (i <= qdev->rss_ring_count) {
-                       napi_disable(&rx_ring->napi);
-               } else {
-                       cancel_delayed_work_sync(&rx_ring->rx_work);
-               }
-       }
+       for (i = 0; i < qdev->rss_ring_count; i++)
+               napi_disable(&qdev->rx_ring[i].napi);
  
         clear_bit(QL_ADAPTER_UP, &qdev->flags);
  
@@ -3364,12 +3377,10 @@ static int ql_adapter_down(struct ql_adapter *qdev)
  
         ql_free_rx_buffers(qdev);
  
-       spin_lock(&qdev->hw_lock);
         status = ql_adapter_reset(qdev);
         if (status)
                 QPRINTK(qdev, IFDOWN, ERR, "reset(func #%d) FAILED!\n",
                         qdev->func);
-       spin_unlock(&qdev->hw_lock);
         return status;
  }
  
@@ -3438,40 +3449,20 @@ static int ql_configure_rings(struct ql_adapter *qdev)
         int i;
         struct rx_ring *rx_ring;
         struct tx_ring *tx_ring;
-       int cpu_cnt = num_online_cpus();
-
-       /*
-        * For each processor present we allocate one
-        * rx_ring for outbound completions, and one
-        * rx_ring for inbound completions.  Plus there is
-        * always the one default queue.  For the CPU
-        * counts we end up with the following rx_rings:
-        * rx_ring count =
-        *  one default queue +
-        *  (CPU count * outbound completion rx_ring) +
-        *  (CPU count * inbound (RSS) completion rx_ring)
-        * To keep it simple we limit the total number of
-        * queues to < 32, so we truncate CPU to 8.
-        * This limitation can be removed when requested.
-        */
-
-       if (cpu_cnt > MAX_CPUS)
-               cpu_cnt = MAX_CPUS;
-
-       /*
-        * rx_ring[0] is always the default queue.
+       int cpu_cnt = min(MAX_CPUS, (int)num_online_cpus());
+
+       /* In a perfect world we have one RSS ring for each CPU
+        * and each has it's own vector.  To do that we ask for
+        * cpu_cnt vectors.  ql_enable_msix() will adjust the
+        * vector count to what we actually get.  We then
+        * allocate an RSS ring for each.
+        * Essentially, we are doing min(cpu_count, msix_vector_count).
          */
-       /* Allocate outbound completion ring for each CPU. */
+       qdev->intr_count = cpu_cnt;
+       ql_enable_msix(qdev);
+       /* Adjust the RSS ring count to the actual vector count. */
+       qdev->rss_ring_count = qdev->intr_count;
         qdev->tx_ring_count = cpu_cnt;
-       /* Allocate inbound completion (RSS) ring for each CPU. */
-       qdev->rss_ring_count = cpu_cnt;
-       /*
-        * qdev->rx_ring_count:
-        * Total number of rx_rings.  This includes the one
-        * default queue, a number of outbound completion
-        * handler rx_rings, and the number of inbound
-        * completion handler rx_rings.
-        */
         qdev->rx_ring_count = qdev->tx_ring_count + qdev->rss_ring_count;
  
         for (i = 0; i < qdev->tx_ring_count; i++) {
@@ -3485,9 +3476,9 @@ static int ql_configure_rings(struct ql_adapter *qdev)
  
                 /*
                  * The completion queue ID for the tx rings start
-                * immediately after the default Q ID, which is zero.
+                * immediately after the rss rings.
                  */
-               tx_ring->cq_id = i + qdev->rss_ring_count;
+               tx_ring->cq_id = qdev->rss_ring_count + i;
         }
  
         for (i = 0; i < qdev->rx_ring_count; i++) {
@@ -3497,7 +3488,9 @@ static int ql_configure_rings(struct ql_adapter *qdev)
                 rx_ring->cq_id = i;
                 rx_ring->cpu = i % cpu_cnt;     /* CPU to run handler on. */
                 if (i < qdev->rss_ring_count) {
-                       /* Inbound completions (RSS) queues */
+                       /*
+                        * Inbound (RSS) queues.
+                        */
                         rx_ring->cq_len = qdev->rx_ring_size;
                         rx_ring->cq_size =
                             rx_ring->cq_len * sizeof(struct ql_net_rsp_iocb);
@@ -3589,7 +3582,6 @@ static void qlge_set_multicast_list(struct net_device *ndev)
         status = ql_sem_spinlock(qdev, SEM_RT_IDX_MASK);
         if (status)
                 return;
-       spin_lock(&qdev->hw_lock);
         /*
          * Set or clear promiscuous mode if a
          * transition is taking place.
@@ -3666,7 +3658,6 @@ static void qlge_set_multicast_list(struct net_device *ndev)
                 }
         }
  exit:
-       spin_unlock(&qdev->hw_lock);
         ql_sem_unlock(qdev, SEM_RT_IDX_MASK);
  }
  
@@ -3686,10 +3677,8 @@ static int qlge_set_mac_address(struct net_device *ndev, void *p)
         status = ql_sem_spinlock(qdev, SEM_MAC_ADDR_MASK);
         if (status)
                 return status;
-       spin_lock(&qdev->hw_lock);
         status = ql_set_mac_addr_reg(qdev, (u8 *) ndev->dev_addr,
                         MAC_ADDR_TYPE_CAM_MAC, qdev->func * MAX_CQ);
-       spin_unlock(&qdev->hw_lock);
         if (status)
                 QPRINTK(qdev, HW, ERR, "Failed to load MAC address.\n");
         ql_sem_unlock(qdev, SEM_MAC_ADDR_MASK);
@@ -3707,7 +3696,7 @@ static void ql_asic_reset_work(struct work_struct *work)
         struct ql_adapter *qdev =
             container_of(work, struct ql_adapter, asic_reset_work.work);
         int status;
-
+       rtnl_lock();
         status = ql_adapter_down(qdev);
         if (status)
                 goto error;
@@ -3715,12 +3704,12 @@ static void ql_asic_reset_work(struct work_struct *work)
         status = ql_adapter_up(qdev);
         if (status)
                 goto error;
-
+       rtnl_unlock();
         return;
  error:
         QPRINTK(qdev, IFUP, ALERT,
                 "Driver up/down cycle failed, closing device\n");
-       rtnl_lock();
+
         set_bit(QL_ADAPTER_UP, &qdev->flags);
         dev_close(qdev->ndev);
         rtnl_unlock();
@@ -3813,10 +3802,7 @@ static void ql_release_all(struct pci_dev *pdev)
                 destroy_workqueue(qdev->workqueue);
                 qdev->workqueue = NULL;
         }
-       if (qdev->q_workqueue) {
-               destroy_workqueue(qdev->q_workqueue);
-               qdev->q_workqueue = NULL;
-       }
+
         if (qdev->reg_base)
                 iounmap(qdev->reg_base);
         if (qdev->doorbell_area)
@@ -3839,11 +3825,14 @@ static int __devinit ql_init_device(struct pci_dev *pdev,
                 return err;
         }
  
+       qdev->ndev = ndev;
+       qdev->pdev = pdev;
+       pci_set_drvdata(pdev, ndev);
         pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
         if (pos <= 0) {
                 dev_err(&pdev->dev, PFX "Cannot find PCI Express capability, "
                         "aborting.\n");
-               goto err_out;
+               return pos;
         } else {
                 pci_read_config_word(pdev, pos + PCI_EXP_DEVCTL, &val16);
                 val16 &= ~PCI_EXP_DEVCTL_NOSNOOP_EN;
@@ -3856,7 +3845,7 @@ static int __devinit ql_init_device(struct pci_dev *pdev,
         err = pci_request_regions(pdev, DRV_NAME);
         if (err) {
                 dev_err(&pdev->dev, "PCI region request failed.\n");
-               goto err_out;
+               return err;
         }
  
         pci_set_master(pdev);
@@ -3874,7 +3863,6 @@ static int __devinit ql_init_device(struct pci_dev *pdev,
                 goto err_out;
         }
  
-       pci_set_drvdata(pdev, ndev);
         qdev->reg_base =
             ioremap_nocache(pci_resource_start(pdev, 1),
                             pci_resource_len(pdev, 1));
@@ -3894,8 +3882,6 @@ static int __devinit ql_init_device(struct pci_dev *pdev,
                 goto err_out;
         }
  
-       qdev->ndev = ndev;
-       qdev->pdev = pdev;
         err = ql_get_board_info(qdev);
         if (err) {
                 dev_err(&pdev->dev, "Register access failed.\n");
@@ -3929,15 +3915,12 @@ static int __devinit ql_init_device(struct pci_dev *pdev,
          * Set up the operating parameters.
          */
         qdev->rx_csum = 1;
-
-       qdev->q_workqueue = create_workqueue(ndev->name);
         qdev->workqueue = create_singlethread_workqueue(ndev->name);
         INIT_DELAYED_WORK(&qdev->asic_reset_work, ql_asic_reset_work);
         INIT_DELAYED_WORK(&qdev->mpi_reset_work, ql_mpi_reset_work);
         INIT_DELAYED_WORK(&qdev->mpi_work, ql_mpi_work);
         INIT_DELAYED_WORK(&qdev->mpi_port_cfg_work, ql_mpi_port_cfg_work);
         INIT_DELAYED_WORK(&qdev->mpi_idc_work, ql_mpi_idc_work);
-       mutex_init(&qdev->mpi_mutex);
         init_completion(&qdev->ide_completion);
  
         if (!cards_found) {