SAFE public projects git trees. - safe/jmp/linux-2.6/blob - drivers/net/cxgb3/sge.c

   1 /*
   2  * Copyright (c) 2005-2007 Chelsio, Inc. All rights reserved.
   3  *
   4  * This software is available to you under a choice of one of two
   5  * licenses.  You may choose to be licensed under the terms of the GNU
   6  * General Public License (GPL) Version 2, available from the file
   7  * COPYING in the main directory of this source tree, or the
   8  * OpenIB.org BSD license below:
   9  *
  10  *     Redistribution and use in source and binary forms, with or
  11  *     without modification, are permitted provided that the following
  12  *     conditions are met:
  13  *
  14  *      - Redistributions of source code must retain the above
  15  *        copyright notice, this list of conditions and the following
  16  *        disclaimer.
  17  *
  18  *      - Redistributions in binary form must reproduce the above
  19  *        copyright notice, this list of conditions and the following
  20  *        disclaimer in the documentation and/or other materials
  21  *        provided with the distribution.
  22  *
  23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30  * SOFTWARE.
  31  */
  32 #include <linux/skbuff.h>
  33 #include <linux/netdevice.h>
  34 #include <linux/etherdevice.h>
  35 #include <linux/if_vlan.h>
  36 #include <linux/ip.h>
  37 #include <linux/tcp.h>
  38 #include <linux/dma-mapping.h>
  39 #include "common.h"
  40 #include "regs.h"
  41 #include "sge_defs.h"
  42 #include "t3_cpl.h"
  43 #include "firmware_exports.h"
  44
  45 #define USE_GTS 0
  46
  47 #define SGE_RX_SM_BUF_SIZE 1536
  48
  49 #define SGE_RX_COPY_THRES  256
  50 #define SGE_RX_PULL_LEN    128
  51
  52 /*
  53  * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
  54  * It must be a divisor of PAGE_SIZE.  If set to 0 FL0 will use sk_buffs
  55  * directly.
  56  */
  57 #define FL0_PG_CHUNK_SIZE  2048
  58
  59 #define SGE_RX_DROP_THRES 16
  60
  61 /*
  62  * Period of the Tx buffer reclaim timer.  This timer does not need to run
  63  * frequently as Tx buffers are usually reclaimed by new Tx packets.
  64  */
  65 #define TX_RECLAIM_PERIOD (HZ / 4)
  66
  67 /* WR size in bytes */
  68 #define WR_LEN (WR_FLITS * 8)
  69
  70 /*
  71  * Types of Tx queues in each queue set.  Order here matters, do not change.
  72  */
  73 enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
  74
  75 /* Values for sge_txq.flags */
  76 enum {
  77         TXQ_RUNNING = 1 << 0,   /* fetch engine is running */
  78         TXQ_LAST_PKT_DB = 1 << 1,       /* last packet rang the doorbell */
  79 };
  80
  81 struct tx_desc {
  82         __be64 flit[TX_DESC_FLITS];
  83 };
  84
  85 struct rx_desc {
  86         __be32 addr_lo;
  87         __be32 len_gen;
  88         __be32 gen2;
  89         __be32 addr_hi;
  90 };
  91
  92 struct tx_sw_desc {             /* SW state per Tx descriptor */
  93         struct sk_buff *skb;
  94         u8 eop;       /* set if last descriptor for packet */
  95         u8 addr_idx;  /* buffer index of first SGL entry in descriptor */
  96         u8 fragidx;   /* first page fragment associated with descriptor */
  97         s8 sflit;     /* start flit of first SGL entry in descriptor */
  98 };
  99
 100 struct rx_sw_desc {                /* SW state per Rx descriptor */
 101         union {
 102                 struct sk_buff *skb;
 103                 struct fl_pg_chunk pg_chunk;
 104         };
 105         DECLARE_PCI_UNMAP_ADDR(dma_addr);
 106 };
 107
 108 struct rsp_desc {               /* response queue descriptor */
 109         struct rss_header rss_hdr;
 110         __be32 flags;
 111         __be32 len_cq;
 112         u8 imm_data[47];
 113         u8 intr_gen;
 114 };
 115
 116 /*
 117  * Holds unmapping information for Tx packets that need deferred unmapping.
 118  * This structure lives at skb->head and must be allocated by callers.
 119  */
 120 struct deferred_unmap_info {
 121         struct pci_dev *pdev;
 122         dma_addr_t addr[MAX_SKB_FRAGS + 1];
 123 };
 124
 125 /*
 126  * Maps a number of flits to the number of Tx descriptors that can hold them.
 127  * The formula is
 128  *
 129  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
 130  *
 131  * HW allows up to 4 descriptors to be combined into a WR.
 132  */
 133 static u8 flit_desc_map[] = {
 134         0,
 135 #if SGE_NUM_GENBITS == 1
 136         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 137         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 138         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 139         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
 140 #elif SGE_NUM_GENBITS == 2
 141         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 142         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 143         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 144         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 145 #else
 146 # error "SGE_NUM_GENBITS must be 1 or 2"
 147 #endif
 148 };
 149
 150 static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
 151 {
 152         return container_of(q, struct sge_qset, fl[qidx]);
 153 }
 154
 155 static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
 156 {
 157         return container_of(q, struct sge_qset, rspq);
 158 }
 159
 160 static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
 161 {
 162         return container_of(q, struct sge_qset, txq[qidx]);
 163 }
 164
 165 /**
 166  *      refill_rspq - replenish an SGE response queue
 167  *      @adapter: the adapter
 168  *      @q: the response queue to replenish
 169  *      @credits: how many new responses to make available
 170  *
 171  *      Replenishes a response queue by making the supplied number of responses
 172  *      available to HW.
 173  */
 174 static inline void refill_rspq(struct adapter *adapter,
 175                                const struct sge_rspq *q, unsigned int credits)
 176 {
 177         rmb();
 178         t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
 179                      V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
 180 }
 181
 182 /**
 183  *      need_skb_unmap - does the platform need unmapping of sk_buffs?
 184  *
 185  *      Returns true if the platfrom needs sk_buff unmapping.  The compiler
 186  *      optimizes away unecessary code if this returns true.
 187  */
 188 static inline int need_skb_unmap(void)
 189 {
 190         /*
 191          * This structure is used to tell if the platfrom needs buffer
 192          * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
 193          */
 194         struct dummy {
 195                 DECLARE_PCI_UNMAP_ADDR(addr);
 196         };
 197
 198         return sizeof(struct dummy) != 0;
 199 }
 200
 201 /**
 202  *      unmap_skb - unmap a packet main body and its page fragments
 203  *      @skb: the packet
 204  *      @q: the Tx queue containing Tx descriptors for the packet
 205  *      @cidx: index of Tx descriptor
 206  *      @pdev: the PCI device
 207  *
 208  *      Unmap the main body of an sk_buff and its page fragments, if any.
 209  *      Because of the fairly complicated structure of our SGLs and the desire
 210  *      to conserve space for metadata, the information necessary to unmap an
 211  *      sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx
 212  *      descriptors (the physical addresses of the various data buffers), and
 213  *      the SW descriptor state (assorted indices).  The send functions
 214  *      initialize the indices for the first packet descriptor so we can unmap
 215  *      the buffers held in the first Tx descriptor here, and we have enough
 216  *      information at this point to set the state for the next Tx descriptor.
 217  *
 218  *      Note that it is possible to clean up the first descriptor of a packet
 219  *      before the send routines have written the next descriptors, but this
 220  *      race does not cause any problem.  We just end up writing the unmapping
 221  *      info for the descriptor first.
 222  */
 223 static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
 224                              unsigned int cidx, struct pci_dev *pdev)
 225 {
 226         const struct sg_ent *sgp;
 227         struct tx_sw_desc *d = &q->sdesc[cidx];
 228         int nfrags, frag_idx, curflit, j = d->addr_idx;
 229
 230         sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit];
 231         frag_idx = d->fragidx;
 232
 233         if (frag_idx == 0 && skb_headlen(skb)) {
 234                 pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]),
 235                                  skb_headlen(skb), PCI_DMA_TODEVICE);
 236                 j = 1;
 237         }
 238
 239         curflit = d->sflit + 1 + j;
 240         nfrags = skb_shinfo(skb)->nr_frags;
 241
 242         while (frag_idx < nfrags && curflit < WR_FLITS) {
 243                 pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
 244                                skb_shinfo(skb)->frags[frag_idx].size,
 245                                PCI_DMA_TODEVICE);
 246                 j ^= 1;
 247                 if (j == 0) {
 248                         sgp++;
 249                         curflit++;
 250                 }
 251                 curflit++;
 252                 frag_idx++;
 253         }
 254
 255         if (frag_idx < nfrags) {   /* SGL continues into next Tx descriptor */
 256                 d = cidx + 1 == q->size ? q->sdesc : d + 1;
 257                 d->fragidx = frag_idx;
 258                 d->addr_idx = j;
 259                 d->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
 260         }
 261 }
 262
 263 /**
 264  *      free_tx_desc - reclaims Tx descriptors and their buffers
 265  *      @adapter: the adapter
 266  *      @q: the Tx queue to reclaim descriptors from
 267  *      @n: the number of descriptors to reclaim
 268  *
 269  *      Reclaims Tx descriptors from an SGE Tx queue and frees the associated
 270  *      Tx buffers.  Called with the Tx queue lock held.
 271  */
 272 static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
 273                          unsigned int n)
 274 {
 275         struct tx_sw_desc *d;
 276         struct pci_dev *pdev = adapter->pdev;
 277         unsigned int cidx = q->cidx;
 278
 279         const int need_unmap = need_skb_unmap() &&
 280                                q->cntxt_id >= FW_TUNNEL_SGEEC_START;
 281
 282         d = &q->sdesc[cidx];
 283         while (n--) {
 284                 if (d->skb) {   /* an SGL is present */
 285                         if (need_unmap)
 286                                 unmap_skb(d->skb, q, cidx, pdev);
 287                         if (d->eop)
 288                                 kfree_skb(d->skb);
 289                 }
 290                 ++d;
 291                 if (++cidx == q->size) {
 292                         cidx = 0;
 293                         d = q->sdesc;
 294                 }
 295         }
 296         q->cidx = cidx;
 297 }
 298
 299 /**
 300  *      reclaim_completed_tx - reclaims completed Tx descriptors
 301  *      @adapter: the adapter
 302  *      @q: the Tx queue to reclaim completed descriptors from
 303  *
 304  *      Reclaims Tx descriptors that the SGE has indicated it has processed,
 305  *      and frees the associated buffers if possible.  Called with the Tx
 306  *      queue's lock held.
 307  */
 308 static inline void reclaim_completed_tx(struct adapter *adapter,
 309                                         struct sge_txq *q)
 310 {
 311         unsigned int reclaim = q->processed - q->cleaned;
 312
 313         if (reclaim) {
 314                 free_tx_desc(adapter, q, reclaim);
 315                 q->cleaned += reclaim;
 316                 q->in_use -= reclaim;
 317         }
 318 }
 319
 320 /**
 321  *      should_restart_tx - are there enough resources to restart a Tx queue?
 322  *      @q: the Tx queue
 323  *
 324  *      Checks if there are enough descriptors to restart a suspended Tx queue.
 325  */
 326 static inline int should_restart_tx(const struct sge_txq *q)
 327 {
 328         unsigned int r = q->processed - q->cleaned;
 329
 330         return q->in_use - r < (q->size >> 1);
 331 }
 332
 333 /**
 334  *      free_rx_bufs - free the Rx buffers on an SGE free list
 335  *      @pdev: the PCI device associated with the adapter
 336  *      @rxq: the SGE free list to clean up
 337  *
 338  *      Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
 339  *      this queue should be stopped before calling this function.
 340  */
 341 static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
 342 {
 343         unsigned int cidx = q->cidx;
 344
 345         while (q->credits--) {
 346                 struct rx_sw_desc *d = &q->sdesc[cidx];
 347
 348                 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
 349                                  q->buf_size, PCI_DMA_FROMDEVICE);
 350                 if (q->use_pages) {
 351                         put_page(d->pg_chunk.page);
 352                         d->pg_chunk.page = NULL;
 353                 } else {
 354                         kfree_skb(d->skb);
 355                         d->skb = NULL;
 356                 }
 357                 if (++cidx == q->size)
 358                         cidx = 0;
 359         }
 360
 361         if (q->pg_chunk.page) {
 362                 __free_page(q->pg_chunk.page);
 363                 q->pg_chunk.page = NULL;
 364         }
 365 }
 366
 367 /**
 368  *      add_one_rx_buf - add a packet buffer to a free-buffer list
 369  *      @va:  buffer start VA
 370  *      @len: the buffer length
 371  *      @d: the HW Rx descriptor to write
 372  *      @sd: the SW Rx descriptor to write
 373  *      @gen: the generation bit value
 374  *      @pdev: the PCI device associated with the adapter
 375  *
 376  *      Add a buffer of the given length to the supplied HW and SW Rx
 377  *      descriptors.
 378  */
 379 static inline int add_one_rx_buf(void *va, unsigned int len,
 380                                  struct rx_desc *d, struct rx_sw_desc *sd,
 381                                  unsigned int gen, struct pci_dev *pdev)
 382 {
 383         dma_addr_t mapping;
 384
 385         mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
 386         if (unlikely(pci_dma_mapping_error(mapping)))
 387                 return -ENOMEM;
 388
 389         pci_unmap_addr_set(sd, dma_addr, mapping);
 390
 391         d->addr_lo = cpu_to_be32(mapping);
 392         d->addr_hi = cpu_to_be32((u64) mapping >> 32);
 393         wmb();
 394         d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
 395         d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
 396         return 0;
 397 }
 398
 399 static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp)
 400 {
 401         if (!q->pg_chunk.page) {
 402                 q->pg_chunk.page = alloc_page(gfp);
 403                 if (unlikely(!q->pg_chunk.page))
 404                         return -ENOMEM;
 405                 q->pg_chunk.va = page_address(q->pg_chunk.page);
 406                 q->pg_chunk.offset = 0;
 407         }
 408         sd->pg_chunk = q->pg_chunk;
 409
 410         q->pg_chunk.offset += q->buf_size;
 411         if (q->pg_chunk.offset == PAGE_SIZE)
 412                 q->pg_chunk.page = NULL;
 413         else {
 414                 q->pg_chunk.va += q->buf_size;
 415                 get_page(q->pg_chunk.page);
 416         }
 417         return 0;
 418 }
 419
 420 /**
 421  *      refill_fl - refill an SGE free-buffer list
 422  *      @adapter: the adapter
 423  *      @q: the free-list to refill
 424  *      @n: the number of new buffers to allocate
 425  *      @gfp: the gfp flags for allocating new buffers
 426  *
 427  *      (Re)populate an SGE free-buffer list with up to @n new packet buffers,
 428  *      allocated with the supplied gfp flags.  The caller must assure that
 429  *      @n does not exceed the queue's capacity.
 430  */
 431 static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
 432 {
 433         void *buf_start;
 434         struct rx_sw_desc *sd = &q->sdesc[q->pidx];
 435         struct rx_desc *d = &q->desc[q->pidx];
 436         unsigned int count = 0;
 437
 438         while (n--) {
 439                 int err;
 440
 441                 if (q->use_pages) {
 442                         if (unlikely(alloc_pg_chunk(q, sd, gfp))) {
 443 nomem:                          q->alloc_failed++;
 444                                 break;
 445                         }
 446                         buf_start = sd->pg_chunk.va;
 447                 } else {
 448                         struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
 449
 450                         if (!skb)
 451                                 goto nomem;
 452
 453                         sd->skb = skb;
 454                         buf_start = skb->data;
 455                 }
 456
 457                 err = add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen,
 458                                      adap->pdev);
 459                 if (unlikely(err)) {
 460                         if (!q->use_pages) {
 461                                 kfree_skb(sd->skb);
 462                                 sd->skb = NULL;
 463                         }
 464                         break;
 465                 }
 466
 467                 d++;
 468                 sd++;
 469                 if (++q->pidx == q->size) {
 470                         q->pidx = 0;
 471                         q->gen ^= 1;
 472                         sd = q->sdesc;
 473                         d = q->desc;
 474                 }
 475                 q->credits++;
 476                 count++;
 477         }
 478         wmb();
 479         if (likely(count))
 480                 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
 481
 482         return count;
 483 }
 484
 485 static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
 486 {
 487         refill_fl(adap, fl, min(16U, fl->size - fl->credits), GFP_ATOMIC);
 488 }
 489
 490 /**
 491  *      recycle_rx_buf - recycle a receive buffer
 492  *      @adapter: the adapter
 493  *      @q: the SGE free list
 494  *      @idx: index of buffer to recycle
 495  *
 496  *      Recycles the specified buffer on the given free list by adding it at
 497  *      the next available slot on the list.
 498  */
 499 static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
 500                            unsigned int idx)
 501 {
 502         struct rx_desc *from = &q->desc[idx];
 503         struct rx_desc *to = &q->desc[q->pidx];
 504
 505         q->sdesc[q->pidx] = q->sdesc[idx];
 506         to->addr_lo = from->addr_lo;    /* already big endian */
 507         to->addr_hi = from->addr_hi;    /* likewise */
 508         wmb();
 509         to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
 510         to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
 511         q->credits++;
 512
 513         if (++q->pidx == q->size) {
 514                 q->pidx = 0;
 515                 q->gen ^= 1;
 516         }
 517         t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
 518 }
 519
 520 /**
 521  *      alloc_ring - allocate resources for an SGE descriptor ring
 522  *      @pdev: the PCI device
 523  *      @nelem: the number of descriptors
 524  *      @elem_size: the size of each descriptor
 525  *      @sw_size: the size of the SW state associated with each ring element
 526  *      @phys: the physical address of the allocated ring
 527  *      @metadata: address of the array holding the SW state for the ring
 528  *
 529  *      Allocates resources for an SGE descriptor ring, such as Tx queues,
 530  *      free buffer lists, or response queues.  Each SGE ring requires
 531  *      space for its HW descriptors plus, optionally, space for the SW state
 532  *      associated with each HW entry (the metadata).  The function returns
 533  *      three values: the virtual address for the HW ring (the return value
 534  *      of the function), the physical address of the HW ring, and the address
 535  *      of the SW ring.
 536  */
 537 static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
 538                         size_t sw_size, dma_addr_t * phys, void *metadata)
 539 {
 540         size_t len = nelem * elem_size;
 541         void *s = NULL;
 542         void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
 543
 544         if (!p)
 545                 return NULL;
 546         if (sw_size) {
 547                 s = kcalloc(nelem, sw_size, GFP_KERNEL);
 548
 549                 if (!s) {
 550                         dma_free_coherent(&pdev->dev, len, p, *phys);
 551                         return NULL;
 552                 }
 553         }
 554         if (metadata)
 555                 *(void **)metadata = s;
 556         memset(p, 0, len);
 557         return p;
 558 }
 559
 560 /**
 561  *      t3_reset_qset - reset a sge qset
 562  *      @q: the queue set
 563  *
 564  *      Reset the qset structure.
 565  *      the NAPI structure is preserved in the event of
 566  *      the qset's reincarnation, for example during EEH recovery.
 567  */
 568 static void t3_reset_qset(struct sge_qset *q)
 569 {
 570         if (q->adap &&
 571             !(q->adap->flags & NAPI_INIT)) {
 572                 memset(q, 0, sizeof(*q));
 573                 return;
 574         }
 575
 576         q->adap = NULL;
 577         memset(&q->rspq, 0, sizeof(q->rspq));
 578         memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
 579         memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
 580         q->txq_stopped = 0;
 581         memset(&q->tx_reclaim_timer, 0, sizeof(q->tx_reclaim_timer));
 582 }
 583
 584
 585 /**
 586  *      free_qset - free the resources of an SGE queue set
 587  *      @adapter: the adapter owning the queue set
 588  *      @q: the queue set
 589  *
 590  *      Release the HW and SW resources associated with an SGE queue set, such
 591  *      as HW contexts, packet buffers, and descriptor rings.  Traffic to the
 592  *      queue set must be quiesced prior to calling this.
 593  */
 594 static void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
 595 {
 596         int i;
 597         struct pci_dev *pdev = adapter->pdev;
 598
 599         if (q->tx_reclaim_timer.function)
 600                 del_timer_sync(&q->tx_reclaim_timer);
 601
 602         for (i = 0; i < SGE_RXQ_PER_SET; ++i)
 603                 if (q->fl[i].desc) {
 604                         spin_lock_irq(&adapter->sge.reg_lock);
 605                         t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
 606                         spin_unlock_irq(&adapter->sge.reg_lock);
 607                         free_rx_bufs(pdev, &q->fl[i]);
 608                         kfree(q->fl[i].sdesc);
 609                         dma_free_coherent(&pdev->dev,
 610                                           q->fl[i].size *
 611                                           sizeof(struct rx_desc), q->fl[i].desc,
 612                                           q->fl[i].phys_addr);
 613                 }
 614
 615         for (i = 0; i < SGE_TXQ_PER_SET; ++i)
 616                 if (q->txq[i].desc) {
 617                         spin_lock_irq(&adapter->sge.reg_lock);
 618                         t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
 619                         spin_unlock_irq(&adapter->sge.reg_lock);
 620                         if (q->txq[i].sdesc) {
 621                                 free_tx_desc(adapter, &q->txq[i],
 622                                              q->txq[i].in_use);
 623                                 kfree(q->txq[i].sdesc);
 624                         }
 625                         dma_free_coherent(&pdev->dev,
 626                                           q->txq[i].size *
 627                                           sizeof(struct tx_desc),
 628                                           q->txq[i].desc, q->txq[i].phys_addr);
 629                         __skb_queue_purge(&q->txq[i].sendq);
 630                 }
 631
 632         if (q->rspq.desc) {
 633                 spin_lock_irq(&adapter->sge.reg_lock);
 634                 t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
 635                 spin_unlock_irq(&adapter->sge.reg_lock);
 636                 dma_free_coherent(&pdev->dev,
 637                                   q->rspq.size * sizeof(struct rsp_desc),
 638                                   q->rspq.desc, q->rspq.phys_addr);
 639         }
 640
 641         t3_reset_qset(q);
 642 }
 643
 644 /**
 645  *      init_qset_cntxt - initialize an SGE queue set context info
 646  *      @qs: the queue set
 647  *      @id: the queue set id
 648  *
 649  *      Initializes the TIDs and context ids for the queues of a queue set.
 650  */
 651 static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
 652 {
 653         qs->rspq.cntxt_id = id;
 654         qs->fl[0].cntxt_id = 2 * id;
 655         qs->fl[1].cntxt_id = 2 * id + 1;
 656         qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
 657         qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
 658         qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
 659         qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
 660         qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
 661 }
 662
 663 /**
 664  *      sgl_len - calculates the size of an SGL of the given capacity
 665  *      @n: the number of SGL entries
 666  *
 667  *      Calculates the number of flits needed for a scatter/gather list that
 668  *      can hold the given number of entries.
 669  */
 670 static inline unsigned int sgl_len(unsigned int n)
 671 {
 672         /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
 673         return (3 * n) / 2 + (n & 1);
 674 }
 675
 676 /**
 677  *      flits_to_desc - returns the num of Tx descriptors for the given flits
 678  *      @n: the number of flits
 679  *
 680  *      Calculates the number of Tx descriptors needed for the supplied number
 681  *      of flits.
 682  */
 683 static inline unsigned int flits_to_desc(unsigned int n)
 684 {
 685         BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
 686         return flit_desc_map[n];
 687 }
 688
 689 /**
 690  *      get_packet - return the next ingress packet buffer from a free list
 691  *      @adap: the adapter that received the packet
 692  *      @fl: the SGE free list holding the packet
 693  *      @len: the packet length including any SGE padding
 694  *      @drop_thres: # of remaining buffers before we start dropping packets
 695  *
 696  *      Get the next packet from a free list and complete setup of the
 697  *      sk_buff.  If the packet is small we make a copy and recycle the
 698  *      original buffer, otherwise we use the original buffer itself.  If a
 699  *      positive drop threshold is supplied packets are dropped and their
 700  *      buffers recycled if (a) the number of remaining buffers is under the
 701  *      threshold and the packet is too big to copy, or (b) the packet should
 702  *      be copied but there is no memory for the copy.
 703  */
 704 static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
 705                                   unsigned int len, unsigned int drop_thres)
 706 {
 707         struct sk_buff *skb = NULL;
 708         struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
 709
 710         prefetch(sd->skb->data);
 711         fl->credits--;
 712
 713         if (len <= SGE_RX_COPY_THRES) {
 714                 skb = alloc_skb(len, GFP_ATOMIC);
 715                 if (likely(skb != NULL)) {
 716                         __skb_put(skb, len);
 717                         pci_dma_sync_single_for_cpu(adap->pdev,
 718                                             pci_unmap_addr(sd, dma_addr), len,
 719                                             PCI_DMA_FROMDEVICE);
 720                         memcpy(skb->data, sd->skb->data, len);
 721                         pci_dma_sync_single_for_device(adap->pdev,
 722                                             pci_unmap_addr(sd, dma_addr), len,
 723                                             PCI_DMA_FROMDEVICE);
 724                 } else if (!drop_thres)
 725                         goto use_orig_buf;
 726 recycle:
 727                 recycle_rx_buf(adap, fl, fl->cidx);
 728                 return skb;
 729         }
 730
 731         if (unlikely(fl->credits < drop_thres))
 732                 goto recycle;
 733
 734 use_orig_buf:
 735         pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
 736                          fl->buf_size, PCI_DMA_FROMDEVICE);
 737         skb = sd->skb;
 738         skb_put(skb, len);
 739         __refill_fl(adap, fl);
 740         return skb;
 741 }
 742
 743 /**
 744  *      get_packet_pg - return the next ingress packet buffer from a free list
 745  *      @adap: the adapter that received the packet
 746  *      @fl: the SGE free list holding the packet
 747  *      @len: the packet length including any SGE padding
 748  *      @drop_thres: # of remaining buffers before we start dropping packets
 749  *
 750  *      Get the next packet from a free list populated with page chunks.
 751  *      If the packet is small we make a copy and recycle the original buffer,
 752  *      otherwise we attach the original buffer as a page fragment to a fresh
 753  *      sk_buff.  If a positive drop threshold is supplied packets are dropped
 754  *      and their buffers recycled if (a) the number of remaining buffers is
 755  *      under the threshold and the packet is too big to copy, or (b) there's
 756  *      no system memory.
 757  *
 758  *      Note: this function is similar to @get_packet but deals with Rx buffers
 759  *      that are page chunks rather than sk_buffs.
 760  */
 761 static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
 762                                      unsigned int len, unsigned int drop_thres)
 763 {
 764         struct sk_buff *skb = NULL;
 765         struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
 766
 767         if (len <= SGE_RX_COPY_THRES) {
 768                 skb = alloc_skb(len, GFP_ATOMIC);
 769                 if (likely(skb != NULL)) {
 770                         __skb_put(skb, len);
 771                         pci_dma_sync_single_for_cpu(adap->pdev,
 772                                             pci_unmap_addr(sd, dma_addr), len,
 773                                             PCI_DMA_FROMDEVICE);
 774                         memcpy(skb->data, sd->pg_chunk.va, len);
 775                         pci_dma_sync_single_for_device(adap->pdev,
 776                                             pci_unmap_addr(sd, dma_addr), len,
 777                                             PCI_DMA_FROMDEVICE);
 778                 } else if (!drop_thres)
 779                         return NULL;
 780 recycle:
 781                 fl->credits--;
 782                 recycle_rx_buf(adap, fl, fl->cidx);
 783                 return skb;
 784         }
 785
 786         if (unlikely(fl->credits <= drop_thres))
 787                 goto recycle;
 788
 789         skb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
 790         if (unlikely(!skb)) {
 791                 if (!drop_thres)
 792                         return NULL;
 793                 goto recycle;
 794         }
 795
 796         pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
 797                          fl->buf_size, PCI_DMA_FROMDEVICE);
 798         __skb_put(skb, SGE_RX_PULL_LEN);
 799         memcpy(skb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
 800         skb_fill_page_desc(skb, 0, sd->pg_chunk.page,
 801                            sd->pg_chunk.offset + SGE_RX_PULL_LEN,
 802                            len - SGE_RX_PULL_LEN);
 803         skb->len = len;
 804         skb->data_len = len - SGE_RX_PULL_LEN;
 805         skb->truesize += skb->data_len;
 806
 807         fl->credits--;
 808         /*
 809          * We do not refill FLs here, we let the caller do it to overlap a
 810          * prefetch.
 811          */
 812         return skb;
 813 }
 814
 815 /**
 816  *      get_imm_packet - return the next ingress packet buffer from a response
 817  *      @resp: the response descriptor containing the packet data
 818  *
 819  *      Return a packet containing the immediate data of the given response.
 820  */
 821 static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
 822 {
 823         struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
 824
 825         if (skb) {
 826                 __skb_put(skb, IMMED_PKT_SIZE);
 827                 skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
 828         }
 829         return skb;
 830 }
 831
 832 /**
 833  *      calc_tx_descs - calculate the number of Tx descriptors for a packet
 834  *      @skb: the packet
 835  *
 836  *      Returns the number of Tx descriptors needed for the given Ethernet
 837  *      packet.  Ethernet packets require addition of WR and CPL headers.
 838  */
 839 static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
 840 {
 841         unsigned int flits;
 842
 843         if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
 844                 return 1;
 845
 846         flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
 847         if (skb_shinfo(skb)->gso_size)
 848                 flits++;
 849         return flits_to_desc(flits);
 850 }
 851
 852 /**
 853  *      make_sgl - populate a scatter/gather list for a packet
 854  *      @skb: the packet
 855  *      @sgp: the SGL to populate
 856  *      @start: start address of skb main body data to include in the SGL
 857  *      @len: length of skb main body data to include in the SGL
 858  *      @pdev: the PCI device
 859  *
 860  *      Generates a scatter/gather list for the buffers that make up a packet
 861  *      and returns the SGL size in 8-byte words.  The caller must size the SGL
 862  *      appropriately.
 863  */
 864 static inline unsigned int make_sgl(const struct sk_buff *skb,
 865                                     struct sg_ent *sgp, unsigned char *start,
 866                                     unsigned int len, struct pci_dev *pdev)
 867 {
 868         dma_addr_t mapping;
 869         unsigned int i, j = 0, nfrags;
 870
 871         if (len) {
 872                 mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
 873                 sgp->len[0] = cpu_to_be32(len);
 874                 sgp->addr[0] = cpu_to_be64(mapping);
 875                 j = 1;
 876         }
 877
 878         nfrags = skb_shinfo(skb)->nr_frags;
 879         for (i = 0; i < nfrags; i++) {
 880                 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 881
 882                 mapping = pci_map_page(pdev, frag->page, frag->page_offset,
 883                                        frag->size, PCI_DMA_TODEVICE);
 884                 sgp->len[j] = cpu_to_be32(frag->size);
 885                 sgp->addr[j] = cpu_to_be64(mapping);
 886                 j ^= 1;
 887                 if (j == 0)
 888                         ++sgp;
 889         }
 890         if (j)
 891                 sgp->len[j] = 0;
 892         return ((nfrags + (len != 0)) * 3) / 2 + j;
 893 }
 894
 895 /**
 896  *      check_ring_tx_db - check and potentially ring a Tx queue's doorbell
 897  *      @adap: the adapter
 898  *      @q: the Tx queue
 899  *
 900  *      Ring the doorbel if a Tx queue is asleep.  There is a natural race,
 901  *      where the HW is going to sleep just after we checked, however,
 902  *      then the interrupt handler will detect the outstanding TX packet
 903  *      and ring the doorbell for us.
 904  *
 905  *      When GTS is disabled we unconditionally ring the doorbell.
 906  */
 907 static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
 908 {
 909 #if USE_GTS
 910         clear_bit(TXQ_LAST_PKT_DB, &q->flags);
 911         if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
 912                 set_bit(TXQ_LAST_PKT_DB, &q->flags);
 913                 t3_write_reg(adap, A_SG_KDOORBELL,
 914                              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 915         }
 916 #else
 917         wmb();                  /* write descriptors before telling HW */
 918         t3_write_reg(adap, A_SG_KDOORBELL,
 919                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 920 #endif
 921 }
 922
 923 static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
 924 {
 925 #if SGE_NUM_GENBITS == 2
 926         d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
 927 #endif
 928 }
 929
 930 /**
 931  *      write_wr_hdr_sgl - write a WR header and, optionally, SGL
 932  *      @ndesc: number of Tx descriptors spanned by the SGL
 933  *      @skb: the packet corresponding to the WR
 934  *      @d: first Tx descriptor to be written
 935  *      @pidx: index of above descriptors
 936  *      @q: the SGE Tx queue
 937  *      @sgl: the SGL
 938  *      @flits: number of flits to the start of the SGL in the first descriptor
 939  *      @sgl_flits: the SGL size in flits
 940  *      @gen: the Tx descriptor generation
 941  *      @wr_hi: top 32 bits of WR header based on WR type (big endian)
 942  *      @wr_lo: low 32 bits of WR header based on WR type (big endian)
 943  *
 944  *      Write a work request header and an associated SGL.  If the SGL is
 945  *      small enough to fit into one Tx descriptor it has already been written
 946  *      and we just need to write the WR header.  Otherwise we distribute the
 947  *      SGL across the number of descriptors it spans.
 948  */
 949 static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
 950                              struct tx_desc *d, unsigned int pidx,
 951                              const struct sge_txq *q,
 952                              const struct sg_ent *sgl,
 953                              unsigned int flits, unsigned int sgl_flits,
 954                              unsigned int gen, __be32 wr_hi,
 955                              __be32 wr_lo)
 956 {
 957         struct work_request_hdr *wrp = (struct work_request_hdr *)d;
 958         struct tx_sw_desc *sd = &q->sdesc[pidx];
 959
 960         sd->skb = skb;
 961         if (need_skb_unmap()) {
 962                 sd->fragidx = 0;
 963                 sd->addr_idx = 0;
 964                 sd->sflit = flits;
 965         }
 966
 967         if (likely(ndesc == 1)) {
 968                 sd->eop = 1;
 969                 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 970                                    V_WR_SGLSFLT(flits)) | wr_hi;
 971                 wmb();
 972                 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
 973                                    V_WR_GEN(gen)) | wr_lo;
 974                 wr_gen2(d, gen);
 975         } else {
 976                 unsigned int ogen = gen;
 977                 const u64 *fp = (const u64 *)sgl;
 978                 struct work_request_hdr *wp = wrp;
 979
 980                 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
 981                                    V_WR_SGLSFLT(flits)) | wr_hi;
 982
 983                 while (sgl_flits) {
 984                         unsigned int avail = WR_FLITS - flits;
 985
 986                         if (avail > sgl_flits)
 987                                 avail = sgl_flits;
 988                         memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
 989                         sgl_flits -= avail;
 990                         ndesc--;
 991                         if (!sgl_flits)
 992                                 break;
 993
 994                         fp += avail;
 995                         d++;
 996                         sd->eop = 0;
 997                         sd++;
 998                         if (++pidx == q->size) {
 999                                 pidx = 0;
1000                                 gen ^= 1;
1001                                 d = q->desc;
1002                                 sd = q->sdesc;
1003                         }
1004
1005                         sd->skb = skb;
1006                         wrp = (struct work_request_hdr *)d;
1007                         wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1008                                            V_WR_SGLSFLT(1)) | wr_hi;
1009                         wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1010                                                         sgl_flits + 1)) |
1011                                            V_WR_GEN(gen)) | wr_lo;
1012                         wr_gen2(d, gen);
1013                         flits = 1;
1014                 }
1015                 sd->eop = 1;
1016                 wrp->wr_hi |= htonl(F_WR_EOP);
1017                 wmb();
1018                 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1019                 wr_gen2((struct tx_desc *)wp, ogen);
1020                 WARN_ON(ndesc != 0);
1021         }
1022 }
1023
1024 /**
1025  *      write_tx_pkt_wr - write a TX_PKT work request
1026  *      @adap: the adapter
1027  *      @skb: the packet to send
1028  *      @pi: the egress interface
1029  *      @pidx: index of the first Tx descriptor to write
1030  *      @gen: the generation value to use
1031  *      @q: the Tx queue
1032  *      @ndesc: number of descriptors the packet will occupy
1033  *      @compl: the value of the COMPL bit to use
1034  *
1035  *      Generate a TX_PKT work request to send the supplied packet.
1036  */
1037 static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
1038                             const struct port_info *pi,
1039                             unsigned int pidx, unsigned int gen,
1040                             struct sge_txq *q, unsigned int ndesc,
1041                             unsigned int compl)
1042 {
1043         unsigned int flits, sgl_flits, cntrl, tso_info;
1044         struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1045         struct tx_desc *d = &q->desc[pidx];
1046         struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1047
1048         cpl->len = htonl(skb->len | 0x80000000);
1049         cntrl = V_TXPKT_INTF(pi->port_id);
1050
1051         if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1052                 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
1053
1054         tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1055         if (tso_info) {
1056                 int eth_type;
1057                 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1058
1059                 d->flit[2] = 0;
1060                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1061                 hdr->cntrl = htonl(cntrl);
1062                 eth_type = skb_network_offset(skb) == ETH_HLEN ?
1063                     CPL_ETH_II : CPL_ETH_II_VLAN;
1064                 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1065                     V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
1066                     V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
1067                 hdr->lso_info = htonl(tso_info);
1068                 flits = 3;
1069         } else {
1070                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1071                 cntrl |= F_TXPKT_IPCSUM_DIS;    /* SW calculates IP csum */
1072                 cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1073                 cpl->cntrl = htonl(cntrl);
1074
1075                 if (skb->len <= WR_LEN - sizeof(*cpl)) {
1076                         q->sdesc[pidx].skb = NULL;
1077                         if (!skb->data_len)
1078                                 skb_copy_from_linear_data(skb, &d->flit[2],
1079                                                           skb->len);
1080                         else
1081                                 skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1082
1083                         flits = (skb->len + 7) / 8 + 2;
1084                         cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1085                                               V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1086                                               | F_WR_SOP | F_WR_EOP | compl);
1087                         wmb();
1088                         cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1089                                               V_WR_TID(q->token));
1090                         wr_gen2(d, gen);
1091                         kfree_skb(skb);
1092                         return;
1093                 }
1094
1095                 flits = 2;
1096         }
1097
1098         sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1099         sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
1100
1101         write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1102                          htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1103                          htonl(V_WR_TID(q->token)));
1104 }
1105
1106 static inline void t3_stop_queue(struct net_device *dev, struct sge_qset *qs,
1107                                  struct sge_txq *q)
1108 {
1109         netif_stop_queue(dev);
1110         set_bit(TXQ_ETH, &qs->txq_stopped);
1111         q->stops++;
1112 }
1113
1114 /**
1115  *      eth_xmit - add a packet to the Ethernet Tx queue
1116  *      @skb: the packet
1117  *      @dev: the egress net device
1118  *
1119  *      Add a packet to an SGE Tx queue.  Runs with softirqs disabled.
1120  */
1121 int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1122 {
1123         unsigned int ndesc, pidx, credits, gen, compl;
1124         const struct port_info *pi = netdev_priv(dev);
1125         struct adapter *adap = pi->adapter;
1126         struct sge_qset *qs = pi->qs;
1127         struct sge_txq *q = &qs->txq[TXQ_ETH];
1128
1129         /*
1130          * The chip min packet length is 9 octets but play safe and reject
1131          * anything shorter than an Ethernet header.
1132          */
1133         if (unlikely(skb->len < ETH_HLEN)) {
1134                 dev_kfree_skb(skb);
1135                 return NETDEV_TX_OK;
1136         }
1137
1138         spin_lock(&q->lock);
1139         reclaim_completed_tx(adap, q);
1140
1141         credits = q->size - q->in_use;
1142         ndesc = calc_tx_descs(skb);
1143
1144         if (unlikely(credits < ndesc)) {
1145                 t3_stop_queue(dev, qs, q);
1146                 dev_err(&adap->pdev->dev,
1147                         "%s: Tx ring %u full while queue awake!\n",
1148                         dev->name, q->cntxt_id & 7);
1149                 spin_unlock(&q->lock);
1150                 return NETDEV_TX_BUSY;
1151         }
1152
1153         q->in_use += ndesc;
1154         if (unlikely(credits - ndesc < q->stop_thres)) {
1155                 t3_stop_queue(dev, qs, q);
1156
1157                 if (should_restart_tx(q) &&
1158                     test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1159                         q->restarts++;
1160                         netif_wake_queue(dev);
1161                 }
1162         }
1163
1164         gen = q->gen;
1165         q->unacked += ndesc;
1166         compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1167         q->unacked &= 7;
1168         pidx = q->pidx;
1169         q->pidx += ndesc;
1170         if (q->pidx >= q->size) {
1171                 q->pidx -= q->size;
1172                 q->gen ^= 1;
1173         }
1174
1175         /* update port statistics */
1176         if (skb->ip_summed == CHECKSUM_COMPLETE)
1177                 qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1178         if (skb_shinfo(skb)->gso_size)
1179                 qs->port_stats[SGE_PSTAT_TSO]++;
1180         if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1181                 qs->port_stats[SGE_PSTAT_VLANINS]++;
1182
1183         dev->trans_start = jiffies;
1184         spin_unlock(&q->lock);
1185
1186         /*
1187          * We do not use Tx completion interrupts to free DMAd Tx packets.
1188          * This is good for performamce but means that we rely on new Tx
1189          * packets arriving to run the destructors of completed packets,
1190          * which open up space in their sockets' send queues.  Sometimes
1191          * we do not get such new packets causing Tx to stall.  A single
1192          * UDP transmitter is a good example of this situation.  We have
1193          * a clean up timer that periodically reclaims completed packets
1194          * but it doesn't run often enough (nor do we want it to) to prevent
1195          * lengthy stalls.  A solution to this problem is to run the
1196          * destructor early, after the packet is queued but before it's DMAd.
1197          * A cons is that we lie to socket memory accounting, but the amount
1198          * of extra memory is reasonable (limited by the number of Tx
1199          * descriptors), the packets do actually get freed quickly by new
1200          * packets almost always, and for protocols like TCP that wait for
1201          * acks to really free up the data the extra memory is even less.
1202          * On the positive side we run the destructors on the sending CPU
1203          * rather than on a potentially different completing CPU, usually a
1204          * good thing.  We also run them without holding our Tx queue lock,
1205          * unlike what reclaim_completed_tx() would otherwise do.
1206          *
1207          * Run the destructor before telling the DMA engine about the packet
1208          * to make sure it doesn't complete and get freed prematurely.
1209          */
1210         if (likely(!skb_shared(skb)))
1211                 skb_orphan(skb);
1212
1213         write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1214         check_ring_tx_db(adap, q);
1215         return NETDEV_TX_OK;
1216 }
1217
1218 /**
1219  *      write_imm - write a packet into a Tx descriptor as immediate data
1220  *      @d: the Tx descriptor to write
1221  *      @skb: the packet
1222  *      @len: the length of packet data to write as immediate data
1223  *      @gen: the generation bit value to write
1224  *
1225  *      Writes a packet as immediate data into a Tx descriptor.  The packet
1226  *      contains a work request at its beginning.  We must write the packet
1227  *      carefully so the SGE doesn't read it accidentally before it's written
1228  *      in its entirety.
1229  */
1230 static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1231                              unsigned int len, unsigned int gen)
1232 {
1233         struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1234         struct work_request_hdr *to = (struct work_request_hdr *)d;
1235
1236         if (likely(!skb->data_len))
1237                 memcpy(&to[1], &from[1], len - sizeof(*from));
1238         else
1239                 skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from));
1240
1241         to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1242                                         V_WR_BCNTLFLT(len & 7));
1243         wmb();
1244         to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1245                                         V_WR_LEN((len + 7) / 8));
1246         wr_gen2(d, gen);
1247         kfree_skb(skb);
1248 }
1249
1250 /**
1251  *      check_desc_avail - check descriptor availability on a send queue
1252  *      @adap: the adapter
1253  *      @q: the send queue
1254  *      @skb: the packet needing the descriptors
1255  *      @ndesc: the number of Tx descriptors needed
1256  *      @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1257  *
1258  *      Checks if the requested number of Tx descriptors is available on an
1259  *      SGE send queue.  If the queue is already suspended or not enough
1260  *      descriptors are available the packet is queued for later transmission.
1261  *      Must be called with the Tx queue locked.
1262  *
1263  *      Returns 0 if enough descriptors are available, 1 if there aren't
1264  *      enough descriptors and the packet has been queued, and 2 if the caller
1265  *      needs to retry because there weren't enough descriptors at the
1266  *      beginning of the call but some freed up in the mean time.
1267  */
1268 static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1269                                    struct sk_buff *skb, unsigned int ndesc,
1270                                    unsigned int qid)
1271 {
1272         if (unlikely(!skb_queue_empty(&q->sendq))) {
1273               addq_exit:__skb_queue_tail(&q->sendq, skb);
1274                 return 1;
1275         }
1276         if (unlikely(q->size - q->in_use < ndesc)) {
1277                 struct sge_qset *qs = txq_to_qset(q, qid);
1278
1279                 set_bit(qid, &qs->txq_stopped);
1280                 smp_mb__after_clear_bit();
1281
1282                 if (should_restart_tx(q) &&
1283                     test_and_clear_bit(qid, &qs->txq_stopped))
1284                         return 2;
1285
1286                 q->stops++;
1287                 goto addq_exit;
1288         }
1289         return 0;
1290 }
1291
1292 /**
1293  *      reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1294  *      @q: the SGE control Tx queue
1295  *
1296  *      This is a variant of reclaim_completed_tx() that is used for Tx queues
1297  *      that send only immediate data (presently just the control queues) and
1298  *      thus do not have any sk_buffs to release.
1299  */
1300 static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1301 {
1302         unsigned int reclaim = q->processed - q->cleaned;
1303
1304         q->in_use -= reclaim;
1305         q->cleaned += reclaim;
1306 }
1307
1308 static inline int immediate(const struct sk_buff *skb)
1309 {
1310         return skb->len <= WR_LEN;
1311 }
1312
1313 /**
1314  *      ctrl_xmit - send a packet through an SGE control Tx queue
1315  *      @adap: the adapter
1316  *      @q: the control queue
1317  *      @skb: the packet
1318  *
1319  *      Send a packet through an SGE control Tx queue.  Packets sent through
1320  *      a control queue must fit entirely as immediate data in a single Tx
1321  *      descriptor and have no page fragments.
1322  */
1323 static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1324                      struct sk_buff *skb)
1325 {
1326         int ret;
1327         struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1328
1329         if (unlikely(!immediate(skb))) {
1330                 WARN_ON(1);
1331                 dev_kfree_skb(skb);
1332                 return NET_XMIT_SUCCESS;
1333         }
1334
1335         wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1336         wrp->wr_lo = htonl(V_WR_TID(q->token));
1337
1338         spin_lock(&q->lock);
1339       again:reclaim_completed_tx_imm(q);
1340
1341         ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1342         if (unlikely(ret)) {
1343                 if (ret == 1) {
1344                         spin_unlock(&q->lock);
1345                         return NET_XMIT_CN;
1346                 }
1347                 goto again;
1348         }
1349
1350         write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1351
1352         q->in_use++;
1353         if (++q->pidx >= q->size) {
1354                 q->pidx = 0;
1355                 q->gen ^= 1;
1356         }
1357         spin_unlock(&q->lock);
1358         wmb();
1359         t3_write_reg(adap, A_SG_KDOORBELL,
1360                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1361         return NET_XMIT_SUCCESS;
1362 }
1363
1364 /**
1365  *      restart_ctrlq - restart a suspended control queue
1366  *      @qs: the queue set cotaining the control queue
1367  *
1368  *      Resumes transmission on a suspended Tx control queue.
1369  */
1370 static void restart_ctrlq(unsigned long data)
1371 {
1372         struct sk_buff *skb;
1373         struct sge_qset *qs = (struct sge_qset *)data;
1374         struct sge_txq *q = &qs->txq[TXQ_CTRL];
1375
1376         spin_lock(&q->lock);
1377       again:reclaim_completed_tx_imm(q);
1378
1379         while (q->in_use < q->size &&
1380                (skb = __skb_dequeue(&q->sendq)) != NULL) {
1381
1382                 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1383
1384                 if (++q->pidx >= q->size) {
1385                         q->pidx = 0;
1386                         q->gen ^= 1;
1387                 }
1388                 q->in_use++;
1389         }
1390
1391         if (!skb_queue_empty(&q->sendq)) {
1392                 set_bit(TXQ_CTRL, &qs->txq_stopped);
1393                 smp_mb__after_clear_bit();
1394
1395                 if (should_restart_tx(q) &&
1396                     test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1397                         goto again;
1398                 q->stops++;
1399         }
1400
1401         spin_unlock(&q->lock);
1402         wmb();
1403         t3_write_reg(qs->adap, A_SG_KDOORBELL,
1404                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1405 }
1406
1407 /*
1408  * Send a management message through control queue 0
1409  */
1410 int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1411 {
1412         int ret;
1413         local_bh_disable();
1414         ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1415         local_bh_enable();
1416
1417         return ret;
1418 }
1419
1420 /**
1421  *      deferred_unmap_destructor - unmap a packet when it is freed
1422  *      @skb: the packet
1423  *
1424  *      This is the packet destructor used for Tx packets that need to remain
1425  *      mapped until they are freed rather than until their Tx descriptors are
1426  *      freed.
1427  */
1428 static void deferred_unmap_destructor(struct sk_buff *skb)
1429 {
1430         int i;
1431         const dma_addr_t *p;
1432         const struct skb_shared_info *si;
1433         const struct deferred_unmap_info *dui;
1434
1435         dui = (struct deferred_unmap_info *)skb->head;
1436         p = dui->addr;
1437
1438         if (skb->tail - skb->transport_header)
1439                 pci_unmap_single(dui->pdev, *p++,
1440                                  skb->tail - skb->transport_header,
1441                                  PCI_DMA_TODEVICE);
1442
1443         si = skb_shinfo(skb);
1444         for (i = 0; i < si->nr_frags; i++)
1445                 pci_unmap_page(dui->pdev, *p++, si->frags[i].size,
1446                                PCI_DMA_TODEVICE);
1447 }
1448
1449 static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1450                                      const struct sg_ent *sgl, int sgl_flits)
1451 {
1452         dma_addr_t *p;
1453         struct deferred_unmap_info *dui;
1454
1455         dui = (struct deferred_unmap_info *)skb->head;
1456         dui->pdev = pdev;
1457         for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1458                 *p++ = be64_to_cpu(sgl->addr[0]);
1459                 *p++ = be64_to_cpu(sgl->addr[1]);
1460         }
1461         if (sgl_flits)
1462                 *p = be64_to_cpu(sgl->addr[0]);
1463 }
1464
1465 /**
1466  *      write_ofld_wr - write an offload work request
1467  *      @adap: the adapter
1468  *      @skb: the packet to send
1469  *      @q: the Tx queue
1470  *      @pidx: index of the first Tx descriptor to write
1471  *      @gen: the generation value to use
1472  *      @ndesc: number of descriptors the packet will occupy
1473  *
1474  *      Write an offload work request to send the supplied packet.  The packet
1475  *      data already carry the work request with most fields populated.
1476  */
1477 static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1478                           struct sge_txq *q, unsigned int pidx,
1479                           unsigned int gen, unsigned int ndesc)
1480 {
1481         unsigned int sgl_flits, flits;
1482         struct work_request_hdr *from;
1483         struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1484         struct tx_desc *d = &q->desc[pidx];
1485
1486         if (immediate(skb)) {
1487                 q->sdesc[pidx].skb = NULL;
1488                 write_imm(d, skb, skb->len, gen);
1489                 return;
1490         }
1491
1492         /* Only TX_DATA builds SGLs */
1493
1494         from = (struct work_request_hdr *)skb->data;
1495         memcpy(&d->flit[1], &from[1],
1496                skb_transport_offset(skb) - sizeof(*from));
1497
1498         flits = skb_transport_offset(skb) / 8;
1499         sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1500         sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
1501                              skb->tail - skb->transport_header,
1502                              adap->pdev);
1503         if (need_skb_unmap()) {
1504                 setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1505                 skb->destructor = deferred_unmap_destructor;
1506         }
1507
1508         write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1509                          gen, from->wr_hi, from->wr_lo);
1510 }
1511
1512 /**
1513  *      calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1514  *      @skb: the packet
1515  *
1516  *      Returns the number of Tx descriptors needed for the given offload
1517  *      packet.  These packets are already fully constructed.
1518  */
1519 static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1520 {
1521         unsigned int flits, cnt;
1522
1523         if (skb->len <= WR_LEN)
1524                 return 1;       /* packet fits as immediate data */
1525
1526         flits = skb_transport_offset(skb) / 8;  /* headers */
1527         cnt = skb_shinfo(skb)->nr_frags;
1528         if (skb->tail != skb->transport_header)
1529                 cnt++;
1530         return flits_to_desc(flits + sgl_len(cnt));
1531 }
1532
1533 /**
1534  *      ofld_xmit - send a packet through an offload queue
1535  *      @adap: the adapter
1536  *      @q: the Tx offload queue
1537  *      @skb: the packet
1538  *
1539  *      Send an offload packet through an SGE offload queue.
1540  */
1541 static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1542                      struct sk_buff *skb)
1543 {
1544         int ret;
1545         unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1546
1547         spin_lock(&q->lock);
1548       again:reclaim_completed_tx(adap, q);
1549
1550         ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1551         if (unlikely(ret)) {
1552                 if (ret == 1) {
1553                         skb->priority = ndesc;  /* save for restart */
1554                         spin_unlock(&q->lock);
1555                         return NET_XMIT_CN;
1556                 }
1557                 goto again;
1558         }
1559
1560         gen = q->gen;
1561         q->in_use += ndesc;
1562         pidx = q->pidx;
1563         q->pidx += ndesc;
1564         if (q->pidx >= q->size) {
1565                 q->pidx -= q->size;
1566                 q->gen ^= 1;
1567         }
1568         spin_unlock(&q->lock);
1569
1570         write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1571         check_ring_tx_db(adap, q);
1572         return NET_XMIT_SUCCESS;
1573 }
1574
1575 /**
1576  *      restart_offloadq - restart a suspended offload queue
1577  *      @qs: the queue set cotaining the offload queue
1578  *
1579  *      Resumes transmission on a suspended Tx offload queue.
1580  */
1581 static void restart_offloadq(unsigned long data)
1582 {
1583         struct sk_buff *skb;
1584         struct sge_qset *qs = (struct sge_qset *)data;
1585         struct sge_txq *q = &qs->txq[TXQ_OFLD];
1586         const struct port_info *pi = netdev_priv(qs->netdev);
1587         struct adapter *adap = pi->adapter;
1588
1589         spin_lock(&q->lock);
1590       again:reclaim_completed_tx(adap, q);
1591
1592         while ((skb = skb_peek(&q->sendq)) != NULL) {
1593                 unsigned int gen, pidx;
1594                 unsigned int ndesc = skb->priority;
1595
1596                 if (unlikely(q->size - q->in_use < ndesc)) {
1597                         set_bit(TXQ_OFLD, &qs->txq_stopped);
1598                         smp_mb__after_clear_bit();
1599
1600                         if (should_restart_tx(q) &&
1601                             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1602                                 goto again;
1603                         q->stops++;
1604                         break;
1605                 }
1606
1607                 gen = q->gen;
1608                 q->in_use += ndesc;
1609                 pidx = q->pidx;
1610                 q->pidx += ndesc;
1611                 if (q->pidx >= q->size) {
1612                         q->pidx -= q->size;
1613                         q->gen ^= 1;
1614                 }
1615                 __skb_unlink(skb, &q->sendq);
1616                 spin_unlock(&q->lock);
1617
1618                 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1619                 spin_lock(&q->lock);
1620         }
1621         spin_unlock(&q->lock);
1622
1623 #if USE_GTS
1624         set_bit(TXQ_RUNNING, &q->flags);
1625         set_bit(TXQ_LAST_PKT_DB, &q->flags);
1626 #endif
1627         wmb();
1628         t3_write_reg(adap, A_SG_KDOORBELL,
1629                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1630 }
1631
1632 /**
1633  *      queue_set - return the queue set a packet should use
1634  *      @skb: the packet
1635  *
1636  *      Maps a packet to the SGE queue set it should use.  The desired queue
1637  *      set is carried in bits 1-3 in the packet's priority.
1638  */
1639 static inline int queue_set(const struct sk_buff *skb)
1640 {
1641         return skb->priority >> 1;
1642 }
1643
1644 /**
1645  *      is_ctrl_pkt - return whether an offload packet is a control packet
1646  *      @skb: the packet
1647  *
1648  *      Determines whether an offload packet should use an OFLD or a CTRL
1649  *      Tx queue.  This is indicated by bit 0 in the packet's priority.
1650  */
1651 static inline int is_ctrl_pkt(const struct sk_buff *skb)
1652 {
1653         return skb->priority & 1;
1654 }
1655
1656 /**
1657  *      t3_offload_tx - send an offload packet
1658  *      @tdev: the offload device to send to
1659  *      @skb: the packet
1660  *
1661  *      Sends an offload packet.  We use the packet priority to select the
1662  *      appropriate Tx queue as follows: bit 0 indicates whether the packet
1663  *      should be sent as regular or control, bits 1-3 select the queue set.
1664  */
1665 int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1666 {
1667         struct adapter *adap = tdev2adap(tdev);
1668         struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1669
1670         if (unlikely(is_ctrl_pkt(skb)))
1671                 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1672
1673         return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1674 }
1675
1676 /**
1677  *      offload_enqueue - add an offload packet to an SGE offload receive queue
1678  *      @q: the SGE response queue
1679  *      @skb: the packet
1680  *
1681  *      Add a new offload packet to an SGE response queue's offload packet
1682  *      queue.  If the packet is the first on the queue it schedules the RX
1683  *      softirq to process the queue.
1684  */
1685 static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1686 {
1687         skb->next = skb->prev = NULL;
1688         if (q->rx_tail)
1689                 q->rx_tail->next = skb;
1690         else {
1691                 struct sge_qset *qs = rspq_to_qset(q);
1692
1693                 napi_schedule(&qs->napi);
1694                 q->rx_head = skb;
1695         }
1696         q->rx_tail = skb;
1697 }
1698
1699 /**
1700  *      deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1701  *      @tdev: the offload device that will be receiving the packets
1702  *      @q: the SGE response queue that assembled the bundle
1703  *      @skbs: the partial bundle
1704  *      @n: the number of packets in the bundle
1705  *
1706  *      Delivers a (partial) bundle of Rx offload packets to an offload device.
1707  */
1708 static inline void deliver_partial_bundle(struct t3cdev *tdev,
1709                                           struct sge_rspq *q,
1710                                           struct sk_buff *skbs[], int n)
1711 {
1712         if (n) {
1713                 q->offload_bundles++;
1714                 tdev->recv(tdev, skbs, n);
1715         }
1716 }
1717
1718 /**
1719  *      ofld_poll - NAPI handler for offload packets in interrupt mode
1720  *      @dev: the network device doing the polling
1721  *      @budget: polling budget
1722  *
1723  *      The NAPI handler for offload packets when a response queue is serviced
1724  *      by the hard interrupt handler, i.e., when it's operating in non-polling
1725  *      mode.  Creates small packet batches and sends them through the offload
1726  *      receive handler.  Batches need to be of modest size as we do prefetches
1727  *      on the packets in each.
1728  */
1729 static int ofld_poll(struct napi_struct *napi, int budget)
1730 {
1731         struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
1732         struct sge_rspq *q = &qs->rspq;
1733         struct adapter *adapter = qs->adap;
1734         int work_done = 0;
1735
1736         while (work_done < budget) {
1737                 struct sk_buff *head, *tail, *skbs[RX_BUNDLE_SIZE];
1738                 int ngathered;
1739
1740                 spin_lock_irq(&q->lock);
1741                 head = q->rx_head;
1742                 if (!head) {
1743                         napi_complete(napi);
1744                         spin_unlock_irq(&q->lock);
1745                         return work_done;
1746                 }
1747
1748                 tail = q->rx_tail;
1749                 q->rx_head = q->rx_tail = NULL;
1750                 spin_unlock_irq(&q->lock);
1751
1752                 for (ngathered = 0; work_done < budget && head; work_done++) {
1753                         prefetch(head->data);
1754                         skbs[ngathered] = head;
1755                         head = head->next;
1756                         skbs[ngathered]->next = NULL;
1757                         if (++ngathered == RX_BUNDLE_SIZE) {
1758                                 q->offload_bundles++;
1759                                 adapter->tdev.recv(&adapter->tdev, skbs,
1760                                                    ngathered);
1761                                 ngathered = 0;
1762                         }
1763                 }
1764                 if (head) {     /* splice remaining packets back onto Rx queue */
1765                         spin_lock_irq(&q->lock);
1766                         tail->next = q->rx_head;
1767                         if (!q->rx_head)
1768                                 q->rx_tail = tail;
1769                         q->rx_head = head;
1770                         spin_unlock_irq(&q->lock);
1771                 }
1772                 deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1773         }
1774
1775         return work_done;
1776 }
1777
1778 /**
1779  *      rx_offload - process a received offload packet
1780  *      @tdev: the offload device receiving the packet
1781  *      @rq: the response queue that received the packet
1782  *      @skb: the packet
1783  *      @rx_gather: a gather list of packets if we are building a bundle
1784  *      @gather_idx: index of the next available slot in the bundle
1785  *
1786  *      Process an ingress offload pakcet and add it to the offload ingress
1787  *      queue.  Returns the index of the next available slot in the bundle.
1788  */
1789 static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1790                              struct sk_buff *skb, struct sk_buff *rx_gather[],
1791                              unsigned int gather_idx)
1792 {
1793         skb_reset_mac_header(skb);
1794         skb_reset_network_header(skb);
1795         skb_reset_transport_header(skb);
1796
1797         if (rq->polling) {
1798                 rx_gather[gather_idx++] = skb;
1799                 if (gather_idx == RX_BUNDLE_SIZE) {
1800                         tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1801                         gather_idx = 0;
1802                         rq->offload_bundles++;
1803                 }
1804         } else
1805                 offload_enqueue(rq, skb);
1806
1807         return gather_idx;
1808 }
1809
1810 /**
1811  *      restart_tx - check whether to restart suspended Tx queues
1812  *      @qs: the queue set to resume
1813  *
1814  *      Restarts suspended Tx queues of an SGE queue set if they have enough
1815  *      free resources to resume operation.
1816  */
1817 static void restart_tx(struct sge_qset *qs)
1818 {
1819         if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1820             should_restart_tx(&qs->txq[TXQ_ETH]) &&
1821             test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1822                 qs->txq[TXQ_ETH].restarts++;
1823                 if (netif_running(qs->netdev))
1824                         netif_wake_queue(qs->netdev);
1825         }
1826
1827         if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1828             should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1829             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1830                 qs->txq[TXQ_OFLD].restarts++;
1831                 tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1832         }
1833         if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1834             should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1835             test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1836                 qs->txq[TXQ_CTRL].restarts++;
1837                 tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1838         }
1839 }
1840
1841 /**
1842  *      rx_eth - process an ingress ethernet packet
1843  *      @adap: the adapter
1844  *      @rq: the response queue that received the packet
1845  *      @skb: the packet
1846  *      @pad: amount of padding at the start of the buffer
1847  *
1848  *      Process an ingress ethernet pakcet and deliver it to the stack.
1849  *      The padding is 2 if the packet was delivered in an Rx buffer and 0
1850  *      if it was immediate data in a response.
1851  */
1852 static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
1853                    struct sk_buff *skb, int pad)
1854 {
1855         struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
1856         struct port_info *pi;
1857
1858         skb_pull(skb, sizeof(*p) + pad);
1859         skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
1860         skb->dev->last_rx = jiffies;
1861         pi = netdev_priv(skb->dev);
1862         if (pi->rx_csum_offload && p->csum_valid && p->csum == htons(0xffff) &&
1863             !p->fragment) {
1864                 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
1865                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1866         } else
1867                 skb->ip_summed = CHECKSUM_NONE;
1868
1869         if (unlikely(p->vlan_valid)) {
1870                 struct vlan_group *grp = pi->vlan_grp;
1871
1872                 rspq_to_qset(rq)->port_stats[SGE_PSTAT_VLANEX]++;
1873                 if (likely(grp))
1874                         __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
1875                                           rq->polling);
1876                 else
1877                         dev_kfree_skb_any(skb);
1878         } else if (rq->polling)
1879                 netif_receive_skb(skb);
1880         else
1881                 netif_rx(skb);
1882 }
1883
1884 /**
1885  *      handle_rsp_cntrl_info - handles control information in a response
1886  *      @qs: the queue set corresponding to the response
1887  *      @flags: the response control flags
1888  *
1889  *      Handles the control information of an SGE response, such as GTS
1890  *      indications and completion credits for the queue set's Tx queues.
1891  *      HW coalesces credits, we don't do any extra SW coalescing.
1892  */
1893 static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
1894 {
1895         unsigned int credits;
1896
1897 #if USE_GTS
1898         if (flags & F_RSPD_TXQ0_GTS)
1899                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
1900 #endif
1901
1902         credits = G_RSPD_TXQ0_CR(flags);
1903         if (credits)
1904                 qs->txq[TXQ_ETH].processed += credits;
1905
1906         credits = G_RSPD_TXQ2_CR(flags);
1907         if (credits)
1908                 qs->txq[TXQ_CTRL].processed += credits;
1909
1910 # if USE_GTS
1911         if (flags & F_RSPD_TXQ1_GTS)
1912                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
1913 # endif
1914         credits = G_RSPD_TXQ1_CR(flags);
1915         if (credits)
1916                 qs->txq[TXQ_OFLD].processed += credits;
1917 }
1918
1919 /**
1920  *      check_ring_db - check if we need to ring any doorbells
1921  *      @adapter: the adapter
1922  *      @qs: the queue set whose Tx queues are to be examined
1923  *      @sleeping: indicates which Tx queue sent GTS
1924  *
1925  *      Checks if some of a queue set's Tx queues need to ring their doorbells
1926  *      to resume transmission after idling while they still have unprocessed
1927  *      descriptors.
1928  */
1929 static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
1930                           unsigned int sleeping)
1931 {
1932         if (sleeping & F_RSPD_TXQ0_GTS) {
1933                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1934
1935                 if (txq->cleaned + txq->in_use != txq->processed &&
1936                     !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
1937                         set_bit(TXQ_RUNNING, &txq->flags);
1938                         t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
1939                                      V_EGRCNTX(txq->cntxt_id));
1940                 }
1941         }
1942
1943         if (sleeping & F_RSPD_TXQ1_GTS) {
1944                 struct sge_txq *txq = &qs->txq[TXQ_OFLD];
1945
1946                 if (txq->cleaned + txq->in_use != txq->processed &&
1947                     !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
1948                         set_bit(TXQ_RUNNING, &txq->flags);
1949                         t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
1950                                      V_EGRCNTX(txq->cntxt_id));
1951                 }
1952         }
1953 }
1954
1955 /**
1956  *      is_new_response - check if a response is newly written
1957  *      @r: the response descriptor
1958  *      @q: the response queue
1959  *
1960  *      Returns true if a response descriptor contains a yet unprocessed
1961  *      response.
1962  */
1963 static inline int is_new_response(const struct rsp_desc *r,
1964                                   const struct sge_rspq *q)
1965 {
1966         return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1967 }
1968
1969 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1970 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1971                         V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1972                         V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1973                         V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1974
1975 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1976 #define NOMEM_INTR_DELAY 2500
1977
1978 /**
1979  *      process_responses - process responses from an SGE response queue
1980  *      @adap: the adapter
1981  *      @qs: the queue set to which the response queue belongs
1982  *      @budget: how many responses can be processed in this round
1983  *
1984  *      Process responses from an SGE response queue up to the supplied budget.
1985  *      Responses include received packets as well as credits and other events
1986  *      for the queues that belong to the response queue's queue set.
1987  *      A negative budget is effectively unlimited.
1988  *
1989  *      Additionally choose the interrupt holdoff time for the next interrupt
1990  *      on this queue.  If the system is under memory shortage use a fairly
1991  *      long delay to help recovery.
1992  */
1993 static int process_responses(struct adapter *adap, struct sge_qset *qs,
1994                              int budget)
1995 {
1996         struct sge_rspq *q = &qs->rspq;
1997         struct rsp_desc *r = &q->desc[q->cidx];
1998         int budget_left = budget;
1999         unsigned int sleeping = 0;
2000         struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
2001         int ngathered = 0;
2002
2003         q->next_holdoff = q->holdoff_tmr;
2004
2005         while (likely(budget_left && is_new_response(r, q))) {
2006                 int eth, ethpad = 2;
2007                 struct sk_buff *skb = NULL;
2008                 u32 len, flags = ntohl(r->flags);
2009                 __be32 rss_hi = *(const __be32 *)r, rss_lo = r->rss_hdr.rss_hash_val;
2010
2011                 eth = r->rss_hdr.opcode == CPL_RX_PKT;
2012
2013                 if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
2014                         skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
2015                         if (!skb)
2016                                 goto no_mem;
2017
2018                         memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
2019                         skb->data[0] = CPL_ASYNC_NOTIF;
2020                         rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
2021                         q->async_notif++;
2022                 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2023                         skb = get_imm_packet(r);
2024                         if (unlikely(!skb)) {
2025 no_mem:
2026                                 q->next_holdoff = NOMEM_INTR_DELAY;
2027                                 q->nomem++;
2028                                 /* consume one credit since we tried */
2029                                 budget_left--;
2030                                 break;
2031                         }
2032                         q->imm_data++;
2033                         ethpad = 0;
2034                 } else if ((len = ntohl(r->len_cq)) != 0) {
2035                         struct sge_fl *fl;
2036
2037                         fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2038                         if (fl->use_pages) {
2039                                 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
2040
2041                                 prefetch(addr);
2042 #if L1_CACHE_BYTES < 128
2043                                 prefetch(addr + L1_CACHE_BYTES);
2044 #endif
2045                                 __refill_fl(adap, fl);
2046
2047                                 skb = get_packet_pg(adap, fl, G_RSPD_LEN(len),
2048                                                  eth ? SGE_RX_DROP_THRES : 0);
2049                         } else
2050                                 skb = get_packet(adap, fl, G_RSPD_LEN(len),
2051                                                  eth ? SGE_RX_DROP_THRES : 0);
2052                         if (unlikely(!skb)) {
2053                                 if (!eth)
2054                                         goto no_mem;
2055                                 q->rx_drops++;
2056                         } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2057                                 __skb_pull(skb, 2);
2058
2059                         if (++fl->cidx == fl->size)
2060                                 fl->cidx = 0;
2061                 } else
2062                         q->pure_rsps++;
2063
2064                 if (flags & RSPD_CTRL_MASK) {
2065                         sleeping |= flags & RSPD_GTS_MASK;
2066                         handle_rsp_cntrl_info(qs, flags);
2067                 }
2068
2069                 r++;
2070                 if (unlikely(++q->cidx == q->size)) {
2071                         q->cidx = 0;
2072                         q->gen ^= 1;
2073                         r = q->desc;
2074                 }
2075                 prefetch(r);
2076
2077                 if (++q->credits >= (q->size / 4)) {
2078                         refill_rspq(adap, q, q->credits);
2079                         q->credits = 0;
2080                 }
2081
2082                 if (likely(skb != NULL)) {
2083                         if (eth)
2084                                 rx_eth(adap, q, skb, ethpad);
2085                         else {
2086                                 q->offload_pkts++;
2087                                 /* Preserve the RSS info in csum & priority */
2088                                 skb->csum = rss_hi;
2089                                 skb->priority = rss_lo;
2090                                 ngathered = rx_offload(&adap->tdev, q, skb,
2091                                                        offload_skbs,
2092                                                        ngathered);
2093                         }
2094                 }
2095                 --budget_left;
2096         }
2097
2098         deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
2099         if (sleeping)
2100                 check_ring_db(adap, qs, sleeping);
2101
2102         smp_mb();               /* commit Tx queue .processed updates */
2103         if (unlikely(qs->txq_stopped != 0))
2104                 restart_tx(qs);
2105
2106         budget -= budget_left;
2107         return budget;
2108 }
2109
2110 static inline int is_pure_response(const struct rsp_desc *r)
2111 {
2112         u32 n = ntohl(r->flags) & (F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
2113
2114         return (n | r->len_cq) == 0;
2115 }
2116
2117 /**
2118  *      napi_rx_handler - the NAPI handler for Rx processing
2119  *      @napi: the napi instance
2120  *      @budget: how many packets we can process in this round
2121  *
2122  *      Handler for new data events when using NAPI.
2123  */
2124 static int napi_rx_handler(struct napi_struct *napi, int budget)
2125 {
2126         struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
2127         struct adapter *adap = qs->adap;
2128         int work_done = process_responses(adap, qs, budget);
2129
2130         if (likely(work_done < budget)) {
2131                 napi_complete(napi);
2132
2133                 /*
2134                  * Because we don't atomically flush the following
2135                  * write it is possible that in very rare cases it can
2136                  * reach the device in a way that races with a new
2137                  * response being written plus an error interrupt
2138                  * causing the NAPI interrupt handler below to return
2139                  * unhandled status to the OS.  To protect against
2140                  * this would require flushing the write and doing
2141                  * both the write and the flush with interrupts off.
2142                  * Way too expensive and unjustifiable given the
2143                  * rarity of the race.
2144                  *
2145                  * The race cannot happen at all with MSI-X.
2146                  */
2147                 t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2148                              V_NEWTIMER(qs->rspq.next_holdoff) |
2149                              V_NEWINDEX(qs->rspq.cidx));
2150         }
2151         return work_done;
2152 }
2153
2154 /*
2155  * Returns true if the device is already scheduled for polling.
2156  */
2157 static inline int napi_is_scheduled(struct napi_struct *napi)
2158 {
2159         return test_bit(NAPI_STATE_SCHED, &napi->state);
2160 }
2161
2162 /**
2163  *      process_pure_responses - process pure responses from a response queue
2164  *      @adap: the adapter
2165  *      @qs: the queue set owning the response queue
2166  *      @r: the first pure response to process
2167  *
2168  *      A simpler version of process_responses() that handles only pure (i.e.,
2169  *      non data-carrying) responses.  Such respones are too light-weight to
2170  *      justify calling a softirq under NAPI, so we handle them specially in
2171  *      the interrupt handler.  The function is called with a pointer to a
2172  *      response, which the caller must ensure is a valid pure response.
2173  *
2174  *      Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2175  */
2176 static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2177                                   struct rsp_desc *r)
2178 {
2179         struct sge_rspq *q = &qs->rspq;
2180         unsigned int sleeping = 0;
2181
2182         do {
2183                 u32 flags = ntohl(r->flags);
2184
2185                 r++;
2186                 if (unlikely(++q->cidx == q->size)) {
2187                         q->cidx = 0;
2188                         q->gen ^= 1;
2189                         r = q->desc;
2190                 }
2191                 prefetch(r);
2192
2193                 if (flags & RSPD_CTRL_MASK) {
2194                         sleeping |= flags & RSPD_GTS_MASK;
2195                         handle_rsp_cntrl_info(qs, flags);
2196                 }
2197
2198                 q->pure_rsps++;
2199                 if (++q->credits >= (q->size / 4)) {
2200                         refill_rspq(adap, q, q->credits);
2201                         q->credits = 0;
2202                 }
2203         } while (is_new_response(r, q) && is_pure_response(r));
2204
2205         if (sleeping)
2206                 check_ring_db(adap, qs, sleeping);
2207
2208         smp_mb();               /* commit Tx queue .processed updates */
2209         if (unlikely(qs->txq_stopped != 0))
2210                 restart_tx(qs);
2211
2212         return is_new_response(r, q);
2213 }
2214
2215 /**
2216  *      handle_responses - decide what to do with new responses in NAPI mode
2217  *      @adap: the adapter
2218  *      @q: the response queue
2219  *
2220  *      This is used by the NAPI interrupt handlers to decide what to do with
2221  *      new SGE responses.  If there are no new responses it returns -1.  If
2222  *      there are new responses and they are pure (i.e., non-data carrying)
2223  *      it handles them straight in hard interrupt context as they are very
2224  *      cheap and don't deliver any packets.  Finally, if there are any data
2225  *      signaling responses it schedules the NAPI handler.  Returns 1 if it
2226  *      schedules NAPI, 0 if all new responses were pure.
2227  *
2228  *      The caller must ascertain NAPI is not already running.
2229  */
2230 static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2231 {
2232         struct sge_qset *qs = rspq_to_qset(q);
2233         struct rsp_desc *r = &q->desc[q->cidx];
2234
2235         if (!is_new_response(r, q))
2236                 return -1;
2237         if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2238                 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2239                              V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2240                 return 0;
2241         }
2242         napi_schedule(&qs->napi);
2243         return 1;
2244 }
2245
2246 /*
2247  * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2248  * (i.e., response queue serviced in hard interrupt).
2249  */
2250 irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2251 {
2252         struct sge_qset *qs = cookie;
2253         struct adapter *adap = qs->adap;
2254         struct sge_rspq *q = &qs->rspq;
2255
2256         spin_lock(&q->lock);
2257         if (process_responses(adap, qs, -1) == 0)
2258                 q->unhandled_irqs++;
2259         t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2260                      V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2261         spin_unlock(&q->lock);
2262         return IRQ_HANDLED;
2263 }
2264
2265 /*
2266  * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2267  * (i.e., response queue serviced by NAPI polling).
2268  */
2269 static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
2270 {
2271         struct sge_qset *qs = cookie;
2272         struct sge_rspq *q = &qs->rspq;
2273
2274         spin_lock(&q->lock);
2275
2276         if (handle_responses(qs->adap, q) < 0)
2277                 q->unhandled_irqs++;
2278         spin_unlock(&q->lock);
2279         return IRQ_HANDLED;
2280 }
2281
2282 /*
2283  * The non-NAPI MSI interrupt handler.  This needs to handle data events from
2284  * SGE response queues as well as error and other async events as they all use
2285  * the same MSI vector.  We use one SGE response queue per port in this mode
2286  * and protect all response queues with queue 0's lock.
2287  */
2288 static irqreturn_t t3_intr_msi(int irq, void *cookie)
2289 {
2290         int new_packets = 0;
2291         struct adapter *adap = cookie;
2292         struct sge_rspq *q = &adap->sge.qs[0].rspq;
2293
2294         spin_lock(&q->lock);
2295
2296         if (process_responses(adap, &adap->sge.qs[0], -1)) {
2297                 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2298                              V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2299                 new_packets = 1;
2300         }
2301
2302         if (adap->params.nports == 2 &&
2303             process_responses(adap, &adap->sge.qs[1], -1)) {
2304                 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2305
2306                 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2307                              V_NEWTIMER(q1->next_holdoff) |
2308                              V_NEWINDEX(q1->cidx));
2309                 new_packets = 1;
2310         }
2311
2312         if (!new_packets && t3_slow_intr_handler(adap) == 0)
2313                 q->unhandled_irqs++;
2314
2315         spin_unlock(&q->lock);
2316         return IRQ_HANDLED;
2317 }
2318
2319 static int rspq_check_napi(struct sge_qset *qs)
2320 {
2321         struct sge_rspq *q = &qs->rspq;
2322
2323         if (!napi_is_scheduled(&qs->napi) &&
2324             is_new_response(&q->desc[q->cidx], q)) {
2325                 napi_schedule(&qs->napi);
2326                 return 1;
2327         }
2328         return 0;
2329 }
2330
2331 /*
2332  * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2333  * by NAPI polling).  Handles data events from SGE response queues as well as
2334  * error and other async events as they all use the same MSI vector.  We use
2335  * one SGE response queue per port in this mode and protect all response
2336  * queues with queue 0's lock.
2337  */
2338 static irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
2339 {
2340         int new_packets;
2341         struct adapter *adap = cookie;
2342         struct sge_rspq *q = &adap->sge.qs[0].rspq;
2343
2344         spin_lock(&q->lock);
2345
2346         new_packets = rspq_check_napi(&adap->sge.qs[0]);
2347         if (adap->params.nports == 2)
2348                 new_packets += rspq_check_napi(&adap->sge.qs[1]);
2349         if (!new_packets && t3_slow_intr_handler(adap) == 0)
2350                 q->unhandled_irqs++;
2351
2352         spin_unlock(&q->lock);
2353         return IRQ_HANDLED;
2354 }
2355
2356 /*
2357  * A helper function that processes responses and issues GTS.
2358  */
2359 static inline int process_responses_gts(struct adapter *adap,
2360                                         struct sge_rspq *rq)
2361 {
2362         int work;
2363
2364         work = process_responses(adap, rspq_to_qset(rq), -1);
2365         t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2366                      V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2367         return work;
2368 }
2369
2370 /*
2371  * The legacy INTx interrupt handler.  This needs to handle data events from
2372  * SGE response queues as well as error and other async events as they all use
2373  * the same interrupt pin.  We use one SGE response queue per port in this mode
2374  * and protect all response queues with queue 0's lock.
2375  */
2376 static irqreturn_t t3_intr(int irq, void *cookie)
2377 {
2378         int work_done, w0, w1;
2379         struct adapter *adap = cookie;
2380         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2381         struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2382
2383         spin_lock(&q0->lock);
2384
2385         w0 = is_new_response(&q0->desc[q0->cidx], q0);
2386         w1 = adap->params.nports == 2 &&
2387             is_new_response(&q1->desc[q1->cidx], q1);
2388
2389         if (likely(w0 | w1)) {
2390                 t3_write_reg(adap, A_PL_CLI, 0);
2391                 t3_read_reg(adap, A_PL_CLI);    /* flush */
2392
2393                 if (likely(w0))
2394                         process_responses_gts(adap, q0);
2395
2396                 if (w1)
2397                         process_responses_gts(adap, q1);
2398
2399                 work_done = w0 | w1;
2400         } else
2401                 work_done = t3_slow_intr_handler(adap);
2402
2403         spin_unlock(&q0->lock);
2404         return IRQ_RETVAL(work_done != 0);
2405 }
2406
2407 /*
2408  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2409  * Handles data events from SGE response queues as well as error and other
2410  * async events as they all use the same interrupt pin.  We use one SGE
2411  * response queue per port in this mode and protect all response queues with
2412  * queue 0's lock.
2413  */
2414 static irqreturn_t t3b_intr(int irq, void *cookie)
2415 {
2416         u32 map;
2417         struct adapter *adap = cookie;
2418         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2419
2420         t3_write_reg(adap, A_PL_CLI, 0);
2421         map = t3_read_reg(adap, A_SG_DATA_INTR);
2422
2423         if (unlikely(!map))     /* shared interrupt, most likely */
2424                 return IRQ_NONE;
2425
2426         spin_lock(&q0->lock);
2427
2428         if (unlikely(map & F_ERRINTR))
2429                 t3_slow_intr_handler(adap);
2430
2431         if (likely(map & 1))
2432                 process_responses_gts(adap, q0);
2433
2434         if (map & 2)
2435                 process_responses_gts(adap, &adap->sge.qs[1].rspq);
2436
2437         spin_unlock(&q0->lock);
2438         return IRQ_HANDLED;
2439 }
2440
2441 /*
2442  * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2443  * Handles data events from SGE response queues as well as error and other
2444  * async events as they all use the same interrupt pin.  We use one SGE
2445  * response queue per port in this mode and protect all response queues with
2446  * queue 0's lock.
2447  */
2448 static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2449 {
2450         u32 map;
2451         struct adapter *adap = cookie;
2452         struct sge_qset *qs0 = &adap->sge.qs[0];
2453         struct sge_rspq *q0 = &qs0->rspq;
2454
2455         t3_write_reg(adap, A_PL_CLI, 0);
2456         map = t3_read_reg(adap, A_SG_DATA_INTR);
2457
2458         if (unlikely(!map))     /* shared interrupt, most likely */
2459                 return IRQ_NONE;
2460
2461         spin_lock(&q0->lock);
2462
2463         if (unlikely(map & F_ERRINTR))
2464                 t3_slow_intr_handler(adap);
2465
2466         if (likely(map & 1))
2467                 napi_schedule(&qs0->napi);
2468
2469         if (map & 2)
2470                 napi_schedule(&adap->sge.qs[1].napi);
2471
2472         spin_unlock(&q0->lock);
2473         return IRQ_HANDLED;
2474 }
2475
2476 /**
2477  *      t3_intr_handler - select the top-level interrupt handler
2478  *      @adap: the adapter
2479  *      @polling: whether using NAPI to service response queues
2480  *
2481  *      Selects the top-level interrupt handler based on the type of interrupts
2482  *      (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2483  *      response queues.
2484  */
2485 irq_handler_t t3_intr_handler(struct adapter *adap, int polling)
2486 {
2487         if (adap->flags & USING_MSIX)
2488                 return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2489         if (adap->flags & USING_MSI)
2490                 return polling ? t3_intr_msi_napi : t3_intr_msi;
2491         if (adap->params.rev > 0)
2492                 return polling ? t3b_intr_napi : t3b_intr;
2493         return t3_intr;
2494 }
2495
2496 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
2497                     F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
2498                     V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
2499                     F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
2500                     F_HIRCQPARITYERROR)
2501 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
2502 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
2503                       F_RSPQDISABLED)
2504
2505 /**
2506  *      t3_sge_err_intr_handler - SGE async event interrupt handler
2507  *      @adapter: the adapter
2508  *
2509  *      Interrupt handler for SGE asynchronous (non-data) events.
2510  */
2511 void t3_sge_err_intr_handler(struct adapter *adapter)
2512 {
2513         unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE);
2514
2515         if (status & SGE_PARERR)
2516                 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
2517                          status & SGE_PARERR);
2518         if (status & SGE_FRAMINGERR)
2519                 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
2520                          status & SGE_FRAMINGERR);
2521
2522         if (status & F_RSPQCREDITOVERFOW)
2523                 CH_ALERT(adapter, "SGE response queue credit overflow\n");
2524
2525         if (status & F_RSPQDISABLED) {
2526                 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2527
2528                 CH_ALERT(adapter,
2529                          "packet delivered to disabled response queue "
2530                          "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2531         }
2532
2533         if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
2534                 CH_ALERT(adapter, "SGE dropped %s priority doorbell\n",
2535                          status & F_HIPIODRBDROPERR ? "high" : "lo");
2536
2537         t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2538         if (status &  SGE_FATALERR)
2539                 t3_fatal_err(adapter);
2540 }
2541
2542 /**
2543  *      sge_timer_cb - perform periodic maintenance of an SGE qset
2544  *      @data: the SGE queue set to maintain
2545  *
2546  *      Runs periodically from a timer to perform maintenance of an SGE queue
2547  *      set.  It performs two tasks:
2548  *
2549  *      a) Cleans up any completed Tx descriptors that may still be pending.
2550  *      Normal descriptor cleanup happens when new packets are added to a Tx
2551  *      queue so this timer is relatively infrequent and does any cleanup only
2552  *      if the Tx queue has not seen any new packets in a while.  We make a
2553  *      best effort attempt to reclaim descriptors, in that we don't wait
2554  *      around if we cannot get a queue's lock (which most likely is because
2555  *      someone else is queueing new packets and so will also handle the clean
2556  *      up).  Since control queues use immediate data exclusively we don't
2557  *      bother cleaning them up here.
2558  *
2559  *      b) Replenishes Rx queues that have run out due to memory shortage.
2560  *      Normally new Rx buffers are added when existing ones are consumed but
2561  *      when out of memory a queue can become empty.  We try to add only a few
2562  *      buffers here, the queue will be replenished fully as these new buffers
2563  *      are used up if memory shortage has subsided.
2564  */
2565 static void sge_timer_cb(unsigned long data)
2566 {
2567         spinlock_t *lock;
2568         struct sge_qset *qs = (struct sge_qset *)data;
2569         struct adapter *adap = qs->adap;
2570
2571         if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
2572                 reclaim_completed_tx(adap, &qs->txq[TXQ_ETH]);
2573                 spin_unlock(&qs->txq[TXQ_ETH].lock);
2574         }
2575         if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2576                 reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD]);
2577                 spin_unlock(&qs->txq[TXQ_OFLD].lock);
2578         }
2579         lock = (adap->flags & USING_MSIX) ? &qs->rspq.lock :
2580                                             &adap->sge.qs[0].rspq.lock;
2581         if (spin_trylock_irq(lock)) {
2582                 if (!napi_is_scheduled(&qs->napi)) {
2583                         u32 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2584
2585                         if (qs->fl[0].credits < qs->fl[0].size)
2586                                 __refill_fl(adap, &qs->fl[0]);
2587                         if (qs->fl[1].credits < qs->fl[1].size)
2588                                 __refill_fl(adap, &qs->fl[1]);
2589
2590                         if (status & (1 << qs->rspq.cntxt_id)) {
2591                                 qs->rspq.starved++;
2592                                 if (qs->rspq.credits) {
2593                                         refill_rspq(adap, &qs->rspq, 1);
2594                                         qs->rspq.credits--;
2595                                         qs->rspq.restarted++;
2596                                         t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
2597                                                      1 << qs->rspq.cntxt_id);
2598                                 }
2599                         }
2600                 }
2601                 spin_unlock_irq(lock);
2602         }
2603         mod_timer(&qs->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2604 }
2605
2606 /**
2607  *      t3_update_qset_coalesce - update coalescing settings for a queue set
2608  *      @qs: the SGE queue set
2609  *      @p: new queue set parameters
2610  *
2611  *      Update the coalescing settings for an SGE queue set.  Nothing is done
2612  *      if the queue set is not initialized yet.
2613  */
2614 void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2615 {
2616         qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2617         qs->rspq.polling = p->polling;
2618         qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
2619 }
2620
2621 /**
2622  *      t3_sge_alloc_qset - initialize an SGE queue set
2623  *      @adapter: the adapter
2624  *      @id: the queue set id
2625  *      @nports: how many Ethernet ports will be using this queue set
2626  *      @irq_vec_idx: the IRQ vector index for response queue interrupts
2627  *      @p: configuration parameters for this queue set
2628  *      @ntxq: number of Tx queues for the queue set
2629  *      @netdev: net device associated with this queue set
2630  *
2631  *      Allocate resources and initialize an SGE queue set.  A queue set
2632  *      comprises a response queue, two Rx free-buffer queues, and up to 3
2633  *      Tx queues.  The Tx queues are assigned roles in the order Ethernet
2634  *      queue, offload queue, and control queue.
2635  */
2636 int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2637                       int irq_vec_idx, const struct qset_params *p,
2638                       int ntxq, struct net_device *dev)
2639 {
2640         int i, avail, ret = -ENOMEM;
2641         struct sge_qset *q = &adapter->sge.qs[id];
2642
2643         init_qset_cntxt(q, id);
2644         init_timer(&q->tx_reclaim_timer);
2645         q->tx_reclaim_timer.data = (unsigned long)q;
2646         q->tx_reclaim_timer.function = sge_timer_cb;
2647
2648         q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
2649                                    sizeof(struct rx_desc),
2650                                    sizeof(struct rx_sw_desc),
2651                                    &q->fl[0].phys_addr, &q->fl[0].sdesc);
2652         if (!q->fl[0].desc)
2653                 goto err;
2654
2655         q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
2656                                    sizeof(struct rx_desc),
2657                                    sizeof(struct rx_sw_desc),
2658                                    &q->fl[1].phys_addr, &q->fl[1].sdesc);
2659         if (!q->fl[1].desc)
2660                 goto err;
2661
2662         q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
2663                                   sizeof(struct rsp_desc), 0,
2664                                   &q->rspq.phys_addr, NULL);
2665         if (!q->rspq.desc)
2666                 goto err;
2667
2668         for (i = 0; i < ntxq; ++i) {
2669                 /*
2670                  * The control queue always uses immediate data so does not
2671                  * need to keep track of any sk_buffs.
2672                  */
2673                 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2674
2675                 q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
2676                                             sizeof(struct tx_desc), sz,
2677                                             &q->txq[i].phys_addr,
2678                                             &q->txq[i].sdesc);
2679                 if (!q->txq[i].desc)
2680                         goto err;
2681
2682                 q->txq[i].gen = 1;
2683                 q->txq[i].size = p->txq_size[i];
2684                 spin_lock_init(&q->txq[i].lock);
2685                 skb_queue_head_init(&q->txq[i].sendq);
2686         }
2687
2688         tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
2689                      (unsigned long)q);
2690         tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
2691                      (unsigned long)q);
2692
2693         q->fl[0].gen = q->fl[1].gen = 1;
2694         q->fl[0].size = p->fl_size;
2695         q->fl[1].size = p->jumbo_size;
2696
2697         q->rspq.gen = 1;
2698         q->rspq.size = p->rspq_size;
2699         spin_lock_init(&q->rspq.lock);
2700
2701         q->txq[TXQ_ETH].stop_thres = nports *
2702             flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
2703
2704 #if FL0_PG_CHUNK_SIZE > 0
2705         q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
2706 #else
2707         q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
2708 #endif
2709         q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
2710         q->fl[1].buf_size = is_offload(adapter) ?
2711                 (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
2712                 MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
2713
2714         spin_lock_irq(&adapter->sge.reg_lock);
2715
2716         /* FL threshold comparison uses < */
2717         ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
2718                                    q->rspq.phys_addr, q->rspq.size,
2719                                    q->fl[0].buf_size, 1, 0);
2720         if (ret)
2721                 goto err_unlock;
2722
2723         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2724                 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
2725                                           q->fl[i].phys_addr, q->fl[i].size,
2726                                           q->fl[i].buf_size, p->cong_thres, 1,
2727                                           0);
2728                 if (ret)
2729                         goto err_unlock;
2730         }
2731
2732         ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2733                                  SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2734                                  q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2735                                  1, 0);
2736         if (ret)
2737                 goto err_unlock;
2738
2739         if (ntxq > 1) {
2740                 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
2741                                          USE_GTS, SGE_CNTXT_OFLD, id,
2742                                          q->txq[TXQ_OFLD].phys_addr,
2743                                          q->txq[TXQ_OFLD].size, 0, 1, 0);
2744                 if (ret)
2745                         goto err_unlock;
2746         }
2747
2748         if (ntxq > 2) {
2749                 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
2750                                          SGE_CNTXT_CTRL, id,
2751                                          q->txq[TXQ_CTRL].phys_addr,
2752                                          q->txq[TXQ_CTRL].size,
2753                                          q->txq[TXQ_CTRL].token, 1, 0);
2754                 if (ret)
2755                         goto err_unlock;
2756         }
2757
2758         spin_unlock_irq(&adapter->sge.reg_lock);
2759
2760         q->adap = adapter;
2761         q->netdev = dev;
2762         t3_update_qset_coalesce(q, p);
2763         avail = refill_fl(adapter, &q->fl[0], q->fl[0].size, GFP_KERNEL);
2764         if (!avail) {
2765                 CH_ALERT(adapter, "free list queue 0 initialization failed\n");
2766                 goto err;
2767         }
2768         if (avail < q->fl[0].size)
2769                 CH_WARN(adapter, "free list queue 0 enabled with %d credits\n",
2770                         avail);
2771
2772         avail = refill_fl(adapter, &q->fl[1], q->fl[1].size, GFP_KERNEL);
2773         if (avail < q->fl[1].size)
2774                 CH_WARN(adapter, "free list queue 1 enabled with %d credits\n",
2775                         avail);
2776         refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
2777
2778         t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2779                      V_NEWTIMER(q->rspq.holdoff_tmr));
2780
2781         mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2782         return 0;
2783
2784 err_unlock:
2785         spin_unlock_irq(&adapter->sge.reg_lock);
2786 err:
2787         t3_free_qset(adapter, q);
2788         return ret;
2789 }
2790
2791 /**
2792  *      t3_free_sge_resources - free SGE resources
2793  *      @adap: the adapter
2794  *
2795  *      Frees resources used by the SGE queue sets.
2796  */
2797 void t3_free_sge_resources(struct adapter *adap)
2798 {
2799         int i;
2800
2801         for (i = 0; i < SGE_QSETS; ++i)
2802                 t3_free_qset(adap, &adap->sge.qs[i]);
2803 }
2804
2805 /**
2806  *      t3_sge_start - enable SGE
2807  *      @adap: the adapter
2808  *
2809  *      Enables the SGE for DMAs.  This is the last step in starting packet
2810  *      transfers.
2811  */
2812 void t3_sge_start(struct adapter *adap)
2813 {
2814         t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2815 }
2816
2817 /**
2818  *      t3_sge_stop - disable SGE operation
2819  *      @adap: the adapter
2820  *
2821  *      Disables the DMA engine.  This can be called in emeregencies (e.g.,
2822  *      from error interrupts) or from normal process context.  In the latter
2823  *      case it also disables any pending queue restart tasklets.  Note that
2824  *      if it is called in interrupt context it cannot disable the restart
2825  *      tasklets as it cannot wait, however the tasklets will have no effect
2826  *      since the doorbells are disabled and the driver will call this again
2827  *      later from process context, at which time the tasklets will be stopped
2828  *      if they are still running.
2829  */
2830 void t3_sge_stop(struct adapter *adap)
2831 {
2832         t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
2833         if (!in_interrupt()) {
2834                 int i;
2835
2836                 for (i = 0; i < SGE_QSETS; ++i) {
2837                         struct sge_qset *qs = &adap->sge.qs[i];
2838
2839                         tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
2840                         tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
2841                 }
2842         }
2843 }
2844
2845 /**
2846  *      t3_sge_init - initialize SGE
2847  *      @adap: the adapter
2848  *      @p: the SGE parameters
2849  *
2850  *      Performs SGE initialization needed every time after a chip reset.
2851  *      We do not initialize any of the queue sets here, instead the driver
2852  *      top-level must request those individually.  We also do not enable DMA
2853  *      here, that should be done after the queues have been set up.
2854  */
2855 void t3_sge_init(struct adapter *adap, struct sge_params *p)
2856 {
2857         unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
2858
2859         ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
2860             F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
2861             V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
2862             V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
2863 #if SGE_NUM_GENBITS == 1
2864         ctrl |= F_EGRGENCTRL;
2865 #endif
2866         if (adap->params.rev > 0) {
2867                 if (!(adap->flags & (USING_MSIX | USING_MSI)))
2868                         ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
2869         }
2870         t3_write_reg(adap, A_SG_CONTROL, ctrl);
2871         t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
2872                      V_LORCQDRBTHRSH(512));
2873         t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
2874         t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
2875                      V_TIMEOUT(200 * core_ticks_per_usec(adap)));
2876         t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
2877                      adap->params.rev < T3_REV_C ? 1000 : 500);
2878         t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
2879         t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
2880         t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
2881         t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
2882         t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
2883 }
2884
2885 /**
2886  *      t3_sge_prep - one-time SGE initialization
2887  *      @adap: the associated adapter
2888  *      @p: SGE parameters
2889  *
2890  *      Performs one-time initialization of SGE SW state.  Includes determining
2891  *      defaults for the assorted SGE parameters, which admins can change until
2892  *      they are used to initialize the SGE.
2893  */
2894 void t3_sge_prep(struct adapter *adap, struct sge_params *p)
2895 {
2896         int i;
2897
2898         p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
2899             SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
2900
2901         for (i = 0; i < SGE_QSETS; ++i) {
2902                 struct qset_params *q = p->qset + i;
2903
2904                 q->polling = adap->params.rev > 0;
2905                 q->coalesce_usecs = 5;
2906                 q->rspq_size = 1024;
2907                 q->fl_size = 1024;
2908                 q->jumbo_size = 512;
2909                 q->txq_size[TXQ_ETH] = 1024;
2910                 q->txq_size[TXQ_OFLD] = 1024;
2911                 q->txq_size[TXQ_CTRL] = 256;
2912                 q->cong_thres = 0;
2913         }
2914
2915         spin_lock_init(&adap->sge.reg_lock);
2916 }
2917
2918 /**
2919  *      t3_get_desc - dump an SGE descriptor for debugging purposes
2920  *      @qs: the queue set
2921  *      @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
2922  *      @idx: the descriptor index in the queue
2923  *      @data: where to dump the descriptor contents
2924  *
2925  *      Dumps the contents of a HW descriptor of an SGE queue.  Returns the
2926  *      size of the descriptor.
2927  */
2928 int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
2929                 unsigned char *data)
2930 {
2931         if (qnum >= 6)
2932                 return -EINVAL;
2933
2934         if (qnum < 3) {
2935                 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
2936                         return -EINVAL;
2937                 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
2938                 return sizeof(struct tx_desc);
2939         }
2940
2941         if (qnum == 3) {
2942                 if (!qs->rspq.desc || idx >= qs->rspq.size)
2943                         return -EINVAL;
2944                 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
2945                 return sizeof(struct rsp_desc);
2946         }
2947
2948         qnum -= 4;
2949         if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
2950                 return -EINVAL;
2951         memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
2952         return sizeof(struct rx_desc);
2953 }