SAFE public projects git trees. - safe/jmp/linux-2.6/blob - drivers/net/cxgb3/sge.c

   1 /*
   2  * Copyright (c) 2005-2008 Chelsio, Inc. All rights reserved.
   3  *
   4  * This software is available to you under a choice of one of two
   5  * licenses.  You may choose to be licensed under the terms of the GNU
   6  * General Public License (GPL) Version 2, available from the file
   7  * COPYING in the main directory of this source tree, or the
   8  * OpenIB.org BSD license below:
   9  *
  10  *     Redistribution and use in source and binary forms, with or
  11  *     without modification, are permitted provided that the following
  12  *     conditions are met:
  13  *
  14  *      - Redistributions of source code must retain the above
  15  *        copyright notice, this list of conditions and the following
  16  *        disclaimer.
  17  *
  18  *      - Redistributions in binary form must reproduce the above
  19  *        copyright notice, this list of conditions and the following
  20  *        disclaimer in the documentation and/or other materials
  21  *        provided with the distribution.
  22  *
  23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30  * SOFTWARE.
  31  */
  32 #include <linux/skbuff.h>
  33 #include <linux/netdevice.h>
  34 #include <linux/etherdevice.h>
  35 #include <linux/if_vlan.h>
  36 #include <linux/ip.h>
  37 #include <linux/tcp.h>
  38 #include <linux/dma-mapping.h>
  39 #include <net/arp.h>
  40 #include "common.h"
  41 #include "regs.h"
  42 #include "sge_defs.h"
  43 #include "t3_cpl.h"
  44 #include "firmware_exports.h"
  45
  46 #define USE_GTS 0
  47
  48 #define SGE_RX_SM_BUF_SIZE 1536
  49
  50 #define SGE_RX_COPY_THRES  256
  51 #define SGE_RX_PULL_LEN    128
  52
  53 /*
  54  * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
  55  * It must be a divisor of PAGE_SIZE.  If set to 0 FL0 will use sk_buffs
  56  * directly.
  57  */
  58 #define FL0_PG_CHUNK_SIZE  2048
  59 #define FL0_PG_ORDER 0
  60 #define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192)
  61 #define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
  62
  63 #define SGE_RX_DROP_THRES 16
  64
  65 /*
  66  * Max number of Rx buffers we replenish at a time.
  67  */
  68 #define MAX_RX_REFILL 16U
  69 /*
  70  * Period of the Tx buffer reclaim timer.  This timer does not need to run
  71  * frequently as Tx buffers are usually reclaimed by new Tx packets.
  72  */
  73 #define TX_RECLAIM_PERIOD (HZ / 4)
  74
  75 /* WR size in bytes */
  76 #define WR_LEN (WR_FLITS * 8)
  77
  78 /*
  79  * Types of Tx queues in each queue set.  Order here matters, do not change.
  80  */
  81 enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
  82
  83 /* Values for sge_txq.flags */
  84 enum {
  85         TXQ_RUNNING = 1 << 0,   /* fetch engine is running */
  86         TXQ_LAST_PKT_DB = 1 << 1,       /* last packet rang the doorbell */
  87 };
  88
  89 struct tx_desc {
  90         __be64 flit[TX_DESC_FLITS];
  91 };
  92
  93 struct rx_desc {
  94         __be32 addr_lo;
  95         __be32 len_gen;
  96         __be32 gen2;
  97         __be32 addr_hi;
  98 };
  99
 100 struct tx_sw_desc {             /* SW state per Tx descriptor */
 101         struct sk_buff *skb;
 102         u8 eop;       /* set if last descriptor for packet */
 103         u8 addr_idx;  /* buffer index of first SGL entry in descriptor */
 104         u8 fragidx;   /* first page fragment associated with descriptor */
 105         s8 sflit;     /* start flit of first SGL entry in descriptor */
 106 };
 107
 108 struct rx_sw_desc {                /* SW state per Rx descriptor */
 109         union {
 110                 struct sk_buff *skb;
 111                 struct fl_pg_chunk pg_chunk;
 112         };
 113         DECLARE_PCI_UNMAP_ADDR(dma_addr);
 114 };
 115
 116 struct rsp_desc {               /* response queue descriptor */
 117         struct rss_header rss_hdr;
 118         __be32 flags;
 119         __be32 len_cq;
 120         u8 imm_data[47];
 121         u8 intr_gen;
 122 };
 123
 124 /*
 125  * Holds unmapping information for Tx packets that need deferred unmapping.
 126  * This structure lives at skb->head and must be allocated by callers.
 127  */
 128 struct deferred_unmap_info {
 129         struct pci_dev *pdev;
 130         dma_addr_t addr[MAX_SKB_FRAGS + 1];
 131 };
 132
 133 /*
 134  * Maps a number of flits to the number of Tx descriptors that can hold them.
 135  * The formula is
 136  *
 137  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
 138  *
 139  * HW allows up to 4 descriptors to be combined into a WR.
 140  */
 141 static u8 flit_desc_map[] = {
 142         0,
 143 #if SGE_NUM_GENBITS == 1
 144         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 145         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 146         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 147         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
 148 #elif SGE_NUM_GENBITS == 2
 149         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 150         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 151         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 152         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 153 #else
 154 # error "SGE_NUM_GENBITS must be 1 or 2"
 155 #endif
 156 };
 157
 158 static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
 159 {
 160         return container_of(q, struct sge_qset, fl[qidx]);
 161 }
 162
 163 static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
 164 {
 165         return container_of(q, struct sge_qset, rspq);
 166 }
 167
 168 static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
 169 {
 170         return container_of(q, struct sge_qset, txq[qidx]);
 171 }
 172
 173 /**
 174  *      refill_rspq - replenish an SGE response queue
 175  *      @adapter: the adapter
 176  *      @q: the response queue to replenish
 177  *      @credits: how many new responses to make available
 178  *
 179  *      Replenishes a response queue by making the supplied number of responses
 180  *      available to HW.
 181  */
 182 static inline void refill_rspq(struct adapter *adapter,
 183                                const struct sge_rspq *q, unsigned int credits)
 184 {
 185         rmb();
 186         t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
 187                      V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
 188 }
 189
 190 /**
 191  *      need_skb_unmap - does the platform need unmapping of sk_buffs?
 192  *
 193  *      Returns true if the platfrom needs sk_buff unmapping.  The compiler
 194  *      optimizes away unecessary code if this returns true.
 195  */
 196 static inline int need_skb_unmap(void)
 197 {
 198         /*
 199          * This structure is used to tell if the platfrom needs buffer
 200          * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
 201          */
 202         struct dummy {
 203                 DECLARE_PCI_UNMAP_ADDR(addr);
 204         };
 205
 206         return sizeof(struct dummy) != 0;
 207 }
 208
 209 /**
 210  *      unmap_skb - unmap a packet main body and its page fragments
 211  *      @skb: the packet
 212  *      @q: the Tx queue containing Tx descriptors for the packet
 213  *      @cidx: index of Tx descriptor
 214  *      @pdev: the PCI device
 215  *
 216  *      Unmap the main body of an sk_buff and its page fragments, if any.
 217  *      Because of the fairly complicated structure of our SGLs and the desire
 218  *      to conserve space for metadata, the information necessary to unmap an
 219  *      sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx
 220  *      descriptors (the physical addresses of the various data buffers), and
 221  *      the SW descriptor state (assorted indices).  The send functions
 222  *      initialize the indices for the first packet descriptor so we can unmap
 223  *      the buffers held in the first Tx descriptor here, and we have enough
 224  *      information at this point to set the state for the next Tx descriptor.
 225  *
 226  *      Note that it is possible to clean up the first descriptor of a packet
 227  *      before the send routines have written the next descriptors, but this
 228  *      race does not cause any problem.  We just end up writing the unmapping
 229  *      info for the descriptor first.
 230  */
 231 static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
 232                              unsigned int cidx, struct pci_dev *pdev)
 233 {
 234         const struct sg_ent *sgp;
 235         struct tx_sw_desc *d = &q->sdesc[cidx];
 236         int nfrags, frag_idx, curflit, j = d->addr_idx;
 237
 238         sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit];
 239         frag_idx = d->fragidx;
 240
 241         if (frag_idx == 0 && skb_headlen(skb)) {
 242                 pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]),
 243                                  skb_headlen(skb), PCI_DMA_TODEVICE);
 244                 j = 1;
 245         }
 246
 247         curflit = d->sflit + 1 + j;
 248         nfrags = skb_shinfo(skb)->nr_frags;
 249
 250         while (frag_idx < nfrags && curflit < WR_FLITS) {
 251                 pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
 252                                skb_shinfo(skb)->frags[frag_idx].size,
 253                                PCI_DMA_TODEVICE);
 254                 j ^= 1;
 255                 if (j == 0) {
 256                         sgp++;
 257                         curflit++;
 258                 }
 259                 curflit++;
 260                 frag_idx++;
 261         }
 262
 263         if (frag_idx < nfrags) {   /* SGL continues into next Tx descriptor */
 264                 d = cidx + 1 == q->size ? q->sdesc : d + 1;
 265                 d->fragidx = frag_idx;
 266                 d->addr_idx = j;
 267                 d->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
 268         }
 269 }
 270
 271 /**
 272  *      free_tx_desc - reclaims Tx descriptors and their buffers
 273  *      @adapter: the adapter
 274  *      @q: the Tx queue to reclaim descriptors from
 275  *      @n: the number of descriptors to reclaim
 276  *
 277  *      Reclaims Tx descriptors from an SGE Tx queue and frees the associated
 278  *      Tx buffers.  Called with the Tx queue lock held.
 279  */
 280 static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
 281                          unsigned int n)
 282 {
 283         struct tx_sw_desc *d;
 284         struct pci_dev *pdev = adapter->pdev;
 285         unsigned int cidx = q->cidx;
 286
 287         const int need_unmap = need_skb_unmap() &&
 288                                q->cntxt_id >= FW_TUNNEL_SGEEC_START;
 289
 290         d = &q->sdesc[cidx];
 291         while (n--) {
 292                 if (d->skb) {   /* an SGL is present */
 293                         if (need_unmap)
 294                                 unmap_skb(d->skb, q, cidx, pdev);
 295                         if (d->eop)
 296                                 kfree_skb(d->skb);
 297                 }
 298                 ++d;
 299                 if (++cidx == q->size) {
 300                         cidx = 0;
 301                         d = q->sdesc;
 302                 }
 303         }
 304         q->cidx = cidx;
 305 }
 306
 307 /**
 308  *      reclaim_completed_tx - reclaims completed Tx descriptors
 309  *      @adapter: the adapter
 310  *      @q: the Tx queue to reclaim completed descriptors from
 311  *
 312  *      Reclaims Tx descriptors that the SGE has indicated it has processed,
 313  *      and frees the associated buffers if possible.  Called with the Tx
 314  *      queue's lock held.
 315  */
 316 static inline void reclaim_completed_tx(struct adapter *adapter,
 317                                         struct sge_txq *q)
 318 {
 319         unsigned int reclaim = q->processed - q->cleaned;
 320
 321         if (reclaim) {
 322                 free_tx_desc(adapter, q, reclaim);
 323                 q->cleaned += reclaim;
 324                 q->in_use -= reclaim;
 325         }
 326 }
 327
 328 /**
 329  *      should_restart_tx - are there enough resources to restart a Tx queue?
 330  *      @q: the Tx queue
 331  *
 332  *      Checks if there are enough descriptors to restart a suspended Tx queue.
 333  */
 334 static inline int should_restart_tx(const struct sge_txq *q)
 335 {
 336         unsigned int r = q->processed - q->cleaned;
 337
 338         return q->in_use - r < (q->size >> 1);
 339 }
 340
 341 static void clear_rx_desc(const struct sge_fl *q, struct rx_sw_desc *d)
 342 {
 343         if (q->use_pages) {
 344                 if (d->pg_chunk.page)
 345                         put_page(d->pg_chunk.page);
 346                 d->pg_chunk.page = NULL;
 347         } else {
 348                 kfree_skb(d->skb);
 349                 d->skb = NULL;
 350         }
 351 }
 352
 353 /**
 354  *      free_rx_bufs - free the Rx buffers on an SGE free list
 355  *      @pdev: the PCI device associated with the adapter
 356  *      @rxq: the SGE free list to clean up
 357  *
 358  *      Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
 359  *      this queue should be stopped before calling this function.
 360  */
 361 static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
 362 {
 363         unsigned int cidx = q->cidx;
 364
 365         while (q->credits--) {
 366                 struct rx_sw_desc *d = &q->sdesc[cidx];
 367
 368                 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
 369                                  q->buf_size, PCI_DMA_FROMDEVICE);
 370                 clear_rx_desc(q, d);
 371                 if (++cidx == q->size)
 372                         cidx = 0;
 373         }
 374
 375         if (q->pg_chunk.page) {
 376                 __free_pages(q->pg_chunk.page, q->order);
 377                 q->pg_chunk.page = NULL;
 378         }
 379 }
 380
 381 /**
 382  *      add_one_rx_buf - add a packet buffer to a free-buffer list
 383  *      @va:  buffer start VA
 384  *      @len: the buffer length
 385  *      @d: the HW Rx descriptor to write
 386  *      @sd: the SW Rx descriptor to write
 387  *      @gen: the generation bit value
 388  *      @pdev: the PCI device associated with the adapter
 389  *
 390  *      Add a buffer of the given length to the supplied HW and SW Rx
 391  *      descriptors.
 392  */
 393 static inline int add_one_rx_buf(void *va, unsigned int len,
 394                                  struct rx_desc *d, struct rx_sw_desc *sd,
 395                                  unsigned int gen, struct pci_dev *pdev)
 396 {
 397         dma_addr_t mapping;
 398
 399         mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
 400         if (unlikely(pci_dma_mapping_error(pdev, mapping)))
 401                 return -ENOMEM;
 402
 403         pci_unmap_addr_set(sd, dma_addr, mapping);
 404
 405         d->addr_lo = cpu_to_be32(mapping);
 406         d->addr_hi = cpu_to_be32((u64) mapping >> 32);
 407         wmb();
 408         d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
 409         d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
 410         return 0;
 411 }
 412
 413 static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp,
 414                           unsigned int order)
 415 {
 416         if (!q->pg_chunk.page) {
 417                 q->pg_chunk.page = alloc_pages(gfp, order);
 418                 if (unlikely(!q->pg_chunk.page))
 419                         return -ENOMEM;
 420                 q->pg_chunk.va = page_address(q->pg_chunk.page);
 421                 q->pg_chunk.offset = 0;
 422         }
 423         sd->pg_chunk = q->pg_chunk;
 424
 425         q->pg_chunk.offset += q->buf_size;
 426         if (q->pg_chunk.offset == (PAGE_SIZE << order))
 427                 q->pg_chunk.page = NULL;
 428         else {
 429                 q->pg_chunk.va += q->buf_size;
 430                 get_page(q->pg_chunk.page);
 431         }
 432         return 0;
 433 }
 434
 435 static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
 436 {
 437         if (q->pend_cred >= q->credits / 4) {
 438                 q->pend_cred = 0;
 439                 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
 440         }
 441 }
 442
 443 /**
 444  *      refill_fl - refill an SGE free-buffer list
 445  *      @adapter: the adapter
 446  *      @q: the free-list to refill
 447  *      @n: the number of new buffers to allocate
 448  *      @gfp: the gfp flags for allocating new buffers
 449  *
 450  *      (Re)populate an SGE free-buffer list with up to @n new packet buffers,
 451  *      allocated with the supplied gfp flags.  The caller must assure that
 452  *      @n does not exceed the queue's capacity.
 453  */
 454 static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
 455 {
 456         void *buf_start;
 457         struct rx_sw_desc *sd = &q->sdesc[q->pidx];
 458         struct rx_desc *d = &q->desc[q->pidx];
 459         unsigned int count = 0;
 460
 461         while (n--) {
 462                 int err;
 463
 464                 if (q->use_pages) {
 465                         if (unlikely(alloc_pg_chunk(q, sd, gfp, q->order))) {
 466 nomem:                          q->alloc_failed++;
 467                                 break;
 468                         }
 469                         buf_start = sd->pg_chunk.va;
 470                 } else {
 471                         struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
 472
 473                         if (!skb)
 474                                 goto nomem;
 475
 476                         sd->skb = skb;
 477                         buf_start = skb->data;
 478                 }
 479
 480                 err = add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen,
 481                                      adap->pdev);
 482                 if (unlikely(err)) {
 483                         clear_rx_desc(q, sd);
 484                         break;
 485                 }
 486
 487                 d++;
 488                 sd++;
 489                 if (++q->pidx == q->size) {
 490                         q->pidx = 0;
 491                         q->gen ^= 1;
 492                         sd = q->sdesc;
 493                         d = q->desc;
 494                 }
 495                 count++;
 496         }
 497
 498         q->credits += count;
 499         q->pend_cred += count;
 500         ring_fl_db(adap, q);
 501
 502         return count;
 503 }
 504
 505 static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
 506 {
 507         refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits),
 508                   GFP_ATOMIC | __GFP_COMP);
 509 }
 510
 511 /**
 512  *      recycle_rx_buf - recycle a receive buffer
 513  *      @adapter: the adapter
 514  *      @q: the SGE free list
 515  *      @idx: index of buffer to recycle
 516  *
 517  *      Recycles the specified buffer on the given free list by adding it at
 518  *      the next available slot on the list.
 519  */
 520 static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
 521                            unsigned int idx)
 522 {
 523         struct rx_desc *from = &q->desc[idx];
 524         struct rx_desc *to = &q->desc[q->pidx];
 525
 526         q->sdesc[q->pidx] = q->sdesc[idx];
 527         to->addr_lo = from->addr_lo;    /* already big endian */
 528         to->addr_hi = from->addr_hi;    /* likewise */
 529         wmb();
 530         to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
 531         to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
 532
 533         if (++q->pidx == q->size) {
 534                 q->pidx = 0;
 535                 q->gen ^= 1;
 536         }
 537
 538         q->credits++;
 539         q->pend_cred++;
 540         ring_fl_db(adap, q);
 541 }
 542
 543 /**
 544  *      alloc_ring - allocate resources for an SGE descriptor ring
 545  *      @pdev: the PCI device
 546  *      @nelem: the number of descriptors
 547  *      @elem_size: the size of each descriptor
 548  *      @sw_size: the size of the SW state associated with each ring element
 549  *      @phys: the physical address of the allocated ring
 550  *      @metadata: address of the array holding the SW state for the ring
 551  *
 552  *      Allocates resources for an SGE descriptor ring, such as Tx queues,
 553  *      free buffer lists, or response queues.  Each SGE ring requires
 554  *      space for its HW descriptors plus, optionally, space for the SW state
 555  *      associated with each HW entry (the metadata).  The function returns
 556  *      three values: the virtual address for the HW ring (the return value
 557  *      of the function), the physical address of the HW ring, and the address
 558  *      of the SW ring.
 559  */
 560 static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
 561                         size_t sw_size, dma_addr_t * phys, void *metadata)
 562 {
 563         size_t len = nelem * elem_size;
 564         void *s = NULL;
 565         void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
 566
 567         if (!p)
 568                 return NULL;
 569         if (sw_size && metadata) {
 570                 s = kcalloc(nelem, sw_size, GFP_KERNEL);
 571
 572                 if (!s) {
 573                         dma_free_coherent(&pdev->dev, len, p, *phys);
 574                         return NULL;
 575                 }
 576                 *(void **)metadata = s;
 577         }
 578         memset(p, 0, len);
 579         return p;
 580 }
 581
 582 /**
 583  *      t3_reset_qset - reset a sge qset
 584  *      @q: the queue set
 585  *
 586  *      Reset the qset structure.
 587  *      the NAPI structure is preserved in the event of
 588  *      the qset's reincarnation, for example during EEH recovery.
 589  */
 590 static void t3_reset_qset(struct sge_qset *q)
 591 {
 592         if (q->adap &&
 593             !(q->adap->flags & NAPI_INIT)) {
 594                 memset(q, 0, sizeof(*q));
 595                 return;
 596         }
 597
 598         q->adap = NULL;
 599         memset(&q->rspq, 0, sizeof(q->rspq));
 600         memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
 601         memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
 602         q->txq_stopped = 0;
 603         q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
 604         q->lro_frag_tbl.nr_frags = q->lro_frag_tbl.len = 0;
 605 }
 606
 607
 608 /**
 609  *      free_qset - free the resources of an SGE queue set
 610  *      @adapter: the adapter owning the queue set
 611  *      @q: the queue set
 612  *
 613  *      Release the HW and SW resources associated with an SGE queue set, such
 614  *      as HW contexts, packet buffers, and descriptor rings.  Traffic to the
 615  *      queue set must be quiesced prior to calling this.
 616  */
 617 static void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
 618 {
 619         int i;
 620         struct pci_dev *pdev = adapter->pdev;
 621
 622         for (i = 0; i < SGE_RXQ_PER_SET; ++i)
 623                 if (q->fl[i].desc) {
 624                         spin_lock_irq(&adapter->sge.reg_lock);
 625                         t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
 626                         spin_unlock_irq(&adapter->sge.reg_lock);
 627                         free_rx_bufs(pdev, &q->fl[i]);
 628                         kfree(q->fl[i].sdesc);
 629                         dma_free_coherent(&pdev->dev,
 630                                           q->fl[i].size *
 631                                           sizeof(struct rx_desc), q->fl[i].desc,
 632                                           q->fl[i].phys_addr);
 633                 }
 634
 635         for (i = 0; i < SGE_TXQ_PER_SET; ++i)
 636                 if (q->txq[i].desc) {
 637                         spin_lock_irq(&adapter->sge.reg_lock);
 638                         t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
 639                         spin_unlock_irq(&adapter->sge.reg_lock);
 640                         if (q->txq[i].sdesc) {
 641                                 free_tx_desc(adapter, &q->txq[i],
 642                                              q->txq[i].in_use);
 643                                 kfree(q->txq[i].sdesc);
 644                         }
 645                         dma_free_coherent(&pdev->dev,
 646                                           q->txq[i].size *
 647                                           sizeof(struct tx_desc),
 648                                           q->txq[i].desc, q->txq[i].phys_addr);
 649                         __skb_queue_purge(&q->txq[i].sendq);
 650                 }
 651
 652         if (q->rspq.desc) {
 653                 spin_lock_irq(&adapter->sge.reg_lock);
 654                 t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
 655                 spin_unlock_irq(&adapter->sge.reg_lock);
 656                 dma_free_coherent(&pdev->dev,
 657                                   q->rspq.size * sizeof(struct rsp_desc),
 658                                   q->rspq.desc, q->rspq.phys_addr);
 659         }
 660
 661         t3_reset_qset(q);
 662 }
 663
 664 /**
 665  *      init_qset_cntxt - initialize an SGE queue set context info
 666  *      @qs: the queue set
 667  *      @id: the queue set id
 668  *
 669  *      Initializes the TIDs and context ids for the queues of a queue set.
 670  */
 671 static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
 672 {
 673         qs->rspq.cntxt_id = id;
 674         qs->fl[0].cntxt_id = 2 * id;
 675         qs->fl[1].cntxt_id = 2 * id + 1;
 676         qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
 677         qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
 678         qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
 679         qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
 680         qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
 681 }
 682
 683 /**
 684  *      sgl_len - calculates the size of an SGL of the given capacity
 685  *      @n: the number of SGL entries
 686  *
 687  *      Calculates the number of flits needed for a scatter/gather list that
 688  *      can hold the given number of entries.
 689  */
 690 static inline unsigned int sgl_len(unsigned int n)
 691 {
 692         /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
 693         return (3 * n) / 2 + (n & 1);
 694 }
 695
 696 /**
 697  *      flits_to_desc - returns the num of Tx descriptors for the given flits
 698  *      @n: the number of flits
 699  *
 700  *      Calculates the number of Tx descriptors needed for the supplied number
 701  *      of flits.
 702  */
 703 static inline unsigned int flits_to_desc(unsigned int n)
 704 {
 705         BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
 706         return flit_desc_map[n];
 707 }
 708
 709 /**
 710  *      get_packet - return the next ingress packet buffer from a free list
 711  *      @adap: the adapter that received the packet
 712  *      @fl: the SGE free list holding the packet
 713  *      @len: the packet length including any SGE padding
 714  *      @drop_thres: # of remaining buffers before we start dropping packets
 715  *
 716  *      Get the next packet from a free list and complete setup of the
 717  *      sk_buff.  If the packet is small we make a copy and recycle the
 718  *      original buffer, otherwise we use the original buffer itself.  If a
 719  *      positive drop threshold is supplied packets are dropped and their
 720  *      buffers recycled if (a) the number of remaining buffers is under the
 721  *      threshold and the packet is too big to copy, or (b) the packet should
 722  *      be copied but there is no memory for the copy.
 723  */
 724 static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
 725                                   unsigned int len, unsigned int drop_thres)
 726 {
 727         struct sk_buff *skb = NULL;
 728         struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
 729
 730         prefetch(sd->skb->data);
 731         fl->credits--;
 732
 733         if (len <= SGE_RX_COPY_THRES) {
 734                 skb = alloc_skb(len, GFP_ATOMIC);
 735                 if (likely(skb != NULL)) {
 736                         __skb_put(skb, len);
 737                         pci_dma_sync_single_for_cpu(adap->pdev,
 738                                             pci_unmap_addr(sd, dma_addr), len,
 739                                             PCI_DMA_FROMDEVICE);
 740                         memcpy(skb->data, sd->skb->data, len);
 741                         pci_dma_sync_single_for_device(adap->pdev,
 742                                             pci_unmap_addr(sd, dma_addr), len,
 743                                             PCI_DMA_FROMDEVICE);
 744                 } else if (!drop_thres)
 745                         goto use_orig_buf;
 746 recycle:
 747                 recycle_rx_buf(adap, fl, fl->cidx);
 748                 return skb;
 749         }
 750
 751         if (unlikely(fl->credits < drop_thres) &&
 752             refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits - 1),
 753                       GFP_ATOMIC | __GFP_COMP) == 0)
 754                 goto recycle;
 755
 756 use_orig_buf:
 757         pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
 758                          fl->buf_size, PCI_DMA_FROMDEVICE);
 759         skb = sd->skb;
 760         skb_put(skb, len);
 761         __refill_fl(adap, fl);
 762         return skb;
 763 }
 764
 765 /**
 766  *      get_packet_pg - return the next ingress packet buffer from a free list
 767  *      @adap: the adapter that received the packet
 768  *      @fl: the SGE free list holding the packet
 769  *      @len: the packet length including any SGE padding
 770  *      @drop_thres: # of remaining buffers before we start dropping packets
 771  *
 772  *      Get the next packet from a free list populated with page chunks.
 773  *      If the packet is small we make a copy and recycle the original buffer,
 774  *      otherwise we attach the original buffer as a page fragment to a fresh
 775  *      sk_buff.  If a positive drop threshold is supplied packets are dropped
 776  *      and their buffers recycled if (a) the number of remaining buffers is
 777  *      under the threshold and the packet is too big to copy, or (b) there's
 778  *      no system memory.
 779  *
 780  *      Note: this function is similar to @get_packet but deals with Rx buffers
 781  *      that are page chunks rather than sk_buffs.
 782  */
 783 static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
 784                                      struct sge_rspq *q, unsigned int len,
 785                                      unsigned int drop_thres)
 786 {
 787         struct sk_buff *newskb, *skb;
 788         struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
 789
 790         newskb = skb = q->pg_skb;
 791
 792         if (!skb && (len <= SGE_RX_COPY_THRES)) {
 793                 newskb = alloc_skb(len, GFP_ATOMIC);
 794                 if (likely(newskb != NULL)) {
 795                         __skb_put(newskb, len);
 796                         pci_dma_sync_single_for_cpu(adap->pdev,
 797                                             pci_unmap_addr(sd, dma_addr), len,
 798                                             PCI_DMA_FROMDEVICE);
 799                         memcpy(newskb->data, sd->pg_chunk.va, len);
 800                         pci_dma_sync_single_for_device(adap->pdev,
 801                                             pci_unmap_addr(sd, dma_addr), len,
 802                                             PCI_DMA_FROMDEVICE);
 803                 } else if (!drop_thres)
 804                         return NULL;
 805 recycle:
 806                 fl->credits--;
 807                 recycle_rx_buf(adap, fl, fl->cidx);
 808                 q->rx_recycle_buf++;
 809                 return newskb;
 810         }
 811
 812         if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres)))
 813                 goto recycle;
 814
 815         if (!skb)
 816                 newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
 817         if (unlikely(!newskb)) {
 818                 if (!drop_thres)
 819                         return NULL;
 820                 goto recycle;
 821         }
 822
 823         pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
 824                          fl->buf_size, PCI_DMA_FROMDEVICE);
 825         if (!skb) {
 826                 __skb_put(newskb, SGE_RX_PULL_LEN);
 827                 memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
 828                 skb_fill_page_desc(newskb, 0, sd->pg_chunk.page,
 829                                    sd->pg_chunk.offset + SGE_RX_PULL_LEN,
 830                                    len - SGE_RX_PULL_LEN);
 831                 newskb->len = len;
 832                 newskb->data_len = len - SGE_RX_PULL_LEN;
 833         } else {
 834                 skb_fill_page_desc(newskb, skb_shinfo(newskb)->nr_frags,
 835                                    sd->pg_chunk.page,
 836                                    sd->pg_chunk.offset, len);
 837                 newskb->len += len;
 838                 newskb->data_len += len;
 839         }
 840         newskb->truesize += newskb->data_len;
 841
 842         fl->credits--;
 843         /*
 844          * We do not refill FLs here, we let the caller do it to overlap a
 845          * prefetch.
 846          */
 847         return newskb;
 848 }
 849
 850 /**
 851  *      get_imm_packet - return the next ingress packet buffer from a response
 852  *      @resp: the response descriptor containing the packet data
 853  *
 854  *      Return a packet containing the immediate data of the given response.
 855  */
 856 static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
 857 {
 858         struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
 859
 860         if (skb) {
 861                 __skb_put(skb, IMMED_PKT_SIZE);
 862                 skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
 863         }
 864         return skb;
 865 }
 866
 867 /**
 868  *      calc_tx_descs - calculate the number of Tx descriptors for a packet
 869  *      @skb: the packet
 870  *
 871  *      Returns the number of Tx descriptors needed for the given Ethernet
 872  *      packet.  Ethernet packets require addition of WR and CPL headers.
 873  */
 874 static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
 875 {
 876         unsigned int flits;
 877
 878         if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
 879                 return 1;
 880
 881         flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
 882         if (skb_shinfo(skb)->gso_size)
 883                 flits++;
 884         return flits_to_desc(flits);
 885 }
 886
 887 /**
 888  *      make_sgl - populate a scatter/gather list for a packet
 889  *      @skb: the packet
 890  *      @sgp: the SGL to populate
 891  *      @start: start address of skb main body data to include in the SGL
 892  *      @len: length of skb main body data to include in the SGL
 893  *      @pdev: the PCI device
 894  *
 895  *      Generates a scatter/gather list for the buffers that make up a packet
 896  *      and returns the SGL size in 8-byte words.  The caller must size the SGL
 897  *      appropriately.
 898  */
 899 static inline unsigned int make_sgl(const struct sk_buff *skb,
 900                                     struct sg_ent *sgp, unsigned char *start,
 901                                     unsigned int len, struct pci_dev *pdev)
 902 {
 903         dma_addr_t mapping;
 904         unsigned int i, j = 0, nfrags;
 905
 906         if (len) {
 907                 mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
 908                 sgp->len[0] = cpu_to_be32(len);
 909                 sgp->addr[0] = cpu_to_be64(mapping);
 910                 j = 1;
 911         }
 912
 913         nfrags = skb_shinfo(skb)->nr_frags;
 914         for (i = 0; i < nfrags; i++) {
 915                 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 916
 917                 mapping = pci_map_page(pdev, frag->page, frag->page_offset,
 918                                        frag->size, PCI_DMA_TODEVICE);
 919                 sgp->len[j] = cpu_to_be32(frag->size);
 920                 sgp->addr[j] = cpu_to_be64(mapping);
 921                 j ^= 1;
 922                 if (j == 0)
 923                         ++sgp;
 924         }
 925         if (j)
 926                 sgp->len[j] = 0;
 927         return ((nfrags + (len != 0)) * 3) / 2 + j;
 928 }
 929
 930 /**
 931  *      check_ring_tx_db - check and potentially ring a Tx queue's doorbell
 932  *      @adap: the adapter
 933  *      @q: the Tx queue
 934  *
 935  *      Ring the doorbel if a Tx queue is asleep.  There is a natural race,
 936  *      where the HW is going to sleep just after we checked, however,
 937  *      then the interrupt handler will detect the outstanding TX packet
 938  *      and ring the doorbell for us.
 939  *
 940  *      When GTS is disabled we unconditionally ring the doorbell.
 941  */
 942 static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
 943 {
 944 #if USE_GTS
 945         clear_bit(TXQ_LAST_PKT_DB, &q->flags);
 946         if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
 947                 set_bit(TXQ_LAST_PKT_DB, &q->flags);
 948                 t3_write_reg(adap, A_SG_KDOORBELL,
 949                              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 950         }
 951 #else
 952         wmb();                  /* write descriptors before telling HW */
 953         t3_write_reg(adap, A_SG_KDOORBELL,
 954                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 955 #endif
 956 }
 957
 958 static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
 959 {
 960 #if SGE_NUM_GENBITS == 2
 961         d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
 962 #endif
 963 }
 964
 965 /**
 966  *      write_wr_hdr_sgl - write a WR header and, optionally, SGL
 967  *      @ndesc: number of Tx descriptors spanned by the SGL
 968  *      @skb: the packet corresponding to the WR
 969  *      @d: first Tx descriptor to be written
 970  *      @pidx: index of above descriptors
 971  *      @q: the SGE Tx queue
 972  *      @sgl: the SGL
 973  *      @flits: number of flits to the start of the SGL in the first descriptor
 974  *      @sgl_flits: the SGL size in flits
 975  *      @gen: the Tx descriptor generation
 976  *      @wr_hi: top 32 bits of WR header based on WR type (big endian)
 977  *      @wr_lo: low 32 bits of WR header based on WR type (big endian)
 978  *
 979  *      Write a work request header and an associated SGL.  If the SGL is
 980  *      small enough to fit into one Tx descriptor it has already been written
 981  *      and we just need to write the WR header.  Otherwise we distribute the
 982  *      SGL across the number of descriptors it spans.
 983  */
 984 static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
 985                              struct tx_desc *d, unsigned int pidx,
 986                              const struct sge_txq *q,
 987                              const struct sg_ent *sgl,
 988                              unsigned int flits, unsigned int sgl_flits,
 989                              unsigned int gen, __be32 wr_hi,
 990                              __be32 wr_lo)
 991 {
 992         struct work_request_hdr *wrp = (struct work_request_hdr *)d;
 993         struct tx_sw_desc *sd = &q->sdesc[pidx];
 994
 995         sd->skb = skb;
 996         if (need_skb_unmap()) {
 997                 sd->fragidx = 0;
 998                 sd->addr_idx = 0;
 999                 sd->sflit = flits;
1000         }
1001
1002         if (likely(ndesc == 1)) {
1003                 sd->eop = 1;
1004                 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1005                                    V_WR_SGLSFLT(flits)) | wr_hi;
1006                 wmb();
1007                 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1008                                    V_WR_GEN(gen)) | wr_lo;
1009                 wr_gen2(d, gen);
1010         } else {
1011                 unsigned int ogen = gen;
1012                 const u64 *fp = (const u64 *)sgl;
1013                 struct work_request_hdr *wp = wrp;
1014
1015                 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1016                                    V_WR_SGLSFLT(flits)) | wr_hi;
1017
1018                 while (sgl_flits) {
1019                         unsigned int avail = WR_FLITS - flits;
1020
1021                         if (avail > sgl_flits)
1022                                 avail = sgl_flits;
1023                         memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
1024                         sgl_flits -= avail;
1025                         ndesc--;
1026                         if (!sgl_flits)
1027                                 break;
1028
1029                         fp += avail;
1030                         d++;
1031                         sd->eop = 0;
1032                         sd++;
1033                         if (++pidx == q->size) {
1034                                 pidx = 0;
1035                                 gen ^= 1;
1036                                 d = q->desc;
1037                                 sd = q->sdesc;
1038                         }
1039
1040                         sd->skb = skb;
1041                         wrp = (struct work_request_hdr *)d;
1042                         wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1043                                            V_WR_SGLSFLT(1)) | wr_hi;
1044                         wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1045                                                         sgl_flits + 1)) |
1046                                            V_WR_GEN(gen)) | wr_lo;
1047                         wr_gen2(d, gen);
1048                         flits = 1;
1049                 }
1050                 sd->eop = 1;
1051                 wrp->wr_hi |= htonl(F_WR_EOP);
1052                 wmb();
1053                 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1054                 wr_gen2((struct tx_desc *)wp, ogen);
1055                 WARN_ON(ndesc != 0);
1056         }
1057 }
1058
1059 /**
1060  *      write_tx_pkt_wr - write a TX_PKT work request
1061  *      @adap: the adapter
1062  *      @skb: the packet to send
1063  *      @pi: the egress interface
1064  *      @pidx: index of the first Tx descriptor to write
1065  *      @gen: the generation value to use
1066  *      @q: the Tx queue
1067  *      @ndesc: number of descriptors the packet will occupy
1068  *      @compl: the value of the COMPL bit to use
1069  *
1070  *      Generate a TX_PKT work request to send the supplied packet.
1071  */
1072 static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
1073                             const struct port_info *pi,
1074                             unsigned int pidx, unsigned int gen,
1075                             struct sge_txq *q, unsigned int ndesc,
1076                             unsigned int compl)
1077 {
1078         unsigned int flits, sgl_flits, cntrl, tso_info;
1079         struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1080         struct tx_desc *d = &q->desc[pidx];
1081         struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1082
1083         cpl->len = htonl(skb->len | 0x80000000);
1084         cntrl = V_TXPKT_INTF(pi->port_id);
1085
1086         if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1087                 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
1088
1089         tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1090         if (tso_info) {
1091                 int eth_type;
1092                 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1093
1094                 d->flit[2] = 0;
1095                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1096                 hdr->cntrl = htonl(cntrl);
1097                 eth_type = skb_network_offset(skb) == ETH_HLEN ?
1098                     CPL_ETH_II : CPL_ETH_II_VLAN;
1099                 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1100                     V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
1101                     V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
1102                 hdr->lso_info = htonl(tso_info);
1103                 flits = 3;
1104         } else {
1105                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1106                 cntrl |= F_TXPKT_IPCSUM_DIS;    /* SW calculates IP csum */
1107                 cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1108                 cpl->cntrl = htonl(cntrl);
1109
1110                 if (skb->len <= WR_LEN - sizeof(*cpl)) {
1111                         q->sdesc[pidx].skb = NULL;
1112                         if (!skb->data_len)
1113                                 skb_copy_from_linear_data(skb, &d->flit[2],
1114                                                           skb->len);
1115                         else
1116                                 skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1117
1118                         flits = (skb->len + 7) / 8 + 2;
1119                         cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1120                                               V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1121                                               | F_WR_SOP | F_WR_EOP | compl);
1122                         wmb();
1123                         cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1124                                               V_WR_TID(q->token));
1125                         wr_gen2(d, gen);
1126                         kfree_skb(skb);
1127                         return;
1128                 }
1129
1130                 flits = 2;
1131         }
1132
1133         sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1134         sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
1135
1136         write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1137                          htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1138                          htonl(V_WR_TID(q->token)));
1139 }
1140
1141 static inline void t3_stop_tx_queue(struct netdev_queue *txq,
1142                                     struct sge_qset *qs, struct sge_txq *q)
1143 {
1144         netif_tx_stop_queue(txq);
1145         set_bit(TXQ_ETH, &qs->txq_stopped);
1146         q->stops++;
1147 }
1148
1149 /**
1150  *      eth_xmit - add a packet to the Ethernet Tx queue
1151  *      @skb: the packet
1152  *      @dev: the egress net device
1153  *
1154  *      Add a packet to an SGE Tx queue.  Runs with softirqs disabled.
1155  */
1156 int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1157 {
1158         int qidx;
1159         unsigned int ndesc, pidx, credits, gen, compl;
1160         const struct port_info *pi = netdev_priv(dev);
1161         struct adapter *adap = pi->adapter;
1162         struct netdev_queue *txq;
1163         struct sge_qset *qs;
1164         struct sge_txq *q;
1165
1166         /*
1167          * The chip min packet length is 9 octets but play safe and reject
1168          * anything shorter than an Ethernet header.
1169          */
1170         if (unlikely(skb->len < ETH_HLEN)) {
1171                 dev_kfree_skb(skb);
1172                 return NETDEV_TX_OK;
1173         }
1174
1175         qidx = skb_get_queue_mapping(skb);
1176         qs = &pi->qs[qidx];
1177         q = &qs->txq[TXQ_ETH];
1178         txq = netdev_get_tx_queue(dev, qidx);
1179
1180         spin_lock(&q->lock);
1181         reclaim_completed_tx(adap, q);
1182
1183         credits = q->size - q->in_use;
1184         ndesc = calc_tx_descs(skb);
1185
1186         if (unlikely(credits < ndesc)) {
1187                 t3_stop_tx_queue(txq, qs, q);
1188                 dev_err(&adap->pdev->dev,
1189                         "%s: Tx ring %u full while queue awake!\n",
1190                         dev->name, q->cntxt_id & 7);
1191                 spin_unlock(&q->lock);
1192                 return NETDEV_TX_BUSY;
1193         }
1194
1195         q->in_use += ndesc;
1196         if (unlikely(credits - ndesc < q->stop_thres)) {
1197                 t3_stop_tx_queue(txq, qs, q);
1198
1199                 if (should_restart_tx(q) &&
1200                     test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1201                         q->restarts++;
1202                         netif_tx_wake_queue(txq);
1203                 }
1204         }
1205
1206         gen = q->gen;
1207         q->unacked += ndesc;
1208         compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1209         q->unacked &= 7;
1210         pidx = q->pidx;
1211         q->pidx += ndesc;
1212         if (q->pidx >= q->size) {
1213                 q->pidx -= q->size;
1214                 q->gen ^= 1;
1215         }
1216
1217         /* update port statistics */
1218         if (skb->ip_summed == CHECKSUM_COMPLETE)
1219                 qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1220         if (skb_shinfo(skb)->gso_size)
1221                 qs->port_stats[SGE_PSTAT_TSO]++;
1222         if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1223                 qs->port_stats[SGE_PSTAT_VLANINS]++;
1224
1225         dev->trans_start = jiffies;
1226         spin_unlock(&q->lock);
1227
1228         /*
1229          * We do not use Tx completion interrupts to free DMAd Tx packets.
1230          * This is good for performamce but means that we rely on new Tx
1231          * packets arriving to run the destructors of completed packets,
1232          * which open up space in their sockets' send queues.  Sometimes
1233          * we do not get such new packets causing Tx to stall.  A single
1234          * UDP transmitter is a good example of this situation.  We have
1235          * a clean up timer that periodically reclaims completed packets
1236          * but it doesn't run often enough (nor do we want it to) to prevent
1237          * lengthy stalls.  A solution to this problem is to run the
1238          * destructor early, after the packet is queued but before it's DMAd.
1239          * A cons is that we lie to socket memory accounting, but the amount
1240          * of extra memory is reasonable (limited by the number of Tx
1241          * descriptors), the packets do actually get freed quickly by new
1242          * packets almost always, and for protocols like TCP that wait for
1243          * acks to really free up the data the extra memory is even less.
1244          * On the positive side we run the destructors on the sending CPU
1245          * rather than on a potentially different completing CPU, usually a
1246          * good thing.  We also run them without holding our Tx queue lock,
1247          * unlike what reclaim_completed_tx() would otherwise do.
1248          *
1249          * Run the destructor before telling the DMA engine about the packet
1250          * to make sure it doesn't complete and get freed prematurely.
1251          */
1252         if (likely(!skb_shared(skb)))
1253                 skb_orphan(skb);
1254
1255         write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1256         check_ring_tx_db(adap, q);
1257         return NETDEV_TX_OK;
1258 }
1259
1260 /**
1261  *      write_imm - write a packet into a Tx descriptor as immediate data
1262  *      @d: the Tx descriptor to write
1263  *      @skb: the packet
1264  *      @len: the length of packet data to write as immediate data
1265  *      @gen: the generation bit value to write
1266  *
1267  *      Writes a packet as immediate data into a Tx descriptor.  The packet
1268  *      contains a work request at its beginning.  We must write the packet
1269  *      carefully so the SGE doesn't read it accidentally before it's written
1270  *      in its entirety.
1271  */
1272 static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1273                              unsigned int len, unsigned int gen)
1274 {
1275         struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1276         struct work_request_hdr *to = (struct work_request_hdr *)d;
1277
1278         if (likely(!skb->data_len))
1279                 memcpy(&to[1], &from[1], len - sizeof(*from));
1280         else
1281                 skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from));
1282
1283         to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1284                                         V_WR_BCNTLFLT(len & 7));
1285         wmb();
1286         to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1287                                         V_WR_LEN((len + 7) / 8));
1288         wr_gen2(d, gen);
1289         kfree_skb(skb);
1290 }
1291
1292 /**
1293  *      check_desc_avail - check descriptor availability on a send queue
1294  *      @adap: the adapter
1295  *      @q: the send queue
1296  *      @skb: the packet needing the descriptors
1297  *      @ndesc: the number of Tx descriptors needed
1298  *      @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1299  *
1300  *      Checks if the requested number of Tx descriptors is available on an
1301  *      SGE send queue.  If the queue is already suspended or not enough
1302  *      descriptors are available the packet is queued for later transmission.
1303  *      Must be called with the Tx queue locked.
1304  *
1305  *      Returns 0 if enough descriptors are available, 1 if there aren't
1306  *      enough descriptors and the packet has been queued, and 2 if the caller
1307  *      needs to retry because there weren't enough descriptors at the
1308  *      beginning of the call but some freed up in the mean time.
1309  */
1310 static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1311                                    struct sk_buff *skb, unsigned int ndesc,
1312                                    unsigned int qid)
1313 {
1314         if (unlikely(!skb_queue_empty(&q->sendq))) {
1315               addq_exit:__skb_queue_tail(&q->sendq, skb);
1316                 return 1;
1317         }
1318         if (unlikely(q->size - q->in_use < ndesc)) {
1319                 struct sge_qset *qs = txq_to_qset(q, qid);
1320
1321                 set_bit(qid, &qs->txq_stopped);
1322                 smp_mb__after_clear_bit();
1323
1324                 if (should_restart_tx(q) &&
1325                     test_and_clear_bit(qid, &qs->txq_stopped))
1326                         return 2;
1327
1328                 q->stops++;
1329                 goto addq_exit;
1330         }
1331         return 0;
1332 }
1333
1334 /**
1335  *      reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1336  *      @q: the SGE control Tx queue
1337  *
1338  *      This is a variant of reclaim_completed_tx() that is used for Tx queues
1339  *      that send only immediate data (presently just the control queues) and
1340  *      thus do not have any sk_buffs to release.
1341  */
1342 static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1343 {
1344         unsigned int reclaim = q->processed - q->cleaned;
1345
1346         q->in_use -= reclaim;
1347         q->cleaned += reclaim;
1348 }
1349
1350 static inline int immediate(const struct sk_buff *skb)
1351 {
1352         return skb->len <= WR_LEN;
1353 }
1354
1355 /**
1356  *      ctrl_xmit - send a packet through an SGE control Tx queue
1357  *      @adap: the adapter
1358  *      @q: the control queue
1359  *      @skb: the packet
1360  *
1361  *      Send a packet through an SGE control Tx queue.  Packets sent through
1362  *      a control queue must fit entirely as immediate data in a single Tx
1363  *      descriptor and have no page fragments.
1364  */
1365 static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1366                      struct sk_buff *skb)
1367 {
1368         int ret;
1369         struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1370
1371         if (unlikely(!immediate(skb))) {
1372                 WARN_ON(1);
1373                 dev_kfree_skb(skb);
1374                 return NET_XMIT_SUCCESS;
1375         }
1376
1377         wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1378         wrp->wr_lo = htonl(V_WR_TID(q->token));
1379
1380         spin_lock(&q->lock);
1381       again:reclaim_completed_tx_imm(q);
1382
1383         ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1384         if (unlikely(ret)) {
1385                 if (ret == 1) {
1386                         spin_unlock(&q->lock);
1387                         return NET_XMIT_CN;
1388                 }
1389                 goto again;
1390         }
1391
1392         write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1393
1394         q->in_use++;
1395         if (++q->pidx >= q->size) {
1396                 q->pidx = 0;
1397                 q->gen ^= 1;
1398         }
1399         spin_unlock(&q->lock);
1400         wmb();
1401         t3_write_reg(adap, A_SG_KDOORBELL,
1402                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1403         return NET_XMIT_SUCCESS;
1404 }
1405
1406 /**
1407  *      restart_ctrlq - restart a suspended control queue
1408  *      @qs: the queue set cotaining the control queue
1409  *
1410  *      Resumes transmission on a suspended Tx control queue.
1411  */
1412 static void restart_ctrlq(unsigned long data)
1413 {
1414         struct sk_buff *skb;
1415         struct sge_qset *qs = (struct sge_qset *)data;
1416         struct sge_txq *q = &qs->txq[TXQ_CTRL];
1417
1418         spin_lock(&q->lock);
1419       again:reclaim_completed_tx_imm(q);
1420
1421         while (q->in_use < q->size &&
1422                (skb = __skb_dequeue(&q->sendq)) != NULL) {
1423
1424                 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1425
1426                 if (++q->pidx >= q->size) {
1427                         q->pidx = 0;
1428                         q->gen ^= 1;
1429                 }
1430                 q->in_use++;
1431         }
1432
1433         if (!skb_queue_empty(&q->sendq)) {
1434                 set_bit(TXQ_CTRL, &qs->txq_stopped);
1435                 smp_mb__after_clear_bit();
1436
1437                 if (should_restart_tx(q) &&
1438                     test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1439                         goto again;
1440                 q->stops++;
1441         }
1442
1443         spin_unlock(&q->lock);
1444         wmb();
1445         t3_write_reg(qs->adap, A_SG_KDOORBELL,
1446                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1447 }
1448
1449 /*
1450  * Send a management message through control queue 0
1451  */
1452 int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1453 {
1454         int ret;
1455         local_bh_disable();
1456         ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1457         local_bh_enable();
1458
1459         return ret;
1460 }
1461
1462 /**
1463  *      deferred_unmap_destructor - unmap a packet when it is freed
1464  *      @skb: the packet
1465  *
1466  *      This is the packet destructor used for Tx packets that need to remain
1467  *      mapped until they are freed rather than until their Tx descriptors are
1468  *      freed.
1469  */
1470 static void deferred_unmap_destructor(struct sk_buff *skb)
1471 {
1472         int i;
1473         const dma_addr_t *p;
1474         const struct skb_shared_info *si;
1475         const struct deferred_unmap_info *dui;
1476
1477         dui = (struct deferred_unmap_info *)skb->head;
1478         p = dui->addr;
1479
1480         if (skb->tail - skb->transport_header)
1481                 pci_unmap_single(dui->pdev, *p++,
1482                                  skb->tail - skb->transport_header,
1483                                  PCI_DMA_TODEVICE);
1484
1485         si = skb_shinfo(skb);
1486         for (i = 0; i < si->nr_frags; i++)
1487                 pci_unmap_page(dui->pdev, *p++, si->frags[i].size,
1488                                PCI_DMA_TODEVICE);
1489 }
1490
1491 static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1492                                      const struct sg_ent *sgl, int sgl_flits)
1493 {
1494         dma_addr_t *p;
1495         struct deferred_unmap_info *dui;
1496
1497         dui = (struct deferred_unmap_info *)skb->head;
1498         dui->pdev = pdev;
1499         for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1500                 *p++ = be64_to_cpu(sgl->addr[0]);
1501                 *p++ = be64_to_cpu(sgl->addr[1]);
1502         }
1503         if (sgl_flits)
1504                 *p = be64_to_cpu(sgl->addr[0]);
1505 }
1506
1507 /**
1508  *      write_ofld_wr - write an offload work request
1509  *      @adap: the adapter
1510  *      @skb: the packet to send
1511  *      @q: the Tx queue
1512  *      @pidx: index of the first Tx descriptor to write
1513  *      @gen: the generation value to use
1514  *      @ndesc: number of descriptors the packet will occupy
1515  *
1516  *      Write an offload work request to send the supplied packet.  The packet
1517  *      data already carry the work request with most fields populated.
1518  */
1519 static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1520                           struct sge_txq *q, unsigned int pidx,
1521                           unsigned int gen, unsigned int ndesc)
1522 {
1523         unsigned int sgl_flits, flits;
1524         struct work_request_hdr *from;
1525         struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1526         struct tx_desc *d = &q->desc[pidx];
1527
1528         if (immediate(skb)) {
1529                 q->sdesc[pidx].skb = NULL;
1530                 write_imm(d, skb, skb->len, gen);
1531                 return;
1532         }
1533
1534         /* Only TX_DATA builds SGLs */
1535
1536         from = (struct work_request_hdr *)skb->data;
1537         memcpy(&d->flit[1], &from[1],
1538                skb_transport_offset(skb) - sizeof(*from));
1539
1540         flits = skb_transport_offset(skb) / 8;
1541         sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1542         sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
1543                              skb->tail - skb->transport_header,
1544                              adap->pdev);
1545         if (need_skb_unmap()) {
1546                 setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1547                 skb->destructor = deferred_unmap_destructor;
1548         }
1549
1550         write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1551                          gen, from->wr_hi, from->wr_lo);
1552 }
1553
1554 /**
1555  *      calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1556  *      @skb: the packet
1557  *
1558  *      Returns the number of Tx descriptors needed for the given offload
1559  *      packet.  These packets are already fully constructed.
1560  */
1561 static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1562 {
1563         unsigned int flits, cnt;
1564
1565         if (skb->len <= WR_LEN)
1566                 return 1;       /* packet fits as immediate data */
1567
1568         flits = skb_transport_offset(skb) / 8;  /* headers */
1569         cnt = skb_shinfo(skb)->nr_frags;
1570         if (skb->tail != skb->transport_header)
1571                 cnt++;
1572         return flits_to_desc(flits + sgl_len(cnt));
1573 }
1574
1575 /**
1576  *      ofld_xmit - send a packet through an offload queue
1577  *      @adap: the adapter
1578  *      @q: the Tx offload queue
1579  *      @skb: the packet
1580  *
1581  *      Send an offload packet through an SGE offload queue.
1582  */
1583 static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1584                      struct sk_buff *skb)
1585 {
1586         int ret;
1587         unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1588
1589         spin_lock(&q->lock);
1590       again:reclaim_completed_tx(adap, q);
1591
1592         ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1593         if (unlikely(ret)) {
1594                 if (ret == 1) {
1595                         skb->priority = ndesc;  /* save for restart */
1596                         spin_unlock(&q->lock);
1597                         return NET_XMIT_CN;
1598                 }
1599                 goto again;
1600         }
1601
1602         gen = q->gen;
1603         q->in_use += ndesc;
1604         pidx = q->pidx;
1605         q->pidx += ndesc;
1606         if (q->pidx >= q->size) {
1607                 q->pidx -= q->size;
1608                 q->gen ^= 1;
1609         }
1610         spin_unlock(&q->lock);
1611
1612         write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1613         check_ring_tx_db(adap, q);
1614         return NET_XMIT_SUCCESS;
1615 }
1616
1617 /**
1618  *      restart_offloadq - restart a suspended offload queue
1619  *      @qs: the queue set cotaining the offload queue
1620  *
1621  *      Resumes transmission on a suspended Tx offload queue.
1622  */
1623 static void restart_offloadq(unsigned long data)
1624 {
1625         struct sk_buff *skb;
1626         struct sge_qset *qs = (struct sge_qset *)data;
1627         struct sge_txq *q = &qs->txq[TXQ_OFLD];
1628         const struct port_info *pi = netdev_priv(qs->netdev);
1629         struct adapter *adap = pi->adapter;
1630
1631         spin_lock(&q->lock);
1632       again:reclaim_completed_tx(adap, q);
1633
1634         while ((skb = skb_peek(&q->sendq)) != NULL) {
1635                 unsigned int gen, pidx;
1636                 unsigned int ndesc = skb->priority;
1637
1638                 if (unlikely(q->size - q->in_use < ndesc)) {
1639                         set_bit(TXQ_OFLD, &qs->txq_stopped);
1640                         smp_mb__after_clear_bit();
1641
1642                         if (should_restart_tx(q) &&
1643                             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1644                                 goto again;
1645                         q->stops++;
1646                         break;
1647                 }
1648
1649                 gen = q->gen;
1650                 q->in_use += ndesc;
1651                 pidx = q->pidx;
1652                 q->pidx += ndesc;
1653                 if (q->pidx >= q->size) {
1654                         q->pidx -= q->size;
1655                         q->gen ^= 1;
1656                 }
1657                 __skb_unlink(skb, &q->sendq);
1658                 spin_unlock(&q->lock);
1659
1660                 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1661                 spin_lock(&q->lock);
1662         }
1663         spin_unlock(&q->lock);
1664
1665 #if USE_GTS
1666         set_bit(TXQ_RUNNING, &q->flags);
1667         set_bit(TXQ_LAST_PKT_DB, &q->flags);
1668 #endif
1669         wmb();
1670         t3_write_reg(adap, A_SG_KDOORBELL,
1671                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1672 }
1673
1674 /**
1675  *      queue_set - return the queue set a packet should use
1676  *      @skb: the packet
1677  *
1678  *      Maps a packet to the SGE queue set it should use.  The desired queue
1679  *      set is carried in bits 1-3 in the packet's priority.
1680  */
1681 static inline int queue_set(const struct sk_buff *skb)
1682 {
1683         return skb->priority >> 1;
1684 }
1685
1686 /**
1687  *      is_ctrl_pkt - return whether an offload packet is a control packet
1688  *      @skb: the packet
1689  *
1690  *      Determines whether an offload packet should use an OFLD or a CTRL
1691  *      Tx queue.  This is indicated by bit 0 in the packet's priority.
1692  */
1693 static inline int is_ctrl_pkt(const struct sk_buff *skb)
1694 {
1695         return skb->priority & 1;
1696 }
1697
1698 /**
1699  *      t3_offload_tx - send an offload packet
1700  *      @tdev: the offload device to send to
1701  *      @skb: the packet
1702  *
1703  *      Sends an offload packet.  We use the packet priority to select the
1704  *      appropriate Tx queue as follows: bit 0 indicates whether the packet
1705  *      should be sent as regular or control, bits 1-3 select the queue set.
1706  */
1707 int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1708 {
1709         struct adapter *adap = tdev2adap(tdev);
1710         struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1711
1712         if (unlikely(is_ctrl_pkt(skb)))
1713                 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1714
1715         return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1716 }
1717
1718 /**
1719  *      offload_enqueue - add an offload packet to an SGE offload receive queue
1720  *      @q: the SGE response queue
1721  *      @skb: the packet
1722  *
1723  *      Add a new offload packet to an SGE response queue's offload packet
1724  *      queue.  If the packet is the first on the queue it schedules the RX
1725  *      softirq to process the queue.
1726  */
1727 static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1728 {
1729         int was_empty = skb_queue_empty(&q->rx_queue);
1730
1731         __skb_queue_tail(&q->rx_queue, skb);
1732
1733         if (was_empty) {
1734                 struct sge_qset *qs = rspq_to_qset(q);
1735
1736                 napi_schedule(&qs->napi);
1737         }
1738 }
1739
1740 /**
1741  *      deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1742  *      @tdev: the offload device that will be receiving the packets
1743  *      @q: the SGE response queue that assembled the bundle
1744  *      @skbs: the partial bundle
1745  *      @n: the number of packets in the bundle
1746  *
1747  *      Delivers a (partial) bundle of Rx offload packets to an offload device.
1748  */
1749 static inline void deliver_partial_bundle(struct t3cdev *tdev,
1750                                           struct sge_rspq *q,
1751                                           struct sk_buff *skbs[], int n)
1752 {
1753         if (n) {
1754                 q->offload_bundles++;
1755                 tdev->recv(tdev, skbs, n);
1756         }
1757 }
1758
1759 /**
1760  *      ofld_poll - NAPI handler for offload packets in interrupt mode
1761  *      @dev: the network device doing the polling
1762  *      @budget: polling budget
1763  *
1764  *      The NAPI handler for offload packets when a response queue is serviced
1765  *      by the hard interrupt handler, i.e., when it's operating in non-polling
1766  *      mode.  Creates small packet batches and sends them through the offload
1767  *      receive handler.  Batches need to be of modest size as we do prefetches
1768  *      on the packets in each.
1769  */
1770 static int ofld_poll(struct napi_struct *napi, int budget)
1771 {
1772         struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
1773         struct sge_rspq *q = &qs->rspq;
1774         struct adapter *adapter = qs->adap;
1775         int work_done = 0;
1776
1777         while (work_done < budget) {
1778                 struct sk_buff *skb, *tmp, *skbs[RX_BUNDLE_SIZE];
1779                 struct sk_buff_head queue;
1780                 int ngathered;
1781
1782                 spin_lock_irq(&q->lock);
1783                 __skb_queue_head_init(&queue);
1784                 skb_queue_splice_init(&q->rx_queue, &queue);
1785                 if (skb_queue_empty(&queue)) {
1786                         napi_complete(napi);
1787                         spin_unlock_irq(&q->lock);
1788                         return work_done;
1789                 }
1790                 spin_unlock_irq(&q->lock);
1791
1792                 ngathered = 0;
1793                 skb_queue_walk_safe(&queue, skb, tmp) {
1794                         if (work_done >= budget)
1795                                 break;
1796                         work_done++;
1797
1798                         __skb_unlink(skb, &queue);
1799                         prefetch(skb->data);
1800                         skbs[ngathered] = skb;
1801                         if (++ngathered == RX_BUNDLE_SIZE) {
1802                                 q->offload_bundles++;
1803                                 adapter->tdev.recv(&adapter->tdev, skbs,
1804                                                    ngathered);
1805                                 ngathered = 0;
1806                         }
1807                 }
1808                 if (!skb_queue_empty(&queue)) {
1809                         /* splice remaining packets back onto Rx queue */
1810                         spin_lock_irq(&q->lock);
1811                         skb_queue_splice(&queue, &q->rx_queue);
1812                         spin_unlock_irq(&q->lock);
1813                 }
1814                 deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1815         }
1816
1817         return work_done;
1818 }
1819
1820 /**
1821  *      rx_offload - process a received offload packet
1822  *      @tdev: the offload device receiving the packet
1823  *      @rq: the response queue that received the packet
1824  *      @skb: the packet
1825  *      @rx_gather: a gather list of packets if we are building a bundle
1826  *      @gather_idx: index of the next available slot in the bundle
1827  *
1828  *      Process an ingress offload pakcet and add it to the offload ingress
1829  *      queue.  Returns the index of the next available slot in the bundle.
1830  */
1831 static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1832                              struct sk_buff *skb, struct sk_buff *rx_gather[],
1833                              unsigned int gather_idx)
1834 {
1835         skb_reset_mac_header(skb);
1836         skb_reset_network_header(skb);
1837         skb_reset_transport_header(skb);
1838
1839         if (rq->polling) {
1840                 rx_gather[gather_idx++] = skb;
1841                 if (gather_idx == RX_BUNDLE_SIZE) {
1842                         tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1843                         gather_idx = 0;
1844                         rq->offload_bundles++;
1845                 }
1846         } else
1847                 offload_enqueue(rq, skb);
1848
1849         return gather_idx;
1850 }
1851
1852 /**
1853  *      restart_tx - check whether to restart suspended Tx queues
1854  *      @qs: the queue set to resume
1855  *
1856  *      Restarts suspended Tx queues of an SGE queue set if they have enough
1857  *      free resources to resume operation.
1858  */
1859 static void restart_tx(struct sge_qset *qs)
1860 {
1861         if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1862             should_restart_tx(&qs->txq[TXQ_ETH]) &&
1863             test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1864                 qs->txq[TXQ_ETH].restarts++;
1865                 if (netif_running(qs->netdev))
1866                         netif_tx_wake_queue(qs->tx_q);
1867         }
1868
1869         if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1870             should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1871             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1872                 qs->txq[TXQ_OFLD].restarts++;
1873                 tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1874         }
1875         if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1876             should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1877             test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1878                 qs->txq[TXQ_CTRL].restarts++;
1879                 tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1880         }
1881 }
1882
1883 /**
1884  *      cxgb3_arp_process - process an ARP request probing a private IP address
1885  *      @adapter: the adapter
1886  *      @skb: the skbuff containing the ARP request
1887  *
1888  *      Check if the ARP request is probing the private IP address
1889  *      dedicated to iSCSI, generate an ARP reply if so.
1890  */
1891 static void cxgb3_arp_process(struct adapter *adapter, struct sk_buff *skb)
1892 {
1893         struct net_device *dev = skb->dev;
1894         struct port_info *pi;
1895         struct arphdr *arp;
1896         unsigned char *arp_ptr;
1897         unsigned char *sha;
1898         __be32 sip, tip;
1899
1900         if (!dev)
1901                 return;
1902
1903         skb_reset_network_header(skb);
1904         arp = arp_hdr(skb);
1905
1906         if (arp->ar_op != htons(ARPOP_REQUEST))
1907                 return;
1908
1909         arp_ptr = (unsigned char *)(arp + 1);
1910         sha = arp_ptr;
1911         arp_ptr += dev->addr_len;
1912         memcpy(&sip, arp_ptr, sizeof(sip));
1913         arp_ptr += sizeof(sip);
1914         arp_ptr += dev->addr_len;
1915         memcpy(&tip, arp_ptr, sizeof(tip));
1916
1917         pi = netdev_priv(dev);
1918         if (tip != pi->iscsi_ipv4addr)
1919                 return;
1920
1921         arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
1922                  dev->dev_addr, sha);
1923
1924 }
1925
1926 static inline int is_arp(struct sk_buff *skb)
1927 {
1928         return skb->protocol == htons(ETH_P_ARP);
1929 }
1930
1931 /**
1932  *      rx_eth - process an ingress ethernet packet
1933  *      @adap: the adapter
1934  *      @rq: the response queue that received the packet
1935  *      @skb: the packet
1936  *      @pad: amount of padding at the start of the buffer
1937  *
1938  *      Process an ingress ethernet pakcet and deliver it to the stack.
1939  *      The padding is 2 if the packet was delivered in an Rx buffer and 0
1940  *      if it was immediate data in a response.
1941  */
1942 static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
1943                    struct sk_buff *skb, int pad, int lro)
1944 {
1945         struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
1946         struct sge_qset *qs = rspq_to_qset(rq);
1947         struct port_info *pi;
1948
1949         skb_pull(skb, sizeof(*p) + pad);
1950         skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
1951         pi = netdev_priv(skb->dev);
1952         if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid && p->csum == htons(0xffff) &&
1953             !p->fragment) {
1954                 qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
1955                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1956         } else
1957                 skb->ip_summed = CHECKSUM_NONE;
1958         skb_record_rx_queue(skb, qs - &adap->sge.qs[0]);
1959
1960         if (unlikely(p->vlan_valid)) {
1961                 struct vlan_group *grp = pi->vlan_grp;
1962
1963                 qs->port_stats[SGE_PSTAT_VLANEX]++;
1964                 if (likely(grp))
1965                         if (lro)
1966                                 vlan_gro_receive(&qs->napi, grp,
1967                                                  ntohs(p->vlan), skb);
1968                         else {
1969                                 if (unlikely(pi->iscsi_ipv4addr &&
1970                                     is_arp(skb))) {
1971                                         unsigned short vtag = ntohs(p->vlan) &
1972                                                                 VLAN_VID_MASK;
1973                                         skb->dev = vlan_group_get_device(grp,
1974                                                                          vtag);
1975                                         cxgb3_arp_process(adap, skb);
1976                                 }
1977                                 __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
1978                                                   rq->polling);
1979                         }
1980                 else
1981                         dev_kfree_skb_any(skb);
1982         } else if (rq->polling) {
1983                 if (lro)
1984                         napi_gro_receive(&qs->napi, skb);
1985                 else {
1986                         if (unlikely(pi->iscsi_ipv4addr && is_arp(skb)))
1987                                 cxgb3_arp_process(adap, skb);
1988                         netif_receive_skb(skb);
1989                 }
1990         } else
1991                 netif_rx(skb);
1992 }
1993
1994 static inline int is_eth_tcp(u32 rss)
1995 {
1996         return G_HASHTYPE(ntohl(rss)) == RSS_HASH_4_TUPLE;
1997 }
1998
1999 /**
2000  *      lro_add_page - add a page chunk to an LRO session
2001  *      @adap: the adapter
2002  *      @qs: the associated queue set
2003  *      @fl: the free list containing the page chunk to add
2004  *      @len: packet length
2005  *      @complete: Indicates the last fragment of a frame
2006  *
2007  *      Add a received packet contained in a page chunk to an existing LRO
2008  *      session.
2009  */
2010 static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2011                          struct sge_fl *fl, int len, int complete)
2012 {
2013         struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2014         struct cpl_rx_pkt *cpl;
2015         struct skb_frag_struct *rx_frag = qs->lro_frag_tbl.frags;
2016         int nr_frags = qs->lro_frag_tbl.nr_frags;
2017         int frag_len = qs->lro_frag_tbl.len;
2018         int offset = 0;
2019
2020         if (!nr_frags) {
2021                 offset = 2 + sizeof(struct cpl_rx_pkt);
2022                 qs->lro_va = cpl = sd->pg_chunk.va + 2;
2023         }
2024
2025         fl->credits--;
2026
2027         len -= offset;
2028         pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
2029                          fl->buf_size, PCI_DMA_FROMDEVICE);
2030
2031         rx_frag += nr_frags;
2032         rx_frag->page = sd->pg_chunk.page;
2033         rx_frag->page_offset = sd->pg_chunk.offset + offset;
2034         rx_frag->size = len;
2035         frag_len += len;
2036         qs->lro_frag_tbl.nr_frags++;
2037         qs->lro_frag_tbl.len = frag_len;
2038
2039         if (!complete)
2040                 return;
2041
2042         qs->lro_frag_tbl.ip_summed = CHECKSUM_UNNECESSARY;
2043         cpl = qs->lro_va;
2044
2045         if (unlikely(cpl->vlan_valid)) {
2046                 struct net_device *dev = qs->netdev;
2047                 struct port_info *pi = netdev_priv(dev);
2048                 struct vlan_group *grp = pi->vlan_grp;
2049
2050                 if (likely(grp != NULL)) {
2051                         vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan),
2052                                        &qs->lro_frag_tbl);
2053                         goto out;
2054                 }
2055         }
2056         napi_gro_frags(&qs->napi, &qs->lro_frag_tbl);
2057
2058 out:
2059         qs->lro_frag_tbl.nr_frags = qs->lro_frag_tbl.len = 0;
2060 }
2061
2062 /**
2063  *      handle_rsp_cntrl_info - handles control information in a response
2064  *      @qs: the queue set corresponding to the response
2065  *      @flags: the response control flags
2066  *
2067  *      Handles the control information of an SGE response, such as GTS
2068  *      indications and completion credits for the queue set's Tx queues.
2069  *      HW coalesces credits, we don't do any extra SW coalescing.
2070  */
2071 static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
2072 {
2073         unsigned int credits;
2074
2075 #if USE_GTS
2076         if (flags & F_RSPD_TXQ0_GTS)
2077                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2078 #endif
2079
2080         credits = G_RSPD_TXQ0_CR(flags);
2081         if (credits)
2082                 qs->txq[TXQ_ETH].processed += credits;
2083
2084         credits = G_RSPD_TXQ2_CR(flags);
2085         if (credits)
2086                 qs->txq[TXQ_CTRL].processed += credits;
2087
2088 # if USE_GTS
2089         if (flags & F_RSPD_TXQ1_GTS)
2090                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2091 # endif
2092         credits = G_RSPD_TXQ1_CR(flags);
2093         if (credits)
2094                 qs->txq[TXQ_OFLD].processed += credits;
2095 }
2096
2097 /**
2098  *      check_ring_db - check if we need to ring any doorbells
2099  *      @adapter: the adapter
2100  *      @qs: the queue set whose Tx queues are to be examined
2101  *      @sleeping: indicates which Tx queue sent GTS
2102  *
2103  *      Checks if some of a queue set's Tx queues need to ring their doorbells
2104  *      to resume transmission after idling while they still have unprocessed
2105  *      descriptors.
2106  */
2107 static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
2108                           unsigned int sleeping)
2109 {
2110         if (sleeping & F_RSPD_TXQ0_GTS) {
2111                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
2112
2113                 if (txq->cleaned + txq->in_use != txq->processed &&
2114                     !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2115                         set_bit(TXQ_RUNNING, &txq->flags);
2116                         t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2117                                      V_EGRCNTX(txq->cntxt_id));
2118                 }
2119         }
2120
2121         if (sleeping & F_RSPD_TXQ1_GTS) {
2122                 struct sge_txq *txq = &qs->txq[TXQ_OFLD];
2123
2124                 if (txq->cleaned + txq->in_use != txq->processed &&
2125                     !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2126                         set_bit(TXQ_RUNNING, &txq->flags);
2127                         t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2128                                      V_EGRCNTX(txq->cntxt_id));
2129                 }
2130         }
2131 }
2132
2133 /**
2134  *      is_new_response - check if a response is newly written
2135  *      @r: the response descriptor
2136  *      @q: the response queue
2137  *
2138  *      Returns true if a response descriptor contains a yet unprocessed
2139  *      response.
2140  */
2141 static inline int is_new_response(const struct rsp_desc *r,
2142                                   const struct sge_rspq *q)
2143 {
2144         return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2145 }
2146
2147 static inline void clear_rspq_bufstate(struct sge_rspq * const q)
2148 {
2149         q->pg_skb = NULL;
2150         q->rx_recycle_buf = 0;
2151 }
2152
2153 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2154 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2155                         V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2156                         V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2157                         V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2158
2159 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2160 #define NOMEM_INTR_DELAY 2500
2161
2162 /**
2163  *      process_responses - process responses from an SGE response queue
2164  *      @adap: the adapter
2165  *      @qs: the queue set to which the response queue belongs
2166  *      @budget: how many responses can be processed in this round
2167  *
2168  *      Process responses from an SGE response queue up to the supplied budget.
2169  *      Responses include received packets as well as credits and other events
2170  *      for the queues that belong to the response queue's queue set.
2171  *      A negative budget is effectively unlimited.
2172  *
2173  *      Additionally choose the interrupt holdoff time for the next interrupt
2174  *      on this queue.  If the system is under memory shortage use a fairly
2175  *      long delay to help recovery.
2176  */
2177 static int process_responses(struct adapter *adap, struct sge_qset *qs,
2178                              int budget)
2179 {
2180         struct sge_rspq *q = &qs->rspq;
2181         struct rsp_desc *r = &q->desc[q->cidx];
2182         int budget_left = budget;
2183         unsigned int sleeping = 0;
2184         struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
2185         int ngathered = 0;
2186
2187         q->next_holdoff = q->holdoff_tmr;
2188
2189         while (likely(budget_left && is_new_response(r, q))) {
2190                 int packet_complete, eth, ethpad = 2, lro = qs->lro_enabled;
2191                 struct sk_buff *skb = NULL;
2192                 u32 len, flags = ntohl(r->flags);
2193                 __be32 rss_hi = *(const __be32 *)r,
2194                        rss_lo = r->rss_hdr.rss_hash_val;
2195
2196                 eth = r->rss_hdr.opcode == CPL_RX_PKT;
2197
2198                 if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
2199                         skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
2200                         if (!skb)
2201                                 goto no_mem;
2202
2203                         memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
2204                         skb->data[0] = CPL_ASYNC_NOTIF;
2205                         rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
2206                         q->async_notif++;
2207                 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2208                         skb = get_imm_packet(r);
2209                         if (unlikely(!skb)) {
2210 no_mem:
2211                                 q->next_holdoff = NOMEM_INTR_DELAY;
2212                                 q->nomem++;
2213                                 /* consume one credit since we tried */
2214                                 budget_left--;
2215                                 break;
2216                         }
2217                         q->imm_data++;
2218                         ethpad = 0;
2219                 } else if ((len = ntohl(r->len_cq)) != 0) {
2220                         struct sge_fl *fl;
2221
2222                         lro &= eth && is_eth_tcp(rss_hi);
2223
2224                         fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2225                         if (fl->use_pages) {
2226                                 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
2227
2228                                 prefetch(addr);
2229 #if L1_CACHE_BYTES < 128
2230                                 prefetch(addr + L1_CACHE_BYTES);
2231 #endif
2232                                 __refill_fl(adap, fl);
2233                                 if (lro > 0) {
2234                                         lro_add_page(adap, qs, fl,
2235                                                      G_RSPD_LEN(len),
2236                                                      flags & F_RSPD_EOP);
2237                                          goto next_fl;
2238                                 }
2239
2240                                 skb = get_packet_pg(adap, fl, q,
2241                                                     G_RSPD_LEN(len),
2242                                                     eth ?
2243                                                     SGE_RX_DROP_THRES : 0);
2244                                 q->pg_skb = skb;
2245                         } else
2246                                 skb = get_packet(adap, fl, G_RSPD_LEN(len),
2247                                                  eth ? SGE_RX_DROP_THRES : 0);
2248                         if (unlikely(!skb)) {
2249                                 if (!eth)
2250                                         goto no_mem;
2251                                 q->rx_drops++;
2252                         } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2253                                 __skb_pull(skb, 2);
2254 next_fl:
2255                         if (++fl->cidx == fl->size)
2256                                 fl->cidx = 0;
2257                 } else
2258                         q->pure_rsps++;
2259
2260                 if (flags & RSPD_CTRL_MASK) {
2261                         sleeping |= flags & RSPD_GTS_MASK;
2262                         handle_rsp_cntrl_info(qs, flags);
2263                 }
2264
2265                 r++;
2266                 if (unlikely(++q->cidx == q->size)) {
2267                         q->cidx = 0;
2268                         q->gen ^= 1;
2269                         r = q->desc;
2270                 }
2271                 prefetch(r);
2272
2273                 if (++q->credits >= (q->size / 4)) {
2274                         refill_rspq(adap, q, q->credits);
2275                         q->credits = 0;
2276                 }
2277
2278                 packet_complete = flags &
2279                                   (F_RSPD_EOP | F_RSPD_IMM_DATA_VALID |
2280                                    F_RSPD_ASYNC_NOTIF);
2281
2282                 if (skb != NULL && packet_complete) {
2283                         if (eth)
2284                                 rx_eth(adap, q, skb, ethpad, lro);
2285                         else {
2286                                 q->offload_pkts++;
2287                                 /* Preserve the RSS info in csum & priority */
2288                                 skb->csum = rss_hi;
2289                                 skb->priority = rss_lo;
2290                                 ngathered = rx_offload(&adap->tdev, q, skb,
2291                                                        offload_skbs,
2292                                                        ngathered);
2293                         }
2294
2295                         if (flags & F_RSPD_EOP)
2296                                 clear_rspq_bufstate(q);
2297                 }
2298                 --budget_left;
2299         }
2300
2301         deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
2302
2303         if (sleeping)
2304                 check_ring_db(adap, qs, sleeping);
2305
2306         smp_mb();               /* commit Tx queue .processed updates */
2307         if (unlikely(qs->txq_stopped != 0))
2308                 restart_tx(qs);
2309
2310         budget -= budget_left;
2311         return budget;
2312 }
2313
2314 static inline int is_pure_response(const struct rsp_desc *r)
2315 {
2316         __be32 n = r->flags & htonl(F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
2317
2318         return (n | r->len_cq) == 0;
2319 }
2320
2321 /**
2322  *      napi_rx_handler - the NAPI handler for Rx processing
2323  *      @napi: the napi instance
2324  *      @budget: how many packets we can process in this round
2325  *
2326  *      Handler for new data events when using NAPI.
2327  */
2328 static int napi_rx_handler(struct napi_struct *napi, int budget)
2329 {
2330         struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
2331         struct adapter *adap = qs->adap;
2332         int work_done = process_responses(adap, qs, budget);
2333
2334         if (likely(work_done < budget)) {
2335                 napi_complete(napi);
2336
2337                 /*
2338                  * Because we don't atomically flush the following
2339                  * write it is possible that in very rare cases it can
2340                  * reach the device in a way that races with a new
2341                  * response being written plus an error interrupt
2342                  * causing the NAPI interrupt handler below to return
2343                  * unhandled status to the OS.  To protect against
2344                  * this would require flushing the write and doing
2345                  * both the write and the flush with interrupts off.
2346                  * Way too expensive and unjustifiable given the
2347                  * rarity of the race.
2348                  *
2349                  * The race cannot happen at all with MSI-X.
2350                  */
2351                 t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2352                              V_NEWTIMER(qs->rspq.next_holdoff) |
2353                              V_NEWINDEX(qs->rspq.cidx));
2354         }
2355         return work_done;
2356 }
2357
2358 /*
2359  * Returns true if the device is already scheduled for polling.
2360  */
2361 static inline int napi_is_scheduled(struct napi_struct *napi)
2362 {
2363         return test_bit(NAPI_STATE_SCHED, &napi->state);
2364 }
2365
2366 /**
2367  *      process_pure_responses - process pure responses from a response queue
2368  *      @adap: the adapter
2369  *      @qs: the queue set owning the response queue
2370  *      @r: the first pure response to process
2371  *
2372  *      A simpler version of process_responses() that handles only pure (i.e.,
2373  *      non data-carrying) responses.  Such respones are too light-weight to
2374  *      justify calling a softirq under NAPI, so we handle them specially in
2375  *      the interrupt handler.  The function is called with a pointer to a
2376  *      response, which the caller must ensure is a valid pure response.
2377  *
2378  *      Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2379  */
2380 static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2381                                   struct rsp_desc *r)
2382 {
2383         struct sge_rspq *q = &qs->rspq;
2384         unsigned int sleeping = 0;
2385
2386         do {
2387                 u32 flags = ntohl(r->flags);
2388
2389                 r++;
2390                 if (unlikely(++q->cidx == q->size)) {
2391                         q->cidx = 0;
2392                         q->gen ^= 1;
2393                         r = q->desc;
2394                 }
2395                 prefetch(r);
2396
2397                 if (flags & RSPD_CTRL_MASK) {
2398                         sleeping |= flags & RSPD_GTS_MASK;
2399                         handle_rsp_cntrl_info(qs, flags);
2400                 }
2401
2402                 q->pure_rsps++;
2403                 if (++q->credits >= (q->size / 4)) {
2404                         refill_rspq(adap, q, q->credits);
2405                         q->credits = 0;
2406                 }
2407         } while (is_new_response(r, q) && is_pure_response(r));
2408
2409         if (sleeping)
2410                 check_ring_db(adap, qs, sleeping);
2411
2412         smp_mb();               /* commit Tx queue .processed updates */
2413         if (unlikely(qs->txq_stopped != 0))
2414                 restart_tx(qs);
2415
2416         return is_new_response(r, q);
2417 }
2418
2419 /**
2420  *      handle_responses - decide what to do with new responses in NAPI mode
2421  *      @adap: the adapter
2422  *      @q: the response queue
2423  *
2424  *      This is used by the NAPI interrupt handlers to decide what to do with
2425  *      new SGE responses.  If there are no new responses it returns -1.  If
2426  *      there are new responses and they are pure (i.e., non-data carrying)
2427  *      it handles them straight in hard interrupt context as they are very
2428  *      cheap and don't deliver any packets.  Finally, if there are any data
2429  *      signaling responses it schedules the NAPI handler.  Returns 1 if it
2430  *      schedules NAPI, 0 if all new responses were pure.
2431  *
2432  *      The caller must ascertain NAPI is not already running.
2433  */
2434 static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2435 {
2436         struct sge_qset *qs = rspq_to_qset(q);
2437         struct rsp_desc *r = &q->desc[q->cidx];
2438
2439         if (!is_new_response(r, q))
2440                 return -1;
2441         if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2442                 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2443                              V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2444                 return 0;
2445         }
2446         napi_schedule(&qs->napi);
2447         return 1;
2448 }
2449
2450 /*
2451  * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2452  * (i.e., response queue serviced in hard interrupt).
2453  */
2454 irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2455 {
2456         struct sge_qset *qs = cookie;
2457         struct adapter *adap = qs->adap;
2458         struct sge_rspq *q = &qs->rspq;
2459
2460         spin_lock(&q->lock);
2461         if (process_responses(adap, qs, -1) == 0)
2462                 q->unhandled_irqs++;
2463         t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2464                      V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2465         spin_unlock(&q->lock);
2466         return IRQ_HANDLED;
2467 }
2468
2469 /*
2470  * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2471  * (i.e., response queue serviced by NAPI polling).
2472  */
2473 static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
2474 {
2475         struct sge_qset *qs = cookie;
2476         struct sge_rspq *q = &qs->rspq;
2477
2478         spin_lock(&q->lock);
2479
2480         if (handle_responses(qs->adap, q) < 0)
2481                 q->unhandled_irqs++;
2482         spin_unlock(&q->lock);
2483         return IRQ_HANDLED;
2484 }
2485
2486 /*
2487  * The non-NAPI MSI interrupt handler.  This needs to handle data events from
2488  * SGE response queues as well as error and other async events as they all use
2489  * the same MSI vector.  We use one SGE response queue per port in this mode
2490  * and protect all response queues with queue 0's lock.
2491  */
2492 static irqreturn_t t3_intr_msi(int irq, void *cookie)
2493 {
2494         int new_packets = 0;
2495         struct adapter *adap = cookie;
2496         struct sge_rspq *q = &adap->sge.qs[0].rspq;
2497
2498         spin_lock(&q->lock);
2499
2500         if (process_responses(adap, &adap->sge.qs[0], -1)) {
2501                 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2502                              V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2503                 new_packets = 1;
2504         }
2505
2506         if (adap->params.nports == 2 &&
2507             process_responses(adap, &adap->sge.qs[1], -1)) {
2508                 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2509
2510                 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2511                              V_NEWTIMER(q1->next_holdoff) |
2512                              V_NEWINDEX(q1->cidx));
2513                 new_packets = 1;
2514         }
2515
2516         if (!new_packets && t3_slow_intr_handler(adap) == 0)
2517                 q->unhandled_irqs++;
2518
2519         spin_unlock(&q->lock);
2520         return IRQ_HANDLED;
2521 }
2522
2523 static int rspq_check_napi(struct sge_qset *qs)
2524 {
2525         struct sge_rspq *q = &qs->rspq;
2526
2527         if (!napi_is_scheduled(&qs->napi) &&
2528             is_new_response(&q->desc[q->cidx], q)) {
2529                 napi_schedule(&qs->napi);
2530                 return 1;
2531         }
2532         return 0;
2533 }
2534
2535 /*
2536  * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2537  * by NAPI polling).  Handles data events from SGE response queues as well as
2538  * error and other async events as they all use the same MSI vector.  We use
2539  * one SGE response queue per port in this mode and protect all response
2540  * queues with queue 0's lock.
2541  */
2542 static irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
2543 {
2544         int new_packets;
2545         struct adapter *adap = cookie;
2546         struct sge_rspq *q = &adap->sge.qs[0].rspq;
2547
2548         spin_lock(&q->lock);
2549
2550         new_packets = rspq_check_napi(&adap->sge.qs[0]);
2551         if (adap->params.nports == 2)
2552                 new_packets += rspq_check_napi(&adap->sge.qs[1]);
2553         if (!new_packets && t3_slow_intr_handler(adap) == 0)
2554                 q->unhandled_irqs++;
2555
2556         spin_unlock(&q->lock);
2557         return IRQ_HANDLED;
2558 }
2559
2560 /*
2561  * A helper function that processes responses and issues GTS.
2562  */
2563 static inline int process_responses_gts(struct adapter *adap,
2564                                         struct sge_rspq *rq)
2565 {
2566         int work;
2567
2568         work = process_responses(adap, rspq_to_qset(rq), -1);
2569         t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2570                      V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2571         return work;
2572 }
2573
2574 /*
2575  * The legacy INTx interrupt handler.  This needs to handle data events from
2576  * SGE response queues as well as error and other async events as they all use
2577  * the same interrupt pin.  We use one SGE response queue per port in this mode
2578  * and protect all response queues with queue 0's lock.
2579  */
2580 static irqreturn_t t3_intr(int irq, void *cookie)
2581 {
2582         int work_done, w0, w1;
2583         struct adapter *adap = cookie;
2584         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2585         struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2586
2587         spin_lock(&q0->lock);
2588
2589         w0 = is_new_response(&q0->desc[q0->cidx], q0);
2590         w1 = adap->params.nports == 2 &&
2591             is_new_response(&q1->desc[q1->cidx], q1);
2592
2593         if (likely(w0 | w1)) {
2594                 t3_write_reg(adap, A_PL_CLI, 0);
2595                 t3_read_reg(adap, A_PL_CLI);    /* flush */
2596
2597                 if (likely(w0))
2598                         process_responses_gts(adap, q0);
2599
2600                 if (w1)
2601                         process_responses_gts(adap, q1);
2602
2603                 work_done = w0 | w1;
2604         } else
2605                 work_done = t3_slow_intr_handler(adap);
2606
2607         spin_unlock(&q0->lock);
2608         return IRQ_RETVAL(work_done != 0);
2609 }
2610
2611 /*
2612  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2613  * Handles data events from SGE response queues as well as error and other
2614  * async events as they all use the same interrupt pin.  We use one SGE
2615  * response queue per port in this mode and protect all response queues with
2616  * queue 0's lock.
2617  */
2618 static irqreturn_t t3b_intr(int irq, void *cookie)
2619 {
2620         u32 map;
2621         struct adapter *adap = cookie;
2622         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2623
2624         t3_write_reg(adap, A_PL_CLI, 0);
2625         map = t3_read_reg(adap, A_SG_DATA_INTR);
2626
2627         if (unlikely(!map))     /* shared interrupt, most likely */
2628                 return IRQ_NONE;
2629
2630         spin_lock(&q0->lock);
2631
2632         if (unlikely(map & F_ERRINTR))
2633                 t3_slow_intr_handler(adap);
2634
2635         if (likely(map & 1))
2636                 process_responses_gts(adap, q0);
2637
2638         if (map & 2)
2639                 process_responses_gts(adap, &adap->sge.qs[1].rspq);
2640
2641         spin_unlock(&q0->lock);
2642         return IRQ_HANDLED;
2643 }
2644
2645 /*
2646  * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2647  * Handles data events from SGE response queues as well as error and other
2648  * async events as they all use the same interrupt pin.  We use one SGE
2649  * response queue per port in this mode and protect all response queues with
2650  * queue 0's lock.
2651  */
2652 static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2653 {
2654         u32 map;
2655         struct adapter *adap = cookie;
2656         struct sge_qset *qs0 = &adap->sge.qs[0];
2657         struct sge_rspq *q0 = &qs0->rspq;
2658
2659         t3_write_reg(adap, A_PL_CLI, 0);
2660         map = t3_read_reg(adap, A_SG_DATA_INTR);
2661
2662         if (unlikely(!map))     /* shared interrupt, most likely */
2663                 return IRQ_NONE;
2664
2665         spin_lock(&q0->lock);
2666
2667         if (unlikely(map & F_ERRINTR))
2668                 t3_slow_intr_handler(adap);
2669
2670         if (likely(map & 1))
2671                 napi_schedule(&qs0->napi);
2672
2673         if (map & 2)
2674                 napi_schedule(&adap->sge.qs[1].napi);
2675
2676         spin_unlock(&q0->lock);
2677         return IRQ_HANDLED;
2678 }
2679
2680 /**
2681  *      t3_intr_handler - select the top-level interrupt handler
2682  *      @adap: the adapter
2683  *      @polling: whether using NAPI to service response queues
2684  *
2685  *      Selects the top-level interrupt handler based on the type of interrupts
2686  *      (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2687  *      response queues.
2688  */
2689 irq_handler_t t3_intr_handler(struct adapter *adap, int polling)
2690 {
2691         if (adap->flags & USING_MSIX)
2692                 return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2693         if (adap->flags & USING_MSI)
2694                 return polling ? t3_intr_msi_napi : t3_intr_msi;
2695         if (adap->params.rev > 0)
2696                 return polling ? t3b_intr_napi : t3b_intr;
2697         return t3_intr;
2698 }
2699
2700 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
2701                     F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
2702                     V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
2703                     F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
2704                     F_HIRCQPARITYERROR)
2705 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
2706 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
2707                       F_RSPQDISABLED)
2708
2709 /**
2710  *      t3_sge_err_intr_handler - SGE async event interrupt handler
2711  *      @adapter: the adapter
2712  *
2713  *      Interrupt handler for SGE asynchronous (non-data) events.
2714  */
2715 void t3_sge_err_intr_handler(struct adapter *adapter)
2716 {
2717         unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE);
2718
2719         if (status & SGE_PARERR)
2720                 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
2721                          status & SGE_PARERR);
2722         if (status & SGE_FRAMINGERR)
2723                 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
2724                          status & SGE_FRAMINGERR);
2725
2726         if (status & F_RSPQCREDITOVERFOW)
2727                 CH_ALERT(adapter, "SGE response queue credit overflow\n");
2728
2729         if (status & F_RSPQDISABLED) {
2730                 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2731
2732                 CH_ALERT(adapter,
2733                          "packet delivered to disabled response queue "
2734                          "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2735         }
2736
2737         if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
2738                 CH_ALERT(adapter, "SGE dropped %s priority doorbell\n",
2739                          status & F_HIPIODRBDROPERR ? "high" : "lo");
2740
2741         t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2742         if (status &  SGE_FATALERR)
2743                 t3_fatal_err(adapter);
2744 }
2745
2746 /**
2747  *      sge_timer_cb - perform periodic maintenance of an SGE qset
2748  *      @data: the SGE queue set to maintain
2749  *
2750  *      Runs periodically from a timer to perform maintenance of an SGE queue
2751  *      set.  It performs two tasks:
2752  *
2753  *      a) Cleans up any completed Tx descriptors that may still be pending.
2754  *      Normal descriptor cleanup happens when new packets are added to a Tx
2755  *      queue so this timer is relatively infrequent and does any cleanup only
2756  *      if the Tx queue has not seen any new packets in a while.  We make a
2757  *      best effort attempt to reclaim descriptors, in that we don't wait
2758  *      around if we cannot get a queue's lock (which most likely is because
2759  *      someone else is queueing new packets and so will also handle the clean
2760  *      up).  Since control queues use immediate data exclusively we don't
2761  *      bother cleaning them up here.
2762  *
2763  *      b) Replenishes Rx queues that have run out due to memory shortage.
2764  *      Normally new Rx buffers are added when existing ones are consumed but
2765  *      when out of memory a queue can become empty.  We try to add only a few
2766  *      buffers here, the queue will be replenished fully as these new buffers
2767  *      are used up if memory shortage has subsided.
2768  */
2769 static void sge_timer_cb(unsigned long data)
2770 {
2771         spinlock_t *lock;
2772         struct sge_qset *qs = (struct sge_qset *)data;
2773         struct adapter *adap = qs->adap;
2774
2775         if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
2776                 reclaim_completed_tx(adap, &qs->txq[TXQ_ETH]);
2777                 spin_unlock(&qs->txq[TXQ_ETH].lock);
2778         }
2779         if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2780                 reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD]);
2781                 spin_unlock(&qs->txq[TXQ_OFLD].lock);
2782         }
2783         lock = (adap->flags & USING_MSIX) ? &qs->rspq.lock :
2784                                             &adap->sge.qs[0].rspq.lock;
2785         if (spin_trylock_irq(lock)) {
2786                 if (!napi_is_scheduled(&qs->napi)) {
2787                         u32 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2788
2789                         if (qs->fl[0].credits < qs->fl[0].size)
2790                                 __refill_fl(adap, &qs->fl[0]);
2791                         if (qs->fl[1].credits < qs->fl[1].size)
2792                                 __refill_fl(adap, &qs->fl[1]);
2793
2794                         if (status & (1 << qs->rspq.cntxt_id)) {
2795                                 qs->rspq.starved++;
2796                                 if (qs->rspq.credits) {
2797                                         refill_rspq(adap, &qs->rspq, 1);
2798                                         qs->rspq.credits--;
2799                                         qs->rspq.restarted++;
2800                                         t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
2801                                                      1 << qs->rspq.cntxt_id);
2802                                 }
2803                         }
2804                 }
2805                 spin_unlock_irq(lock);
2806         }
2807         mod_timer(&qs->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2808 }
2809
2810 /**
2811  *      t3_update_qset_coalesce - update coalescing settings for a queue set
2812  *      @qs: the SGE queue set
2813  *      @p: new queue set parameters
2814  *
2815  *      Update the coalescing settings for an SGE queue set.  Nothing is done
2816  *      if the queue set is not initialized yet.
2817  */
2818 void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2819 {
2820         qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2821         qs->rspq.polling = p->polling;
2822         qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
2823 }
2824
2825 /**
2826  *      t3_sge_alloc_qset - initialize an SGE queue set
2827  *      @adapter: the adapter
2828  *      @id: the queue set id
2829  *      @nports: how many Ethernet ports will be using this queue set
2830  *      @irq_vec_idx: the IRQ vector index for response queue interrupts
2831  *      @p: configuration parameters for this queue set
2832  *      @ntxq: number of Tx queues for the queue set
2833  *      @netdev: net device associated with this queue set
2834  *      @netdevq: net device TX queue associated with this queue set
2835  *
2836  *      Allocate resources and initialize an SGE queue set.  A queue set
2837  *      comprises a response queue, two Rx free-buffer queues, and up to 3
2838  *      Tx queues.  The Tx queues are assigned roles in the order Ethernet
2839  *      queue, offload queue, and control queue.
2840  */
2841 int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2842                       int irq_vec_idx, const struct qset_params *p,
2843                       int ntxq, struct net_device *dev,
2844                       struct netdev_queue *netdevq)
2845 {
2846         int i, avail, ret = -ENOMEM;
2847         struct sge_qset *q = &adapter->sge.qs[id];
2848
2849         init_qset_cntxt(q, id);
2850         setup_timer(&q->tx_reclaim_timer, sge_timer_cb, (unsigned long)q);
2851
2852         q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
2853                                    sizeof(struct rx_desc),
2854                                    sizeof(struct rx_sw_desc),
2855                                    &q->fl[0].phys_addr, &q->fl[0].sdesc);
2856         if (!q->fl[0].desc)
2857                 goto err;
2858
2859         q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
2860                                    sizeof(struct rx_desc),
2861                                    sizeof(struct rx_sw_desc),
2862                                    &q->fl[1].phys_addr, &q->fl[1].sdesc);
2863         if (!q->fl[1].desc)
2864                 goto err;
2865
2866         q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
2867                                   sizeof(struct rsp_desc), 0,
2868                                   &q->rspq.phys_addr, NULL);
2869         if (!q->rspq.desc)
2870                 goto err;
2871
2872         for (i = 0; i < ntxq; ++i) {
2873                 /*
2874                  * The control queue always uses immediate data so does not
2875                  * need to keep track of any sk_buffs.
2876                  */
2877                 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2878
2879                 q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
2880                                             sizeof(struct tx_desc), sz,
2881                                             &q->txq[i].phys_addr,
2882                                             &q->txq[i].sdesc);
2883                 if (!q->txq[i].desc)
2884                         goto err;
2885
2886                 q->txq[i].gen = 1;
2887                 q->txq[i].size = p->txq_size[i];
2888                 spin_lock_init(&q->txq[i].lock);
2889                 skb_queue_head_init(&q->txq[i].sendq);
2890         }
2891
2892         tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
2893                      (unsigned long)q);
2894         tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
2895                      (unsigned long)q);
2896
2897         q->fl[0].gen = q->fl[1].gen = 1;
2898         q->fl[0].size = p->fl_size;
2899         q->fl[1].size = p->jumbo_size;
2900
2901         q->rspq.gen = 1;
2902         q->rspq.size = p->rspq_size;
2903         spin_lock_init(&q->rspq.lock);
2904         skb_queue_head_init(&q->rspq.rx_queue);
2905
2906         q->txq[TXQ_ETH].stop_thres = nports *
2907             flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
2908
2909 #if FL0_PG_CHUNK_SIZE > 0
2910         q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
2911 #else
2912         q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
2913 #endif
2914 #if FL1_PG_CHUNK_SIZE > 0
2915         q->fl[1].buf_size = FL1_PG_CHUNK_SIZE;
2916 #else
2917         q->fl[1].buf_size = is_offload(adapter) ?
2918                 (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
2919                 MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
2920 #endif
2921
2922         q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
2923         q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0;
2924         q->fl[0].order = FL0_PG_ORDER;
2925         q->fl[1].order = FL1_PG_ORDER;
2926
2927         spin_lock_irq(&adapter->sge.reg_lock);
2928
2929         /* FL threshold comparison uses < */
2930         ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
2931                                    q->rspq.phys_addr, q->rspq.size,
2932                                    q->fl[0].buf_size, 1, 0);
2933         if (ret)
2934                 goto err_unlock;
2935
2936         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2937                 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
2938                                           q->fl[i].phys_addr, q->fl[i].size,
2939                                           q->fl[i].buf_size, p->cong_thres, 1,
2940                                           0);
2941                 if (ret)
2942                         goto err_unlock;
2943         }
2944
2945         ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2946                                  SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2947                                  q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2948                                  1, 0);
2949         if (ret)
2950                 goto err_unlock;
2951
2952         if (ntxq > 1) {
2953                 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
2954                                          USE_GTS, SGE_CNTXT_OFLD, id,
2955                                          q->txq[TXQ_OFLD].phys_addr,
2956                                          q->txq[TXQ_OFLD].size, 0, 1, 0);
2957                 if (ret)
2958                         goto err_unlock;
2959         }
2960
2961         if (ntxq > 2) {
2962                 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
2963                                          SGE_CNTXT_CTRL, id,
2964                                          q->txq[TXQ_CTRL].phys_addr,
2965                                          q->txq[TXQ_CTRL].size,
2966                                          q->txq[TXQ_CTRL].token, 1, 0);
2967                 if (ret)
2968                         goto err_unlock;
2969         }
2970
2971         spin_unlock_irq(&adapter->sge.reg_lock);
2972
2973         q->adap = adapter;
2974         q->netdev = dev;
2975         q->tx_q = netdevq;
2976         t3_update_qset_coalesce(q, p);
2977
2978         avail = refill_fl(adapter, &q->fl[0], q->fl[0].size,
2979                           GFP_KERNEL | __GFP_COMP);
2980         if (!avail) {
2981                 CH_ALERT(adapter, "free list queue 0 initialization failed\n");
2982                 goto err;
2983         }
2984         if (avail < q->fl[0].size)
2985                 CH_WARN(adapter, "free list queue 0 enabled with %d credits\n",
2986                         avail);
2987
2988         avail = refill_fl(adapter, &q->fl[1], q->fl[1].size,
2989                           GFP_KERNEL | __GFP_COMP);
2990         if (avail < q->fl[1].size)
2991                 CH_WARN(adapter, "free list queue 1 enabled with %d credits\n",
2992                         avail);
2993         refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
2994
2995         t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2996                      V_NEWTIMER(q->rspq.holdoff_tmr));
2997
2998         mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2999         return 0;
3000
3001 err_unlock:
3002         spin_unlock_irq(&adapter->sge.reg_lock);
3003 err:
3004         t3_free_qset(adapter, q);
3005         return ret;
3006 }
3007
3008 /**
3009  *      t3_stop_sge_timers - stop SGE timer call backs
3010  *      @adap: the adapter
3011  *
3012  *      Stops each SGE queue set's timer call back
3013  */
3014 void t3_stop_sge_timers(struct adapter *adap)
3015 {
3016         int i;
3017
3018         for (i = 0; i < SGE_QSETS; ++i) {
3019                 struct sge_qset *q = &adap->sge.qs[i];
3020
3021                 if (q->tx_reclaim_timer.function)
3022                         del_timer_sync(&q->tx_reclaim_timer);
3023         }
3024 }
3025
3026 /**
3027  *      t3_free_sge_resources - free SGE resources
3028  *      @adap: the adapter
3029  *
3030  *      Frees resources used by the SGE queue sets.
3031  */
3032 void t3_free_sge_resources(struct adapter *adap)
3033 {
3034         int i;
3035
3036         for (i = 0; i < SGE_QSETS; ++i)
3037                 t3_free_qset(adap, &adap->sge.qs[i]);
3038 }
3039
3040 /**
3041  *      t3_sge_start - enable SGE
3042  *      @adap: the adapter
3043  *
3044  *      Enables the SGE for DMAs.  This is the last step in starting packet
3045  *      transfers.
3046  */
3047 void t3_sge_start(struct adapter *adap)
3048 {
3049         t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
3050 }
3051
3052 /**
3053  *      t3_sge_stop - disable SGE operation
3054  *      @adap: the adapter
3055  *
3056  *      Disables the DMA engine.  This can be called in emeregencies (e.g.,
3057  *      from error interrupts) or from normal process context.  In the latter
3058  *      case it also disables any pending queue restart tasklets.  Note that
3059  *      if it is called in interrupt context it cannot disable the restart
3060  *      tasklets as it cannot wait, however the tasklets will have no effect
3061  *      since the doorbells are disabled and the driver will call this again
3062  *      later from process context, at which time the tasklets will be stopped
3063  *      if they are still running.
3064  */
3065 void t3_sge_stop(struct adapter *adap)
3066 {
3067         t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
3068         if (!in_interrupt()) {
3069                 int i;
3070
3071                 for (i = 0; i < SGE_QSETS; ++i) {
3072                         struct sge_qset *qs = &adap->sge.qs[i];
3073
3074                         tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
3075                         tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
3076                 }
3077         }
3078 }
3079
3080 /**
3081  *      t3_sge_init - initialize SGE
3082  *      @adap: the adapter
3083  *      @p: the SGE parameters
3084  *
3085  *      Performs SGE initialization needed every time after a chip reset.
3086  *      We do not initialize any of the queue sets here, instead the driver
3087  *      top-level must request those individually.  We also do not enable DMA
3088  *      here, that should be done after the queues have been set up.
3089  */
3090 void t3_sge_init(struct adapter *adap, struct sge_params *p)
3091 {
3092         unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
3093
3094         ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
3095             F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
3096             V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
3097             V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
3098 #if SGE_NUM_GENBITS == 1
3099         ctrl |= F_EGRGENCTRL;
3100 #endif
3101         if (adap->params.rev > 0) {
3102                 if (!(adap->flags & (USING_MSIX | USING_MSI)))
3103                         ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
3104         }
3105         t3_write_reg(adap, A_SG_CONTROL, ctrl);
3106         t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
3107                      V_LORCQDRBTHRSH(512));
3108         t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
3109         t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
3110                      V_TIMEOUT(200 * core_ticks_per_usec(adap)));
3111         t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
3112                      adap->params.rev < T3_REV_C ? 1000 : 500);
3113         t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
3114         t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
3115         t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
3116         t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
3117         t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
3118 }
3119
3120 /**
3121  *      t3_sge_prep - one-time SGE initialization
3122  *      @adap: the associated adapter
3123  *      @p: SGE parameters
3124  *
3125  *      Performs one-time initialization of SGE SW state.  Includes determining
3126  *      defaults for the assorted SGE parameters, which admins can change until
3127  *      they are used to initialize the SGE.
3128  */
3129 void t3_sge_prep(struct adapter *adap, struct sge_params *p)
3130 {
3131         int i;
3132
3133         p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
3134             SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
3135
3136         for (i = 0; i < SGE_QSETS; ++i) {
3137                 struct qset_params *q = p->qset + i;
3138
3139                 q->polling = adap->params.rev > 0;
3140                 q->coalesce_usecs = 5;
3141                 q->rspq_size = 1024;
3142                 q->fl_size = 1024;
3143                 q->jumbo_size = 512;
3144                 q->txq_size[TXQ_ETH] = 1024;
3145                 q->txq_size[TXQ_OFLD] = 1024;
3146                 q->txq_size[TXQ_CTRL] = 256;
3147                 q->cong_thres = 0;
3148         }
3149
3150         spin_lock_init(&adap->sge.reg_lock);
3151 }
3152
3153 /**
3154  *      t3_get_desc - dump an SGE descriptor for debugging purposes
3155  *      @qs: the queue set
3156  *      @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3157  *      @idx: the descriptor index in the queue
3158  *      @data: where to dump the descriptor contents
3159  *
3160  *      Dumps the contents of a HW descriptor of an SGE queue.  Returns the
3161  *      size of the descriptor.
3162  */
3163 int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3164                 unsigned char *data)
3165 {
3166         if (qnum >= 6)
3167                 return -EINVAL;
3168
3169         if (qnum < 3) {
3170                 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3171                         return -EINVAL;
3172                 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3173                 return sizeof(struct tx_desc);
3174         }
3175
3176         if (qnum == 3) {
3177                 if (!qs->rspq.desc || idx >= qs->rspq.size)
3178                         return -EINVAL;
3179                 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3180                 return sizeof(struct rsp_desc);
3181         }
3182
3183         qnum -= 4;
3184         if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3185                 return -EINVAL;
3186         memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3187         return sizeof(struct rx_desc);
3188 }