2 * Copyright (c) 2005-2008 Chelsio, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 #include <linux/skbuff.h>
33 #include <linux/netdevice.h>
34 #include <linux/etherdevice.h>
35 #include <linux/if_vlan.h>
37 #include <linux/tcp.h>
38 #include <linux/dma-mapping.h>
44 #include "firmware_exports.h"
48 #define SGE_RX_SM_BUF_SIZE 1536
50 #define SGE_RX_COPY_THRES 256
51 #define SGE_RX_PULL_LEN 128
54 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
55 * It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs
58 #define FL0_PG_CHUNK_SIZE 2048
59 #define FL0_PG_ORDER 0
60 #define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192)
61 #define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
63 #define SGE_RX_DROP_THRES 16
66 * Max number of Rx buffers we replenish at a time.
68 #define MAX_RX_REFILL 16U
70 * Period of the Tx buffer reclaim timer. This timer does not need to run
71 * frequently as Tx buffers are usually reclaimed by new Tx packets.
73 #define TX_RECLAIM_PERIOD (HZ / 4)
75 /* WR size in bytes */
76 #define WR_LEN (WR_FLITS * 8)
79 * Types of Tx queues in each queue set. Order here matters, do not change.
81 enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
83 /* Values for sge_txq.flags */
85 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
86 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
90 __be64 flit[TX_DESC_FLITS];
100 struct tx_sw_desc { /* SW state per Tx descriptor */
102 u8 eop; /* set if last descriptor for packet */
103 u8 addr_idx; /* buffer index of first SGL entry in descriptor */
104 u8 fragidx; /* first page fragment associated with descriptor */
105 s8 sflit; /* start flit of first SGL entry in descriptor */
108 struct rx_sw_desc { /* SW state per Rx descriptor */
111 struct fl_pg_chunk pg_chunk;
113 DECLARE_PCI_UNMAP_ADDR(dma_addr);
116 struct rsp_desc { /* response queue descriptor */
117 struct rss_header rss_hdr;
125 * Holds unmapping information for Tx packets that need deferred unmapping.
126 * This structure lives at skb->head and must be allocated by callers.
128 struct deferred_unmap_info {
129 struct pci_dev *pdev;
130 dma_addr_t addr[MAX_SKB_FRAGS + 1];
134 * Maps a number of flits to the number of Tx descriptors that can hold them.
137 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
139 * HW allows up to 4 descriptors to be combined into a WR.
141 static u8 flit_desc_map[] = {
143 #if SGE_NUM_GENBITS == 1
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
146 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
147 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
148 #elif SGE_NUM_GENBITS == 2
149 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
150 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
151 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
152 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
154 # error "SGE_NUM_GENBITS must be 1 or 2"
158 static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
160 return container_of(q, struct sge_qset, fl[qidx]);
163 static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
165 return container_of(q, struct sge_qset, rspq);
168 static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
170 return container_of(q, struct sge_qset, txq[qidx]);
174 * refill_rspq - replenish an SGE response queue
175 * @adapter: the adapter
176 * @q: the response queue to replenish
177 * @credits: how many new responses to make available
179 * Replenishes a response queue by making the supplied number of responses
182 static inline void refill_rspq(struct adapter *adapter,
183 const struct sge_rspq *q, unsigned int credits)
186 t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
187 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
191 * need_skb_unmap - does the platform need unmapping of sk_buffs?
193 * Returns true if the platfrom needs sk_buff unmapping. The compiler
194 * optimizes away unecessary code if this returns true.
196 static inline int need_skb_unmap(void)
199 * This structure is used to tell if the platfrom needs buffer
200 * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
203 DECLARE_PCI_UNMAP_ADDR(addr);
206 return sizeof(struct dummy) != 0;
210 * unmap_skb - unmap a packet main body and its page fragments
212 * @q: the Tx queue containing Tx descriptors for the packet
213 * @cidx: index of Tx descriptor
214 * @pdev: the PCI device
216 * Unmap the main body of an sk_buff and its page fragments, if any.
217 * Because of the fairly complicated structure of our SGLs and the desire
218 * to conserve space for metadata, the information necessary to unmap an
219 * sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx
220 * descriptors (the physical addresses of the various data buffers), and
221 * the SW descriptor state (assorted indices). The send functions
222 * initialize the indices for the first packet descriptor so we can unmap
223 * the buffers held in the first Tx descriptor here, and we have enough
224 * information at this point to set the state for the next Tx descriptor.
226 * Note that it is possible to clean up the first descriptor of a packet
227 * before the send routines have written the next descriptors, but this
228 * race does not cause any problem. We just end up writing the unmapping
229 * info for the descriptor first.
231 static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
232 unsigned int cidx, struct pci_dev *pdev)
234 const struct sg_ent *sgp;
235 struct tx_sw_desc *d = &q->sdesc[cidx];
236 int nfrags, frag_idx, curflit, j = d->addr_idx;
238 sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit];
239 frag_idx = d->fragidx;
241 if (frag_idx == 0 && skb_headlen(skb)) {
242 pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]),
243 skb_headlen(skb), PCI_DMA_TODEVICE);
247 curflit = d->sflit + 1 + j;
248 nfrags = skb_shinfo(skb)->nr_frags;
250 while (frag_idx < nfrags && curflit < WR_FLITS) {
251 pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
252 skb_shinfo(skb)->frags[frag_idx].size,
263 if (frag_idx < nfrags) { /* SGL continues into next Tx descriptor */
264 d = cidx + 1 == q->size ? q->sdesc : d + 1;
265 d->fragidx = frag_idx;
267 d->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
272 * free_tx_desc - reclaims Tx descriptors and their buffers
273 * @adapter: the adapter
274 * @q: the Tx queue to reclaim descriptors from
275 * @n: the number of descriptors to reclaim
277 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
278 * Tx buffers. Called with the Tx queue lock held.
280 static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
283 struct tx_sw_desc *d;
284 struct pci_dev *pdev = adapter->pdev;
285 unsigned int cidx = q->cidx;
287 const int need_unmap = need_skb_unmap() &&
288 q->cntxt_id >= FW_TUNNEL_SGEEC_START;
292 if (d->skb) { /* an SGL is present */
294 unmap_skb(d->skb, q, cidx, pdev);
299 if (++cidx == q->size) {
308 * reclaim_completed_tx - reclaims completed Tx descriptors
309 * @adapter: the adapter
310 * @q: the Tx queue to reclaim completed descriptors from
312 * Reclaims Tx descriptors that the SGE has indicated it has processed,
313 * and frees the associated buffers if possible. Called with the Tx
316 static inline void reclaim_completed_tx(struct adapter *adapter,
319 unsigned int reclaim = q->processed - q->cleaned;
322 free_tx_desc(adapter, q, reclaim);
323 q->cleaned += reclaim;
324 q->in_use -= reclaim;
329 * should_restart_tx - are there enough resources to restart a Tx queue?
332 * Checks if there are enough descriptors to restart a suspended Tx queue.
334 static inline int should_restart_tx(const struct sge_txq *q)
336 unsigned int r = q->processed - q->cleaned;
338 return q->in_use - r < (q->size >> 1);
342 * free_rx_bufs - free the Rx buffers on an SGE free list
343 * @pdev: the PCI device associated with the adapter
344 * @rxq: the SGE free list to clean up
346 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
347 * this queue should be stopped before calling this function.
349 static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
351 unsigned int cidx = q->cidx;
353 while (q->credits--) {
354 struct rx_sw_desc *d = &q->sdesc[cidx];
356 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
357 q->buf_size, PCI_DMA_FROMDEVICE);
359 if (d->pg_chunk.page)
360 put_page(d->pg_chunk.page);
361 d->pg_chunk.page = NULL;
366 if (++cidx == q->size)
370 if (q->pg_chunk.page) {
371 __free_pages(q->pg_chunk.page, q->order);
372 q->pg_chunk.page = NULL;
377 * add_one_rx_buf - add a packet buffer to a free-buffer list
378 * @va: buffer start VA
379 * @len: the buffer length
380 * @d: the HW Rx descriptor to write
381 * @sd: the SW Rx descriptor to write
382 * @gen: the generation bit value
383 * @pdev: the PCI device associated with the adapter
385 * Add a buffer of the given length to the supplied HW and SW Rx
388 static inline int add_one_rx_buf(void *va, unsigned int len,
389 struct rx_desc *d, struct rx_sw_desc *sd,
390 unsigned int gen, struct pci_dev *pdev)
394 mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
395 if (unlikely(pci_dma_mapping_error(pdev, mapping)))
398 pci_unmap_addr_set(sd, dma_addr, mapping);
400 d->addr_lo = cpu_to_be32(mapping);
401 d->addr_hi = cpu_to_be32((u64) mapping >> 32);
403 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
404 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
408 static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp,
411 if (!q->pg_chunk.page) {
412 q->pg_chunk.page = alloc_pages(gfp, order);
413 if (unlikely(!q->pg_chunk.page))
415 q->pg_chunk.va = page_address(q->pg_chunk.page);
416 q->pg_chunk.offset = 0;
418 sd->pg_chunk = q->pg_chunk;
420 q->pg_chunk.offset += q->buf_size;
421 if (q->pg_chunk.offset == (PAGE_SIZE << order))
422 q->pg_chunk.page = NULL;
424 q->pg_chunk.va += q->buf_size;
425 get_page(q->pg_chunk.page);
430 static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
432 if (q->pend_cred >= q->credits / 4) {
434 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
439 * refill_fl - refill an SGE free-buffer list
440 * @adapter: the adapter
441 * @q: the free-list to refill
442 * @n: the number of new buffers to allocate
443 * @gfp: the gfp flags for allocating new buffers
445 * (Re)populate an SGE free-buffer list with up to @n new packet buffers,
446 * allocated with the supplied gfp flags. The caller must assure that
447 * @n does not exceed the queue's capacity.
449 static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
452 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
453 struct rx_desc *d = &q->desc[q->pidx];
454 unsigned int count = 0;
460 if (unlikely(alloc_pg_chunk(q, sd, gfp, q->order))) {
461 nomem: q->alloc_failed++;
464 buf_start = sd->pg_chunk.va;
466 struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
472 buf_start = skb->data;
475 err = add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen,
487 if (++q->pidx == q->size) {
497 q->pend_cred += count;
503 static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
505 refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits),
506 GFP_ATOMIC | __GFP_COMP);
510 * recycle_rx_buf - recycle a receive buffer
511 * @adapter: the adapter
512 * @q: the SGE free list
513 * @idx: index of buffer to recycle
515 * Recycles the specified buffer on the given free list by adding it at
516 * the next available slot on the list.
518 static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
521 struct rx_desc *from = &q->desc[idx];
522 struct rx_desc *to = &q->desc[q->pidx];
524 q->sdesc[q->pidx] = q->sdesc[idx];
525 to->addr_lo = from->addr_lo; /* already big endian */
526 to->addr_hi = from->addr_hi; /* likewise */
528 to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
529 to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
531 if (++q->pidx == q->size) {
542 * alloc_ring - allocate resources for an SGE descriptor ring
543 * @pdev: the PCI device
544 * @nelem: the number of descriptors
545 * @elem_size: the size of each descriptor
546 * @sw_size: the size of the SW state associated with each ring element
547 * @phys: the physical address of the allocated ring
548 * @metadata: address of the array holding the SW state for the ring
550 * Allocates resources for an SGE descriptor ring, such as Tx queues,
551 * free buffer lists, or response queues. Each SGE ring requires
552 * space for its HW descriptors plus, optionally, space for the SW state
553 * associated with each HW entry (the metadata). The function returns
554 * three values: the virtual address for the HW ring (the return value
555 * of the function), the physical address of the HW ring, and the address
558 static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
559 size_t sw_size, dma_addr_t * phys, void *metadata)
561 size_t len = nelem * elem_size;
563 void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
567 if (sw_size && metadata) {
568 s = kcalloc(nelem, sw_size, GFP_KERNEL);
571 dma_free_coherent(&pdev->dev, len, p, *phys);
574 *(void **)metadata = s;
581 * t3_reset_qset - reset a sge qset
584 * Reset the qset structure.
585 * the NAPI structure is preserved in the event of
586 * the qset's reincarnation, for example during EEH recovery.
588 static void t3_reset_qset(struct sge_qset *q)
591 !(q->adap->flags & NAPI_INIT)) {
592 memset(q, 0, sizeof(*q));
597 memset(&q->rspq, 0, sizeof(q->rspq));
598 memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
599 memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
601 q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
602 q->lro_frag_tbl.nr_frags = q->lro_frag_tbl.len = 0;
607 * free_qset - free the resources of an SGE queue set
608 * @adapter: the adapter owning the queue set
611 * Release the HW and SW resources associated with an SGE queue set, such
612 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
613 * queue set must be quiesced prior to calling this.
615 static void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
618 struct pci_dev *pdev = adapter->pdev;
620 for (i = 0; i < SGE_RXQ_PER_SET; ++i)
622 spin_lock_irq(&adapter->sge.reg_lock);
623 t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
624 spin_unlock_irq(&adapter->sge.reg_lock);
625 free_rx_bufs(pdev, &q->fl[i]);
626 kfree(q->fl[i].sdesc);
627 dma_free_coherent(&pdev->dev,
629 sizeof(struct rx_desc), q->fl[i].desc,
633 for (i = 0; i < SGE_TXQ_PER_SET; ++i)
634 if (q->txq[i].desc) {
635 spin_lock_irq(&adapter->sge.reg_lock);
636 t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
637 spin_unlock_irq(&adapter->sge.reg_lock);
638 if (q->txq[i].sdesc) {
639 free_tx_desc(adapter, &q->txq[i],
641 kfree(q->txq[i].sdesc);
643 dma_free_coherent(&pdev->dev,
645 sizeof(struct tx_desc),
646 q->txq[i].desc, q->txq[i].phys_addr);
647 __skb_queue_purge(&q->txq[i].sendq);
651 spin_lock_irq(&adapter->sge.reg_lock);
652 t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
653 spin_unlock_irq(&adapter->sge.reg_lock);
654 dma_free_coherent(&pdev->dev,
655 q->rspq.size * sizeof(struct rsp_desc),
656 q->rspq.desc, q->rspq.phys_addr);
663 * init_qset_cntxt - initialize an SGE queue set context info
665 * @id: the queue set id
667 * Initializes the TIDs and context ids for the queues of a queue set.
669 static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
671 qs->rspq.cntxt_id = id;
672 qs->fl[0].cntxt_id = 2 * id;
673 qs->fl[1].cntxt_id = 2 * id + 1;
674 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
675 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
676 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
677 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
678 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
682 * sgl_len - calculates the size of an SGL of the given capacity
683 * @n: the number of SGL entries
685 * Calculates the number of flits needed for a scatter/gather list that
686 * can hold the given number of entries.
688 static inline unsigned int sgl_len(unsigned int n)
690 /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
691 return (3 * n) / 2 + (n & 1);
695 * flits_to_desc - returns the num of Tx descriptors for the given flits
696 * @n: the number of flits
698 * Calculates the number of Tx descriptors needed for the supplied number
701 static inline unsigned int flits_to_desc(unsigned int n)
703 BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
704 return flit_desc_map[n];
708 * get_packet - return the next ingress packet buffer from a free list
709 * @adap: the adapter that received the packet
710 * @fl: the SGE free list holding the packet
711 * @len: the packet length including any SGE padding
712 * @drop_thres: # of remaining buffers before we start dropping packets
714 * Get the next packet from a free list and complete setup of the
715 * sk_buff. If the packet is small we make a copy and recycle the
716 * original buffer, otherwise we use the original buffer itself. If a
717 * positive drop threshold is supplied packets are dropped and their
718 * buffers recycled if (a) the number of remaining buffers is under the
719 * threshold and the packet is too big to copy, or (b) the packet should
720 * be copied but there is no memory for the copy.
722 static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
723 unsigned int len, unsigned int drop_thres)
725 struct sk_buff *skb = NULL;
726 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
728 prefetch(sd->skb->data);
731 if (len <= SGE_RX_COPY_THRES) {
732 skb = alloc_skb(len, GFP_ATOMIC);
733 if (likely(skb != NULL)) {
735 pci_dma_sync_single_for_cpu(adap->pdev,
736 pci_unmap_addr(sd, dma_addr), len,
738 memcpy(skb->data, sd->skb->data, len);
739 pci_dma_sync_single_for_device(adap->pdev,
740 pci_unmap_addr(sd, dma_addr), len,
742 } else if (!drop_thres)
745 recycle_rx_buf(adap, fl, fl->cidx);
749 if (unlikely(fl->credits < drop_thres) &&
750 refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits - 1),
751 GFP_ATOMIC | __GFP_COMP) == 0)
755 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
756 fl->buf_size, PCI_DMA_FROMDEVICE);
759 __refill_fl(adap, fl);
764 * get_packet_pg - return the next ingress packet buffer from a free list
765 * @adap: the adapter that received the packet
766 * @fl: the SGE free list holding the packet
767 * @len: the packet length including any SGE padding
768 * @drop_thres: # of remaining buffers before we start dropping packets
770 * Get the next packet from a free list populated with page chunks.
771 * If the packet is small we make a copy and recycle the original buffer,
772 * otherwise we attach the original buffer as a page fragment to a fresh
773 * sk_buff. If a positive drop threshold is supplied packets are dropped
774 * and their buffers recycled if (a) the number of remaining buffers is
775 * under the threshold and the packet is too big to copy, or (b) there's
778 * Note: this function is similar to @get_packet but deals with Rx buffers
779 * that are page chunks rather than sk_buffs.
781 static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
782 struct sge_rspq *q, unsigned int len,
783 unsigned int drop_thres)
785 struct sk_buff *newskb, *skb;
786 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
788 newskb = skb = q->pg_skb;
790 if (!skb && (len <= SGE_RX_COPY_THRES)) {
791 newskb = alloc_skb(len, GFP_ATOMIC);
792 if (likely(newskb != NULL)) {
793 __skb_put(newskb, len);
794 pci_dma_sync_single_for_cpu(adap->pdev,
795 pci_unmap_addr(sd, dma_addr), len,
797 memcpy(newskb->data, sd->pg_chunk.va, len);
798 pci_dma_sync_single_for_device(adap->pdev,
799 pci_unmap_addr(sd, dma_addr), len,
801 } else if (!drop_thres)
805 recycle_rx_buf(adap, fl, fl->cidx);
810 if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres)))
814 newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
815 if (unlikely(!newskb)) {
821 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
822 fl->buf_size, PCI_DMA_FROMDEVICE);
824 __skb_put(newskb, SGE_RX_PULL_LEN);
825 memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
826 skb_fill_page_desc(newskb, 0, sd->pg_chunk.page,
827 sd->pg_chunk.offset + SGE_RX_PULL_LEN,
828 len - SGE_RX_PULL_LEN);
830 newskb->data_len = len - SGE_RX_PULL_LEN;
832 skb_fill_page_desc(newskb, skb_shinfo(newskb)->nr_frags,
834 sd->pg_chunk.offset, len);
836 newskb->data_len += len;
838 newskb->truesize += newskb->data_len;
842 * We do not refill FLs here, we let the caller do it to overlap a
849 * get_imm_packet - return the next ingress packet buffer from a response
850 * @resp: the response descriptor containing the packet data
852 * Return a packet containing the immediate data of the given response.
854 static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
856 struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
859 __skb_put(skb, IMMED_PKT_SIZE);
860 skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
866 * calc_tx_descs - calculate the number of Tx descriptors for a packet
869 * Returns the number of Tx descriptors needed for the given Ethernet
870 * packet. Ethernet packets require addition of WR and CPL headers.
872 static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
876 if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
879 flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
880 if (skb_shinfo(skb)->gso_size)
882 return flits_to_desc(flits);
886 * make_sgl - populate a scatter/gather list for a packet
888 * @sgp: the SGL to populate
889 * @start: start address of skb main body data to include in the SGL
890 * @len: length of skb main body data to include in the SGL
891 * @pdev: the PCI device
893 * Generates a scatter/gather list for the buffers that make up a packet
894 * and returns the SGL size in 8-byte words. The caller must size the SGL
897 static inline unsigned int make_sgl(const struct sk_buff *skb,
898 struct sg_ent *sgp, unsigned char *start,
899 unsigned int len, struct pci_dev *pdev)
902 unsigned int i, j = 0, nfrags;
905 mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
906 sgp->len[0] = cpu_to_be32(len);
907 sgp->addr[0] = cpu_to_be64(mapping);
911 nfrags = skb_shinfo(skb)->nr_frags;
912 for (i = 0; i < nfrags; i++) {
913 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
915 mapping = pci_map_page(pdev, frag->page, frag->page_offset,
916 frag->size, PCI_DMA_TODEVICE);
917 sgp->len[j] = cpu_to_be32(frag->size);
918 sgp->addr[j] = cpu_to_be64(mapping);
925 return ((nfrags + (len != 0)) * 3) / 2 + j;
929 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
933 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
934 * where the HW is going to sleep just after we checked, however,
935 * then the interrupt handler will detect the outstanding TX packet
936 * and ring the doorbell for us.
938 * When GTS is disabled we unconditionally ring the doorbell.
940 static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
943 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
944 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
945 set_bit(TXQ_LAST_PKT_DB, &q->flags);
946 t3_write_reg(adap, A_SG_KDOORBELL,
947 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
950 wmb(); /* write descriptors before telling HW */
951 t3_write_reg(adap, A_SG_KDOORBELL,
952 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
956 static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
958 #if SGE_NUM_GENBITS == 2
959 d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
964 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
965 * @ndesc: number of Tx descriptors spanned by the SGL
966 * @skb: the packet corresponding to the WR
967 * @d: first Tx descriptor to be written
968 * @pidx: index of above descriptors
969 * @q: the SGE Tx queue
971 * @flits: number of flits to the start of the SGL in the first descriptor
972 * @sgl_flits: the SGL size in flits
973 * @gen: the Tx descriptor generation
974 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
975 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
977 * Write a work request header and an associated SGL. If the SGL is
978 * small enough to fit into one Tx descriptor it has already been written
979 * and we just need to write the WR header. Otherwise we distribute the
980 * SGL across the number of descriptors it spans.
982 static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
983 struct tx_desc *d, unsigned int pidx,
984 const struct sge_txq *q,
985 const struct sg_ent *sgl,
986 unsigned int flits, unsigned int sgl_flits,
987 unsigned int gen, __be32 wr_hi,
990 struct work_request_hdr *wrp = (struct work_request_hdr *)d;
991 struct tx_sw_desc *sd = &q->sdesc[pidx];
994 if (need_skb_unmap()) {
1000 if (likely(ndesc == 1)) {
1002 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1003 V_WR_SGLSFLT(flits)) | wr_hi;
1005 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1006 V_WR_GEN(gen)) | wr_lo;
1009 unsigned int ogen = gen;
1010 const u64 *fp = (const u64 *)sgl;
1011 struct work_request_hdr *wp = wrp;
1013 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1014 V_WR_SGLSFLT(flits)) | wr_hi;
1017 unsigned int avail = WR_FLITS - flits;
1019 if (avail > sgl_flits)
1021 memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
1031 if (++pidx == q->size) {
1039 wrp = (struct work_request_hdr *)d;
1040 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1041 V_WR_SGLSFLT(1)) | wr_hi;
1042 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1044 V_WR_GEN(gen)) | wr_lo;
1049 wrp->wr_hi |= htonl(F_WR_EOP);
1051 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1052 wr_gen2((struct tx_desc *)wp, ogen);
1053 WARN_ON(ndesc != 0);
1058 * write_tx_pkt_wr - write a TX_PKT work request
1059 * @adap: the adapter
1060 * @skb: the packet to send
1061 * @pi: the egress interface
1062 * @pidx: index of the first Tx descriptor to write
1063 * @gen: the generation value to use
1065 * @ndesc: number of descriptors the packet will occupy
1066 * @compl: the value of the COMPL bit to use
1068 * Generate a TX_PKT work request to send the supplied packet.
1070 static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
1071 const struct port_info *pi,
1072 unsigned int pidx, unsigned int gen,
1073 struct sge_txq *q, unsigned int ndesc,
1076 unsigned int flits, sgl_flits, cntrl, tso_info;
1077 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1078 struct tx_desc *d = &q->desc[pidx];
1079 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1081 cpl->len = htonl(skb->len | 0x80000000);
1082 cntrl = V_TXPKT_INTF(pi->port_id);
1084 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1085 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
1087 tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1090 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1093 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1094 hdr->cntrl = htonl(cntrl);
1095 eth_type = skb_network_offset(skb) == ETH_HLEN ?
1096 CPL_ETH_II : CPL_ETH_II_VLAN;
1097 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1098 V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
1099 V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
1100 hdr->lso_info = htonl(tso_info);
1103 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1104 cntrl |= F_TXPKT_IPCSUM_DIS; /* SW calculates IP csum */
1105 cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1106 cpl->cntrl = htonl(cntrl);
1108 if (skb->len <= WR_LEN - sizeof(*cpl)) {
1109 q->sdesc[pidx].skb = NULL;
1111 skb_copy_from_linear_data(skb, &d->flit[2],
1114 skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1116 flits = (skb->len + 7) / 8 + 2;
1117 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1118 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1119 | F_WR_SOP | F_WR_EOP | compl);
1121 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1122 V_WR_TID(q->token));
1131 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1132 sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
1134 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1135 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1136 htonl(V_WR_TID(q->token)));
1139 static inline void t3_stop_tx_queue(struct netdev_queue *txq,
1140 struct sge_qset *qs, struct sge_txq *q)
1142 netif_tx_stop_queue(txq);
1143 set_bit(TXQ_ETH, &qs->txq_stopped);
1148 * eth_xmit - add a packet to the Ethernet Tx queue
1150 * @dev: the egress net device
1152 * Add a packet to an SGE Tx queue. Runs with softirqs disabled.
1154 int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1157 unsigned int ndesc, pidx, credits, gen, compl;
1158 const struct port_info *pi = netdev_priv(dev);
1159 struct adapter *adap = pi->adapter;
1160 struct netdev_queue *txq;
1161 struct sge_qset *qs;
1165 * The chip min packet length is 9 octets but play safe and reject
1166 * anything shorter than an Ethernet header.
1168 if (unlikely(skb->len < ETH_HLEN)) {
1170 return NETDEV_TX_OK;
1173 qidx = skb_get_queue_mapping(skb);
1175 q = &qs->txq[TXQ_ETH];
1176 txq = netdev_get_tx_queue(dev, qidx);
1178 spin_lock(&q->lock);
1179 reclaim_completed_tx(adap, q);
1181 credits = q->size - q->in_use;
1182 ndesc = calc_tx_descs(skb);
1184 if (unlikely(credits < ndesc)) {
1185 t3_stop_tx_queue(txq, qs, q);
1186 dev_err(&adap->pdev->dev,
1187 "%s: Tx ring %u full while queue awake!\n",
1188 dev->name, q->cntxt_id & 7);
1189 spin_unlock(&q->lock);
1190 return NETDEV_TX_BUSY;
1194 if (unlikely(credits - ndesc < q->stop_thres)) {
1195 t3_stop_tx_queue(txq, qs, q);
1197 if (should_restart_tx(q) &&
1198 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1200 netif_tx_wake_queue(txq);
1205 q->unacked += ndesc;
1206 compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1210 if (q->pidx >= q->size) {
1215 /* update port statistics */
1216 if (skb->ip_summed == CHECKSUM_COMPLETE)
1217 qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1218 if (skb_shinfo(skb)->gso_size)
1219 qs->port_stats[SGE_PSTAT_TSO]++;
1220 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1221 qs->port_stats[SGE_PSTAT_VLANINS]++;
1223 dev->trans_start = jiffies;
1224 spin_unlock(&q->lock);
1227 * We do not use Tx completion interrupts to free DMAd Tx packets.
1228 * This is good for performamce but means that we rely on new Tx
1229 * packets arriving to run the destructors of completed packets,
1230 * which open up space in their sockets' send queues. Sometimes
1231 * we do not get such new packets causing Tx to stall. A single
1232 * UDP transmitter is a good example of this situation. We have
1233 * a clean up timer that periodically reclaims completed packets
1234 * but it doesn't run often enough (nor do we want it to) to prevent
1235 * lengthy stalls. A solution to this problem is to run the
1236 * destructor early, after the packet is queued but before it's DMAd.
1237 * A cons is that we lie to socket memory accounting, but the amount
1238 * of extra memory is reasonable (limited by the number of Tx
1239 * descriptors), the packets do actually get freed quickly by new
1240 * packets almost always, and for protocols like TCP that wait for
1241 * acks to really free up the data the extra memory is even less.
1242 * On the positive side we run the destructors on the sending CPU
1243 * rather than on a potentially different completing CPU, usually a
1244 * good thing. We also run them without holding our Tx queue lock,
1245 * unlike what reclaim_completed_tx() would otherwise do.
1247 * Run the destructor before telling the DMA engine about the packet
1248 * to make sure it doesn't complete and get freed prematurely.
1250 if (likely(!skb_shared(skb)))
1253 write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1254 check_ring_tx_db(adap, q);
1255 return NETDEV_TX_OK;
1259 * write_imm - write a packet into a Tx descriptor as immediate data
1260 * @d: the Tx descriptor to write
1262 * @len: the length of packet data to write as immediate data
1263 * @gen: the generation bit value to write
1265 * Writes a packet as immediate data into a Tx descriptor. The packet
1266 * contains a work request at its beginning. We must write the packet
1267 * carefully so the SGE doesn't read it accidentally before it's written
1270 static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1271 unsigned int len, unsigned int gen)
1273 struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1274 struct work_request_hdr *to = (struct work_request_hdr *)d;
1276 if (likely(!skb->data_len))
1277 memcpy(&to[1], &from[1], len - sizeof(*from));
1279 skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from));
1281 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1282 V_WR_BCNTLFLT(len & 7));
1284 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1285 V_WR_LEN((len + 7) / 8));
1291 * check_desc_avail - check descriptor availability on a send queue
1292 * @adap: the adapter
1293 * @q: the send queue
1294 * @skb: the packet needing the descriptors
1295 * @ndesc: the number of Tx descriptors needed
1296 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1298 * Checks if the requested number of Tx descriptors is available on an
1299 * SGE send queue. If the queue is already suspended or not enough
1300 * descriptors are available the packet is queued for later transmission.
1301 * Must be called with the Tx queue locked.
1303 * Returns 0 if enough descriptors are available, 1 if there aren't
1304 * enough descriptors and the packet has been queued, and 2 if the caller
1305 * needs to retry because there weren't enough descriptors at the
1306 * beginning of the call but some freed up in the mean time.
1308 static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1309 struct sk_buff *skb, unsigned int ndesc,
1312 if (unlikely(!skb_queue_empty(&q->sendq))) {
1313 addq_exit:__skb_queue_tail(&q->sendq, skb);
1316 if (unlikely(q->size - q->in_use < ndesc)) {
1317 struct sge_qset *qs = txq_to_qset(q, qid);
1319 set_bit(qid, &qs->txq_stopped);
1320 smp_mb__after_clear_bit();
1322 if (should_restart_tx(q) &&
1323 test_and_clear_bit(qid, &qs->txq_stopped))
1333 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1334 * @q: the SGE control Tx queue
1336 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1337 * that send only immediate data (presently just the control queues) and
1338 * thus do not have any sk_buffs to release.
1340 static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1342 unsigned int reclaim = q->processed - q->cleaned;
1344 q->in_use -= reclaim;
1345 q->cleaned += reclaim;
1348 static inline int immediate(const struct sk_buff *skb)
1350 return skb->len <= WR_LEN;
1354 * ctrl_xmit - send a packet through an SGE control Tx queue
1355 * @adap: the adapter
1356 * @q: the control queue
1359 * Send a packet through an SGE control Tx queue. Packets sent through
1360 * a control queue must fit entirely as immediate data in a single Tx
1361 * descriptor and have no page fragments.
1363 static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1364 struct sk_buff *skb)
1367 struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1369 if (unlikely(!immediate(skb))) {
1372 return NET_XMIT_SUCCESS;
1375 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1376 wrp->wr_lo = htonl(V_WR_TID(q->token));
1378 spin_lock(&q->lock);
1379 again:reclaim_completed_tx_imm(q);
1381 ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1382 if (unlikely(ret)) {
1384 spin_unlock(&q->lock);
1390 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1393 if (++q->pidx >= q->size) {
1397 spin_unlock(&q->lock);
1399 t3_write_reg(adap, A_SG_KDOORBELL,
1400 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1401 return NET_XMIT_SUCCESS;
1405 * restart_ctrlq - restart a suspended control queue
1406 * @qs: the queue set cotaining the control queue
1408 * Resumes transmission on a suspended Tx control queue.
1410 static void restart_ctrlq(unsigned long data)
1412 struct sk_buff *skb;
1413 struct sge_qset *qs = (struct sge_qset *)data;
1414 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1416 spin_lock(&q->lock);
1417 again:reclaim_completed_tx_imm(q);
1419 while (q->in_use < q->size &&
1420 (skb = __skb_dequeue(&q->sendq)) != NULL) {
1422 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1424 if (++q->pidx >= q->size) {
1431 if (!skb_queue_empty(&q->sendq)) {
1432 set_bit(TXQ_CTRL, &qs->txq_stopped);
1433 smp_mb__after_clear_bit();
1435 if (should_restart_tx(q) &&
1436 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1441 spin_unlock(&q->lock);
1443 t3_write_reg(qs->adap, A_SG_KDOORBELL,
1444 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1448 * Send a management message through control queue 0
1450 int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1454 ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1461 * deferred_unmap_destructor - unmap a packet when it is freed
1464 * This is the packet destructor used for Tx packets that need to remain
1465 * mapped until they are freed rather than until their Tx descriptors are
1468 static void deferred_unmap_destructor(struct sk_buff *skb)
1471 const dma_addr_t *p;
1472 const struct skb_shared_info *si;
1473 const struct deferred_unmap_info *dui;
1475 dui = (struct deferred_unmap_info *)skb->head;
1478 if (skb->tail - skb->transport_header)
1479 pci_unmap_single(dui->pdev, *p++,
1480 skb->tail - skb->transport_header,
1483 si = skb_shinfo(skb);
1484 for (i = 0; i < si->nr_frags; i++)
1485 pci_unmap_page(dui->pdev, *p++, si->frags[i].size,
1489 static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1490 const struct sg_ent *sgl, int sgl_flits)
1493 struct deferred_unmap_info *dui;
1495 dui = (struct deferred_unmap_info *)skb->head;
1497 for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1498 *p++ = be64_to_cpu(sgl->addr[0]);
1499 *p++ = be64_to_cpu(sgl->addr[1]);
1502 *p = be64_to_cpu(sgl->addr[0]);
1506 * write_ofld_wr - write an offload work request
1507 * @adap: the adapter
1508 * @skb: the packet to send
1510 * @pidx: index of the first Tx descriptor to write
1511 * @gen: the generation value to use
1512 * @ndesc: number of descriptors the packet will occupy
1514 * Write an offload work request to send the supplied packet. The packet
1515 * data already carry the work request with most fields populated.
1517 static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1518 struct sge_txq *q, unsigned int pidx,
1519 unsigned int gen, unsigned int ndesc)
1521 unsigned int sgl_flits, flits;
1522 struct work_request_hdr *from;
1523 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1524 struct tx_desc *d = &q->desc[pidx];
1526 if (immediate(skb)) {
1527 q->sdesc[pidx].skb = NULL;
1528 write_imm(d, skb, skb->len, gen);
1532 /* Only TX_DATA builds SGLs */
1534 from = (struct work_request_hdr *)skb->data;
1535 memcpy(&d->flit[1], &from[1],
1536 skb_transport_offset(skb) - sizeof(*from));
1538 flits = skb_transport_offset(skb) / 8;
1539 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1540 sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
1541 skb->tail - skb->transport_header,
1543 if (need_skb_unmap()) {
1544 setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1545 skb->destructor = deferred_unmap_destructor;
1548 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1549 gen, from->wr_hi, from->wr_lo);
1553 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1556 * Returns the number of Tx descriptors needed for the given offload
1557 * packet. These packets are already fully constructed.
1559 static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1561 unsigned int flits, cnt;
1563 if (skb->len <= WR_LEN)
1564 return 1; /* packet fits as immediate data */
1566 flits = skb_transport_offset(skb) / 8; /* headers */
1567 cnt = skb_shinfo(skb)->nr_frags;
1568 if (skb->tail != skb->transport_header)
1570 return flits_to_desc(flits + sgl_len(cnt));
1574 * ofld_xmit - send a packet through an offload queue
1575 * @adap: the adapter
1576 * @q: the Tx offload queue
1579 * Send an offload packet through an SGE offload queue.
1581 static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1582 struct sk_buff *skb)
1585 unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1587 spin_lock(&q->lock);
1588 again:reclaim_completed_tx(adap, q);
1590 ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1591 if (unlikely(ret)) {
1593 skb->priority = ndesc; /* save for restart */
1594 spin_unlock(&q->lock);
1604 if (q->pidx >= q->size) {
1608 spin_unlock(&q->lock);
1610 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1611 check_ring_tx_db(adap, q);
1612 return NET_XMIT_SUCCESS;
1616 * restart_offloadq - restart a suspended offload queue
1617 * @qs: the queue set cotaining the offload queue
1619 * Resumes transmission on a suspended Tx offload queue.
1621 static void restart_offloadq(unsigned long data)
1623 struct sk_buff *skb;
1624 struct sge_qset *qs = (struct sge_qset *)data;
1625 struct sge_txq *q = &qs->txq[TXQ_OFLD];
1626 const struct port_info *pi = netdev_priv(qs->netdev);
1627 struct adapter *adap = pi->adapter;
1629 spin_lock(&q->lock);
1630 again:reclaim_completed_tx(adap, q);
1632 while ((skb = skb_peek(&q->sendq)) != NULL) {
1633 unsigned int gen, pidx;
1634 unsigned int ndesc = skb->priority;
1636 if (unlikely(q->size - q->in_use < ndesc)) {
1637 set_bit(TXQ_OFLD, &qs->txq_stopped);
1638 smp_mb__after_clear_bit();
1640 if (should_restart_tx(q) &&
1641 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1651 if (q->pidx >= q->size) {
1655 __skb_unlink(skb, &q->sendq);
1656 spin_unlock(&q->lock);
1658 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1659 spin_lock(&q->lock);
1661 spin_unlock(&q->lock);
1664 set_bit(TXQ_RUNNING, &q->flags);
1665 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1668 t3_write_reg(adap, A_SG_KDOORBELL,
1669 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1673 * queue_set - return the queue set a packet should use
1676 * Maps a packet to the SGE queue set it should use. The desired queue
1677 * set is carried in bits 1-3 in the packet's priority.
1679 static inline int queue_set(const struct sk_buff *skb)
1681 return skb->priority >> 1;
1685 * is_ctrl_pkt - return whether an offload packet is a control packet
1688 * Determines whether an offload packet should use an OFLD or a CTRL
1689 * Tx queue. This is indicated by bit 0 in the packet's priority.
1691 static inline int is_ctrl_pkt(const struct sk_buff *skb)
1693 return skb->priority & 1;
1697 * t3_offload_tx - send an offload packet
1698 * @tdev: the offload device to send to
1701 * Sends an offload packet. We use the packet priority to select the
1702 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1703 * should be sent as regular or control, bits 1-3 select the queue set.
1705 int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1707 struct adapter *adap = tdev2adap(tdev);
1708 struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1710 if (unlikely(is_ctrl_pkt(skb)))
1711 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1713 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1717 * offload_enqueue - add an offload packet to an SGE offload receive queue
1718 * @q: the SGE response queue
1721 * Add a new offload packet to an SGE response queue's offload packet
1722 * queue. If the packet is the first on the queue it schedules the RX
1723 * softirq to process the queue.
1725 static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1727 int was_empty = skb_queue_empty(&q->rx_queue);
1729 __skb_queue_tail(&q->rx_queue, skb);
1732 struct sge_qset *qs = rspq_to_qset(q);
1734 napi_schedule(&qs->napi);
1739 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1740 * @tdev: the offload device that will be receiving the packets
1741 * @q: the SGE response queue that assembled the bundle
1742 * @skbs: the partial bundle
1743 * @n: the number of packets in the bundle
1745 * Delivers a (partial) bundle of Rx offload packets to an offload device.
1747 static inline void deliver_partial_bundle(struct t3cdev *tdev,
1749 struct sk_buff *skbs[], int n)
1752 q->offload_bundles++;
1753 tdev->recv(tdev, skbs, n);
1758 * ofld_poll - NAPI handler for offload packets in interrupt mode
1759 * @dev: the network device doing the polling
1760 * @budget: polling budget
1762 * The NAPI handler for offload packets when a response queue is serviced
1763 * by the hard interrupt handler, i.e., when it's operating in non-polling
1764 * mode. Creates small packet batches and sends them through the offload
1765 * receive handler. Batches need to be of modest size as we do prefetches
1766 * on the packets in each.
1768 static int ofld_poll(struct napi_struct *napi, int budget)
1770 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
1771 struct sge_rspq *q = &qs->rspq;
1772 struct adapter *adapter = qs->adap;
1775 while (work_done < budget) {
1776 struct sk_buff *skb, *tmp, *skbs[RX_BUNDLE_SIZE];
1777 struct sk_buff_head queue;
1780 spin_lock_irq(&q->lock);
1781 __skb_queue_head_init(&queue);
1782 skb_queue_splice_init(&q->rx_queue, &queue);
1783 if (skb_queue_empty(&queue)) {
1784 napi_complete(napi);
1785 spin_unlock_irq(&q->lock);
1788 spin_unlock_irq(&q->lock);
1791 skb_queue_walk_safe(&queue, skb, tmp) {
1792 if (work_done >= budget)
1796 __skb_unlink(skb, &queue);
1797 prefetch(skb->data);
1798 skbs[ngathered] = skb;
1799 if (++ngathered == RX_BUNDLE_SIZE) {
1800 q->offload_bundles++;
1801 adapter->tdev.recv(&adapter->tdev, skbs,
1806 if (!skb_queue_empty(&queue)) {
1807 /* splice remaining packets back onto Rx queue */
1808 spin_lock_irq(&q->lock);
1809 skb_queue_splice(&queue, &q->rx_queue);
1810 spin_unlock_irq(&q->lock);
1812 deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1819 * rx_offload - process a received offload packet
1820 * @tdev: the offload device receiving the packet
1821 * @rq: the response queue that received the packet
1823 * @rx_gather: a gather list of packets if we are building a bundle
1824 * @gather_idx: index of the next available slot in the bundle
1826 * Process an ingress offload pakcet and add it to the offload ingress
1827 * queue. Returns the index of the next available slot in the bundle.
1829 static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1830 struct sk_buff *skb, struct sk_buff *rx_gather[],
1831 unsigned int gather_idx)
1833 skb_reset_mac_header(skb);
1834 skb_reset_network_header(skb);
1835 skb_reset_transport_header(skb);
1838 rx_gather[gather_idx++] = skb;
1839 if (gather_idx == RX_BUNDLE_SIZE) {
1840 tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1842 rq->offload_bundles++;
1845 offload_enqueue(rq, skb);
1851 * restart_tx - check whether to restart suspended Tx queues
1852 * @qs: the queue set to resume
1854 * Restarts suspended Tx queues of an SGE queue set if they have enough
1855 * free resources to resume operation.
1857 static void restart_tx(struct sge_qset *qs)
1859 if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1860 should_restart_tx(&qs->txq[TXQ_ETH]) &&
1861 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1862 qs->txq[TXQ_ETH].restarts++;
1863 if (netif_running(qs->netdev))
1864 netif_tx_wake_queue(qs->tx_q);
1867 if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1868 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1869 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1870 qs->txq[TXQ_OFLD].restarts++;
1871 tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1873 if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1874 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1875 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1876 qs->txq[TXQ_CTRL].restarts++;
1877 tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1882 * cxgb3_arp_process - process an ARP request probing a private IP address
1883 * @adapter: the adapter
1884 * @skb: the skbuff containing the ARP request
1886 * Check if the ARP request is probing the private IP address
1887 * dedicated to iSCSI, generate an ARP reply if so.
1889 static void cxgb3_arp_process(struct adapter *adapter, struct sk_buff *skb)
1891 struct net_device *dev = skb->dev;
1892 struct port_info *pi;
1894 unsigned char *arp_ptr;
1901 skb_reset_network_header(skb);
1904 if (arp->ar_op != htons(ARPOP_REQUEST))
1907 arp_ptr = (unsigned char *)(arp + 1);
1909 arp_ptr += dev->addr_len;
1910 memcpy(&sip, arp_ptr, sizeof(sip));
1911 arp_ptr += sizeof(sip);
1912 arp_ptr += dev->addr_len;
1913 memcpy(&tip, arp_ptr, sizeof(tip));
1915 pi = netdev_priv(dev);
1916 if (tip != pi->iscsi_ipv4addr)
1919 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
1920 dev->dev_addr, sha);
1924 static inline int is_arp(struct sk_buff *skb)
1926 return skb->protocol == htons(ETH_P_ARP);
1930 * rx_eth - process an ingress ethernet packet
1931 * @adap: the adapter
1932 * @rq: the response queue that received the packet
1934 * @pad: amount of padding at the start of the buffer
1936 * Process an ingress ethernet pakcet and deliver it to the stack.
1937 * The padding is 2 if the packet was delivered in an Rx buffer and 0
1938 * if it was immediate data in a response.
1940 static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
1941 struct sk_buff *skb, int pad, int lro)
1943 struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
1944 struct sge_qset *qs = rspq_to_qset(rq);
1945 struct port_info *pi;
1947 skb_pull(skb, sizeof(*p) + pad);
1948 skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
1949 pi = netdev_priv(skb->dev);
1950 if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid && p->csum == htons(0xffff) &&
1952 qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
1953 skb->ip_summed = CHECKSUM_UNNECESSARY;
1955 skb->ip_summed = CHECKSUM_NONE;
1956 skb_record_rx_queue(skb, qs - &adap->sge.qs[0]);
1958 if (unlikely(p->vlan_valid)) {
1959 struct vlan_group *grp = pi->vlan_grp;
1961 qs->port_stats[SGE_PSTAT_VLANEX]++;
1964 vlan_gro_receive(&qs->napi, grp,
1965 ntohs(p->vlan), skb);
1967 if (unlikely(pi->iscsi_ipv4addr &&
1969 unsigned short vtag = ntohs(p->vlan) &
1971 skb->dev = vlan_group_get_device(grp,
1973 cxgb3_arp_process(adap, skb);
1975 __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
1979 dev_kfree_skb_any(skb);
1980 } else if (rq->polling) {
1982 napi_gro_receive(&qs->napi, skb);
1984 if (unlikely(pi->iscsi_ipv4addr && is_arp(skb)))
1985 cxgb3_arp_process(adap, skb);
1986 netif_receive_skb(skb);
1992 static inline int is_eth_tcp(u32 rss)
1994 return G_HASHTYPE(ntohl(rss)) == RSS_HASH_4_TUPLE;
1998 * lro_add_page - add a page chunk to an LRO session
1999 * @adap: the adapter
2000 * @qs: the associated queue set
2001 * @fl: the free list containing the page chunk to add
2002 * @len: packet length
2003 * @complete: Indicates the last fragment of a frame
2005 * Add a received packet contained in a page chunk to an existing LRO
2008 static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2009 struct sge_fl *fl, int len, int complete)
2011 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2012 struct cpl_rx_pkt *cpl;
2013 struct skb_frag_struct *rx_frag = qs->lro_frag_tbl.frags;
2014 int nr_frags = qs->lro_frag_tbl.nr_frags;
2015 int frag_len = qs->lro_frag_tbl.len;
2019 offset = 2 + sizeof(struct cpl_rx_pkt);
2020 qs->lro_va = cpl = sd->pg_chunk.va + 2;
2026 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
2027 fl->buf_size, PCI_DMA_FROMDEVICE);
2029 rx_frag += nr_frags;
2030 rx_frag->page = sd->pg_chunk.page;
2031 rx_frag->page_offset = sd->pg_chunk.offset + offset;
2032 rx_frag->size = len;
2034 qs->lro_frag_tbl.nr_frags++;
2035 qs->lro_frag_tbl.len = frag_len;
2040 qs->lro_frag_tbl.ip_summed = CHECKSUM_UNNECESSARY;
2043 if (unlikely(cpl->vlan_valid)) {
2044 struct net_device *dev = qs->netdev;
2045 struct port_info *pi = netdev_priv(dev);
2046 struct vlan_group *grp = pi->vlan_grp;
2048 if (likely(grp != NULL)) {
2049 vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan),
2054 napi_gro_frags(&qs->napi, &qs->lro_frag_tbl);
2057 qs->lro_frag_tbl.nr_frags = qs->lro_frag_tbl.len = 0;
2061 * handle_rsp_cntrl_info - handles control information in a response
2062 * @qs: the queue set corresponding to the response
2063 * @flags: the response control flags
2065 * Handles the control information of an SGE response, such as GTS
2066 * indications and completion credits for the queue set's Tx queues.
2067 * HW coalesces credits, we don't do any extra SW coalescing.
2069 static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
2071 unsigned int credits;
2074 if (flags & F_RSPD_TXQ0_GTS)
2075 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2078 credits = G_RSPD_TXQ0_CR(flags);
2080 qs->txq[TXQ_ETH].processed += credits;
2082 credits = G_RSPD_TXQ2_CR(flags);
2084 qs->txq[TXQ_CTRL].processed += credits;
2087 if (flags & F_RSPD_TXQ1_GTS)
2088 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2090 credits = G_RSPD_TXQ1_CR(flags);
2092 qs->txq[TXQ_OFLD].processed += credits;
2096 * check_ring_db - check if we need to ring any doorbells
2097 * @adapter: the adapter
2098 * @qs: the queue set whose Tx queues are to be examined
2099 * @sleeping: indicates which Tx queue sent GTS
2101 * Checks if some of a queue set's Tx queues need to ring their doorbells
2102 * to resume transmission after idling while they still have unprocessed
2105 static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
2106 unsigned int sleeping)
2108 if (sleeping & F_RSPD_TXQ0_GTS) {
2109 struct sge_txq *txq = &qs->txq[TXQ_ETH];
2111 if (txq->cleaned + txq->in_use != txq->processed &&
2112 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2113 set_bit(TXQ_RUNNING, &txq->flags);
2114 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2115 V_EGRCNTX(txq->cntxt_id));
2119 if (sleeping & F_RSPD_TXQ1_GTS) {
2120 struct sge_txq *txq = &qs->txq[TXQ_OFLD];
2122 if (txq->cleaned + txq->in_use != txq->processed &&
2123 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2124 set_bit(TXQ_RUNNING, &txq->flags);
2125 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2126 V_EGRCNTX(txq->cntxt_id));
2132 * is_new_response - check if a response is newly written
2133 * @r: the response descriptor
2134 * @q: the response queue
2136 * Returns true if a response descriptor contains a yet unprocessed
2139 static inline int is_new_response(const struct rsp_desc *r,
2140 const struct sge_rspq *q)
2142 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2145 static inline void clear_rspq_bufstate(struct sge_rspq * const q)
2148 q->rx_recycle_buf = 0;
2151 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2152 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2153 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2154 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2155 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2157 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2158 #define NOMEM_INTR_DELAY 2500
2161 * process_responses - process responses from an SGE response queue
2162 * @adap: the adapter
2163 * @qs: the queue set to which the response queue belongs
2164 * @budget: how many responses can be processed in this round
2166 * Process responses from an SGE response queue up to the supplied budget.
2167 * Responses include received packets as well as credits and other events
2168 * for the queues that belong to the response queue's queue set.
2169 * A negative budget is effectively unlimited.
2171 * Additionally choose the interrupt holdoff time for the next interrupt
2172 * on this queue. If the system is under memory shortage use a fairly
2173 * long delay to help recovery.
2175 static int process_responses(struct adapter *adap, struct sge_qset *qs,
2178 struct sge_rspq *q = &qs->rspq;
2179 struct rsp_desc *r = &q->desc[q->cidx];
2180 int budget_left = budget;
2181 unsigned int sleeping = 0;
2182 struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
2185 q->next_holdoff = q->holdoff_tmr;
2187 while (likely(budget_left && is_new_response(r, q))) {
2188 int packet_complete, eth, ethpad = 2, lro = qs->lro_enabled;
2189 struct sk_buff *skb = NULL;
2190 u32 len, flags = ntohl(r->flags);
2191 __be32 rss_hi = *(const __be32 *)r,
2192 rss_lo = r->rss_hdr.rss_hash_val;
2194 eth = r->rss_hdr.opcode == CPL_RX_PKT;
2196 if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
2197 skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
2201 memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
2202 skb->data[0] = CPL_ASYNC_NOTIF;
2203 rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
2205 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2206 skb = get_imm_packet(r);
2207 if (unlikely(!skb)) {
2209 q->next_holdoff = NOMEM_INTR_DELAY;
2211 /* consume one credit since we tried */
2217 } else if ((len = ntohl(r->len_cq)) != 0) {
2220 lro &= eth && is_eth_tcp(rss_hi);
2222 fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2223 if (fl->use_pages) {
2224 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
2227 #if L1_CACHE_BYTES < 128
2228 prefetch(addr + L1_CACHE_BYTES);
2230 __refill_fl(adap, fl);
2232 lro_add_page(adap, qs, fl,
2234 flags & F_RSPD_EOP);
2238 skb = get_packet_pg(adap, fl, q,
2241 SGE_RX_DROP_THRES : 0);
2244 skb = get_packet(adap, fl, G_RSPD_LEN(len),
2245 eth ? SGE_RX_DROP_THRES : 0);
2246 if (unlikely(!skb)) {
2250 } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2253 if (++fl->cidx == fl->size)
2258 if (flags & RSPD_CTRL_MASK) {
2259 sleeping |= flags & RSPD_GTS_MASK;
2260 handle_rsp_cntrl_info(qs, flags);
2264 if (unlikely(++q->cidx == q->size)) {
2271 if (++q->credits >= (q->size / 4)) {
2272 refill_rspq(adap, q, q->credits);
2276 packet_complete = flags &
2277 (F_RSPD_EOP | F_RSPD_IMM_DATA_VALID |
2278 F_RSPD_ASYNC_NOTIF);
2280 if (skb != NULL && packet_complete) {
2282 rx_eth(adap, q, skb, ethpad, lro);
2285 /* Preserve the RSS info in csum & priority */
2287 skb->priority = rss_lo;
2288 ngathered = rx_offload(&adap->tdev, q, skb,
2293 if (flags & F_RSPD_EOP)
2294 clear_rspq_bufstate(q);
2299 deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
2302 check_ring_db(adap, qs, sleeping);
2304 smp_mb(); /* commit Tx queue .processed updates */
2305 if (unlikely(qs->txq_stopped != 0))
2308 budget -= budget_left;
2312 static inline int is_pure_response(const struct rsp_desc *r)
2314 __be32 n = r->flags & htonl(F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
2316 return (n | r->len_cq) == 0;
2320 * napi_rx_handler - the NAPI handler for Rx processing
2321 * @napi: the napi instance
2322 * @budget: how many packets we can process in this round
2324 * Handler for new data events when using NAPI.
2326 static int napi_rx_handler(struct napi_struct *napi, int budget)
2328 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
2329 struct adapter *adap = qs->adap;
2330 int work_done = process_responses(adap, qs, budget);
2332 if (likely(work_done < budget)) {
2333 napi_complete(napi);
2336 * Because we don't atomically flush the following
2337 * write it is possible that in very rare cases it can
2338 * reach the device in a way that races with a new
2339 * response being written plus an error interrupt
2340 * causing the NAPI interrupt handler below to return
2341 * unhandled status to the OS. To protect against
2342 * this would require flushing the write and doing
2343 * both the write and the flush with interrupts off.
2344 * Way too expensive and unjustifiable given the
2345 * rarity of the race.
2347 * The race cannot happen at all with MSI-X.
2349 t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2350 V_NEWTIMER(qs->rspq.next_holdoff) |
2351 V_NEWINDEX(qs->rspq.cidx));
2357 * Returns true if the device is already scheduled for polling.
2359 static inline int napi_is_scheduled(struct napi_struct *napi)
2361 return test_bit(NAPI_STATE_SCHED, &napi->state);
2365 * process_pure_responses - process pure responses from a response queue
2366 * @adap: the adapter
2367 * @qs: the queue set owning the response queue
2368 * @r: the first pure response to process
2370 * A simpler version of process_responses() that handles only pure (i.e.,
2371 * non data-carrying) responses. Such respones are too light-weight to
2372 * justify calling a softirq under NAPI, so we handle them specially in
2373 * the interrupt handler. The function is called with a pointer to a
2374 * response, which the caller must ensure is a valid pure response.
2376 * Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2378 static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2381 struct sge_rspq *q = &qs->rspq;
2382 unsigned int sleeping = 0;
2385 u32 flags = ntohl(r->flags);
2388 if (unlikely(++q->cidx == q->size)) {
2395 if (flags & RSPD_CTRL_MASK) {
2396 sleeping |= flags & RSPD_GTS_MASK;
2397 handle_rsp_cntrl_info(qs, flags);
2401 if (++q->credits >= (q->size / 4)) {
2402 refill_rspq(adap, q, q->credits);
2405 } while (is_new_response(r, q) && is_pure_response(r));
2408 check_ring_db(adap, qs, sleeping);
2410 smp_mb(); /* commit Tx queue .processed updates */
2411 if (unlikely(qs->txq_stopped != 0))
2414 return is_new_response(r, q);
2418 * handle_responses - decide what to do with new responses in NAPI mode
2419 * @adap: the adapter
2420 * @q: the response queue
2422 * This is used by the NAPI interrupt handlers to decide what to do with
2423 * new SGE responses. If there are no new responses it returns -1. If
2424 * there are new responses and they are pure (i.e., non-data carrying)
2425 * it handles them straight in hard interrupt context as they are very
2426 * cheap and don't deliver any packets. Finally, if there are any data
2427 * signaling responses it schedules the NAPI handler. Returns 1 if it
2428 * schedules NAPI, 0 if all new responses were pure.
2430 * The caller must ascertain NAPI is not already running.
2432 static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2434 struct sge_qset *qs = rspq_to_qset(q);
2435 struct rsp_desc *r = &q->desc[q->cidx];
2437 if (!is_new_response(r, q))
2439 if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2440 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2441 V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2444 napi_schedule(&qs->napi);
2449 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2450 * (i.e., response queue serviced in hard interrupt).
2452 irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2454 struct sge_qset *qs = cookie;
2455 struct adapter *adap = qs->adap;
2456 struct sge_rspq *q = &qs->rspq;
2458 spin_lock(&q->lock);
2459 if (process_responses(adap, qs, -1) == 0)
2460 q->unhandled_irqs++;
2461 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2462 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2463 spin_unlock(&q->lock);
2468 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2469 * (i.e., response queue serviced by NAPI polling).
2471 static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
2473 struct sge_qset *qs = cookie;
2474 struct sge_rspq *q = &qs->rspq;
2476 spin_lock(&q->lock);
2478 if (handle_responses(qs->adap, q) < 0)
2479 q->unhandled_irqs++;
2480 spin_unlock(&q->lock);
2485 * The non-NAPI MSI interrupt handler. This needs to handle data events from
2486 * SGE response queues as well as error and other async events as they all use
2487 * the same MSI vector. We use one SGE response queue per port in this mode
2488 * and protect all response queues with queue 0's lock.
2490 static irqreturn_t t3_intr_msi(int irq, void *cookie)
2492 int new_packets = 0;
2493 struct adapter *adap = cookie;
2494 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2496 spin_lock(&q->lock);
2498 if (process_responses(adap, &adap->sge.qs[0], -1)) {
2499 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2500 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2504 if (adap->params.nports == 2 &&
2505 process_responses(adap, &adap->sge.qs[1], -1)) {
2506 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2508 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2509 V_NEWTIMER(q1->next_holdoff) |
2510 V_NEWINDEX(q1->cidx));
2514 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2515 q->unhandled_irqs++;
2517 spin_unlock(&q->lock);
2521 static int rspq_check_napi(struct sge_qset *qs)
2523 struct sge_rspq *q = &qs->rspq;
2525 if (!napi_is_scheduled(&qs->napi) &&
2526 is_new_response(&q->desc[q->cidx], q)) {
2527 napi_schedule(&qs->napi);
2534 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2535 * by NAPI polling). Handles data events from SGE response queues as well as
2536 * error and other async events as they all use the same MSI vector. We use
2537 * one SGE response queue per port in this mode and protect all response
2538 * queues with queue 0's lock.
2540 static irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
2543 struct adapter *adap = cookie;
2544 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2546 spin_lock(&q->lock);
2548 new_packets = rspq_check_napi(&adap->sge.qs[0]);
2549 if (adap->params.nports == 2)
2550 new_packets += rspq_check_napi(&adap->sge.qs[1]);
2551 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2552 q->unhandled_irqs++;
2554 spin_unlock(&q->lock);
2559 * A helper function that processes responses and issues GTS.
2561 static inline int process_responses_gts(struct adapter *adap,
2562 struct sge_rspq *rq)
2566 work = process_responses(adap, rspq_to_qset(rq), -1);
2567 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2568 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2573 * The legacy INTx interrupt handler. This needs to handle data events from
2574 * SGE response queues as well as error and other async events as they all use
2575 * the same interrupt pin. We use one SGE response queue per port in this mode
2576 * and protect all response queues with queue 0's lock.
2578 static irqreturn_t t3_intr(int irq, void *cookie)
2580 int work_done, w0, w1;
2581 struct adapter *adap = cookie;
2582 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2583 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2585 spin_lock(&q0->lock);
2587 w0 = is_new_response(&q0->desc[q0->cidx], q0);
2588 w1 = adap->params.nports == 2 &&
2589 is_new_response(&q1->desc[q1->cidx], q1);
2591 if (likely(w0 | w1)) {
2592 t3_write_reg(adap, A_PL_CLI, 0);
2593 t3_read_reg(adap, A_PL_CLI); /* flush */
2596 process_responses_gts(adap, q0);
2599 process_responses_gts(adap, q1);
2601 work_done = w0 | w1;
2603 work_done = t3_slow_intr_handler(adap);
2605 spin_unlock(&q0->lock);
2606 return IRQ_RETVAL(work_done != 0);
2610 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2611 * Handles data events from SGE response queues as well as error and other
2612 * async events as they all use the same interrupt pin. We use one SGE
2613 * response queue per port in this mode and protect all response queues with
2616 static irqreturn_t t3b_intr(int irq, void *cookie)
2619 struct adapter *adap = cookie;
2620 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2622 t3_write_reg(adap, A_PL_CLI, 0);
2623 map = t3_read_reg(adap, A_SG_DATA_INTR);
2625 if (unlikely(!map)) /* shared interrupt, most likely */
2628 spin_lock(&q0->lock);
2630 if (unlikely(map & F_ERRINTR))
2631 t3_slow_intr_handler(adap);
2633 if (likely(map & 1))
2634 process_responses_gts(adap, q0);
2637 process_responses_gts(adap, &adap->sge.qs[1].rspq);
2639 spin_unlock(&q0->lock);
2644 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2645 * Handles data events from SGE response queues as well as error and other
2646 * async events as they all use the same interrupt pin. We use one SGE
2647 * response queue per port in this mode and protect all response queues with
2650 static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2653 struct adapter *adap = cookie;
2654 struct sge_qset *qs0 = &adap->sge.qs[0];
2655 struct sge_rspq *q0 = &qs0->rspq;
2657 t3_write_reg(adap, A_PL_CLI, 0);
2658 map = t3_read_reg(adap, A_SG_DATA_INTR);
2660 if (unlikely(!map)) /* shared interrupt, most likely */
2663 spin_lock(&q0->lock);
2665 if (unlikely(map & F_ERRINTR))
2666 t3_slow_intr_handler(adap);
2668 if (likely(map & 1))
2669 napi_schedule(&qs0->napi);
2672 napi_schedule(&adap->sge.qs[1].napi);
2674 spin_unlock(&q0->lock);
2679 * t3_intr_handler - select the top-level interrupt handler
2680 * @adap: the adapter
2681 * @polling: whether using NAPI to service response queues
2683 * Selects the top-level interrupt handler based on the type of interrupts
2684 * (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2687 irq_handler_t t3_intr_handler(struct adapter *adap, int polling)
2689 if (adap->flags & USING_MSIX)
2690 return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2691 if (adap->flags & USING_MSI)
2692 return polling ? t3_intr_msi_napi : t3_intr_msi;
2693 if (adap->params.rev > 0)
2694 return polling ? t3b_intr_napi : t3b_intr;
2698 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
2699 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
2700 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
2701 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
2703 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
2704 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
2708 * t3_sge_err_intr_handler - SGE async event interrupt handler
2709 * @adapter: the adapter
2711 * Interrupt handler for SGE asynchronous (non-data) events.
2713 void t3_sge_err_intr_handler(struct adapter *adapter)
2715 unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE);
2717 if (status & SGE_PARERR)
2718 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
2719 status & SGE_PARERR);
2720 if (status & SGE_FRAMINGERR)
2721 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
2722 status & SGE_FRAMINGERR);
2724 if (status & F_RSPQCREDITOVERFOW)
2725 CH_ALERT(adapter, "SGE response queue credit overflow\n");
2727 if (status & F_RSPQDISABLED) {
2728 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2731 "packet delivered to disabled response queue "
2732 "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2735 if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
2736 CH_ALERT(adapter, "SGE dropped %s priority doorbell\n",
2737 status & F_HIPIODRBDROPERR ? "high" : "lo");
2739 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2740 if (status & SGE_FATALERR)
2741 t3_fatal_err(adapter);
2745 * sge_timer_cb - perform periodic maintenance of an SGE qset
2746 * @data: the SGE queue set to maintain
2748 * Runs periodically from a timer to perform maintenance of an SGE queue
2749 * set. It performs two tasks:
2751 * a) Cleans up any completed Tx descriptors that may still be pending.
2752 * Normal descriptor cleanup happens when new packets are added to a Tx
2753 * queue so this timer is relatively infrequent and does any cleanup only
2754 * if the Tx queue has not seen any new packets in a while. We make a
2755 * best effort attempt to reclaim descriptors, in that we don't wait
2756 * around if we cannot get a queue's lock (which most likely is because
2757 * someone else is queueing new packets and so will also handle the clean
2758 * up). Since control queues use immediate data exclusively we don't
2759 * bother cleaning them up here.
2761 * b) Replenishes Rx queues that have run out due to memory shortage.
2762 * Normally new Rx buffers are added when existing ones are consumed but
2763 * when out of memory a queue can become empty. We try to add only a few
2764 * buffers here, the queue will be replenished fully as these new buffers
2765 * are used up if memory shortage has subsided.
2767 static void sge_timer_cb(unsigned long data)
2770 struct sge_qset *qs = (struct sge_qset *)data;
2771 struct adapter *adap = qs->adap;
2773 if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
2774 reclaim_completed_tx(adap, &qs->txq[TXQ_ETH]);
2775 spin_unlock(&qs->txq[TXQ_ETH].lock);
2777 if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2778 reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD]);
2779 spin_unlock(&qs->txq[TXQ_OFLD].lock);
2781 lock = (adap->flags & USING_MSIX) ? &qs->rspq.lock :
2782 &adap->sge.qs[0].rspq.lock;
2783 if (spin_trylock_irq(lock)) {
2784 if (!napi_is_scheduled(&qs->napi)) {
2785 u32 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2787 if (qs->fl[0].credits < qs->fl[0].size)
2788 __refill_fl(adap, &qs->fl[0]);
2789 if (qs->fl[1].credits < qs->fl[1].size)
2790 __refill_fl(adap, &qs->fl[1]);
2792 if (status & (1 << qs->rspq.cntxt_id)) {
2794 if (qs->rspq.credits) {
2795 refill_rspq(adap, &qs->rspq, 1);
2797 qs->rspq.restarted++;
2798 t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
2799 1 << qs->rspq.cntxt_id);
2803 spin_unlock_irq(lock);
2805 mod_timer(&qs->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2809 * t3_update_qset_coalesce - update coalescing settings for a queue set
2810 * @qs: the SGE queue set
2811 * @p: new queue set parameters
2813 * Update the coalescing settings for an SGE queue set. Nothing is done
2814 * if the queue set is not initialized yet.
2816 void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2818 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2819 qs->rspq.polling = p->polling;
2820 qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
2824 * t3_sge_alloc_qset - initialize an SGE queue set
2825 * @adapter: the adapter
2826 * @id: the queue set id
2827 * @nports: how many Ethernet ports will be using this queue set
2828 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2829 * @p: configuration parameters for this queue set
2830 * @ntxq: number of Tx queues for the queue set
2831 * @netdev: net device associated with this queue set
2832 * @netdevq: net device TX queue associated with this queue set
2834 * Allocate resources and initialize an SGE queue set. A queue set
2835 * comprises a response queue, two Rx free-buffer queues, and up to 3
2836 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2837 * queue, offload queue, and control queue.
2839 int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2840 int irq_vec_idx, const struct qset_params *p,
2841 int ntxq, struct net_device *dev,
2842 struct netdev_queue *netdevq)
2844 int i, avail, ret = -ENOMEM;
2845 struct sge_qset *q = &adapter->sge.qs[id];
2847 init_qset_cntxt(q, id);
2848 setup_timer(&q->tx_reclaim_timer, sge_timer_cb, (unsigned long)q);
2850 q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
2851 sizeof(struct rx_desc),
2852 sizeof(struct rx_sw_desc),
2853 &q->fl[0].phys_addr, &q->fl[0].sdesc);
2857 q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
2858 sizeof(struct rx_desc),
2859 sizeof(struct rx_sw_desc),
2860 &q->fl[1].phys_addr, &q->fl[1].sdesc);
2864 q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
2865 sizeof(struct rsp_desc), 0,
2866 &q->rspq.phys_addr, NULL);
2870 for (i = 0; i < ntxq; ++i) {
2872 * The control queue always uses immediate data so does not
2873 * need to keep track of any sk_buffs.
2875 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2877 q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
2878 sizeof(struct tx_desc), sz,
2879 &q->txq[i].phys_addr,
2881 if (!q->txq[i].desc)
2885 q->txq[i].size = p->txq_size[i];
2886 spin_lock_init(&q->txq[i].lock);
2887 skb_queue_head_init(&q->txq[i].sendq);
2890 tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
2892 tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
2895 q->fl[0].gen = q->fl[1].gen = 1;
2896 q->fl[0].size = p->fl_size;
2897 q->fl[1].size = p->jumbo_size;
2900 q->rspq.size = p->rspq_size;
2901 spin_lock_init(&q->rspq.lock);
2902 skb_queue_head_init(&q->rspq.rx_queue);
2904 q->txq[TXQ_ETH].stop_thres = nports *
2905 flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
2907 #if FL0_PG_CHUNK_SIZE > 0
2908 q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
2910 q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
2912 #if FL1_PG_CHUNK_SIZE > 0
2913 q->fl[1].buf_size = FL1_PG_CHUNK_SIZE;
2915 q->fl[1].buf_size = is_offload(adapter) ?
2916 (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
2917 MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
2920 q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
2921 q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0;
2922 q->fl[0].order = FL0_PG_ORDER;
2923 q->fl[1].order = FL1_PG_ORDER;
2925 spin_lock_irq(&adapter->sge.reg_lock);
2927 /* FL threshold comparison uses < */
2928 ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
2929 q->rspq.phys_addr, q->rspq.size,
2930 q->fl[0].buf_size, 1, 0);
2934 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2935 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
2936 q->fl[i].phys_addr, q->fl[i].size,
2937 q->fl[i].buf_size, p->cong_thres, 1,
2943 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2944 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2945 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2951 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
2952 USE_GTS, SGE_CNTXT_OFLD, id,
2953 q->txq[TXQ_OFLD].phys_addr,
2954 q->txq[TXQ_OFLD].size, 0, 1, 0);
2960 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
2962 q->txq[TXQ_CTRL].phys_addr,
2963 q->txq[TXQ_CTRL].size,
2964 q->txq[TXQ_CTRL].token, 1, 0);
2969 spin_unlock_irq(&adapter->sge.reg_lock);
2974 t3_update_qset_coalesce(q, p);
2976 avail = refill_fl(adapter, &q->fl[0], q->fl[0].size,
2977 GFP_KERNEL | __GFP_COMP);
2979 CH_ALERT(adapter, "free list queue 0 initialization failed\n");
2982 if (avail < q->fl[0].size)
2983 CH_WARN(adapter, "free list queue 0 enabled with %d credits\n",
2986 avail = refill_fl(adapter, &q->fl[1], q->fl[1].size,
2987 GFP_KERNEL | __GFP_COMP);
2988 if (avail < q->fl[1].size)
2989 CH_WARN(adapter, "free list queue 1 enabled with %d credits\n",
2991 refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
2993 t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2994 V_NEWTIMER(q->rspq.holdoff_tmr));
2996 mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
3000 spin_unlock_irq(&adapter->sge.reg_lock);
3002 t3_free_qset(adapter, q);
3007 * t3_stop_sge_timers - stop SGE timer call backs
3008 * @adap: the adapter
3010 * Stops each SGE queue set's timer call back
3012 void t3_stop_sge_timers(struct adapter *adap)
3016 for (i = 0; i < SGE_QSETS; ++i) {
3017 struct sge_qset *q = &adap->sge.qs[i];
3019 if (q->tx_reclaim_timer.function)
3020 del_timer_sync(&q->tx_reclaim_timer);
3025 * t3_free_sge_resources - free SGE resources
3026 * @adap: the adapter
3028 * Frees resources used by the SGE queue sets.
3030 void t3_free_sge_resources(struct adapter *adap)
3034 for (i = 0; i < SGE_QSETS; ++i)
3035 t3_free_qset(adap, &adap->sge.qs[i]);
3039 * t3_sge_start - enable SGE
3040 * @adap: the adapter
3042 * Enables the SGE for DMAs. This is the last step in starting packet
3045 void t3_sge_start(struct adapter *adap)
3047 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
3051 * t3_sge_stop - disable SGE operation
3052 * @adap: the adapter
3054 * Disables the DMA engine. This can be called in emeregencies (e.g.,
3055 * from error interrupts) or from normal process context. In the latter
3056 * case it also disables any pending queue restart tasklets. Note that
3057 * if it is called in interrupt context it cannot disable the restart
3058 * tasklets as it cannot wait, however the tasklets will have no effect
3059 * since the doorbells are disabled and the driver will call this again
3060 * later from process context, at which time the tasklets will be stopped
3061 * if they are still running.
3063 void t3_sge_stop(struct adapter *adap)
3065 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
3066 if (!in_interrupt()) {
3069 for (i = 0; i < SGE_QSETS; ++i) {
3070 struct sge_qset *qs = &adap->sge.qs[i];
3072 tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
3073 tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
3079 * t3_sge_init - initialize SGE
3080 * @adap: the adapter
3081 * @p: the SGE parameters
3083 * Performs SGE initialization needed every time after a chip reset.
3084 * We do not initialize any of the queue sets here, instead the driver
3085 * top-level must request those individually. We also do not enable DMA
3086 * here, that should be done after the queues have been set up.
3088 void t3_sge_init(struct adapter *adap, struct sge_params *p)
3090 unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
3092 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
3093 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
3094 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
3095 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
3096 #if SGE_NUM_GENBITS == 1
3097 ctrl |= F_EGRGENCTRL;
3099 if (adap->params.rev > 0) {
3100 if (!(adap->flags & (USING_MSIX | USING_MSI)))
3101 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
3103 t3_write_reg(adap, A_SG_CONTROL, ctrl);
3104 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
3105 V_LORCQDRBTHRSH(512));
3106 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
3107 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
3108 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
3109 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
3110 adap->params.rev < T3_REV_C ? 1000 : 500);
3111 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
3112 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
3113 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
3114 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
3115 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
3119 * t3_sge_prep - one-time SGE initialization
3120 * @adap: the associated adapter
3121 * @p: SGE parameters
3123 * Performs one-time initialization of SGE SW state. Includes determining
3124 * defaults for the assorted SGE parameters, which admins can change until
3125 * they are used to initialize the SGE.
3127 void t3_sge_prep(struct adapter *adap, struct sge_params *p)
3131 p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
3132 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
3134 for (i = 0; i < SGE_QSETS; ++i) {
3135 struct qset_params *q = p->qset + i;
3137 q->polling = adap->params.rev > 0;
3138 q->coalesce_usecs = 5;
3139 q->rspq_size = 1024;
3141 q->jumbo_size = 512;
3142 q->txq_size[TXQ_ETH] = 1024;
3143 q->txq_size[TXQ_OFLD] = 1024;
3144 q->txq_size[TXQ_CTRL] = 256;
3148 spin_lock_init(&adap->sge.reg_lock);
3152 * t3_get_desc - dump an SGE descriptor for debugging purposes
3153 * @qs: the queue set
3154 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3155 * @idx: the descriptor index in the queue
3156 * @data: where to dump the descriptor contents
3158 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
3159 * size of the descriptor.
3161 int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3162 unsigned char *data)
3168 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3170 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3171 return sizeof(struct tx_desc);
3175 if (!qs->rspq.desc || idx >= qs->rspq.size)
3177 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3178 return sizeof(struct rsp_desc);
3182 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3184 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3185 return sizeof(struct rx_desc);