2 * Copyright (c) 2005-2008 Chelsio, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 #include <linux/skbuff.h>
33 #include <linux/netdevice.h>
34 #include <linux/etherdevice.h>
35 #include <linux/if_vlan.h>
37 #include <linux/tcp.h>
38 #include <linux/dma-mapping.h>
44 #include "firmware_exports.h"
48 #define SGE_RX_SM_BUF_SIZE 1536
50 #define SGE_RX_COPY_THRES 256
51 #define SGE_RX_PULL_LEN 128
54 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
55 * It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs
58 #define FL0_PG_CHUNK_SIZE 2048
59 #define FL0_PG_ORDER 0
60 #define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192)
61 #define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
63 #define SGE_RX_DROP_THRES 16
66 * Max number of Rx buffers we replenish at a time.
68 #define MAX_RX_REFILL 16U
70 * Period of the Tx buffer reclaim timer. This timer does not need to run
71 * frequently as Tx buffers are usually reclaimed by new Tx packets.
73 #define TX_RECLAIM_PERIOD (HZ / 4)
75 /* WR size in bytes */
76 #define WR_LEN (WR_FLITS * 8)
79 * Types of Tx queues in each queue set. Order here matters, do not change.
81 enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
83 /* Values for sge_txq.flags */
85 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
86 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
90 __be64 flit[TX_DESC_FLITS];
100 struct tx_sw_desc { /* SW state per Tx descriptor */
102 u8 eop; /* set if last descriptor for packet */
103 u8 addr_idx; /* buffer index of first SGL entry in descriptor */
104 u8 fragidx; /* first page fragment associated with descriptor */
105 s8 sflit; /* start flit of first SGL entry in descriptor */
108 struct rx_sw_desc { /* SW state per Rx descriptor */
111 struct fl_pg_chunk pg_chunk;
113 DECLARE_PCI_UNMAP_ADDR(dma_addr);
116 struct rsp_desc { /* response queue descriptor */
117 struct rss_header rss_hdr;
125 * Holds unmapping information for Tx packets that need deferred unmapping.
126 * This structure lives at skb->head and must be allocated by callers.
128 struct deferred_unmap_info {
129 struct pci_dev *pdev;
130 dma_addr_t addr[MAX_SKB_FRAGS + 1];
134 * Maps a number of flits to the number of Tx descriptors that can hold them.
137 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
139 * HW allows up to 4 descriptors to be combined into a WR.
141 static u8 flit_desc_map[] = {
143 #if SGE_NUM_GENBITS == 1
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
146 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
147 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
148 #elif SGE_NUM_GENBITS == 2
149 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
150 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
151 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
152 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
154 # error "SGE_NUM_GENBITS must be 1 or 2"
158 static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
160 return container_of(q, struct sge_qset, fl[qidx]);
163 static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
165 return container_of(q, struct sge_qset, rspq);
168 static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
170 return container_of(q, struct sge_qset, txq[qidx]);
174 * refill_rspq - replenish an SGE response queue
175 * @adapter: the adapter
176 * @q: the response queue to replenish
177 * @credits: how many new responses to make available
179 * Replenishes a response queue by making the supplied number of responses
182 static inline void refill_rspq(struct adapter *adapter,
183 const struct sge_rspq *q, unsigned int credits)
186 t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
187 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
191 * need_skb_unmap - does the platform need unmapping of sk_buffs?
193 * Returns true if the platfrom needs sk_buff unmapping. The compiler
194 * optimizes away unecessary code if this returns true.
196 static inline int need_skb_unmap(void)
199 * This structure is used to tell if the platfrom needs buffer
200 * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
203 DECLARE_PCI_UNMAP_ADDR(addr);
206 return sizeof(struct dummy) != 0;
210 * unmap_skb - unmap a packet main body and its page fragments
212 * @q: the Tx queue containing Tx descriptors for the packet
213 * @cidx: index of Tx descriptor
214 * @pdev: the PCI device
216 * Unmap the main body of an sk_buff and its page fragments, if any.
217 * Because of the fairly complicated structure of our SGLs and the desire
218 * to conserve space for metadata, the information necessary to unmap an
219 * sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx
220 * descriptors (the physical addresses of the various data buffers), and
221 * the SW descriptor state (assorted indices). The send functions
222 * initialize the indices for the first packet descriptor so we can unmap
223 * the buffers held in the first Tx descriptor here, and we have enough
224 * information at this point to set the state for the next Tx descriptor.
226 * Note that it is possible to clean up the first descriptor of a packet
227 * before the send routines have written the next descriptors, but this
228 * race does not cause any problem. We just end up writing the unmapping
229 * info for the descriptor first.
231 static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
232 unsigned int cidx, struct pci_dev *pdev)
234 const struct sg_ent *sgp;
235 struct tx_sw_desc *d = &q->sdesc[cidx];
236 int nfrags, frag_idx, curflit, j = d->addr_idx;
238 sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit];
239 frag_idx = d->fragidx;
241 if (frag_idx == 0 && skb_headlen(skb)) {
242 pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]),
243 skb_headlen(skb), PCI_DMA_TODEVICE);
247 curflit = d->sflit + 1 + j;
248 nfrags = skb_shinfo(skb)->nr_frags;
250 while (frag_idx < nfrags && curflit < WR_FLITS) {
251 pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
252 skb_shinfo(skb)->frags[frag_idx].size,
263 if (frag_idx < nfrags) { /* SGL continues into next Tx descriptor */
264 d = cidx + 1 == q->size ? q->sdesc : d + 1;
265 d->fragidx = frag_idx;
267 d->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
272 * free_tx_desc - reclaims Tx descriptors and their buffers
273 * @adapter: the adapter
274 * @q: the Tx queue to reclaim descriptors from
275 * @n: the number of descriptors to reclaim
277 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
278 * Tx buffers. Called with the Tx queue lock held.
280 static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
283 struct tx_sw_desc *d;
284 struct pci_dev *pdev = adapter->pdev;
285 unsigned int cidx = q->cidx;
287 const int need_unmap = need_skb_unmap() &&
288 q->cntxt_id >= FW_TUNNEL_SGEEC_START;
292 if (d->skb) { /* an SGL is present */
294 unmap_skb(d->skb, q, cidx, pdev);
299 if (++cidx == q->size) {
308 * reclaim_completed_tx - reclaims completed Tx descriptors
309 * @adapter: the adapter
310 * @q: the Tx queue to reclaim completed descriptors from
312 * Reclaims Tx descriptors that the SGE has indicated it has processed,
313 * and frees the associated buffers if possible. Called with the Tx
316 static inline void reclaim_completed_tx(struct adapter *adapter,
319 unsigned int reclaim = q->processed - q->cleaned;
322 free_tx_desc(adapter, q, reclaim);
323 q->cleaned += reclaim;
324 q->in_use -= reclaim;
329 * should_restart_tx - are there enough resources to restart a Tx queue?
332 * Checks if there are enough descriptors to restart a suspended Tx queue.
334 static inline int should_restart_tx(const struct sge_txq *q)
336 unsigned int r = q->processed - q->cleaned;
338 return q->in_use - r < (q->size >> 1);
341 static void clear_rx_desc(const struct sge_fl *q, struct rx_sw_desc *d)
344 if (d->pg_chunk.page)
345 put_page(d->pg_chunk.page);
346 d->pg_chunk.page = NULL;
354 * free_rx_bufs - free the Rx buffers on an SGE free list
355 * @pdev: the PCI device associated with the adapter
356 * @rxq: the SGE free list to clean up
358 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
359 * this queue should be stopped before calling this function.
361 static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
363 unsigned int cidx = q->cidx;
365 while (q->credits--) {
366 struct rx_sw_desc *d = &q->sdesc[cidx];
368 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
369 q->buf_size, PCI_DMA_FROMDEVICE);
371 if (++cidx == q->size)
375 if (q->pg_chunk.page) {
376 __free_pages(q->pg_chunk.page, q->order);
377 q->pg_chunk.page = NULL;
382 * add_one_rx_buf - add a packet buffer to a free-buffer list
383 * @va: buffer start VA
384 * @len: the buffer length
385 * @d: the HW Rx descriptor to write
386 * @sd: the SW Rx descriptor to write
387 * @gen: the generation bit value
388 * @pdev: the PCI device associated with the adapter
390 * Add a buffer of the given length to the supplied HW and SW Rx
393 static inline int add_one_rx_buf(void *va, unsigned int len,
394 struct rx_desc *d, struct rx_sw_desc *sd,
395 unsigned int gen, struct pci_dev *pdev)
399 mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
400 if (unlikely(pci_dma_mapping_error(pdev, mapping)))
403 pci_unmap_addr_set(sd, dma_addr, mapping);
405 d->addr_lo = cpu_to_be32(mapping);
406 d->addr_hi = cpu_to_be32((u64) mapping >> 32);
408 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
409 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
413 static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp,
416 if (!q->pg_chunk.page) {
417 q->pg_chunk.page = alloc_pages(gfp, order);
418 if (unlikely(!q->pg_chunk.page))
420 q->pg_chunk.va = page_address(q->pg_chunk.page);
421 q->pg_chunk.offset = 0;
423 sd->pg_chunk = q->pg_chunk;
425 q->pg_chunk.offset += q->buf_size;
426 if (q->pg_chunk.offset == (PAGE_SIZE << order))
427 q->pg_chunk.page = NULL;
429 q->pg_chunk.va += q->buf_size;
430 get_page(q->pg_chunk.page);
435 static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
437 if (q->pend_cred >= q->credits / 4) {
439 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
444 * refill_fl - refill an SGE free-buffer list
445 * @adapter: the adapter
446 * @q: the free-list to refill
447 * @n: the number of new buffers to allocate
448 * @gfp: the gfp flags for allocating new buffers
450 * (Re)populate an SGE free-buffer list with up to @n new packet buffers,
451 * allocated with the supplied gfp flags. The caller must assure that
452 * @n does not exceed the queue's capacity.
454 static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
457 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
458 struct rx_desc *d = &q->desc[q->pidx];
459 unsigned int count = 0;
465 if (unlikely(alloc_pg_chunk(q, sd, gfp, q->order))) {
466 nomem: q->alloc_failed++;
469 buf_start = sd->pg_chunk.va;
471 struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
477 buf_start = skb->data;
480 err = add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen,
483 clear_rx_desc(q, sd);
489 if (++q->pidx == q->size) {
499 q->pend_cred += count;
505 static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
507 refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits),
508 GFP_ATOMIC | __GFP_COMP);
512 * recycle_rx_buf - recycle a receive buffer
513 * @adapter: the adapter
514 * @q: the SGE free list
515 * @idx: index of buffer to recycle
517 * Recycles the specified buffer on the given free list by adding it at
518 * the next available slot on the list.
520 static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
523 struct rx_desc *from = &q->desc[idx];
524 struct rx_desc *to = &q->desc[q->pidx];
526 q->sdesc[q->pidx] = q->sdesc[idx];
527 to->addr_lo = from->addr_lo; /* already big endian */
528 to->addr_hi = from->addr_hi; /* likewise */
530 to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
531 to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
533 if (++q->pidx == q->size) {
544 * alloc_ring - allocate resources for an SGE descriptor ring
545 * @pdev: the PCI device
546 * @nelem: the number of descriptors
547 * @elem_size: the size of each descriptor
548 * @sw_size: the size of the SW state associated with each ring element
549 * @phys: the physical address of the allocated ring
550 * @metadata: address of the array holding the SW state for the ring
552 * Allocates resources for an SGE descriptor ring, such as Tx queues,
553 * free buffer lists, or response queues. Each SGE ring requires
554 * space for its HW descriptors plus, optionally, space for the SW state
555 * associated with each HW entry (the metadata). The function returns
556 * three values: the virtual address for the HW ring (the return value
557 * of the function), the physical address of the HW ring, and the address
560 static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
561 size_t sw_size, dma_addr_t * phys, void *metadata)
563 size_t len = nelem * elem_size;
565 void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
569 if (sw_size && metadata) {
570 s = kcalloc(nelem, sw_size, GFP_KERNEL);
573 dma_free_coherent(&pdev->dev, len, p, *phys);
576 *(void **)metadata = s;
583 * t3_reset_qset - reset a sge qset
586 * Reset the qset structure.
587 * the NAPI structure is preserved in the event of
588 * the qset's reincarnation, for example during EEH recovery.
590 static void t3_reset_qset(struct sge_qset *q)
593 !(q->adap->flags & NAPI_INIT)) {
594 memset(q, 0, sizeof(*q));
599 memset(&q->rspq, 0, sizeof(q->rspq));
600 memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
601 memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
603 q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
604 q->lro_frag_tbl.nr_frags = q->lro_frag_tbl.len = 0;
609 * free_qset - free the resources of an SGE queue set
610 * @adapter: the adapter owning the queue set
613 * Release the HW and SW resources associated with an SGE queue set, such
614 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
615 * queue set must be quiesced prior to calling this.
617 static void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
620 struct pci_dev *pdev = adapter->pdev;
622 for (i = 0; i < SGE_RXQ_PER_SET; ++i)
624 spin_lock_irq(&adapter->sge.reg_lock);
625 t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
626 spin_unlock_irq(&adapter->sge.reg_lock);
627 free_rx_bufs(pdev, &q->fl[i]);
628 kfree(q->fl[i].sdesc);
629 dma_free_coherent(&pdev->dev,
631 sizeof(struct rx_desc), q->fl[i].desc,
635 for (i = 0; i < SGE_TXQ_PER_SET; ++i)
636 if (q->txq[i].desc) {
637 spin_lock_irq(&adapter->sge.reg_lock);
638 t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
639 spin_unlock_irq(&adapter->sge.reg_lock);
640 if (q->txq[i].sdesc) {
641 free_tx_desc(adapter, &q->txq[i],
643 kfree(q->txq[i].sdesc);
645 dma_free_coherent(&pdev->dev,
647 sizeof(struct tx_desc),
648 q->txq[i].desc, q->txq[i].phys_addr);
649 __skb_queue_purge(&q->txq[i].sendq);
653 spin_lock_irq(&adapter->sge.reg_lock);
654 t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
655 spin_unlock_irq(&adapter->sge.reg_lock);
656 dma_free_coherent(&pdev->dev,
657 q->rspq.size * sizeof(struct rsp_desc),
658 q->rspq.desc, q->rspq.phys_addr);
665 * init_qset_cntxt - initialize an SGE queue set context info
667 * @id: the queue set id
669 * Initializes the TIDs and context ids for the queues of a queue set.
671 static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
673 qs->rspq.cntxt_id = id;
674 qs->fl[0].cntxt_id = 2 * id;
675 qs->fl[1].cntxt_id = 2 * id + 1;
676 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
677 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
678 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
679 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
680 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
684 * sgl_len - calculates the size of an SGL of the given capacity
685 * @n: the number of SGL entries
687 * Calculates the number of flits needed for a scatter/gather list that
688 * can hold the given number of entries.
690 static inline unsigned int sgl_len(unsigned int n)
692 /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
693 return (3 * n) / 2 + (n & 1);
697 * flits_to_desc - returns the num of Tx descriptors for the given flits
698 * @n: the number of flits
700 * Calculates the number of Tx descriptors needed for the supplied number
703 static inline unsigned int flits_to_desc(unsigned int n)
705 BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
706 return flit_desc_map[n];
710 * get_packet - return the next ingress packet buffer from a free list
711 * @adap: the adapter that received the packet
712 * @fl: the SGE free list holding the packet
713 * @len: the packet length including any SGE padding
714 * @drop_thres: # of remaining buffers before we start dropping packets
716 * Get the next packet from a free list and complete setup of the
717 * sk_buff. If the packet is small we make a copy and recycle the
718 * original buffer, otherwise we use the original buffer itself. If a
719 * positive drop threshold is supplied packets are dropped and their
720 * buffers recycled if (a) the number of remaining buffers is under the
721 * threshold and the packet is too big to copy, or (b) the packet should
722 * be copied but there is no memory for the copy.
724 static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
725 unsigned int len, unsigned int drop_thres)
727 struct sk_buff *skb = NULL;
728 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
730 prefetch(sd->skb->data);
733 if (len <= SGE_RX_COPY_THRES) {
734 skb = alloc_skb(len, GFP_ATOMIC);
735 if (likely(skb != NULL)) {
737 pci_dma_sync_single_for_cpu(adap->pdev,
738 pci_unmap_addr(sd, dma_addr), len,
740 memcpy(skb->data, sd->skb->data, len);
741 pci_dma_sync_single_for_device(adap->pdev,
742 pci_unmap_addr(sd, dma_addr), len,
744 } else if (!drop_thres)
747 recycle_rx_buf(adap, fl, fl->cidx);
751 if (unlikely(fl->credits < drop_thres) &&
752 refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits - 1),
753 GFP_ATOMIC | __GFP_COMP) == 0)
757 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
758 fl->buf_size, PCI_DMA_FROMDEVICE);
761 __refill_fl(adap, fl);
766 * get_packet_pg - return the next ingress packet buffer from a free list
767 * @adap: the adapter that received the packet
768 * @fl: the SGE free list holding the packet
769 * @len: the packet length including any SGE padding
770 * @drop_thres: # of remaining buffers before we start dropping packets
772 * Get the next packet from a free list populated with page chunks.
773 * If the packet is small we make a copy and recycle the original buffer,
774 * otherwise we attach the original buffer as a page fragment to a fresh
775 * sk_buff. If a positive drop threshold is supplied packets are dropped
776 * and their buffers recycled if (a) the number of remaining buffers is
777 * under the threshold and the packet is too big to copy, or (b) there's
780 * Note: this function is similar to @get_packet but deals with Rx buffers
781 * that are page chunks rather than sk_buffs.
783 static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
784 struct sge_rspq *q, unsigned int len,
785 unsigned int drop_thres)
787 struct sk_buff *newskb, *skb;
788 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
790 newskb = skb = q->pg_skb;
792 if (!skb && (len <= SGE_RX_COPY_THRES)) {
793 newskb = alloc_skb(len, GFP_ATOMIC);
794 if (likely(newskb != NULL)) {
795 __skb_put(newskb, len);
796 pci_dma_sync_single_for_cpu(adap->pdev,
797 pci_unmap_addr(sd, dma_addr), len,
799 memcpy(newskb->data, sd->pg_chunk.va, len);
800 pci_dma_sync_single_for_device(adap->pdev,
801 pci_unmap_addr(sd, dma_addr), len,
803 } else if (!drop_thres)
807 recycle_rx_buf(adap, fl, fl->cidx);
812 if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres)))
816 newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
817 if (unlikely(!newskb)) {
823 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
824 fl->buf_size, PCI_DMA_FROMDEVICE);
826 __skb_put(newskb, SGE_RX_PULL_LEN);
827 memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
828 skb_fill_page_desc(newskb, 0, sd->pg_chunk.page,
829 sd->pg_chunk.offset + SGE_RX_PULL_LEN,
830 len - SGE_RX_PULL_LEN);
832 newskb->data_len = len - SGE_RX_PULL_LEN;
834 skb_fill_page_desc(newskb, skb_shinfo(newskb)->nr_frags,
836 sd->pg_chunk.offset, len);
838 newskb->data_len += len;
840 newskb->truesize += newskb->data_len;
844 * We do not refill FLs here, we let the caller do it to overlap a
851 * get_imm_packet - return the next ingress packet buffer from a response
852 * @resp: the response descriptor containing the packet data
854 * Return a packet containing the immediate data of the given response.
856 static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
858 struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
861 __skb_put(skb, IMMED_PKT_SIZE);
862 skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
868 * calc_tx_descs - calculate the number of Tx descriptors for a packet
871 * Returns the number of Tx descriptors needed for the given Ethernet
872 * packet. Ethernet packets require addition of WR and CPL headers.
874 static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
878 if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
881 flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
882 if (skb_shinfo(skb)->gso_size)
884 return flits_to_desc(flits);
888 * make_sgl - populate a scatter/gather list for a packet
890 * @sgp: the SGL to populate
891 * @start: start address of skb main body data to include in the SGL
892 * @len: length of skb main body data to include in the SGL
893 * @pdev: the PCI device
895 * Generates a scatter/gather list for the buffers that make up a packet
896 * and returns the SGL size in 8-byte words. The caller must size the SGL
899 static inline unsigned int make_sgl(const struct sk_buff *skb,
900 struct sg_ent *sgp, unsigned char *start,
901 unsigned int len, struct pci_dev *pdev)
904 unsigned int i, j = 0, nfrags;
907 mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
908 sgp->len[0] = cpu_to_be32(len);
909 sgp->addr[0] = cpu_to_be64(mapping);
913 nfrags = skb_shinfo(skb)->nr_frags;
914 for (i = 0; i < nfrags; i++) {
915 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
917 mapping = pci_map_page(pdev, frag->page, frag->page_offset,
918 frag->size, PCI_DMA_TODEVICE);
919 sgp->len[j] = cpu_to_be32(frag->size);
920 sgp->addr[j] = cpu_to_be64(mapping);
927 return ((nfrags + (len != 0)) * 3) / 2 + j;
931 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
935 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
936 * where the HW is going to sleep just after we checked, however,
937 * then the interrupt handler will detect the outstanding TX packet
938 * and ring the doorbell for us.
940 * When GTS is disabled we unconditionally ring the doorbell.
942 static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
945 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
946 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
947 set_bit(TXQ_LAST_PKT_DB, &q->flags);
948 t3_write_reg(adap, A_SG_KDOORBELL,
949 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
952 wmb(); /* write descriptors before telling HW */
953 t3_write_reg(adap, A_SG_KDOORBELL,
954 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
958 static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
960 #if SGE_NUM_GENBITS == 2
961 d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
966 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
967 * @ndesc: number of Tx descriptors spanned by the SGL
968 * @skb: the packet corresponding to the WR
969 * @d: first Tx descriptor to be written
970 * @pidx: index of above descriptors
971 * @q: the SGE Tx queue
973 * @flits: number of flits to the start of the SGL in the first descriptor
974 * @sgl_flits: the SGL size in flits
975 * @gen: the Tx descriptor generation
976 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
977 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
979 * Write a work request header and an associated SGL. If the SGL is
980 * small enough to fit into one Tx descriptor it has already been written
981 * and we just need to write the WR header. Otherwise we distribute the
982 * SGL across the number of descriptors it spans.
984 static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
985 struct tx_desc *d, unsigned int pidx,
986 const struct sge_txq *q,
987 const struct sg_ent *sgl,
988 unsigned int flits, unsigned int sgl_flits,
989 unsigned int gen, __be32 wr_hi,
992 struct work_request_hdr *wrp = (struct work_request_hdr *)d;
993 struct tx_sw_desc *sd = &q->sdesc[pidx];
996 if (need_skb_unmap()) {
1002 if (likely(ndesc == 1)) {
1004 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1005 V_WR_SGLSFLT(flits)) | wr_hi;
1007 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1008 V_WR_GEN(gen)) | wr_lo;
1011 unsigned int ogen = gen;
1012 const u64 *fp = (const u64 *)sgl;
1013 struct work_request_hdr *wp = wrp;
1015 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1016 V_WR_SGLSFLT(flits)) | wr_hi;
1019 unsigned int avail = WR_FLITS - flits;
1021 if (avail > sgl_flits)
1023 memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
1033 if (++pidx == q->size) {
1041 wrp = (struct work_request_hdr *)d;
1042 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1043 V_WR_SGLSFLT(1)) | wr_hi;
1044 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1046 V_WR_GEN(gen)) | wr_lo;
1051 wrp->wr_hi |= htonl(F_WR_EOP);
1053 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1054 wr_gen2((struct tx_desc *)wp, ogen);
1055 WARN_ON(ndesc != 0);
1060 * write_tx_pkt_wr - write a TX_PKT work request
1061 * @adap: the adapter
1062 * @skb: the packet to send
1063 * @pi: the egress interface
1064 * @pidx: index of the first Tx descriptor to write
1065 * @gen: the generation value to use
1067 * @ndesc: number of descriptors the packet will occupy
1068 * @compl: the value of the COMPL bit to use
1070 * Generate a TX_PKT work request to send the supplied packet.
1072 static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
1073 const struct port_info *pi,
1074 unsigned int pidx, unsigned int gen,
1075 struct sge_txq *q, unsigned int ndesc,
1078 unsigned int flits, sgl_flits, cntrl, tso_info;
1079 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1080 struct tx_desc *d = &q->desc[pidx];
1081 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1083 cpl->len = htonl(skb->len | 0x80000000);
1084 cntrl = V_TXPKT_INTF(pi->port_id);
1086 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1087 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
1089 tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1092 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1095 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1096 hdr->cntrl = htonl(cntrl);
1097 eth_type = skb_network_offset(skb) == ETH_HLEN ?
1098 CPL_ETH_II : CPL_ETH_II_VLAN;
1099 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1100 V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
1101 V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
1102 hdr->lso_info = htonl(tso_info);
1105 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1106 cntrl |= F_TXPKT_IPCSUM_DIS; /* SW calculates IP csum */
1107 cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1108 cpl->cntrl = htonl(cntrl);
1110 if (skb->len <= WR_LEN - sizeof(*cpl)) {
1111 q->sdesc[pidx].skb = NULL;
1113 skb_copy_from_linear_data(skb, &d->flit[2],
1116 skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1118 flits = (skb->len + 7) / 8 + 2;
1119 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1120 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1121 | F_WR_SOP | F_WR_EOP | compl);
1123 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1124 V_WR_TID(q->token));
1133 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1134 sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
1136 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1137 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1138 htonl(V_WR_TID(q->token)));
1141 static inline void t3_stop_tx_queue(struct netdev_queue *txq,
1142 struct sge_qset *qs, struct sge_txq *q)
1144 netif_tx_stop_queue(txq);
1145 set_bit(TXQ_ETH, &qs->txq_stopped);
1150 * eth_xmit - add a packet to the Ethernet Tx queue
1152 * @dev: the egress net device
1154 * Add a packet to an SGE Tx queue. Runs with softirqs disabled.
1156 int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1159 unsigned int ndesc, pidx, credits, gen, compl;
1160 const struct port_info *pi = netdev_priv(dev);
1161 struct adapter *adap = pi->adapter;
1162 struct netdev_queue *txq;
1163 struct sge_qset *qs;
1167 * The chip min packet length is 9 octets but play safe and reject
1168 * anything shorter than an Ethernet header.
1170 if (unlikely(skb->len < ETH_HLEN)) {
1172 return NETDEV_TX_OK;
1175 qidx = skb_get_queue_mapping(skb);
1177 q = &qs->txq[TXQ_ETH];
1178 txq = netdev_get_tx_queue(dev, qidx);
1180 spin_lock(&q->lock);
1181 reclaim_completed_tx(adap, q);
1183 credits = q->size - q->in_use;
1184 ndesc = calc_tx_descs(skb);
1186 if (unlikely(credits < ndesc)) {
1187 t3_stop_tx_queue(txq, qs, q);
1188 dev_err(&adap->pdev->dev,
1189 "%s: Tx ring %u full while queue awake!\n",
1190 dev->name, q->cntxt_id & 7);
1191 spin_unlock(&q->lock);
1192 return NETDEV_TX_BUSY;
1196 if (unlikely(credits - ndesc < q->stop_thres)) {
1197 t3_stop_tx_queue(txq, qs, q);
1199 if (should_restart_tx(q) &&
1200 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1202 netif_tx_wake_queue(txq);
1207 q->unacked += ndesc;
1208 compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1212 if (q->pidx >= q->size) {
1217 /* update port statistics */
1218 if (skb->ip_summed == CHECKSUM_COMPLETE)
1219 qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1220 if (skb_shinfo(skb)->gso_size)
1221 qs->port_stats[SGE_PSTAT_TSO]++;
1222 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1223 qs->port_stats[SGE_PSTAT_VLANINS]++;
1225 dev->trans_start = jiffies;
1226 spin_unlock(&q->lock);
1229 * We do not use Tx completion interrupts to free DMAd Tx packets.
1230 * This is good for performamce but means that we rely on new Tx
1231 * packets arriving to run the destructors of completed packets,
1232 * which open up space in their sockets' send queues. Sometimes
1233 * we do not get such new packets causing Tx to stall. A single
1234 * UDP transmitter is a good example of this situation. We have
1235 * a clean up timer that periodically reclaims completed packets
1236 * but it doesn't run often enough (nor do we want it to) to prevent
1237 * lengthy stalls. A solution to this problem is to run the
1238 * destructor early, after the packet is queued but before it's DMAd.
1239 * A cons is that we lie to socket memory accounting, but the amount
1240 * of extra memory is reasonable (limited by the number of Tx
1241 * descriptors), the packets do actually get freed quickly by new
1242 * packets almost always, and for protocols like TCP that wait for
1243 * acks to really free up the data the extra memory is even less.
1244 * On the positive side we run the destructors on the sending CPU
1245 * rather than on a potentially different completing CPU, usually a
1246 * good thing. We also run them without holding our Tx queue lock,
1247 * unlike what reclaim_completed_tx() would otherwise do.
1249 * Run the destructor before telling the DMA engine about the packet
1250 * to make sure it doesn't complete and get freed prematurely.
1252 if (likely(!skb_shared(skb)))
1255 write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1256 check_ring_tx_db(adap, q);
1257 return NETDEV_TX_OK;
1261 * write_imm - write a packet into a Tx descriptor as immediate data
1262 * @d: the Tx descriptor to write
1264 * @len: the length of packet data to write as immediate data
1265 * @gen: the generation bit value to write
1267 * Writes a packet as immediate data into a Tx descriptor. The packet
1268 * contains a work request at its beginning. We must write the packet
1269 * carefully so the SGE doesn't read it accidentally before it's written
1272 static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1273 unsigned int len, unsigned int gen)
1275 struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1276 struct work_request_hdr *to = (struct work_request_hdr *)d;
1278 if (likely(!skb->data_len))
1279 memcpy(&to[1], &from[1], len - sizeof(*from));
1281 skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from));
1283 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1284 V_WR_BCNTLFLT(len & 7));
1286 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1287 V_WR_LEN((len + 7) / 8));
1293 * check_desc_avail - check descriptor availability on a send queue
1294 * @adap: the adapter
1295 * @q: the send queue
1296 * @skb: the packet needing the descriptors
1297 * @ndesc: the number of Tx descriptors needed
1298 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1300 * Checks if the requested number of Tx descriptors is available on an
1301 * SGE send queue. If the queue is already suspended or not enough
1302 * descriptors are available the packet is queued for later transmission.
1303 * Must be called with the Tx queue locked.
1305 * Returns 0 if enough descriptors are available, 1 if there aren't
1306 * enough descriptors and the packet has been queued, and 2 if the caller
1307 * needs to retry because there weren't enough descriptors at the
1308 * beginning of the call but some freed up in the mean time.
1310 static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1311 struct sk_buff *skb, unsigned int ndesc,
1314 if (unlikely(!skb_queue_empty(&q->sendq))) {
1315 addq_exit:__skb_queue_tail(&q->sendq, skb);
1318 if (unlikely(q->size - q->in_use < ndesc)) {
1319 struct sge_qset *qs = txq_to_qset(q, qid);
1321 set_bit(qid, &qs->txq_stopped);
1322 smp_mb__after_clear_bit();
1324 if (should_restart_tx(q) &&
1325 test_and_clear_bit(qid, &qs->txq_stopped))
1335 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1336 * @q: the SGE control Tx queue
1338 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1339 * that send only immediate data (presently just the control queues) and
1340 * thus do not have any sk_buffs to release.
1342 static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1344 unsigned int reclaim = q->processed - q->cleaned;
1346 q->in_use -= reclaim;
1347 q->cleaned += reclaim;
1350 static inline int immediate(const struct sk_buff *skb)
1352 return skb->len <= WR_LEN;
1356 * ctrl_xmit - send a packet through an SGE control Tx queue
1357 * @adap: the adapter
1358 * @q: the control queue
1361 * Send a packet through an SGE control Tx queue. Packets sent through
1362 * a control queue must fit entirely as immediate data in a single Tx
1363 * descriptor and have no page fragments.
1365 static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1366 struct sk_buff *skb)
1369 struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1371 if (unlikely(!immediate(skb))) {
1374 return NET_XMIT_SUCCESS;
1377 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1378 wrp->wr_lo = htonl(V_WR_TID(q->token));
1380 spin_lock(&q->lock);
1381 again:reclaim_completed_tx_imm(q);
1383 ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1384 if (unlikely(ret)) {
1386 spin_unlock(&q->lock);
1392 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1395 if (++q->pidx >= q->size) {
1399 spin_unlock(&q->lock);
1401 t3_write_reg(adap, A_SG_KDOORBELL,
1402 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1403 return NET_XMIT_SUCCESS;
1407 * restart_ctrlq - restart a suspended control queue
1408 * @qs: the queue set cotaining the control queue
1410 * Resumes transmission on a suspended Tx control queue.
1412 static void restart_ctrlq(unsigned long data)
1414 struct sk_buff *skb;
1415 struct sge_qset *qs = (struct sge_qset *)data;
1416 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1418 spin_lock(&q->lock);
1419 again:reclaim_completed_tx_imm(q);
1421 while (q->in_use < q->size &&
1422 (skb = __skb_dequeue(&q->sendq)) != NULL) {
1424 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1426 if (++q->pidx >= q->size) {
1433 if (!skb_queue_empty(&q->sendq)) {
1434 set_bit(TXQ_CTRL, &qs->txq_stopped);
1435 smp_mb__after_clear_bit();
1437 if (should_restart_tx(q) &&
1438 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1443 spin_unlock(&q->lock);
1445 t3_write_reg(qs->adap, A_SG_KDOORBELL,
1446 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1450 * Send a management message through control queue 0
1452 int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1456 ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1463 * deferred_unmap_destructor - unmap a packet when it is freed
1466 * This is the packet destructor used for Tx packets that need to remain
1467 * mapped until they are freed rather than until their Tx descriptors are
1470 static void deferred_unmap_destructor(struct sk_buff *skb)
1473 const dma_addr_t *p;
1474 const struct skb_shared_info *si;
1475 const struct deferred_unmap_info *dui;
1477 dui = (struct deferred_unmap_info *)skb->head;
1480 if (skb->tail - skb->transport_header)
1481 pci_unmap_single(dui->pdev, *p++,
1482 skb->tail - skb->transport_header,
1485 si = skb_shinfo(skb);
1486 for (i = 0; i < si->nr_frags; i++)
1487 pci_unmap_page(dui->pdev, *p++, si->frags[i].size,
1491 static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1492 const struct sg_ent *sgl, int sgl_flits)
1495 struct deferred_unmap_info *dui;
1497 dui = (struct deferred_unmap_info *)skb->head;
1499 for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1500 *p++ = be64_to_cpu(sgl->addr[0]);
1501 *p++ = be64_to_cpu(sgl->addr[1]);
1504 *p = be64_to_cpu(sgl->addr[0]);
1508 * write_ofld_wr - write an offload work request
1509 * @adap: the adapter
1510 * @skb: the packet to send
1512 * @pidx: index of the first Tx descriptor to write
1513 * @gen: the generation value to use
1514 * @ndesc: number of descriptors the packet will occupy
1516 * Write an offload work request to send the supplied packet. The packet
1517 * data already carry the work request with most fields populated.
1519 static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1520 struct sge_txq *q, unsigned int pidx,
1521 unsigned int gen, unsigned int ndesc)
1523 unsigned int sgl_flits, flits;
1524 struct work_request_hdr *from;
1525 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1526 struct tx_desc *d = &q->desc[pidx];
1528 if (immediate(skb)) {
1529 q->sdesc[pidx].skb = NULL;
1530 write_imm(d, skb, skb->len, gen);
1534 /* Only TX_DATA builds SGLs */
1536 from = (struct work_request_hdr *)skb->data;
1537 memcpy(&d->flit[1], &from[1],
1538 skb_transport_offset(skb) - sizeof(*from));
1540 flits = skb_transport_offset(skb) / 8;
1541 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1542 sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
1543 skb->tail - skb->transport_header,
1545 if (need_skb_unmap()) {
1546 setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1547 skb->destructor = deferred_unmap_destructor;
1550 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1551 gen, from->wr_hi, from->wr_lo);
1555 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1558 * Returns the number of Tx descriptors needed for the given offload
1559 * packet. These packets are already fully constructed.
1561 static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1563 unsigned int flits, cnt;
1565 if (skb->len <= WR_LEN)
1566 return 1; /* packet fits as immediate data */
1568 flits = skb_transport_offset(skb) / 8; /* headers */
1569 cnt = skb_shinfo(skb)->nr_frags;
1570 if (skb->tail != skb->transport_header)
1572 return flits_to_desc(flits + sgl_len(cnt));
1576 * ofld_xmit - send a packet through an offload queue
1577 * @adap: the adapter
1578 * @q: the Tx offload queue
1581 * Send an offload packet through an SGE offload queue.
1583 static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1584 struct sk_buff *skb)
1587 unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1589 spin_lock(&q->lock);
1590 again:reclaim_completed_tx(adap, q);
1592 ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1593 if (unlikely(ret)) {
1595 skb->priority = ndesc; /* save for restart */
1596 spin_unlock(&q->lock);
1606 if (q->pidx >= q->size) {
1610 spin_unlock(&q->lock);
1612 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1613 check_ring_tx_db(adap, q);
1614 return NET_XMIT_SUCCESS;
1618 * restart_offloadq - restart a suspended offload queue
1619 * @qs: the queue set cotaining the offload queue
1621 * Resumes transmission on a suspended Tx offload queue.
1623 static void restart_offloadq(unsigned long data)
1625 struct sk_buff *skb;
1626 struct sge_qset *qs = (struct sge_qset *)data;
1627 struct sge_txq *q = &qs->txq[TXQ_OFLD];
1628 const struct port_info *pi = netdev_priv(qs->netdev);
1629 struct adapter *adap = pi->adapter;
1631 spin_lock(&q->lock);
1632 again:reclaim_completed_tx(adap, q);
1634 while ((skb = skb_peek(&q->sendq)) != NULL) {
1635 unsigned int gen, pidx;
1636 unsigned int ndesc = skb->priority;
1638 if (unlikely(q->size - q->in_use < ndesc)) {
1639 set_bit(TXQ_OFLD, &qs->txq_stopped);
1640 smp_mb__after_clear_bit();
1642 if (should_restart_tx(q) &&
1643 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1653 if (q->pidx >= q->size) {
1657 __skb_unlink(skb, &q->sendq);
1658 spin_unlock(&q->lock);
1660 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1661 spin_lock(&q->lock);
1663 spin_unlock(&q->lock);
1666 set_bit(TXQ_RUNNING, &q->flags);
1667 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1670 t3_write_reg(adap, A_SG_KDOORBELL,
1671 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1675 * queue_set - return the queue set a packet should use
1678 * Maps a packet to the SGE queue set it should use. The desired queue
1679 * set is carried in bits 1-3 in the packet's priority.
1681 static inline int queue_set(const struct sk_buff *skb)
1683 return skb->priority >> 1;
1687 * is_ctrl_pkt - return whether an offload packet is a control packet
1690 * Determines whether an offload packet should use an OFLD or a CTRL
1691 * Tx queue. This is indicated by bit 0 in the packet's priority.
1693 static inline int is_ctrl_pkt(const struct sk_buff *skb)
1695 return skb->priority & 1;
1699 * t3_offload_tx - send an offload packet
1700 * @tdev: the offload device to send to
1703 * Sends an offload packet. We use the packet priority to select the
1704 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1705 * should be sent as regular or control, bits 1-3 select the queue set.
1707 int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1709 struct adapter *adap = tdev2adap(tdev);
1710 struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1712 if (unlikely(is_ctrl_pkt(skb)))
1713 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1715 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1719 * offload_enqueue - add an offload packet to an SGE offload receive queue
1720 * @q: the SGE response queue
1723 * Add a new offload packet to an SGE response queue's offload packet
1724 * queue. If the packet is the first on the queue it schedules the RX
1725 * softirq to process the queue.
1727 static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1729 int was_empty = skb_queue_empty(&q->rx_queue);
1731 __skb_queue_tail(&q->rx_queue, skb);
1734 struct sge_qset *qs = rspq_to_qset(q);
1736 napi_schedule(&qs->napi);
1741 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1742 * @tdev: the offload device that will be receiving the packets
1743 * @q: the SGE response queue that assembled the bundle
1744 * @skbs: the partial bundle
1745 * @n: the number of packets in the bundle
1747 * Delivers a (partial) bundle of Rx offload packets to an offload device.
1749 static inline void deliver_partial_bundle(struct t3cdev *tdev,
1751 struct sk_buff *skbs[], int n)
1754 q->offload_bundles++;
1755 tdev->recv(tdev, skbs, n);
1760 * ofld_poll - NAPI handler for offload packets in interrupt mode
1761 * @dev: the network device doing the polling
1762 * @budget: polling budget
1764 * The NAPI handler for offload packets when a response queue is serviced
1765 * by the hard interrupt handler, i.e., when it's operating in non-polling
1766 * mode. Creates small packet batches and sends them through the offload
1767 * receive handler. Batches need to be of modest size as we do prefetches
1768 * on the packets in each.
1770 static int ofld_poll(struct napi_struct *napi, int budget)
1772 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
1773 struct sge_rspq *q = &qs->rspq;
1774 struct adapter *adapter = qs->adap;
1777 while (work_done < budget) {
1778 struct sk_buff *skb, *tmp, *skbs[RX_BUNDLE_SIZE];
1779 struct sk_buff_head queue;
1782 spin_lock_irq(&q->lock);
1783 __skb_queue_head_init(&queue);
1784 skb_queue_splice_init(&q->rx_queue, &queue);
1785 if (skb_queue_empty(&queue)) {
1786 napi_complete(napi);
1787 spin_unlock_irq(&q->lock);
1790 spin_unlock_irq(&q->lock);
1793 skb_queue_walk_safe(&queue, skb, tmp) {
1794 if (work_done >= budget)
1798 __skb_unlink(skb, &queue);
1799 prefetch(skb->data);
1800 skbs[ngathered] = skb;
1801 if (++ngathered == RX_BUNDLE_SIZE) {
1802 q->offload_bundles++;
1803 adapter->tdev.recv(&adapter->tdev, skbs,
1808 if (!skb_queue_empty(&queue)) {
1809 /* splice remaining packets back onto Rx queue */
1810 spin_lock_irq(&q->lock);
1811 skb_queue_splice(&queue, &q->rx_queue);
1812 spin_unlock_irq(&q->lock);
1814 deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1821 * rx_offload - process a received offload packet
1822 * @tdev: the offload device receiving the packet
1823 * @rq: the response queue that received the packet
1825 * @rx_gather: a gather list of packets if we are building a bundle
1826 * @gather_idx: index of the next available slot in the bundle
1828 * Process an ingress offload pakcet and add it to the offload ingress
1829 * queue. Returns the index of the next available slot in the bundle.
1831 static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1832 struct sk_buff *skb, struct sk_buff *rx_gather[],
1833 unsigned int gather_idx)
1835 skb_reset_mac_header(skb);
1836 skb_reset_network_header(skb);
1837 skb_reset_transport_header(skb);
1840 rx_gather[gather_idx++] = skb;
1841 if (gather_idx == RX_BUNDLE_SIZE) {
1842 tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1844 rq->offload_bundles++;
1847 offload_enqueue(rq, skb);
1853 * restart_tx - check whether to restart suspended Tx queues
1854 * @qs: the queue set to resume
1856 * Restarts suspended Tx queues of an SGE queue set if they have enough
1857 * free resources to resume operation.
1859 static void restart_tx(struct sge_qset *qs)
1861 if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1862 should_restart_tx(&qs->txq[TXQ_ETH]) &&
1863 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1864 qs->txq[TXQ_ETH].restarts++;
1865 if (netif_running(qs->netdev))
1866 netif_tx_wake_queue(qs->tx_q);
1869 if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1870 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1871 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1872 qs->txq[TXQ_OFLD].restarts++;
1873 tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1875 if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1876 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1877 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1878 qs->txq[TXQ_CTRL].restarts++;
1879 tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1884 * cxgb3_arp_process - process an ARP request probing a private IP address
1885 * @adapter: the adapter
1886 * @skb: the skbuff containing the ARP request
1888 * Check if the ARP request is probing the private IP address
1889 * dedicated to iSCSI, generate an ARP reply if so.
1891 static void cxgb3_arp_process(struct adapter *adapter, struct sk_buff *skb)
1893 struct net_device *dev = skb->dev;
1894 struct port_info *pi;
1896 unsigned char *arp_ptr;
1903 skb_reset_network_header(skb);
1906 if (arp->ar_op != htons(ARPOP_REQUEST))
1909 arp_ptr = (unsigned char *)(arp + 1);
1911 arp_ptr += dev->addr_len;
1912 memcpy(&sip, arp_ptr, sizeof(sip));
1913 arp_ptr += sizeof(sip);
1914 arp_ptr += dev->addr_len;
1915 memcpy(&tip, arp_ptr, sizeof(tip));
1917 pi = netdev_priv(dev);
1918 if (tip != pi->iscsi_ipv4addr)
1921 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
1922 dev->dev_addr, sha);
1926 static inline int is_arp(struct sk_buff *skb)
1928 return skb->protocol == htons(ETH_P_ARP);
1932 * rx_eth - process an ingress ethernet packet
1933 * @adap: the adapter
1934 * @rq: the response queue that received the packet
1936 * @pad: amount of padding at the start of the buffer
1938 * Process an ingress ethernet pakcet and deliver it to the stack.
1939 * The padding is 2 if the packet was delivered in an Rx buffer and 0
1940 * if it was immediate data in a response.
1942 static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
1943 struct sk_buff *skb, int pad, int lro)
1945 struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
1946 struct sge_qset *qs = rspq_to_qset(rq);
1947 struct port_info *pi;
1949 skb_pull(skb, sizeof(*p) + pad);
1950 skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
1951 pi = netdev_priv(skb->dev);
1952 if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid && p->csum == htons(0xffff) &&
1954 qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
1955 skb->ip_summed = CHECKSUM_UNNECESSARY;
1957 skb->ip_summed = CHECKSUM_NONE;
1958 skb_record_rx_queue(skb, qs - &adap->sge.qs[0]);
1960 if (unlikely(p->vlan_valid)) {
1961 struct vlan_group *grp = pi->vlan_grp;
1963 qs->port_stats[SGE_PSTAT_VLANEX]++;
1966 vlan_gro_receive(&qs->napi, grp,
1967 ntohs(p->vlan), skb);
1969 if (unlikely(pi->iscsi_ipv4addr &&
1971 unsigned short vtag = ntohs(p->vlan) &
1973 skb->dev = vlan_group_get_device(grp,
1975 cxgb3_arp_process(adap, skb);
1977 __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
1981 dev_kfree_skb_any(skb);
1982 } else if (rq->polling) {
1984 napi_gro_receive(&qs->napi, skb);
1986 if (unlikely(pi->iscsi_ipv4addr && is_arp(skb)))
1987 cxgb3_arp_process(adap, skb);
1988 netif_receive_skb(skb);
1994 static inline int is_eth_tcp(u32 rss)
1996 return G_HASHTYPE(ntohl(rss)) == RSS_HASH_4_TUPLE;
2000 * lro_add_page - add a page chunk to an LRO session
2001 * @adap: the adapter
2002 * @qs: the associated queue set
2003 * @fl: the free list containing the page chunk to add
2004 * @len: packet length
2005 * @complete: Indicates the last fragment of a frame
2007 * Add a received packet contained in a page chunk to an existing LRO
2010 static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2011 struct sge_fl *fl, int len, int complete)
2013 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2014 struct cpl_rx_pkt *cpl;
2015 struct skb_frag_struct *rx_frag = qs->lro_frag_tbl.frags;
2016 int nr_frags = qs->lro_frag_tbl.nr_frags;
2017 int frag_len = qs->lro_frag_tbl.len;
2021 offset = 2 + sizeof(struct cpl_rx_pkt);
2022 qs->lro_va = cpl = sd->pg_chunk.va + 2;
2028 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
2029 fl->buf_size, PCI_DMA_FROMDEVICE);
2031 rx_frag += nr_frags;
2032 rx_frag->page = sd->pg_chunk.page;
2033 rx_frag->page_offset = sd->pg_chunk.offset + offset;
2034 rx_frag->size = len;
2036 qs->lro_frag_tbl.nr_frags++;
2037 qs->lro_frag_tbl.len = frag_len;
2042 qs->lro_frag_tbl.ip_summed = CHECKSUM_UNNECESSARY;
2045 if (unlikely(cpl->vlan_valid)) {
2046 struct net_device *dev = qs->netdev;
2047 struct port_info *pi = netdev_priv(dev);
2048 struct vlan_group *grp = pi->vlan_grp;
2050 if (likely(grp != NULL)) {
2051 vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan),
2056 napi_gro_frags(&qs->napi, &qs->lro_frag_tbl);
2059 qs->lro_frag_tbl.nr_frags = qs->lro_frag_tbl.len = 0;
2063 * handle_rsp_cntrl_info - handles control information in a response
2064 * @qs: the queue set corresponding to the response
2065 * @flags: the response control flags
2067 * Handles the control information of an SGE response, such as GTS
2068 * indications and completion credits for the queue set's Tx queues.
2069 * HW coalesces credits, we don't do any extra SW coalescing.
2071 static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
2073 unsigned int credits;
2076 if (flags & F_RSPD_TXQ0_GTS)
2077 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2080 credits = G_RSPD_TXQ0_CR(flags);
2082 qs->txq[TXQ_ETH].processed += credits;
2084 credits = G_RSPD_TXQ2_CR(flags);
2086 qs->txq[TXQ_CTRL].processed += credits;
2089 if (flags & F_RSPD_TXQ1_GTS)
2090 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2092 credits = G_RSPD_TXQ1_CR(flags);
2094 qs->txq[TXQ_OFLD].processed += credits;
2098 * check_ring_db - check if we need to ring any doorbells
2099 * @adapter: the adapter
2100 * @qs: the queue set whose Tx queues are to be examined
2101 * @sleeping: indicates which Tx queue sent GTS
2103 * Checks if some of a queue set's Tx queues need to ring their doorbells
2104 * to resume transmission after idling while they still have unprocessed
2107 static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
2108 unsigned int sleeping)
2110 if (sleeping & F_RSPD_TXQ0_GTS) {
2111 struct sge_txq *txq = &qs->txq[TXQ_ETH];
2113 if (txq->cleaned + txq->in_use != txq->processed &&
2114 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2115 set_bit(TXQ_RUNNING, &txq->flags);
2116 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2117 V_EGRCNTX(txq->cntxt_id));
2121 if (sleeping & F_RSPD_TXQ1_GTS) {
2122 struct sge_txq *txq = &qs->txq[TXQ_OFLD];
2124 if (txq->cleaned + txq->in_use != txq->processed &&
2125 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2126 set_bit(TXQ_RUNNING, &txq->flags);
2127 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2128 V_EGRCNTX(txq->cntxt_id));
2134 * is_new_response - check if a response is newly written
2135 * @r: the response descriptor
2136 * @q: the response queue
2138 * Returns true if a response descriptor contains a yet unprocessed
2141 static inline int is_new_response(const struct rsp_desc *r,
2142 const struct sge_rspq *q)
2144 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2147 static inline void clear_rspq_bufstate(struct sge_rspq * const q)
2150 q->rx_recycle_buf = 0;
2153 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2154 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2155 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2156 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2157 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2159 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2160 #define NOMEM_INTR_DELAY 2500
2163 * process_responses - process responses from an SGE response queue
2164 * @adap: the adapter
2165 * @qs: the queue set to which the response queue belongs
2166 * @budget: how many responses can be processed in this round
2168 * Process responses from an SGE response queue up to the supplied budget.
2169 * Responses include received packets as well as credits and other events
2170 * for the queues that belong to the response queue's queue set.
2171 * A negative budget is effectively unlimited.
2173 * Additionally choose the interrupt holdoff time for the next interrupt
2174 * on this queue. If the system is under memory shortage use a fairly
2175 * long delay to help recovery.
2177 static int process_responses(struct adapter *adap, struct sge_qset *qs,
2180 struct sge_rspq *q = &qs->rspq;
2181 struct rsp_desc *r = &q->desc[q->cidx];
2182 int budget_left = budget;
2183 unsigned int sleeping = 0;
2184 struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
2187 q->next_holdoff = q->holdoff_tmr;
2189 while (likely(budget_left && is_new_response(r, q))) {
2190 int packet_complete, eth, ethpad = 2, lro = qs->lro_enabled;
2191 struct sk_buff *skb = NULL;
2192 u32 len, flags = ntohl(r->flags);
2193 __be32 rss_hi = *(const __be32 *)r,
2194 rss_lo = r->rss_hdr.rss_hash_val;
2196 eth = r->rss_hdr.opcode == CPL_RX_PKT;
2198 if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
2199 skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
2203 memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
2204 skb->data[0] = CPL_ASYNC_NOTIF;
2205 rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
2207 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2208 skb = get_imm_packet(r);
2209 if (unlikely(!skb)) {
2211 q->next_holdoff = NOMEM_INTR_DELAY;
2213 /* consume one credit since we tried */
2219 } else if ((len = ntohl(r->len_cq)) != 0) {
2222 lro &= eth && is_eth_tcp(rss_hi);
2224 fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2225 if (fl->use_pages) {
2226 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
2229 #if L1_CACHE_BYTES < 128
2230 prefetch(addr + L1_CACHE_BYTES);
2232 __refill_fl(adap, fl);
2234 lro_add_page(adap, qs, fl,
2236 flags & F_RSPD_EOP);
2240 skb = get_packet_pg(adap, fl, q,
2243 SGE_RX_DROP_THRES : 0);
2246 skb = get_packet(adap, fl, G_RSPD_LEN(len),
2247 eth ? SGE_RX_DROP_THRES : 0);
2248 if (unlikely(!skb)) {
2252 } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2255 if (++fl->cidx == fl->size)
2260 if (flags & RSPD_CTRL_MASK) {
2261 sleeping |= flags & RSPD_GTS_MASK;
2262 handle_rsp_cntrl_info(qs, flags);
2266 if (unlikely(++q->cidx == q->size)) {
2273 if (++q->credits >= (q->size / 4)) {
2274 refill_rspq(adap, q, q->credits);
2278 packet_complete = flags &
2279 (F_RSPD_EOP | F_RSPD_IMM_DATA_VALID |
2280 F_RSPD_ASYNC_NOTIF);
2282 if (skb != NULL && packet_complete) {
2284 rx_eth(adap, q, skb, ethpad, lro);
2287 /* Preserve the RSS info in csum & priority */
2289 skb->priority = rss_lo;
2290 ngathered = rx_offload(&adap->tdev, q, skb,
2295 if (flags & F_RSPD_EOP)
2296 clear_rspq_bufstate(q);
2301 deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
2304 check_ring_db(adap, qs, sleeping);
2306 smp_mb(); /* commit Tx queue .processed updates */
2307 if (unlikely(qs->txq_stopped != 0))
2310 budget -= budget_left;
2314 static inline int is_pure_response(const struct rsp_desc *r)
2316 __be32 n = r->flags & htonl(F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
2318 return (n | r->len_cq) == 0;
2322 * napi_rx_handler - the NAPI handler for Rx processing
2323 * @napi: the napi instance
2324 * @budget: how many packets we can process in this round
2326 * Handler for new data events when using NAPI.
2328 static int napi_rx_handler(struct napi_struct *napi, int budget)
2330 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
2331 struct adapter *adap = qs->adap;
2332 int work_done = process_responses(adap, qs, budget);
2334 if (likely(work_done < budget)) {
2335 napi_complete(napi);
2338 * Because we don't atomically flush the following
2339 * write it is possible that in very rare cases it can
2340 * reach the device in a way that races with a new
2341 * response being written plus an error interrupt
2342 * causing the NAPI interrupt handler below to return
2343 * unhandled status to the OS. To protect against
2344 * this would require flushing the write and doing
2345 * both the write and the flush with interrupts off.
2346 * Way too expensive and unjustifiable given the
2347 * rarity of the race.
2349 * The race cannot happen at all with MSI-X.
2351 t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2352 V_NEWTIMER(qs->rspq.next_holdoff) |
2353 V_NEWINDEX(qs->rspq.cidx));
2359 * Returns true if the device is already scheduled for polling.
2361 static inline int napi_is_scheduled(struct napi_struct *napi)
2363 return test_bit(NAPI_STATE_SCHED, &napi->state);
2367 * process_pure_responses - process pure responses from a response queue
2368 * @adap: the adapter
2369 * @qs: the queue set owning the response queue
2370 * @r: the first pure response to process
2372 * A simpler version of process_responses() that handles only pure (i.e.,
2373 * non data-carrying) responses. Such respones are too light-weight to
2374 * justify calling a softirq under NAPI, so we handle them specially in
2375 * the interrupt handler. The function is called with a pointer to a
2376 * response, which the caller must ensure is a valid pure response.
2378 * Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2380 static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2383 struct sge_rspq *q = &qs->rspq;
2384 unsigned int sleeping = 0;
2387 u32 flags = ntohl(r->flags);
2390 if (unlikely(++q->cidx == q->size)) {
2397 if (flags & RSPD_CTRL_MASK) {
2398 sleeping |= flags & RSPD_GTS_MASK;
2399 handle_rsp_cntrl_info(qs, flags);
2403 if (++q->credits >= (q->size / 4)) {
2404 refill_rspq(adap, q, q->credits);
2407 } while (is_new_response(r, q) && is_pure_response(r));
2410 check_ring_db(adap, qs, sleeping);
2412 smp_mb(); /* commit Tx queue .processed updates */
2413 if (unlikely(qs->txq_stopped != 0))
2416 return is_new_response(r, q);
2420 * handle_responses - decide what to do with new responses in NAPI mode
2421 * @adap: the adapter
2422 * @q: the response queue
2424 * This is used by the NAPI interrupt handlers to decide what to do with
2425 * new SGE responses. If there are no new responses it returns -1. If
2426 * there are new responses and they are pure (i.e., non-data carrying)
2427 * it handles them straight in hard interrupt context as they are very
2428 * cheap and don't deliver any packets. Finally, if there are any data
2429 * signaling responses it schedules the NAPI handler. Returns 1 if it
2430 * schedules NAPI, 0 if all new responses were pure.
2432 * The caller must ascertain NAPI is not already running.
2434 static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2436 struct sge_qset *qs = rspq_to_qset(q);
2437 struct rsp_desc *r = &q->desc[q->cidx];
2439 if (!is_new_response(r, q))
2441 if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2442 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2443 V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2446 napi_schedule(&qs->napi);
2451 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2452 * (i.e., response queue serviced in hard interrupt).
2454 irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2456 struct sge_qset *qs = cookie;
2457 struct adapter *adap = qs->adap;
2458 struct sge_rspq *q = &qs->rspq;
2460 spin_lock(&q->lock);
2461 if (process_responses(adap, qs, -1) == 0)
2462 q->unhandled_irqs++;
2463 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2464 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2465 spin_unlock(&q->lock);
2470 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2471 * (i.e., response queue serviced by NAPI polling).
2473 static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
2475 struct sge_qset *qs = cookie;
2476 struct sge_rspq *q = &qs->rspq;
2478 spin_lock(&q->lock);
2480 if (handle_responses(qs->adap, q) < 0)
2481 q->unhandled_irqs++;
2482 spin_unlock(&q->lock);
2487 * The non-NAPI MSI interrupt handler. This needs to handle data events from
2488 * SGE response queues as well as error and other async events as they all use
2489 * the same MSI vector. We use one SGE response queue per port in this mode
2490 * and protect all response queues with queue 0's lock.
2492 static irqreturn_t t3_intr_msi(int irq, void *cookie)
2494 int new_packets = 0;
2495 struct adapter *adap = cookie;
2496 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2498 spin_lock(&q->lock);
2500 if (process_responses(adap, &adap->sge.qs[0], -1)) {
2501 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2502 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2506 if (adap->params.nports == 2 &&
2507 process_responses(adap, &adap->sge.qs[1], -1)) {
2508 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2510 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2511 V_NEWTIMER(q1->next_holdoff) |
2512 V_NEWINDEX(q1->cidx));
2516 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2517 q->unhandled_irqs++;
2519 spin_unlock(&q->lock);
2523 static int rspq_check_napi(struct sge_qset *qs)
2525 struct sge_rspq *q = &qs->rspq;
2527 if (!napi_is_scheduled(&qs->napi) &&
2528 is_new_response(&q->desc[q->cidx], q)) {
2529 napi_schedule(&qs->napi);
2536 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2537 * by NAPI polling). Handles data events from SGE response queues as well as
2538 * error and other async events as they all use the same MSI vector. We use
2539 * one SGE response queue per port in this mode and protect all response
2540 * queues with queue 0's lock.
2542 static irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
2545 struct adapter *adap = cookie;
2546 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2548 spin_lock(&q->lock);
2550 new_packets = rspq_check_napi(&adap->sge.qs[0]);
2551 if (adap->params.nports == 2)
2552 new_packets += rspq_check_napi(&adap->sge.qs[1]);
2553 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2554 q->unhandled_irqs++;
2556 spin_unlock(&q->lock);
2561 * A helper function that processes responses and issues GTS.
2563 static inline int process_responses_gts(struct adapter *adap,
2564 struct sge_rspq *rq)
2568 work = process_responses(adap, rspq_to_qset(rq), -1);
2569 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2570 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2575 * The legacy INTx interrupt handler. This needs to handle data events from
2576 * SGE response queues as well as error and other async events as they all use
2577 * the same interrupt pin. We use one SGE response queue per port in this mode
2578 * and protect all response queues with queue 0's lock.
2580 static irqreturn_t t3_intr(int irq, void *cookie)
2582 int work_done, w0, w1;
2583 struct adapter *adap = cookie;
2584 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2585 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2587 spin_lock(&q0->lock);
2589 w0 = is_new_response(&q0->desc[q0->cidx], q0);
2590 w1 = adap->params.nports == 2 &&
2591 is_new_response(&q1->desc[q1->cidx], q1);
2593 if (likely(w0 | w1)) {
2594 t3_write_reg(adap, A_PL_CLI, 0);
2595 t3_read_reg(adap, A_PL_CLI); /* flush */
2598 process_responses_gts(adap, q0);
2601 process_responses_gts(adap, q1);
2603 work_done = w0 | w1;
2605 work_done = t3_slow_intr_handler(adap);
2607 spin_unlock(&q0->lock);
2608 return IRQ_RETVAL(work_done != 0);
2612 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2613 * Handles data events from SGE response queues as well as error and other
2614 * async events as they all use the same interrupt pin. We use one SGE
2615 * response queue per port in this mode and protect all response queues with
2618 static irqreturn_t t3b_intr(int irq, void *cookie)
2621 struct adapter *adap = cookie;
2622 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2624 t3_write_reg(adap, A_PL_CLI, 0);
2625 map = t3_read_reg(adap, A_SG_DATA_INTR);
2627 if (unlikely(!map)) /* shared interrupt, most likely */
2630 spin_lock(&q0->lock);
2632 if (unlikely(map & F_ERRINTR))
2633 t3_slow_intr_handler(adap);
2635 if (likely(map & 1))
2636 process_responses_gts(adap, q0);
2639 process_responses_gts(adap, &adap->sge.qs[1].rspq);
2641 spin_unlock(&q0->lock);
2646 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2647 * Handles data events from SGE response queues as well as error and other
2648 * async events as they all use the same interrupt pin. We use one SGE
2649 * response queue per port in this mode and protect all response queues with
2652 static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2655 struct adapter *adap = cookie;
2656 struct sge_qset *qs0 = &adap->sge.qs[0];
2657 struct sge_rspq *q0 = &qs0->rspq;
2659 t3_write_reg(adap, A_PL_CLI, 0);
2660 map = t3_read_reg(adap, A_SG_DATA_INTR);
2662 if (unlikely(!map)) /* shared interrupt, most likely */
2665 spin_lock(&q0->lock);
2667 if (unlikely(map & F_ERRINTR))
2668 t3_slow_intr_handler(adap);
2670 if (likely(map & 1))
2671 napi_schedule(&qs0->napi);
2674 napi_schedule(&adap->sge.qs[1].napi);
2676 spin_unlock(&q0->lock);
2681 * t3_intr_handler - select the top-level interrupt handler
2682 * @adap: the adapter
2683 * @polling: whether using NAPI to service response queues
2685 * Selects the top-level interrupt handler based on the type of interrupts
2686 * (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2689 irq_handler_t t3_intr_handler(struct adapter *adap, int polling)
2691 if (adap->flags & USING_MSIX)
2692 return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2693 if (adap->flags & USING_MSI)
2694 return polling ? t3_intr_msi_napi : t3_intr_msi;
2695 if (adap->params.rev > 0)
2696 return polling ? t3b_intr_napi : t3b_intr;
2700 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
2701 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
2702 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
2703 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
2705 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
2706 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
2710 * t3_sge_err_intr_handler - SGE async event interrupt handler
2711 * @adapter: the adapter
2713 * Interrupt handler for SGE asynchronous (non-data) events.
2715 void t3_sge_err_intr_handler(struct adapter *adapter)
2717 unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE);
2719 if (status & SGE_PARERR)
2720 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
2721 status & SGE_PARERR);
2722 if (status & SGE_FRAMINGERR)
2723 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
2724 status & SGE_FRAMINGERR);
2726 if (status & F_RSPQCREDITOVERFOW)
2727 CH_ALERT(adapter, "SGE response queue credit overflow\n");
2729 if (status & F_RSPQDISABLED) {
2730 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2733 "packet delivered to disabled response queue "
2734 "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2737 if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
2738 CH_ALERT(adapter, "SGE dropped %s priority doorbell\n",
2739 status & F_HIPIODRBDROPERR ? "high" : "lo");
2741 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2742 if (status & SGE_FATALERR)
2743 t3_fatal_err(adapter);
2747 * sge_timer_cb - perform periodic maintenance of an SGE qset
2748 * @data: the SGE queue set to maintain
2750 * Runs periodically from a timer to perform maintenance of an SGE queue
2751 * set. It performs two tasks:
2753 * a) Cleans up any completed Tx descriptors that may still be pending.
2754 * Normal descriptor cleanup happens when new packets are added to a Tx
2755 * queue so this timer is relatively infrequent and does any cleanup only
2756 * if the Tx queue has not seen any new packets in a while. We make a
2757 * best effort attempt to reclaim descriptors, in that we don't wait
2758 * around if we cannot get a queue's lock (which most likely is because
2759 * someone else is queueing new packets and so will also handle the clean
2760 * up). Since control queues use immediate data exclusively we don't
2761 * bother cleaning them up here.
2763 * b) Replenishes Rx queues that have run out due to memory shortage.
2764 * Normally new Rx buffers are added when existing ones are consumed but
2765 * when out of memory a queue can become empty. We try to add only a few
2766 * buffers here, the queue will be replenished fully as these new buffers
2767 * are used up if memory shortage has subsided.
2769 static void sge_timer_cb(unsigned long data)
2772 struct sge_qset *qs = (struct sge_qset *)data;
2773 struct adapter *adap = qs->adap;
2775 if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
2776 reclaim_completed_tx(adap, &qs->txq[TXQ_ETH]);
2777 spin_unlock(&qs->txq[TXQ_ETH].lock);
2779 if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2780 reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD]);
2781 spin_unlock(&qs->txq[TXQ_OFLD].lock);
2783 lock = (adap->flags & USING_MSIX) ? &qs->rspq.lock :
2784 &adap->sge.qs[0].rspq.lock;
2785 if (spin_trylock_irq(lock)) {
2786 if (!napi_is_scheduled(&qs->napi)) {
2787 u32 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2789 if (qs->fl[0].credits < qs->fl[0].size)
2790 __refill_fl(adap, &qs->fl[0]);
2791 if (qs->fl[1].credits < qs->fl[1].size)
2792 __refill_fl(adap, &qs->fl[1]);
2794 if (status & (1 << qs->rspq.cntxt_id)) {
2796 if (qs->rspq.credits) {
2797 refill_rspq(adap, &qs->rspq, 1);
2799 qs->rspq.restarted++;
2800 t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
2801 1 << qs->rspq.cntxt_id);
2805 spin_unlock_irq(lock);
2807 mod_timer(&qs->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2811 * t3_update_qset_coalesce - update coalescing settings for a queue set
2812 * @qs: the SGE queue set
2813 * @p: new queue set parameters
2815 * Update the coalescing settings for an SGE queue set. Nothing is done
2816 * if the queue set is not initialized yet.
2818 void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2820 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2821 qs->rspq.polling = p->polling;
2822 qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
2826 * t3_sge_alloc_qset - initialize an SGE queue set
2827 * @adapter: the adapter
2828 * @id: the queue set id
2829 * @nports: how many Ethernet ports will be using this queue set
2830 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2831 * @p: configuration parameters for this queue set
2832 * @ntxq: number of Tx queues for the queue set
2833 * @netdev: net device associated with this queue set
2834 * @netdevq: net device TX queue associated with this queue set
2836 * Allocate resources and initialize an SGE queue set. A queue set
2837 * comprises a response queue, two Rx free-buffer queues, and up to 3
2838 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2839 * queue, offload queue, and control queue.
2841 int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2842 int irq_vec_idx, const struct qset_params *p,
2843 int ntxq, struct net_device *dev,
2844 struct netdev_queue *netdevq)
2846 int i, avail, ret = -ENOMEM;
2847 struct sge_qset *q = &adapter->sge.qs[id];
2849 init_qset_cntxt(q, id);
2850 setup_timer(&q->tx_reclaim_timer, sge_timer_cb, (unsigned long)q);
2852 q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
2853 sizeof(struct rx_desc),
2854 sizeof(struct rx_sw_desc),
2855 &q->fl[0].phys_addr, &q->fl[0].sdesc);
2859 q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
2860 sizeof(struct rx_desc),
2861 sizeof(struct rx_sw_desc),
2862 &q->fl[1].phys_addr, &q->fl[1].sdesc);
2866 q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
2867 sizeof(struct rsp_desc), 0,
2868 &q->rspq.phys_addr, NULL);
2872 for (i = 0; i < ntxq; ++i) {
2874 * The control queue always uses immediate data so does not
2875 * need to keep track of any sk_buffs.
2877 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2879 q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
2880 sizeof(struct tx_desc), sz,
2881 &q->txq[i].phys_addr,
2883 if (!q->txq[i].desc)
2887 q->txq[i].size = p->txq_size[i];
2888 spin_lock_init(&q->txq[i].lock);
2889 skb_queue_head_init(&q->txq[i].sendq);
2892 tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
2894 tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
2897 q->fl[0].gen = q->fl[1].gen = 1;
2898 q->fl[0].size = p->fl_size;
2899 q->fl[1].size = p->jumbo_size;
2902 q->rspq.size = p->rspq_size;
2903 spin_lock_init(&q->rspq.lock);
2904 skb_queue_head_init(&q->rspq.rx_queue);
2906 q->txq[TXQ_ETH].stop_thres = nports *
2907 flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
2909 #if FL0_PG_CHUNK_SIZE > 0
2910 q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
2912 q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
2914 #if FL1_PG_CHUNK_SIZE > 0
2915 q->fl[1].buf_size = FL1_PG_CHUNK_SIZE;
2917 q->fl[1].buf_size = is_offload(adapter) ?
2918 (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
2919 MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
2922 q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
2923 q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0;
2924 q->fl[0].order = FL0_PG_ORDER;
2925 q->fl[1].order = FL1_PG_ORDER;
2927 spin_lock_irq(&adapter->sge.reg_lock);
2929 /* FL threshold comparison uses < */
2930 ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
2931 q->rspq.phys_addr, q->rspq.size,
2932 q->fl[0].buf_size, 1, 0);
2936 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2937 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
2938 q->fl[i].phys_addr, q->fl[i].size,
2939 q->fl[i].buf_size, p->cong_thres, 1,
2945 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2946 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2947 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2953 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
2954 USE_GTS, SGE_CNTXT_OFLD, id,
2955 q->txq[TXQ_OFLD].phys_addr,
2956 q->txq[TXQ_OFLD].size, 0, 1, 0);
2962 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
2964 q->txq[TXQ_CTRL].phys_addr,
2965 q->txq[TXQ_CTRL].size,
2966 q->txq[TXQ_CTRL].token, 1, 0);
2971 spin_unlock_irq(&adapter->sge.reg_lock);
2976 t3_update_qset_coalesce(q, p);
2978 avail = refill_fl(adapter, &q->fl[0], q->fl[0].size,
2979 GFP_KERNEL | __GFP_COMP);
2981 CH_ALERT(adapter, "free list queue 0 initialization failed\n");
2984 if (avail < q->fl[0].size)
2985 CH_WARN(adapter, "free list queue 0 enabled with %d credits\n",
2988 avail = refill_fl(adapter, &q->fl[1], q->fl[1].size,
2989 GFP_KERNEL | __GFP_COMP);
2990 if (avail < q->fl[1].size)
2991 CH_WARN(adapter, "free list queue 1 enabled with %d credits\n",
2993 refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
2995 t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2996 V_NEWTIMER(q->rspq.holdoff_tmr));
2998 mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
3002 spin_unlock_irq(&adapter->sge.reg_lock);
3004 t3_free_qset(adapter, q);
3009 * t3_stop_sge_timers - stop SGE timer call backs
3010 * @adap: the adapter
3012 * Stops each SGE queue set's timer call back
3014 void t3_stop_sge_timers(struct adapter *adap)
3018 for (i = 0; i < SGE_QSETS; ++i) {
3019 struct sge_qset *q = &adap->sge.qs[i];
3021 if (q->tx_reclaim_timer.function)
3022 del_timer_sync(&q->tx_reclaim_timer);
3027 * t3_free_sge_resources - free SGE resources
3028 * @adap: the adapter
3030 * Frees resources used by the SGE queue sets.
3032 void t3_free_sge_resources(struct adapter *adap)
3036 for (i = 0; i < SGE_QSETS; ++i)
3037 t3_free_qset(adap, &adap->sge.qs[i]);
3041 * t3_sge_start - enable SGE
3042 * @adap: the adapter
3044 * Enables the SGE for DMAs. This is the last step in starting packet
3047 void t3_sge_start(struct adapter *adap)
3049 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
3053 * t3_sge_stop - disable SGE operation
3054 * @adap: the adapter
3056 * Disables the DMA engine. This can be called in emeregencies (e.g.,
3057 * from error interrupts) or from normal process context. In the latter
3058 * case it also disables any pending queue restart tasklets. Note that
3059 * if it is called in interrupt context it cannot disable the restart
3060 * tasklets as it cannot wait, however the tasklets will have no effect
3061 * since the doorbells are disabled and the driver will call this again
3062 * later from process context, at which time the tasklets will be stopped
3063 * if they are still running.
3065 void t3_sge_stop(struct adapter *adap)
3067 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
3068 if (!in_interrupt()) {
3071 for (i = 0; i < SGE_QSETS; ++i) {
3072 struct sge_qset *qs = &adap->sge.qs[i];
3074 tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
3075 tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
3081 * t3_sge_init - initialize SGE
3082 * @adap: the adapter
3083 * @p: the SGE parameters
3085 * Performs SGE initialization needed every time after a chip reset.
3086 * We do not initialize any of the queue sets here, instead the driver
3087 * top-level must request those individually. We also do not enable DMA
3088 * here, that should be done after the queues have been set up.
3090 void t3_sge_init(struct adapter *adap, struct sge_params *p)
3092 unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
3094 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
3095 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
3096 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
3097 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
3098 #if SGE_NUM_GENBITS == 1
3099 ctrl |= F_EGRGENCTRL;
3101 if (adap->params.rev > 0) {
3102 if (!(adap->flags & (USING_MSIX | USING_MSI)))
3103 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
3105 t3_write_reg(adap, A_SG_CONTROL, ctrl);
3106 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
3107 V_LORCQDRBTHRSH(512));
3108 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
3109 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
3110 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
3111 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
3112 adap->params.rev < T3_REV_C ? 1000 : 500);
3113 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
3114 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
3115 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
3116 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
3117 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
3121 * t3_sge_prep - one-time SGE initialization
3122 * @adap: the associated adapter
3123 * @p: SGE parameters
3125 * Performs one-time initialization of SGE SW state. Includes determining
3126 * defaults for the assorted SGE parameters, which admins can change until
3127 * they are used to initialize the SGE.
3129 void t3_sge_prep(struct adapter *adap, struct sge_params *p)
3133 p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
3134 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
3136 for (i = 0; i < SGE_QSETS; ++i) {
3137 struct qset_params *q = p->qset + i;
3139 q->polling = adap->params.rev > 0;
3140 q->coalesce_usecs = 5;
3141 q->rspq_size = 1024;
3143 q->jumbo_size = 512;
3144 q->txq_size[TXQ_ETH] = 1024;
3145 q->txq_size[TXQ_OFLD] = 1024;
3146 q->txq_size[TXQ_CTRL] = 256;
3150 spin_lock_init(&adap->sge.reg_lock);
3154 * t3_get_desc - dump an SGE descriptor for debugging purposes
3155 * @qs: the queue set
3156 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3157 * @idx: the descriptor index in the queue
3158 * @data: where to dump the descriptor contents
3160 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
3161 * size of the descriptor.
3163 int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3164 unsigned char *data)
3170 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3172 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3173 return sizeof(struct tx_desc);
3177 if (!qs->rspq.desc || idx >= qs->rspq.size)
3179 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3180 return sizeof(struct rsp_desc);
3184 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3186 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3187 return sizeof(struct rx_desc);