2 * Copyright (c) 2005-2008 Chelsio, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 #include <linux/skbuff.h>
33 #include <linux/netdevice.h>
34 #include <linux/etherdevice.h>
35 #include <linux/if_vlan.h>
37 #include <linux/tcp.h>
38 #include <linux/dma-mapping.h>
44 #include "firmware_exports.h"
48 #define SGE_RX_SM_BUF_SIZE 1536
50 #define SGE_RX_COPY_THRES 256
51 #define SGE_RX_PULL_LEN 128
54 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
55 * It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs
58 #define FL0_PG_CHUNK_SIZE 2048
59 #define FL0_PG_ORDER 0
60 #define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192)
61 #define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
63 #define SGE_RX_DROP_THRES 16
66 * Max number of Rx buffers we replenish at a time.
68 #define MAX_RX_REFILL 16U
70 * Period of the Tx buffer reclaim timer. This timer does not need to run
71 * frequently as Tx buffers are usually reclaimed by new Tx packets.
73 #define TX_RECLAIM_PERIOD (HZ / 4)
75 /* WR size in bytes */
76 #define WR_LEN (WR_FLITS * 8)
79 * Types of Tx queues in each queue set. Order here matters, do not change.
81 enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
83 /* Values for sge_txq.flags */
85 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
86 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
90 __be64 flit[TX_DESC_FLITS];
100 struct tx_sw_desc { /* SW state per Tx descriptor */
102 u8 eop; /* set if last descriptor for packet */
103 u8 addr_idx; /* buffer index of first SGL entry in descriptor */
104 u8 fragidx; /* first page fragment associated with descriptor */
105 s8 sflit; /* start flit of first SGL entry in descriptor */
108 struct rx_sw_desc { /* SW state per Rx descriptor */
111 struct fl_pg_chunk pg_chunk;
113 DECLARE_PCI_UNMAP_ADDR(dma_addr);
116 struct rsp_desc { /* response queue descriptor */
117 struct rss_header rss_hdr;
125 * Holds unmapping information for Tx packets that need deferred unmapping.
126 * This structure lives at skb->head and must be allocated by callers.
128 struct deferred_unmap_info {
129 struct pci_dev *pdev;
130 dma_addr_t addr[MAX_SKB_FRAGS + 1];
134 * Maps a number of flits to the number of Tx descriptors that can hold them.
137 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
139 * HW allows up to 4 descriptors to be combined into a WR.
141 static u8 flit_desc_map[] = {
143 #if SGE_NUM_GENBITS == 1
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
146 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
147 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
148 #elif SGE_NUM_GENBITS == 2
149 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
150 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
151 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
152 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
154 # error "SGE_NUM_GENBITS must be 1 or 2"
158 static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
160 return container_of(q, struct sge_qset, fl[qidx]);
163 static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
165 return container_of(q, struct sge_qset, rspq);
168 static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
170 return container_of(q, struct sge_qset, txq[qidx]);
174 * refill_rspq - replenish an SGE response queue
175 * @adapter: the adapter
176 * @q: the response queue to replenish
177 * @credits: how many new responses to make available
179 * Replenishes a response queue by making the supplied number of responses
182 static inline void refill_rspq(struct adapter *adapter,
183 const struct sge_rspq *q, unsigned int credits)
186 t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
187 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
191 * need_skb_unmap - does the platform need unmapping of sk_buffs?
193 * Returns true if the platfrom needs sk_buff unmapping. The compiler
194 * optimizes away unecessary code if this returns true.
196 static inline int need_skb_unmap(void)
199 * This structure is used to tell if the platfrom needs buffer
200 * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
203 DECLARE_PCI_UNMAP_ADDR(addr);
206 return sizeof(struct dummy) != 0;
210 * unmap_skb - unmap a packet main body and its page fragments
212 * @q: the Tx queue containing Tx descriptors for the packet
213 * @cidx: index of Tx descriptor
214 * @pdev: the PCI device
216 * Unmap the main body of an sk_buff and its page fragments, if any.
217 * Because of the fairly complicated structure of our SGLs and the desire
218 * to conserve space for metadata, the information necessary to unmap an
219 * sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx
220 * descriptors (the physical addresses of the various data buffers), and
221 * the SW descriptor state (assorted indices). The send functions
222 * initialize the indices for the first packet descriptor so we can unmap
223 * the buffers held in the first Tx descriptor here, and we have enough
224 * information at this point to set the state for the next Tx descriptor.
226 * Note that it is possible to clean up the first descriptor of a packet
227 * before the send routines have written the next descriptors, but this
228 * race does not cause any problem. We just end up writing the unmapping
229 * info for the descriptor first.
231 static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
232 unsigned int cidx, struct pci_dev *pdev)
234 const struct sg_ent *sgp;
235 struct tx_sw_desc *d = &q->sdesc[cidx];
236 int nfrags, frag_idx, curflit, j = d->addr_idx;
238 sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit];
239 frag_idx = d->fragidx;
241 if (frag_idx == 0 && skb_headlen(skb)) {
242 pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]),
243 skb_headlen(skb), PCI_DMA_TODEVICE);
247 curflit = d->sflit + 1 + j;
248 nfrags = skb_shinfo(skb)->nr_frags;
250 while (frag_idx < nfrags && curflit < WR_FLITS) {
251 pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
252 skb_shinfo(skb)->frags[frag_idx].size,
263 if (frag_idx < nfrags) { /* SGL continues into next Tx descriptor */
264 d = cidx + 1 == q->size ? q->sdesc : d + 1;
265 d->fragidx = frag_idx;
267 d->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
272 * free_tx_desc - reclaims Tx descriptors and their buffers
273 * @adapter: the adapter
274 * @q: the Tx queue to reclaim descriptors from
275 * @n: the number of descriptors to reclaim
277 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
278 * Tx buffers. Called with the Tx queue lock held.
280 static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
283 struct tx_sw_desc *d;
284 struct pci_dev *pdev = adapter->pdev;
285 unsigned int cidx = q->cidx;
287 const int need_unmap = need_skb_unmap() &&
288 q->cntxt_id >= FW_TUNNEL_SGEEC_START;
292 if (d->skb) { /* an SGL is present */
294 unmap_skb(d->skb, q, cidx, pdev);
299 if (++cidx == q->size) {
308 * reclaim_completed_tx - reclaims completed Tx descriptors
309 * @adapter: the adapter
310 * @q: the Tx queue to reclaim completed descriptors from
312 * Reclaims Tx descriptors that the SGE has indicated it has processed,
313 * and frees the associated buffers if possible. Called with the Tx
316 static inline void reclaim_completed_tx(struct adapter *adapter,
319 unsigned int reclaim = q->processed - q->cleaned;
322 free_tx_desc(adapter, q, reclaim);
323 q->cleaned += reclaim;
324 q->in_use -= reclaim;
329 * should_restart_tx - are there enough resources to restart a Tx queue?
332 * Checks if there are enough descriptors to restart a suspended Tx queue.
334 static inline int should_restart_tx(const struct sge_txq *q)
336 unsigned int r = q->processed - q->cleaned;
338 return q->in_use - r < (q->size >> 1);
341 static void clear_rx_desc(const struct sge_fl *q, struct rx_sw_desc *d)
344 if (d->pg_chunk.page)
345 put_page(d->pg_chunk.page);
346 d->pg_chunk.page = NULL;
354 * free_rx_bufs - free the Rx buffers on an SGE free list
355 * @pdev: the PCI device associated with the adapter
356 * @rxq: the SGE free list to clean up
358 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
359 * this queue should be stopped before calling this function.
361 static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
363 unsigned int cidx = q->cidx;
365 while (q->credits--) {
366 struct rx_sw_desc *d = &q->sdesc[cidx];
368 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
369 q->buf_size, PCI_DMA_FROMDEVICE);
371 if (++cidx == q->size)
375 if (q->pg_chunk.page) {
376 __free_pages(q->pg_chunk.page, q->order);
377 q->pg_chunk.page = NULL;
382 * add_one_rx_buf - add a packet buffer to a free-buffer list
383 * @va: buffer start VA
384 * @len: the buffer length
385 * @d: the HW Rx descriptor to write
386 * @sd: the SW Rx descriptor to write
387 * @gen: the generation bit value
388 * @pdev: the PCI device associated with the adapter
390 * Add a buffer of the given length to the supplied HW and SW Rx
393 static inline int add_one_rx_buf(void *va, unsigned int len,
394 struct rx_desc *d, struct rx_sw_desc *sd,
395 unsigned int gen, struct pci_dev *pdev)
399 mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
400 if (unlikely(pci_dma_mapping_error(pdev, mapping)))
403 pci_unmap_addr_set(sd, dma_addr, mapping);
405 d->addr_lo = cpu_to_be32(mapping);
406 d->addr_hi = cpu_to_be32((u64) mapping >> 32);
408 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
409 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
413 static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp,
416 if (!q->pg_chunk.page) {
417 q->pg_chunk.page = alloc_pages(gfp, order);
418 if (unlikely(!q->pg_chunk.page))
420 q->pg_chunk.va = page_address(q->pg_chunk.page);
421 q->pg_chunk.offset = 0;
423 sd->pg_chunk = q->pg_chunk;
425 q->pg_chunk.offset += q->buf_size;
426 if (q->pg_chunk.offset == (PAGE_SIZE << order))
427 q->pg_chunk.page = NULL;
429 q->pg_chunk.va += q->buf_size;
430 get_page(q->pg_chunk.page);
435 static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
437 if (q->pend_cred >= q->credits / 4) {
439 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
444 * refill_fl - refill an SGE free-buffer list
445 * @adapter: the adapter
446 * @q: the free-list to refill
447 * @n: the number of new buffers to allocate
448 * @gfp: the gfp flags for allocating new buffers
450 * (Re)populate an SGE free-buffer list with up to @n new packet buffers,
451 * allocated with the supplied gfp flags. The caller must assure that
452 * @n does not exceed the queue's capacity.
454 static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
457 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
458 struct rx_desc *d = &q->desc[q->pidx];
459 unsigned int count = 0;
465 if (unlikely(alloc_pg_chunk(q, sd, gfp, q->order))) {
466 nomem: q->alloc_failed++;
469 buf_start = sd->pg_chunk.va;
471 struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
477 buf_start = skb->data;
480 err = add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen,
483 clear_rx_desc(q, sd);
489 if (++q->pidx == q->size) {
499 q->pend_cred += count;
505 static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
507 refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits),
508 GFP_ATOMIC | __GFP_COMP);
512 * recycle_rx_buf - recycle a receive buffer
513 * @adapter: the adapter
514 * @q: the SGE free list
515 * @idx: index of buffer to recycle
517 * Recycles the specified buffer on the given free list by adding it at
518 * the next available slot on the list.
520 static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
523 struct rx_desc *from = &q->desc[idx];
524 struct rx_desc *to = &q->desc[q->pidx];
526 q->sdesc[q->pidx] = q->sdesc[idx];
527 to->addr_lo = from->addr_lo; /* already big endian */
528 to->addr_hi = from->addr_hi; /* likewise */
530 to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
531 to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
533 if (++q->pidx == q->size) {
544 * alloc_ring - allocate resources for an SGE descriptor ring
545 * @pdev: the PCI device
546 * @nelem: the number of descriptors
547 * @elem_size: the size of each descriptor
548 * @sw_size: the size of the SW state associated with each ring element
549 * @phys: the physical address of the allocated ring
550 * @metadata: address of the array holding the SW state for the ring
552 * Allocates resources for an SGE descriptor ring, such as Tx queues,
553 * free buffer lists, or response queues. Each SGE ring requires
554 * space for its HW descriptors plus, optionally, space for the SW state
555 * associated with each HW entry (the metadata). The function returns
556 * three values: the virtual address for the HW ring (the return value
557 * of the function), the physical address of the HW ring, and the address
560 static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
561 size_t sw_size, dma_addr_t * phys, void *metadata)
563 size_t len = nelem * elem_size;
565 void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
569 if (sw_size && metadata) {
570 s = kcalloc(nelem, sw_size, GFP_KERNEL);
573 dma_free_coherent(&pdev->dev, len, p, *phys);
576 *(void **)metadata = s;
583 * t3_reset_qset - reset a sge qset
586 * Reset the qset structure.
587 * the NAPI structure is preserved in the event of
588 * the qset's reincarnation, for example during EEH recovery.
590 static void t3_reset_qset(struct sge_qset *q)
593 !(q->adap->flags & NAPI_INIT)) {
594 memset(q, 0, sizeof(*q));
599 memset(&q->rspq, 0, sizeof(q->rspq));
600 memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
601 memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
603 q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
604 q->lro_frag_tbl.nr_frags = q->lro_frag_tbl.len = 0;
609 * free_qset - free the resources of an SGE queue set
610 * @adapter: the adapter owning the queue set
613 * Release the HW and SW resources associated with an SGE queue set, such
614 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
615 * queue set must be quiesced prior to calling this.
617 static void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
620 struct pci_dev *pdev = adapter->pdev;
622 for (i = 0; i < SGE_RXQ_PER_SET; ++i)
624 spin_lock_irq(&adapter->sge.reg_lock);
625 t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
626 spin_unlock_irq(&adapter->sge.reg_lock);
627 free_rx_bufs(pdev, &q->fl[i]);
628 kfree(q->fl[i].sdesc);
629 dma_free_coherent(&pdev->dev,
631 sizeof(struct rx_desc), q->fl[i].desc,
635 for (i = 0; i < SGE_TXQ_PER_SET; ++i)
636 if (q->txq[i].desc) {
637 spin_lock_irq(&adapter->sge.reg_lock);
638 t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
639 spin_unlock_irq(&adapter->sge.reg_lock);
640 if (q->txq[i].sdesc) {
641 free_tx_desc(adapter, &q->txq[i],
643 kfree(q->txq[i].sdesc);
645 dma_free_coherent(&pdev->dev,
647 sizeof(struct tx_desc),
648 q->txq[i].desc, q->txq[i].phys_addr);
649 __skb_queue_purge(&q->txq[i].sendq);
653 spin_lock_irq(&adapter->sge.reg_lock);
654 t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
655 spin_unlock_irq(&adapter->sge.reg_lock);
656 dma_free_coherent(&pdev->dev,
657 q->rspq.size * sizeof(struct rsp_desc),
658 q->rspq.desc, q->rspq.phys_addr);
665 * init_qset_cntxt - initialize an SGE queue set context info
667 * @id: the queue set id
669 * Initializes the TIDs and context ids for the queues of a queue set.
671 static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
673 qs->rspq.cntxt_id = id;
674 qs->fl[0].cntxt_id = 2 * id;
675 qs->fl[1].cntxt_id = 2 * id + 1;
676 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
677 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
678 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
679 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
680 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
684 * sgl_len - calculates the size of an SGL of the given capacity
685 * @n: the number of SGL entries
687 * Calculates the number of flits needed for a scatter/gather list that
688 * can hold the given number of entries.
690 static inline unsigned int sgl_len(unsigned int n)
692 /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
693 return (3 * n) / 2 + (n & 1);
697 * flits_to_desc - returns the num of Tx descriptors for the given flits
698 * @n: the number of flits
700 * Calculates the number of Tx descriptors needed for the supplied number
703 static inline unsigned int flits_to_desc(unsigned int n)
705 BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
706 return flit_desc_map[n];
710 * get_packet - return the next ingress packet buffer from a free list
711 * @adap: the adapter that received the packet
712 * @fl: the SGE free list holding the packet
713 * @len: the packet length including any SGE padding
714 * @drop_thres: # of remaining buffers before we start dropping packets
716 * Get the next packet from a free list and complete setup of the
717 * sk_buff. If the packet is small we make a copy and recycle the
718 * original buffer, otherwise we use the original buffer itself. If a
719 * positive drop threshold is supplied packets are dropped and their
720 * buffers recycled if (a) the number of remaining buffers is under the
721 * threshold and the packet is too big to copy, or (b) the packet should
722 * be copied but there is no memory for the copy.
724 static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
725 unsigned int len, unsigned int drop_thres)
727 struct sk_buff *skb = NULL;
728 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
730 prefetch(sd->skb->data);
733 if (len <= SGE_RX_COPY_THRES) {
734 skb = alloc_skb(len, GFP_ATOMIC);
735 if (likely(skb != NULL)) {
737 pci_dma_sync_single_for_cpu(adap->pdev,
738 pci_unmap_addr(sd, dma_addr), len,
740 memcpy(skb->data, sd->skb->data, len);
741 pci_dma_sync_single_for_device(adap->pdev,
742 pci_unmap_addr(sd, dma_addr), len,
744 } else if (!drop_thres)
747 recycle_rx_buf(adap, fl, fl->cidx);
751 if (unlikely(fl->credits < drop_thres) &&
752 refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits - 1),
753 GFP_ATOMIC | __GFP_COMP) == 0)
757 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
758 fl->buf_size, PCI_DMA_FROMDEVICE);
761 __refill_fl(adap, fl);
766 * get_packet_pg - return the next ingress packet buffer from a free list
767 * @adap: the adapter that received the packet
768 * @fl: the SGE free list holding the packet
769 * @len: the packet length including any SGE padding
770 * @drop_thres: # of remaining buffers before we start dropping packets
772 * Get the next packet from a free list populated with page chunks.
773 * If the packet is small we make a copy and recycle the original buffer,
774 * otherwise we attach the original buffer as a page fragment to a fresh
775 * sk_buff. If a positive drop threshold is supplied packets are dropped
776 * and their buffers recycled if (a) the number of remaining buffers is
777 * under the threshold and the packet is too big to copy, or (b) there's
780 * Note: this function is similar to @get_packet but deals with Rx buffers
781 * that are page chunks rather than sk_buffs.
783 static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
784 struct sge_rspq *q, unsigned int len,
785 unsigned int drop_thres)
787 struct sk_buff *newskb, *skb;
788 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
790 newskb = skb = q->pg_skb;
792 if (!skb && (len <= SGE_RX_COPY_THRES)) {
793 newskb = alloc_skb(len, GFP_ATOMIC);
794 if (likely(newskb != NULL)) {
795 __skb_put(newskb, len);
796 pci_dma_sync_single_for_cpu(adap->pdev,
797 pci_unmap_addr(sd, dma_addr), len,
799 memcpy(newskb->data, sd->pg_chunk.va, len);
800 pci_dma_sync_single_for_device(adap->pdev,
801 pci_unmap_addr(sd, dma_addr), len,
803 } else if (!drop_thres)
807 recycle_rx_buf(adap, fl, fl->cidx);
812 if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres)))
816 newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
817 if (unlikely(!newskb)) {
823 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
824 fl->buf_size, PCI_DMA_FROMDEVICE);
826 __skb_put(newskb, SGE_RX_PULL_LEN);
827 memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
828 skb_fill_page_desc(newskb, 0, sd->pg_chunk.page,
829 sd->pg_chunk.offset + SGE_RX_PULL_LEN,
830 len - SGE_RX_PULL_LEN);
832 newskb->data_len = len - SGE_RX_PULL_LEN;
833 newskb->truesize += newskb->data_len;
835 skb_fill_page_desc(newskb, skb_shinfo(newskb)->nr_frags,
837 sd->pg_chunk.offset, len);
839 newskb->data_len += len;
840 newskb->truesize += len;
845 * We do not refill FLs here, we let the caller do it to overlap a
852 * get_imm_packet - return the next ingress packet buffer from a response
853 * @resp: the response descriptor containing the packet data
855 * Return a packet containing the immediate data of the given response.
857 static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
859 struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
862 __skb_put(skb, IMMED_PKT_SIZE);
863 skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
869 * calc_tx_descs - calculate the number of Tx descriptors for a packet
872 * Returns the number of Tx descriptors needed for the given Ethernet
873 * packet. Ethernet packets require addition of WR and CPL headers.
875 static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
879 if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
882 flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
883 if (skb_shinfo(skb)->gso_size)
885 return flits_to_desc(flits);
889 * make_sgl - populate a scatter/gather list for a packet
891 * @sgp: the SGL to populate
892 * @start: start address of skb main body data to include in the SGL
893 * @len: length of skb main body data to include in the SGL
894 * @pdev: the PCI device
896 * Generates a scatter/gather list for the buffers that make up a packet
897 * and returns the SGL size in 8-byte words. The caller must size the SGL
900 static inline unsigned int make_sgl(const struct sk_buff *skb,
901 struct sg_ent *sgp, unsigned char *start,
902 unsigned int len, struct pci_dev *pdev)
905 unsigned int i, j = 0, nfrags;
908 mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
909 sgp->len[0] = cpu_to_be32(len);
910 sgp->addr[0] = cpu_to_be64(mapping);
914 nfrags = skb_shinfo(skb)->nr_frags;
915 for (i = 0; i < nfrags; i++) {
916 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
918 mapping = pci_map_page(pdev, frag->page, frag->page_offset,
919 frag->size, PCI_DMA_TODEVICE);
920 sgp->len[j] = cpu_to_be32(frag->size);
921 sgp->addr[j] = cpu_to_be64(mapping);
928 return ((nfrags + (len != 0)) * 3) / 2 + j;
932 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
936 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
937 * where the HW is going to sleep just after we checked, however,
938 * then the interrupt handler will detect the outstanding TX packet
939 * and ring the doorbell for us.
941 * When GTS is disabled we unconditionally ring the doorbell.
943 static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
946 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
947 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
948 set_bit(TXQ_LAST_PKT_DB, &q->flags);
949 t3_write_reg(adap, A_SG_KDOORBELL,
950 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
953 wmb(); /* write descriptors before telling HW */
954 t3_write_reg(adap, A_SG_KDOORBELL,
955 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
959 static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
961 #if SGE_NUM_GENBITS == 2
962 d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
967 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
968 * @ndesc: number of Tx descriptors spanned by the SGL
969 * @skb: the packet corresponding to the WR
970 * @d: first Tx descriptor to be written
971 * @pidx: index of above descriptors
972 * @q: the SGE Tx queue
974 * @flits: number of flits to the start of the SGL in the first descriptor
975 * @sgl_flits: the SGL size in flits
976 * @gen: the Tx descriptor generation
977 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
978 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
980 * Write a work request header and an associated SGL. If the SGL is
981 * small enough to fit into one Tx descriptor it has already been written
982 * and we just need to write the WR header. Otherwise we distribute the
983 * SGL across the number of descriptors it spans.
985 static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
986 struct tx_desc *d, unsigned int pidx,
987 const struct sge_txq *q,
988 const struct sg_ent *sgl,
989 unsigned int flits, unsigned int sgl_flits,
990 unsigned int gen, __be32 wr_hi,
993 struct work_request_hdr *wrp = (struct work_request_hdr *)d;
994 struct tx_sw_desc *sd = &q->sdesc[pidx];
997 if (need_skb_unmap()) {
1003 if (likely(ndesc == 1)) {
1005 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1006 V_WR_SGLSFLT(flits)) | wr_hi;
1008 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1009 V_WR_GEN(gen)) | wr_lo;
1012 unsigned int ogen = gen;
1013 const u64 *fp = (const u64 *)sgl;
1014 struct work_request_hdr *wp = wrp;
1016 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1017 V_WR_SGLSFLT(flits)) | wr_hi;
1020 unsigned int avail = WR_FLITS - flits;
1022 if (avail > sgl_flits)
1024 memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
1034 if (++pidx == q->size) {
1042 wrp = (struct work_request_hdr *)d;
1043 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1044 V_WR_SGLSFLT(1)) | wr_hi;
1045 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1047 V_WR_GEN(gen)) | wr_lo;
1052 wrp->wr_hi |= htonl(F_WR_EOP);
1054 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1055 wr_gen2((struct tx_desc *)wp, ogen);
1056 WARN_ON(ndesc != 0);
1061 * write_tx_pkt_wr - write a TX_PKT work request
1062 * @adap: the adapter
1063 * @skb: the packet to send
1064 * @pi: the egress interface
1065 * @pidx: index of the first Tx descriptor to write
1066 * @gen: the generation value to use
1068 * @ndesc: number of descriptors the packet will occupy
1069 * @compl: the value of the COMPL bit to use
1071 * Generate a TX_PKT work request to send the supplied packet.
1073 static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
1074 const struct port_info *pi,
1075 unsigned int pidx, unsigned int gen,
1076 struct sge_txq *q, unsigned int ndesc,
1079 unsigned int flits, sgl_flits, cntrl, tso_info;
1080 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1081 struct tx_desc *d = &q->desc[pidx];
1082 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1084 cpl->len = htonl(skb->len | 0x80000000);
1085 cntrl = V_TXPKT_INTF(pi->port_id);
1087 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1088 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
1090 tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1093 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1096 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1097 hdr->cntrl = htonl(cntrl);
1098 eth_type = skb_network_offset(skb) == ETH_HLEN ?
1099 CPL_ETH_II : CPL_ETH_II_VLAN;
1100 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1101 V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
1102 V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
1103 hdr->lso_info = htonl(tso_info);
1106 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1107 cntrl |= F_TXPKT_IPCSUM_DIS; /* SW calculates IP csum */
1108 cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1109 cpl->cntrl = htonl(cntrl);
1111 if (skb->len <= WR_LEN - sizeof(*cpl)) {
1112 q->sdesc[pidx].skb = NULL;
1114 skb_copy_from_linear_data(skb, &d->flit[2],
1117 skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1119 flits = (skb->len + 7) / 8 + 2;
1120 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1121 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1122 | F_WR_SOP | F_WR_EOP | compl);
1124 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1125 V_WR_TID(q->token));
1134 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1135 sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
1137 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1138 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1139 htonl(V_WR_TID(q->token)));
1142 static inline void t3_stop_tx_queue(struct netdev_queue *txq,
1143 struct sge_qset *qs, struct sge_txq *q)
1145 netif_tx_stop_queue(txq);
1146 set_bit(TXQ_ETH, &qs->txq_stopped);
1151 * eth_xmit - add a packet to the Ethernet Tx queue
1153 * @dev: the egress net device
1155 * Add a packet to an SGE Tx queue. Runs with softirqs disabled.
1157 int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1160 unsigned int ndesc, pidx, credits, gen, compl;
1161 const struct port_info *pi = netdev_priv(dev);
1162 struct adapter *adap = pi->adapter;
1163 struct netdev_queue *txq;
1164 struct sge_qset *qs;
1168 * The chip min packet length is 9 octets but play safe and reject
1169 * anything shorter than an Ethernet header.
1171 if (unlikely(skb->len < ETH_HLEN)) {
1173 return NETDEV_TX_OK;
1176 qidx = skb_get_queue_mapping(skb);
1178 q = &qs->txq[TXQ_ETH];
1179 txq = netdev_get_tx_queue(dev, qidx);
1181 spin_lock(&q->lock);
1182 reclaim_completed_tx(adap, q);
1184 credits = q->size - q->in_use;
1185 ndesc = calc_tx_descs(skb);
1187 if (unlikely(credits < ndesc)) {
1188 t3_stop_tx_queue(txq, qs, q);
1189 dev_err(&adap->pdev->dev,
1190 "%s: Tx ring %u full while queue awake!\n",
1191 dev->name, q->cntxt_id & 7);
1192 spin_unlock(&q->lock);
1193 return NETDEV_TX_BUSY;
1197 if (unlikely(credits - ndesc < q->stop_thres)) {
1198 t3_stop_tx_queue(txq, qs, q);
1200 if (should_restart_tx(q) &&
1201 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1203 netif_tx_wake_queue(txq);
1208 q->unacked += ndesc;
1209 compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1213 if (q->pidx >= q->size) {
1218 /* update port statistics */
1219 if (skb->ip_summed == CHECKSUM_COMPLETE)
1220 qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1221 if (skb_shinfo(skb)->gso_size)
1222 qs->port_stats[SGE_PSTAT_TSO]++;
1223 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1224 qs->port_stats[SGE_PSTAT_VLANINS]++;
1226 dev->trans_start = jiffies;
1227 spin_unlock(&q->lock);
1230 * We do not use Tx completion interrupts to free DMAd Tx packets.
1231 * This is good for performamce but means that we rely on new Tx
1232 * packets arriving to run the destructors of completed packets,
1233 * which open up space in their sockets' send queues. Sometimes
1234 * we do not get such new packets causing Tx to stall. A single
1235 * UDP transmitter is a good example of this situation. We have
1236 * a clean up timer that periodically reclaims completed packets
1237 * but it doesn't run often enough (nor do we want it to) to prevent
1238 * lengthy stalls. A solution to this problem is to run the
1239 * destructor early, after the packet is queued but before it's DMAd.
1240 * A cons is that we lie to socket memory accounting, but the amount
1241 * of extra memory is reasonable (limited by the number of Tx
1242 * descriptors), the packets do actually get freed quickly by new
1243 * packets almost always, and for protocols like TCP that wait for
1244 * acks to really free up the data the extra memory is even less.
1245 * On the positive side we run the destructors on the sending CPU
1246 * rather than on a potentially different completing CPU, usually a
1247 * good thing. We also run them without holding our Tx queue lock,
1248 * unlike what reclaim_completed_tx() would otherwise do.
1250 * Run the destructor before telling the DMA engine about the packet
1251 * to make sure it doesn't complete and get freed prematurely.
1253 if (likely(!skb_shared(skb)))
1256 write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1257 check_ring_tx_db(adap, q);
1258 return NETDEV_TX_OK;
1262 * write_imm - write a packet into a Tx descriptor as immediate data
1263 * @d: the Tx descriptor to write
1265 * @len: the length of packet data to write as immediate data
1266 * @gen: the generation bit value to write
1268 * Writes a packet as immediate data into a Tx descriptor. The packet
1269 * contains a work request at its beginning. We must write the packet
1270 * carefully so the SGE doesn't read it accidentally before it's written
1273 static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1274 unsigned int len, unsigned int gen)
1276 struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1277 struct work_request_hdr *to = (struct work_request_hdr *)d;
1279 if (likely(!skb->data_len))
1280 memcpy(&to[1], &from[1], len - sizeof(*from));
1282 skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from));
1284 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1285 V_WR_BCNTLFLT(len & 7));
1287 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1288 V_WR_LEN((len + 7) / 8));
1294 * check_desc_avail - check descriptor availability on a send queue
1295 * @adap: the adapter
1296 * @q: the send queue
1297 * @skb: the packet needing the descriptors
1298 * @ndesc: the number of Tx descriptors needed
1299 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1301 * Checks if the requested number of Tx descriptors is available on an
1302 * SGE send queue. If the queue is already suspended or not enough
1303 * descriptors are available the packet is queued for later transmission.
1304 * Must be called with the Tx queue locked.
1306 * Returns 0 if enough descriptors are available, 1 if there aren't
1307 * enough descriptors and the packet has been queued, and 2 if the caller
1308 * needs to retry because there weren't enough descriptors at the
1309 * beginning of the call but some freed up in the mean time.
1311 static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1312 struct sk_buff *skb, unsigned int ndesc,
1315 if (unlikely(!skb_queue_empty(&q->sendq))) {
1316 addq_exit:__skb_queue_tail(&q->sendq, skb);
1319 if (unlikely(q->size - q->in_use < ndesc)) {
1320 struct sge_qset *qs = txq_to_qset(q, qid);
1322 set_bit(qid, &qs->txq_stopped);
1323 smp_mb__after_clear_bit();
1325 if (should_restart_tx(q) &&
1326 test_and_clear_bit(qid, &qs->txq_stopped))
1336 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1337 * @q: the SGE control Tx queue
1339 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1340 * that send only immediate data (presently just the control queues) and
1341 * thus do not have any sk_buffs to release.
1343 static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1345 unsigned int reclaim = q->processed - q->cleaned;
1347 q->in_use -= reclaim;
1348 q->cleaned += reclaim;
1351 static inline int immediate(const struct sk_buff *skb)
1353 return skb->len <= WR_LEN;
1357 * ctrl_xmit - send a packet through an SGE control Tx queue
1358 * @adap: the adapter
1359 * @q: the control queue
1362 * Send a packet through an SGE control Tx queue. Packets sent through
1363 * a control queue must fit entirely as immediate data in a single Tx
1364 * descriptor and have no page fragments.
1366 static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1367 struct sk_buff *skb)
1370 struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1372 if (unlikely(!immediate(skb))) {
1375 return NET_XMIT_SUCCESS;
1378 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1379 wrp->wr_lo = htonl(V_WR_TID(q->token));
1381 spin_lock(&q->lock);
1382 again:reclaim_completed_tx_imm(q);
1384 ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1385 if (unlikely(ret)) {
1387 spin_unlock(&q->lock);
1393 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1396 if (++q->pidx >= q->size) {
1400 spin_unlock(&q->lock);
1402 t3_write_reg(adap, A_SG_KDOORBELL,
1403 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1404 return NET_XMIT_SUCCESS;
1408 * restart_ctrlq - restart a suspended control queue
1409 * @qs: the queue set cotaining the control queue
1411 * Resumes transmission on a suspended Tx control queue.
1413 static void restart_ctrlq(unsigned long data)
1415 struct sk_buff *skb;
1416 struct sge_qset *qs = (struct sge_qset *)data;
1417 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1419 spin_lock(&q->lock);
1420 again:reclaim_completed_tx_imm(q);
1422 while (q->in_use < q->size &&
1423 (skb = __skb_dequeue(&q->sendq)) != NULL) {
1425 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1427 if (++q->pidx >= q->size) {
1434 if (!skb_queue_empty(&q->sendq)) {
1435 set_bit(TXQ_CTRL, &qs->txq_stopped);
1436 smp_mb__after_clear_bit();
1438 if (should_restart_tx(q) &&
1439 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1444 spin_unlock(&q->lock);
1446 t3_write_reg(qs->adap, A_SG_KDOORBELL,
1447 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1451 * Send a management message through control queue 0
1453 int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1457 ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1464 * deferred_unmap_destructor - unmap a packet when it is freed
1467 * This is the packet destructor used for Tx packets that need to remain
1468 * mapped until they are freed rather than until their Tx descriptors are
1471 static void deferred_unmap_destructor(struct sk_buff *skb)
1474 const dma_addr_t *p;
1475 const struct skb_shared_info *si;
1476 const struct deferred_unmap_info *dui;
1478 dui = (struct deferred_unmap_info *)skb->head;
1481 if (skb->tail - skb->transport_header)
1482 pci_unmap_single(dui->pdev, *p++,
1483 skb->tail - skb->transport_header,
1486 si = skb_shinfo(skb);
1487 for (i = 0; i < si->nr_frags; i++)
1488 pci_unmap_page(dui->pdev, *p++, si->frags[i].size,
1492 static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1493 const struct sg_ent *sgl, int sgl_flits)
1496 struct deferred_unmap_info *dui;
1498 dui = (struct deferred_unmap_info *)skb->head;
1500 for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1501 *p++ = be64_to_cpu(sgl->addr[0]);
1502 *p++ = be64_to_cpu(sgl->addr[1]);
1505 *p = be64_to_cpu(sgl->addr[0]);
1509 * write_ofld_wr - write an offload work request
1510 * @adap: the adapter
1511 * @skb: the packet to send
1513 * @pidx: index of the first Tx descriptor to write
1514 * @gen: the generation value to use
1515 * @ndesc: number of descriptors the packet will occupy
1517 * Write an offload work request to send the supplied packet. The packet
1518 * data already carry the work request with most fields populated.
1520 static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1521 struct sge_txq *q, unsigned int pidx,
1522 unsigned int gen, unsigned int ndesc)
1524 unsigned int sgl_flits, flits;
1525 struct work_request_hdr *from;
1526 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1527 struct tx_desc *d = &q->desc[pidx];
1529 if (immediate(skb)) {
1530 q->sdesc[pidx].skb = NULL;
1531 write_imm(d, skb, skb->len, gen);
1535 /* Only TX_DATA builds SGLs */
1537 from = (struct work_request_hdr *)skb->data;
1538 memcpy(&d->flit[1], &from[1],
1539 skb_transport_offset(skb) - sizeof(*from));
1541 flits = skb_transport_offset(skb) / 8;
1542 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1543 sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
1544 skb->tail - skb->transport_header,
1546 if (need_skb_unmap()) {
1547 setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1548 skb->destructor = deferred_unmap_destructor;
1551 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1552 gen, from->wr_hi, from->wr_lo);
1556 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1559 * Returns the number of Tx descriptors needed for the given offload
1560 * packet. These packets are already fully constructed.
1562 static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1564 unsigned int flits, cnt;
1566 if (skb->len <= WR_LEN)
1567 return 1; /* packet fits as immediate data */
1569 flits = skb_transport_offset(skb) / 8; /* headers */
1570 cnt = skb_shinfo(skb)->nr_frags;
1571 if (skb->tail != skb->transport_header)
1573 return flits_to_desc(flits + sgl_len(cnt));
1577 * ofld_xmit - send a packet through an offload queue
1578 * @adap: the adapter
1579 * @q: the Tx offload queue
1582 * Send an offload packet through an SGE offload queue.
1584 static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1585 struct sk_buff *skb)
1588 unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1590 spin_lock(&q->lock);
1591 again:reclaim_completed_tx(adap, q);
1593 ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1594 if (unlikely(ret)) {
1596 skb->priority = ndesc; /* save for restart */
1597 spin_unlock(&q->lock);
1607 if (q->pidx >= q->size) {
1611 spin_unlock(&q->lock);
1613 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1614 check_ring_tx_db(adap, q);
1615 return NET_XMIT_SUCCESS;
1619 * restart_offloadq - restart a suspended offload queue
1620 * @qs: the queue set cotaining the offload queue
1622 * Resumes transmission on a suspended Tx offload queue.
1624 static void restart_offloadq(unsigned long data)
1626 struct sk_buff *skb;
1627 struct sge_qset *qs = (struct sge_qset *)data;
1628 struct sge_txq *q = &qs->txq[TXQ_OFLD];
1629 const struct port_info *pi = netdev_priv(qs->netdev);
1630 struct adapter *adap = pi->adapter;
1632 spin_lock(&q->lock);
1633 again:reclaim_completed_tx(adap, q);
1635 while ((skb = skb_peek(&q->sendq)) != NULL) {
1636 unsigned int gen, pidx;
1637 unsigned int ndesc = skb->priority;
1639 if (unlikely(q->size - q->in_use < ndesc)) {
1640 set_bit(TXQ_OFLD, &qs->txq_stopped);
1641 smp_mb__after_clear_bit();
1643 if (should_restart_tx(q) &&
1644 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1654 if (q->pidx >= q->size) {
1658 __skb_unlink(skb, &q->sendq);
1659 spin_unlock(&q->lock);
1661 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1662 spin_lock(&q->lock);
1664 spin_unlock(&q->lock);
1667 set_bit(TXQ_RUNNING, &q->flags);
1668 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1671 t3_write_reg(adap, A_SG_KDOORBELL,
1672 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1676 * queue_set - return the queue set a packet should use
1679 * Maps a packet to the SGE queue set it should use. The desired queue
1680 * set is carried in bits 1-3 in the packet's priority.
1682 static inline int queue_set(const struct sk_buff *skb)
1684 return skb->priority >> 1;
1688 * is_ctrl_pkt - return whether an offload packet is a control packet
1691 * Determines whether an offload packet should use an OFLD or a CTRL
1692 * Tx queue. This is indicated by bit 0 in the packet's priority.
1694 static inline int is_ctrl_pkt(const struct sk_buff *skb)
1696 return skb->priority & 1;
1700 * t3_offload_tx - send an offload packet
1701 * @tdev: the offload device to send to
1704 * Sends an offload packet. We use the packet priority to select the
1705 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1706 * should be sent as regular or control, bits 1-3 select the queue set.
1708 int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1710 struct adapter *adap = tdev2adap(tdev);
1711 struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1713 if (unlikely(is_ctrl_pkt(skb)))
1714 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1716 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1720 * offload_enqueue - add an offload packet to an SGE offload receive queue
1721 * @q: the SGE response queue
1724 * Add a new offload packet to an SGE response queue's offload packet
1725 * queue. If the packet is the first on the queue it schedules the RX
1726 * softirq to process the queue.
1728 static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1730 int was_empty = skb_queue_empty(&q->rx_queue);
1732 __skb_queue_tail(&q->rx_queue, skb);
1735 struct sge_qset *qs = rspq_to_qset(q);
1737 napi_schedule(&qs->napi);
1742 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1743 * @tdev: the offload device that will be receiving the packets
1744 * @q: the SGE response queue that assembled the bundle
1745 * @skbs: the partial bundle
1746 * @n: the number of packets in the bundle
1748 * Delivers a (partial) bundle of Rx offload packets to an offload device.
1750 static inline void deliver_partial_bundle(struct t3cdev *tdev,
1752 struct sk_buff *skbs[], int n)
1755 q->offload_bundles++;
1756 tdev->recv(tdev, skbs, n);
1761 * ofld_poll - NAPI handler for offload packets in interrupt mode
1762 * @dev: the network device doing the polling
1763 * @budget: polling budget
1765 * The NAPI handler for offload packets when a response queue is serviced
1766 * by the hard interrupt handler, i.e., when it's operating in non-polling
1767 * mode. Creates small packet batches and sends them through the offload
1768 * receive handler. Batches need to be of modest size as we do prefetches
1769 * on the packets in each.
1771 static int ofld_poll(struct napi_struct *napi, int budget)
1773 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
1774 struct sge_rspq *q = &qs->rspq;
1775 struct adapter *adapter = qs->adap;
1778 while (work_done < budget) {
1779 struct sk_buff *skb, *tmp, *skbs[RX_BUNDLE_SIZE];
1780 struct sk_buff_head queue;
1783 spin_lock_irq(&q->lock);
1784 __skb_queue_head_init(&queue);
1785 skb_queue_splice_init(&q->rx_queue, &queue);
1786 if (skb_queue_empty(&queue)) {
1787 napi_complete(napi);
1788 spin_unlock_irq(&q->lock);
1791 spin_unlock_irq(&q->lock);
1794 skb_queue_walk_safe(&queue, skb, tmp) {
1795 if (work_done >= budget)
1799 __skb_unlink(skb, &queue);
1800 prefetch(skb->data);
1801 skbs[ngathered] = skb;
1802 if (++ngathered == RX_BUNDLE_SIZE) {
1803 q->offload_bundles++;
1804 adapter->tdev.recv(&adapter->tdev, skbs,
1809 if (!skb_queue_empty(&queue)) {
1810 /* splice remaining packets back onto Rx queue */
1811 spin_lock_irq(&q->lock);
1812 skb_queue_splice(&queue, &q->rx_queue);
1813 spin_unlock_irq(&q->lock);
1815 deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1822 * rx_offload - process a received offload packet
1823 * @tdev: the offload device receiving the packet
1824 * @rq: the response queue that received the packet
1826 * @rx_gather: a gather list of packets if we are building a bundle
1827 * @gather_idx: index of the next available slot in the bundle
1829 * Process an ingress offload pakcet and add it to the offload ingress
1830 * queue. Returns the index of the next available slot in the bundle.
1832 static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1833 struct sk_buff *skb, struct sk_buff *rx_gather[],
1834 unsigned int gather_idx)
1836 skb_reset_mac_header(skb);
1837 skb_reset_network_header(skb);
1838 skb_reset_transport_header(skb);
1841 rx_gather[gather_idx++] = skb;
1842 if (gather_idx == RX_BUNDLE_SIZE) {
1843 tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1845 rq->offload_bundles++;
1848 offload_enqueue(rq, skb);
1854 * restart_tx - check whether to restart suspended Tx queues
1855 * @qs: the queue set to resume
1857 * Restarts suspended Tx queues of an SGE queue set if they have enough
1858 * free resources to resume operation.
1860 static void restart_tx(struct sge_qset *qs)
1862 if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1863 should_restart_tx(&qs->txq[TXQ_ETH]) &&
1864 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1865 qs->txq[TXQ_ETH].restarts++;
1866 if (netif_running(qs->netdev))
1867 netif_tx_wake_queue(qs->tx_q);
1870 if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1871 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1872 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1873 qs->txq[TXQ_OFLD].restarts++;
1874 tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1876 if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1877 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1878 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1879 qs->txq[TXQ_CTRL].restarts++;
1880 tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1885 * cxgb3_arp_process - process an ARP request probing a private IP address
1886 * @adapter: the adapter
1887 * @skb: the skbuff containing the ARP request
1889 * Check if the ARP request is probing the private IP address
1890 * dedicated to iSCSI, generate an ARP reply if so.
1892 static void cxgb3_arp_process(struct adapter *adapter, struct sk_buff *skb)
1894 struct net_device *dev = skb->dev;
1895 struct port_info *pi;
1897 unsigned char *arp_ptr;
1904 skb_reset_network_header(skb);
1907 if (arp->ar_op != htons(ARPOP_REQUEST))
1910 arp_ptr = (unsigned char *)(arp + 1);
1912 arp_ptr += dev->addr_len;
1913 memcpy(&sip, arp_ptr, sizeof(sip));
1914 arp_ptr += sizeof(sip);
1915 arp_ptr += dev->addr_len;
1916 memcpy(&tip, arp_ptr, sizeof(tip));
1918 pi = netdev_priv(dev);
1919 if (tip != pi->iscsi_ipv4addr)
1922 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
1923 dev->dev_addr, sha);
1927 static inline int is_arp(struct sk_buff *skb)
1929 return skb->protocol == htons(ETH_P_ARP);
1933 * rx_eth - process an ingress ethernet packet
1934 * @adap: the adapter
1935 * @rq: the response queue that received the packet
1937 * @pad: amount of padding at the start of the buffer
1939 * Process an ingress ethernet pakcet and deliver it to the stack.
1940 * The padding is 2 if the packet was delivered in an Rx buffer and 0
1941 * if it was immediate data in a response.
1943 static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
1944 struct sk_buff *skb, int pad, int lro)
1946 struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
1947 struct sge_qset *qs = rspq_to_qset(rq);
1948 struct port_info *pi;
1950 skb_pull(skb, sizeof(*p) + pad);
1951 skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
1952 pi = netdev_priv(skb->dev);
1953 if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid && p->csum == htons(0xffff) &&
1955 qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
1956 skb->ip_summed = CHECKSUM_UNNECESSARY;
1958 skb->ip_summed = CHECKSUM_NONE;
1959 skb_record_rx_queue(skb, qs - &adap->sge.qs[0]);
1961 if (unlikely(p->vlan_valid)) {
1962 struct vlan_group *grp = pi->vlan_grp;
1964 qs->port_stats[SGE_PSTAT_VLANEX]++;
1967 vlan_gro_receive(&qs->napi, grp,
1968 ntohs(p->vlan), skb);
1970 if (unlikely(pi->iscsi_ipv4addr &&
1972 unsigned short vtag = ntohs(p->vlan) &
1974 skb->dev = vlan_group_get_device(grp,
1976 cxgb3_arp_process(adap, skb);
1978 __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
1982 dev_kfree_skb_any(skb);
1983 } else if (rq->polling) {
1985 napi_gro_receive(&qs->napi, skb);
1987 if (unlikely(pi->iscsi_ipv4addr && is_arp(skb)))
1988 cxgb3_arp_process(adap, skb);
1989 netif_receive_skb(skb);
1995 static inline int is_eth_tcp(u32 rss)
1997 return G_HASHTYPE(ntohl(rss)) == RSS_HASH_4_TUPLE;
2001 * lro_add_page - add a page chunk to an LRO session
2002 * @adap: the adapter
2003 * @qs: the associated queue set
2004 * @fl: the free list containing the page chunk to add
2005 * @len: packet length
2006 * @complete: Indicates the last fragment of a frame
2008 * Add a received packet contained in a page chunk to an existing LRO
2011 static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2012 struct sge_fl *fl, int len, int complete)
2014 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2015 struct cpl_rx_pkt *cpl;
2016 struct skb_frag_struct *rx_frag = qs->lro_frag_tbl.frags;
2017 int nr_frags = qs->lro_frag_tbl.nr_frags;
2018 int frag_len = qs->lro_frag_tbl.len;
2022 offset = 2 + sizeof(struct cpl_rx_pkt);
2023 qs->lro_va = cpl = sd->pg_chunk.va + 2;
2029 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
2030 fl->buf_size, PCI_DMA_FROMDEVICE);
2032 prefetch(&qs->lro_frag_tbl);
2034 rx_frag += nr_frags;
2035 rx_frag->page = sd->pg_chunk.page;
2036 rx_frag->page_offset = sd->pg_chunk.offset + offset;
2037 rx_frag->size = len;
2039 qs->lro_frag_tbl.nr_frags++;
2040 qs->lro_frag_tbl.len = frag_len;
2045 qs->lro_frag_tbl.ip_summed = CHECKSUM_UNNECESSARY;
2048 if (unlikely(cpl->vlan_valid)) {
2049 struct net_device *dev = qs->netdev;
2050 struct port_info *pi = netdev_priv(dev);
2051 struct vlan_group *grp = pi->vlan_grp;
2053 if (likely(grp != NULL)) {
2054 vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan),
2059 napi_gro_frags(&qs->napi, &qs->lro_frag_tbl);
2062 qs->lro_frag_tbl.nr_frags = qs->lro_frag_tbl.len = 0;
2066 * handle_rsp_cntrl_info - handles control information in a response
2067 * @qs: the queue set corresponding to the response
2068 * @flags: the response control flags
2070 * Handles the control information of an SGE response, such as GTS
2071 * indications and completion credits for the queue set's Tx queues.
2072 * HW coalesces credits, we don't do any extra SW coalescing.
2074 static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
2076 unsigned int credits;
2079 if (flags & F_RSPD_TXQ0_GTS)
2080 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2083 credits = G_RSPD_TXQ0_CR(flags);
2085 qs->txq[TXQ_ETH].processed += credits;
2087 credits = G_RSPD_TXQ2_CR(flags);
2089 qs->txq[TXQ_CTRL].processed += credits;
2092 if (flags & F_RSPD_TXQ1_GTS)
2093 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2095 credits = G_RSPD_TXQ1_CR(flags);
2097 qs->txq[TXQ_OFLD].processed += credits;
2101 * check_ring_db - check if we need to ring any doorbells
2102 * @adapter: the adapter
2103 * @qs: the queue set whose Tx queues are to be examined
2104 * @sleeping: indicates which Tx queue sent GTS
2106 * Checks if some of a queue set's Tx queues need to ring their doorbells
2107 * to resume transmission after idling while they still have unprocessed
2110 static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
2111 unsigned int sleeping)
2113 if (sleeping & F_RSPD_TXQ0_GTS) {
2114 struct sge_txq *txq = &qs->txq[TXQ_ETH];
2116 if (txq->cleaned + txq->in_use != txq->processed &&
2117 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2118 set_bit(TXQ_RUNNING, &txq->flags);
2119 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2120 V_EGRCNTX(txq->cntxt_id));
2124 if (sleeping & F_RSPD_TXQ1_GTS) {
2125 struct sge_txq *txq = &qs->txq[TXQ_OFLD];
2127 if (txq->cleaned + txq->in_use != txq->processed &&
2128 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2129 set_bit(TXQ_RUNNING, &txq->flags);
2130 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2131 V_EGRCNTX(txq->cntxt_id));
2137 * is_new_response - check if a response is newly written
2138 * @r: the response descriptor
2139 * @q: the response queue
2141 * Returns true if a response descriptor contains a yet unprocessed
2144 static inline int is_new_response(const struct rsp_desc *r,
2145 const struct sge_rspq *q)
2147 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2150 static inline void clear_rspq_bufstate(struct sge_rspq * const q)
2153 q->rx_recycle_buf = 0;
2156 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2157 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2158 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2159 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2160 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2162 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2163 #define NOMEM_INTR_DELAY 2500
2166 * process_responses - process responses from an SGE response queue
2167 * @adap: the adapter
2168 * @qs: the queue set to which the response queue belongs
2169 * @budget: how many responses can be processed in this round
2171 * Process responses from an SGE response queue up to the supplied budget.
2172 * Responses include received packets as well as credits and other events
2173 * for the queues that belong to the response queue's queue set.
2174 * A negative budget is effectively unlimited.
2176 * Additionally choose the interrupt holdoff time for the next interrupt
2177 * on this queue. If the system is under memory shortage use a fairly
2178 * long delay to help recovery.
2180 static int process_responses(struct adapter *adap, struct sge_qset *qs,
2183 struct sge_rspq *q = &qs->rspq;
2184 struct rsp_desc *r = &q->desc[q->cidx];
2185 int budget_left = budget;
2186 unsigned int sleeping = 0;
2187 struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
2190 q->next_holdoff = q->holdoff_tmr;
2192 while (likely(budget_left && is_new_response(r, q))) {
2193 int packet_complete, eth, ethpad = 2, lro = qs->lro_enabled;
2194 struct sk_buff *skb = NULL;
2195 u32 len, flags = ntohl(r->flags);
2196 __be32 rss_hi = *(const __be32 *)r,
2197 rss_lo = r->rss_hdr.rss_hash_val;
2199 eth = r->rss_hdr.opcode == CPL_RX_PKT;
2201 if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
2202 skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
2206 memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
2207 skb->data[0] = CPL_ASYNC_NOTIF;
2208 rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
2210 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2211 skb = get_imm_packet(r);
2212 if (unlikely(!skb)) {
2214 q->next_holdoff = NOMEM_INTR_DELAY;
2216 /* consume one credit since we tried */
2222 } else if ((len = ntohl(r->len_cq)) != 0) {
2225 lro &= eth && is_eth_tcp(rss_hi);
2227 fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2228 if (fl->use_pages) {
2229 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
2232 #if L1_CACHE_BYTES < 128
2233 prefetch(addr + L1_CACHE_BYTES);
2235 __refill_fl(adap, fl);
2237 lro_add_page(adap, qs, fl,
2239 flags & F_RSPD_EOP);
2243 skb = get_packet_pg(adap, fl, q,
2246 SGE_RX_DROP_THRES : 0);
2249 skb = get_packet(adap, fl, G_RSPD_LEN(len),
2250 eth ? SGE_RX_DROP_THRES : 0);
2251 if (unlikely(!skb)) {
2255 } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2258 if (++fl->cidx == fl->size)
2263 if (flags & RSPD_CTRL_MASK) {
2264 sleeping |= flags & RSPD_GTS_MASK;
2265 handle_rsp_cntrl_info(qs, flags);
2269 if (unlikely(++q->cidx == q->size)) {
2276 if (++q->credits >= (q->size / 4)) {
2277 refill_rspq(adap, q, q->credits);
2281 packet_complete = flags &
2282 (F_RSPD_EOP | F_RSPD_IMM_DATA_VALID |
2283 F_RSPD_ASYNC_NOTIF);
2285 if (skb != NULL && packet_complete) {
2287 rx_eth(adap, q, skb, ethpad, lro);
2290 /* Preserve the RSS info in csum & priority */
2292 skb->priority = rss_lo;
2293 ngathered = rx_offload(&adap->tdev, q, skb,
2298 if (flags & F_RSPD_EOP)
2299 clear_rspq_bufstate(q);
2304 deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
2307 check_ring_db(adap, qs, sleeping);
2309 smp_mb(); /* commit Tx queue .processed updates */
2310 if (unlikely(qs->txq_stopped != 0))
2313 budget -= budget_left;
2317 static inline int is_pure_response(const struct rsp_desc *r)
2319 __be32 n = r->flags & htonl(F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
2321 return (n | r->len_cq) == 0;
2325 * napi_rx_handler - the NAPI handler for Rx processing
2326 * @napi: the napi instance
2327 * @budget: how many packets we can process in this round
2329 * Handler for new data events when using NAPI.
2331 static int napi_rx_handler(struct napi_struct *napi, int budget)
2333 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
2334 struct adapter *adap = qs->adap;
2335 int work_done = process_responses(adap, qs, budget);
2337 if (likely(work_done < budget)) {
2338 napi_complete(napi);
2341 * Because we don't atomically flush the following
2342 * write it is possible that in very rare cases it can
2343 * reach the device in a way that races with a new
2344 * response being written plus an error interrupt
2345 * causing the NAPI interrupt handler below to return
2346 * unhandled status to the OS. To protect against
2347 * this would require flushing the write and doing
2348 * both the write and the flush with interrupts off.
2349 * Way too expensive and unjustifiable given the
2350 * rarity of the race.
2352 * The race cannot happen at all with MSI-X.
2354 t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2355 V_NEWTIMER(qs->rspq.next_holdoff) |
2356 V_NEWINDEX(qs->rspq.cidx));
2362 * Returns true if the device is already scheduled for polling.
2364 static inline int napi_is_scheduled(struct napi_struct *napi)
2366 return test_bit(NAPI_STATE_SCHED, &napi->state);
2370 * process_pure_responses - process pure responses from a response queue
2371 * @adap: the adapter
2372 * @qs: the queue set owning the response queue
2373 * @r: the first pure response to process
2375 * A simpler version of process_responses() that handles only pure (i.e.,
2376 * non data-carrying) responses. Such respones are too light-weight to
2377 * justify calling a softirq under NAPI, so we handle them specially in
2378 * the interrupt handler. The function is called with a pointer to a
2379 * response, which the caller must ensure is a valid pure response.
2381 * Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2383 static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2386 struct sge_rspq *q = &qs->rspq;
2387 unsigned int sleeping = 0;
2390 u32 flags = ntohl(r->flags);
2393 if (unlikely(++q->cidx == q->size)) {
2400 if (flags & RSPD_CTRL_MASK) {
2401 sleeping |= flags & RSPD_GTS_MASK;
2402 handle_rsp_cntrl_info(qs, flags);
2406 if (++q->credits >= (q->size / 4)) {
2407 refill_rspq(adap, q, q->credits);
2410 } while (is_new_response(r, q) && is_pure_response(r));
2413 check_ring_db(adap, qs, sleeping);
2415 smp_mb(); /* commit Tx queue .processed updates */
2416 if (unlikely(qs->txq_stopped != 0))
2419 return is_new_response(r, q);
2423 * handle_responses - decide what to do with new responses in NAPI mode
2424 * @adap: the adapter
2425 * @q: the response queue
2427 * This is used by the NAPI interrupt handlers to decide what to do with
2428 * new SGE responses. If there are no new responses it returns -1. If
2429 * there are new responses and they are pure (i.e., non-data carrying)
2430 * it handles them straight in hard interrupt context as they are very
2431 * cheap and don't deliver any packets. Finally, if there are any data
2432 * signaling responses it schedules the NAPI handler. Returns 1 if it
2433 * schedules NAPI, 0 if all new responses were pure.
2435 * The caller must ascertain NAPI is not already running.
2437 static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2439 struct sge_qset *qs = rspq_to_qset(q);
2440 struct rsp_desc *r = &q->desc[q->cidx];
2442 if (!is_new_response(r, q))
2444 if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2445 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2446 V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2449 napi_schedule(&qs->napi);
2454 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2455 * (i.e., response queue serviced in hard interrupt).
2457 irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2459 struct sge_qset *qs = cookie;
2460 struct adapter *adap = qs->adap;
2461 struct sge_rspq *q = &qs->rspq;
2463 spin_lock(&q->lock);
2464 if (process_responses(adap, qs, -1) == 0)
2465 q->unhandled_irqs++;
2466 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2467 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2468 spin_unlock(&q->lock);
2473 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2474 * (i.e., response queue serviced by NAPI polling).
2476 static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
2478 struct sge_qset *qs = cookie;
2479 struct sge_rspq *q = &qs->rspq;
2481 spin_lock(&q->lock);
2483 if (handle_responses(qs->adap, q) < 0)
2484 q->unhandled_irqs++;
2485 spin_unlock(&q->lock);
2490 * The non-NAPI MSI interrupt handler. This needs to handle data events from
2491 * SGE response queues as well as error and other async events as they all use
2492 * the same MSI vector. We use one SGE response queue per port in this mode
2493 * and protect all response queues with queue 0's lock.
2495 static irqreturn_t t3_intr_msi(int irq, void *cookie)
2497 int new_packets = 0;
2498 struct adapter *adap = cookie;
2499 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2501 spin_lock(&q->lock);
2503 if (process_responses(adap, &adap->sge.qs[0], -1)) {
2504 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2505 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2509 if (adap->params.nports == 2 &&
2510 process_responses(adap, &adap->sge.qs[1], -1)) {
2511 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2513 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2514 V_NEWTIMER(q1->next_holdoff) |
2515 V_NEWINDEX(q1->cidx));
2519 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2520 q->unhandled_irqs++;
2522 spin_unlock(&q->lock);
2526 static int rspq_check_napi(struct sge_qset *qs)
2528 struct sge_rspq *q = &qs->rspq;
2530 if (!napi_is_scheduled(&qs->napi) &&
2531 is_new_response(&q->desc[q->cidx], q)) {
2532 napi_schedule(&qs->napi);
2539 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2540 * by NAPI polling). Handles data events from SGE response queues as well as
2541 * error and other async events as they all use the same MSI vector. We use
2542 * one SGE response queue per port in this mode and protect all response
2543 * queues with queue 0's lock.
2545 static irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
2548 struct adapter *adap = cookie;
2549 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2551 spin_lock(&q->lock);
2553 new_packets = rspq_check_napi(&adap->sge.qs[0]);
2554 if (adap->params.nports == 2)
2555 new_packets += rspq_check_napi(&adap->sge.qs[1]);
2556 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2557 q->unhandled_irqs++;
2559 spin_unlock(&q->lock);
2564 * A helper function that processes responses and issues GTS.
2566 static inline int process_responses_gts(struct adapter *adap,
2567 struct sge_rspq *rq)
2571 work = process_responses(adap, rspq_to_qset(rq), -1);
2572 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2573 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2578 * The legacy INTx interrupt handler. This needs to handle data events from
2579 * SGE response queues as well as error and other async events as they all use
2580 * the same interrupt pin. We use one SGE response queue per port in this mode
2581 * and protect all response queues with queue 0's lock.
2583 static irqreturn_t t3_intr(int irq, void *cookie)
2585 int work_done, w0, w1;
2586 struct adapter *adap = cookie;
2587 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2588 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2590 spin_lock(&q0->lock);
2592 w0 = is_new_response(&q0->desc[q0->cidx], q0);
2593 w1 = adap->params.nports == 2 &&
2594 is_new_response(&q1->desc[q1->cidx], q1);
2596 if (likely(w0 | w1)) {
2597 t3_write_reg(adap, A_PL_CLI, 0);
2598 t3_read_reg(adap, A_PL_CLI); /* flush */
2601 process_responses_gts(adap, q0);
2604 process_responses_gts(adap, q1);
2606 work_done = w0 | w1;
2608 work_done = t3_slow_intr_handler(adap);
2610 spin_unlock(&q0->lock);
2611 return IRQ_RETVAL(work_done != 0);
2615 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2616 * Handles data events from SGE response queues as well as error and other
2617 * async events as they all use the same interrupt pin. We use one SGE
2618 * response queue per port in this mode and protect all response queues with
2621 static irqreturn_t t3b_intr(int irq, void *cookie)
2624 struct adapter *adap = cookie;
2625 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2627 t3_write_reg(adap, A_PL_CLI, 0);
2628 map = t3_read_reg(adap, A_SG_DATA_INTR);
2630 if (unlikely(!map)) /* shared interrupt, most likely */
2633 spin_lock(&q0->lock);
2635 if (unlikely(map & F_ERRINTR))
2636 t3_slow_intr_handler(adap);
2638 if (likely(map & 1))
2639 process_responses_gts(adap, q0);
2642 process_responses_gts(adap, &adap->sge.qs[1].rspq);
2644 spin_unlock(&q0->lock);
2649 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2650 * Handles data events from SGE response queues as well as error and other
2651 * async events as they all use the same interrupt pin. We use one SGE
2652 * response queue per port in this mode and protect all response queues with
2655 static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2658 struct adapter *adap = cookie;
2659 struct sge_qset *qs0 = &adap->sge.qs[0];
2660 struct sge_rspq *q0 = &qs0->rspq;
2662 t3_write_reg(adap, A_PL_CLI, 0);
2663 map = t3_read_reg(adap, A_SG_DATA_INTR);
2665 if (unlikely(!map)) /* shared interrupt, most likely */
2668 spin_lock(&q0->lock);
2670 if (unlikely(map & F_ERRINTR))
2671 t3_slow_intr_handler(adap);
2673 if (likely(map & 1))
2674 napi_schedule(&qs0->napi);
2677 napi_schedule(&adap->sge.qs[1].napi);
2679 spin_unlock(&q0->lock);
2684 * t3_intr_handler - select the top-level interrupt handler
2685 * @adap: the adapter
2686 * @polling: whether using NAPI to service response queues
2688 * Selects the top-level interrupt handler based on the type of interrupts
2689 * (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2692 irq_handler_t t3_intr_handler(struct adapter *adap, int polling)
2694 if (adap->flags & USING_MSIX)
2695 return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2696 if (adap->flags & USING_MSI)
2697 return polling ? t3_intr_msi_napi : t3_intr_msi;
2698 if (adap->params.rev > 0)
2699 return polling ? t3b_intr_napi : t3b_intr;
2703 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
2704 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
2705 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
2706 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
2708 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
2709 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
2713 * t3_sge_err_intr_handler - SGE async event interrupt handler
2714 * @adapter: the adapter
2716 * Interrupt handler for SGE asynchronous (non-data) events.
2718 void t3_sge_err_intr_handler(struct adapter *adapter)
2720 unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE);
2722 if (status & SGE_PARERR)
2723 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
2724 status & SGE_PARERR);
2725 if (status & SGE_FRAMINGERR)
2726 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
2727 status & SGE_FRAMINGERR);
2729 if (status & F_RSPQCREDITOVERFOW)
2730 CH_ALERT(adapter, "SGE response queue credit overflow\n");
2732 if (status & F_RSPQDISABLED) {
2733 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2736 "packet delivered to disabled response queue "
2737 "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2740 if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
2741 CH_ALERT(adapter, "SGE dropped %s priority doorbell\n",
2742 status & F_HIPIODRBDROPERR ? "high" : "lo");
2744 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2745 if (status & SGE_FATALERR)
2746 t3_fatal_err(adapter);
2750 * sge_timer_cb - perform periodic maintenance of an SGE qset
2751 * @data: the SGE queue set to maintain
2753 * Runs periodically from a timer to perform maintenance of an SGE queue
2754 * set. It performs two tasks:
2756 * a) Cleans up any completed Tx descriptors that may still be pending.
2757 * Normal descriptor cleanup happens when new packets are added to a Tx
2758 * queue so this timer is relatively infrequent and does any cleanup only
2759 * if the Tx queue has not seen any new packets in a while. We make a
2760 * best effort attempt to reclaim descriptors, in that we don't wait
2761 * around if we cannot get a queue's lock (which most likely is because
2762 * someone else is queueing new packets and so will also handle the clean
2763 * up). Since control queues use immediate data exclusively we don't
2764 * bother cleaning them up here.
2766 * b) Replenishes Rx queues that have run out due to memory shortage.
2767 * Normally new Rx buffers are added when existing ones are consumed but
2768 * when out of memory a queue can become empty. We try to add only a few
2769 * buffers here, the queue will be replenished fully as these new buffers
2770 * are used up if memory shortage has subsided.
2772 static void sge_timer_cb(unsigned long data)
2775 struct sge_qset *qs = (struct sge_qset *)data;
2776 struct adapter *adap = qs->adap;
2778 if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
2779 reclaim_completed_tx(adap, &qs->txq[TXQ_ETH]);
2780 spin_unlock(&qs->txq[TXQ_ETH].lock);
2782 if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2783 reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD]);
2784 spin_unlock(&qs->txq[TXQ_OFLD].lock);
2786 lock = (adap->flags & USING_MSIX) ? &qs->rspq.lock :
2787 &adap->sge.qs[0].rspq.lock;
2788 if (spin_trylock_irq(lock)) {
2789 if (!napi_is_scheduled(&qs->napi)) {
2790 u32 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2792 if (qs->fl[0].credits < qs->fl[0].size)
2793 __refill_fl(adap, &qs->fl[0]);
2794 if (qs->fl[1].credits < qs->fl[1].size)
2795 __refill_fl(adap, &qs->fl[1]);
2797 if (status & (1 << qs->rspq.cntxt_id)) {
2799 if (qs->rspq.credits) {
2800 refill_rspq(adap, &qs->rspq, 1);
2802 qs->rspq.restarted++;
2803 t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
2804 1 << qs->rspq.cntxt_id);
2808 spin_unlock_irq(lock);
2810 mod_timer(&qs->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2814 * t3_update_qset_coalesce - update coalescing settings for a queue set
2815 * @qs: the SGE queue set
2816 * @p: new queue set parameters
2818 * Update the coalescing settings for an SGE queue set. Nothing is done
2819 * if the queue set is not initialized yet.
2821 void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2823 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2824 qs->rspq.polling = p->polling;
2825 qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
2829 * t3_sge_alloc_qset - initialize an SGE queue set
2830 * @adapter: the adapter
2831 * @id: the queue set id
2832 * @nports: how many Ethernet ports will be using this queue set
2833 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2834 * @p: configuration parameters for this queue set
2835 * @ntxq: number of Tx queues for the queue set
2836 * @netdev: net device associated with this queue set
2837 * @netdevq: net device TX queue associated with this queue set
2839 * Allocate resources and initialize an SGE queue set. A queue set
2840 * comprises a response queue, two Rx free-buffer queues, and up to 3
2841 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2842 * queue, offload queue, and control queue.
2844 int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2845 int irq_vec_idx, const struct qset_params *p,
2846 int ntxq, struct net_device *dev,
2847 struct netdev_queue *netdevq)
2849 int i, avail, ret = -ENOMEM;
2850 struct sge_qset *q = &adapter->sge.qs[id];
2852 init_qset_cntxt(q, id);
2853 setup_timer(&q->tx_reclaim_timer, sge_timer_cb, (unsigned long)q);
2855 q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
2856 sizeof(struct rx_desc),
2857 sizeof(struct rx_sw_desc),
2858 &q->fl[0].phys_addr, &q->fl[0].sdesc);
2862 q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
2863 sizeof(struct rx_desc),
2864 sizeof(struct rx_sw_desc),
2865 &q->fl[1].phys_addr, &q->fl[1].sdesc);
2869 q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
2870 sizeof(struct rsp_desc), 0,
2871 &q->rspq.phys_addr, NULL);
2875 for (i = 0; i < ntxq; ++i) {
2877 * The control queue always uses immediate data so does not
2878 * need to keep track of any sk_buffs.
2880 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2882 q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
2883 sizeof(struct tx_desc), sz,
2884 &q->txq[i].phys_addr,
2886 if (!q->txq[i].desc)
2890 q->txq[i].size = p->txq_size[i];
2891 spin_lock_init(&q->txq[i].lock);
2892 skb_queue_head_init(&q->txq[i].sendq);
2895 tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
2897 tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
2900 q->fl[0].gen = q->fl[1].gen = 1;
2901 q->fl[0].size = p->fl_size;
2902 q->fl[1].size = p->jumbo_size;
2905 q->rspq.size = p->rspq_size;
2906 spin_lock_init(&q->rspq.lock);
2907 skb_queue_head_init(&q->rspq.rx_queue);
2909 q->txq[TXQ_ETH].stop_thres = nports *
2910 flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
2912 #if FL0_PG_CHUNK_SIZE > 0
2913 q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
2915 q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
2917 #if FL1_PG_CHUNK_SIZE > 0
2918 q->fl[1].buf_size = FL1_PG_CHUNK_SIZE;
2920 q->fl[1].buf_size = is_offload(adapter) ?
2921 (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
2922 MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
2925 q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
2926 q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0;
2927 q->fl[0].order = FL0_PG_ORDER;
2928 q->fl[1].order = FL1_PG_ORDER;
2930 spin_lock_irq(&adapter->sge.reg_lock);
2932 /* FL threshold comparison uses < */
2933 ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
2934 q->rspq.phys_addr, q->rspq.size,
2935 q->fl[0].buf_size, 1, 0);
2939 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2940 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
2941 q->fl[i].phys_addr, q->fl[i].size,
2942 q->fl[i].buf_size, p->cong_thres, 1,
2948 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2949 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2950 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2956 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
2957 USE_GTS, SGE_CNTXT_OFLD, id,
2958 q->txq[TXQ_OFLD].phys_addr,
2959 q->txq[TXQ_OFLD].size, 0, 1, 0);
2965 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
2967 q->txq[TXQ_CTRL].phys_addr,
2968 q->txq[TXQ_CTRL].size,
2969 q->txq[TXQ_CTRL].token, 1, 0);
2974 spin_unlock_irq(&adapter->sge.reg_lock);
2979 t3_update_qset_coalesce(q, p);
2981 avail = refill_fl(adapter, &q->fl[0], q->fl[0].size,
2982 GFP_KERNEL | __GFP_COMP);
2984 CH_ALERT(adapter, "free list queue 0 initialization failed\n");
2987 if (avail < q->fl[0].size)
2988 CH_WARN(adapter, "free list queue 0 enabled with %d credits\n",
2991 avail = refill_fl(adapter, &q->fl[1], q->fl[1].size,
2992 GFP_KERNEL | __GFP_COMP);
2993 if (avail < q->fl[1].size)
2994 CH_WARN(adapter, "free list queue 1 enabled with %d credits\n",
2996 refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
2998 t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2999 V_NEWTIMER(q->rspq.holdoff_tmr));
3001 mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
3006 spin_unlock_irq(&adapter->sge.reg_lock);
3008 t3_free_qset(adapter, q);
3013 * t3_stop_sge_timers - stop SGE timer call backs
3014 * @adap: the adapter
3016 * Stops each SGE queue set's timer call back
3018 void t3_stop_sge_timers(struct adapter *adap)
3022 for (i = 0; i < SGE_QSETS; ++i) {
3023 struct sge_qset *q = &adap->sge.qs[i];
3025 if (q->tx_reclaim_timer.function)
3026 del_timer_sync(&q->tx_reclaim_timer);
3031 * t3_free_sge_resources - free SGE resources
3032 * @adap: the adapter
3034 * Frees resources used by the SGE queue sets.
3036 void t3_free_sge_resources(struct adapter *adap)
3040 for (i = 0; i < SGE_QSETS; ++i)
3041 t3_free_qset(adap, &adap->sge.qs[i]);
3045 * t3_sge_start - enable SGE
3046 * @adap: the adapter
3048 * Enables the SGE for DMAs. This is the last step in starting packet
3051 void t3_sge_start(struct adapter *adap)
3053 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
3057 * t3_sge_stop - disable SGE operation
3058 * @adap: the adapter
3060 * Disables the DMA engine. This can be called in emeregencies (e.g.,
3061 * from error interrupts) or from normal process context. In the latter
3062 * case it also disables any pending queue restart tasklets. Note that
3063 * if it is called in interrupt context it cannot disable the restart
3064 * tasklets as it cannot wait, however the tasklets will have no effect
3065 * since the doorbells are disabled and the driver will call this again
3066 * later from process context, at which time the tasklets will be stopped
3067 * if they are still running.
3069 void t3_sge_stop(struct adapter *adap)
3071 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
3072 if (!in_interrupt()) {
3075 for (i = 0; i < SGE_QSETS; ++i) {
3076 struct sge_qset *qs = &adap->sge.qs[i];
3078 tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
3079 tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
3085 * t3_sge_init - initialize SGE
3086 * @adap: the adapter
3087 * @p: the SGE parameters
3089 * Performs SGE initialization needed every time after a chip reset.
3090 * We do not initialize any of the queue sets here, instead the driver
3091 * top-level must request those individually. We also do not enable DMA
3092 * here, that should be done after the queues have been set up.
3094 void t3_sge_init(struct adapter *adap, struct sge_params *p)
3096 unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
3098 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
3099 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
3100 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
3101 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
3102 #if SGE_NUM_GENBITS == 1
3103 ctrl |= F_EGRGENCTRL;
3105 if (adap->params.rev > 0) {
3106 if (!(adap->flags & (USING_MSIX | USING_MSI)))
3107 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
3109 t3_write_reg(adap, A_SG_CONTROL, ctrl);
3110 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
3111 V_LORCQDRBTHRSH(512));
3112 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
3113 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
3114 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
3115 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
3116 adap->params.rev < T3_REV_C ? 1000 : 500);
3117 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
3118 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
3119 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
3120 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
3121 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
3125 * t3_sge_prep - one-time SGE initialization
3126 * @adap: the associated adapter
3127 * @p: SGE parameters
3129 * Performs one-time initialization of SGE SW state. Includes determining
3130 * defaults for the assorted SGE parameters, which admins can change until
3131 * they are used to initialize the SGE.
3133 void t3_sge_prep(struct adapter *adap, struct sge_params *p)
3137 p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
3138 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
3140 for (i = 0; i < SGE_QSETS; ++i) {
3141 struct qset_params *q = p->qset + i;
3143 q->polling = adap->params.rev > 0;
3144 q->coalesce_usecs = 5;
3145 q->rspq_size = 1024;
3147 q->jumbo_size = 512;
3148 q->txq_size[TXQ_ETH] = 1024;
3149 q->txq_size[TXQ_OFLD] = 1024;
3150 q->txq_size[TXQ_CTRL] = 256;
3154 spin_lock_init(&adap->sge.reg_lock);
3158 * t3_get_desc - dump an SGE descriptor for debugging purposes
3159 * @qs: the queue set
3160 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3161 * @idx: the descriptor index in the queue
3162 * @data: where to dump the descriptor contents
3164 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
3165 * size of the descriptor.
3167 int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3168 unsigned char *data)
3174 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3176 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3177 return sizeof(struct tx_desc);
3181 if (!qs->rspq.desc || idx >= qs->rspq.size)
3183 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3184 return sizeof(struct rsp_desc);
3188 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3190 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3191 return sizeof(struct rx_desc);