1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2009 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
48 #include <linux/dca.h>
52 #define DRV_VERSION "2.1.0-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
59 static const struct e1000_info *igb_info_tbl[] = {
60 [board_82575] = &e1000_82575_info,
63 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
64 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
65 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
66 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
67 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
68 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
69 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
70 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
74 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
79 /* required last entry */
83 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
85 void igb_reset(struct igb_adapter *);
86 static int igb_setup_all_tx_resources(struct igb_adapter *);
87 static int igb_setup_all_rx_resources(struct igb_adapter *);
88 static void igb_free_all_tx_resources(struct igb_adapter *);
89 static void igb_free_all_rx_resources(struct igb_adapter *);
90 static void igb_setup_mrqc(struct igb_adapter *);
91 void igb_update_stats(struct igb_adapter *);
92 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
93 static void __devexit igb_remove(struct pci_dev *pdev);
94 static int igb_sw_init(struct igb_adapter *);
95 static int igb_open(struct net_device *);
96 static int igb_close(struct net_device *);
97 static void igb_configure_tx(struct igb_adapter *);
98 static void igb_configure_rx(struct igb_adapter *);
99 static void igb_clean_all_tx_rings(struct igb_adapter *);
100 static void igb_clean_all_rx_rings(struct igb_adapter *);
101 static void igb_clean_tx_ring(struct igb_ring *);
102 static void igb_clean_rx_ring(struct igb_ring *);
103 static void igb_set_rx_mode(struct net_device *);
104 static void igb_update_phy_info(unsigned long);
105 static void igb_watchdog(unsigned long);
106 static void igb_watchdog_task(struct work_struct *);
107 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
108 static struct net_device_stats *igb_get_stats(struct net_device *);
109 static int igb_change_mtu(struct net_device *, int);
110 static int igb_set_mac(struct net_device *, void *);
111 static void igb_set_uta(struct igb_adapter *adapter);
112 static irqreturn_t igb_intr(int irq, void *);
113 static irqreturn_t igb_intr_msi(int irq, void *);
114 static irqreturn_t igb_msix_other(int irq, void *);
115 static irqreturn_t igb_msix_ring(int irq, void *);
116 #ifdef CONFIG_IGB_DCA
117 static void igb_update_dca(struct igb_q_vector *);
118 static void igb_setup_dca(struct igb_adapter *);
119 #endif /* CONFIG_IGB_DCA */
120 static bool igb_clean_tx_irq(struct igb_q_vector *);
121 static int igb_poll(struct napi_struct *, int);
122 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
123 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
124 static void igb_tx_timeout(struct net_device *);
125 static void igb_reset_task(struct work_struct *);
126 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
127 static void igb_vlan_rx_add_vid(struct net_device *, u16);
128 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
129 static void igb_restore_vlan(struct igb_adapter *);
130 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
131 static void igb_ping_all_vfs(struct igb_adapter *);
132 static void igb_msg_task(struct igb_adapter *);
133 static void igb_vmm_control(struct igb_adapter *);
134 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
135 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
136 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
137 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
138 int vf, u16 vlan, u8 qos);
139 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
140 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
141 struct ifla_vf_info *ivi);
144 static int igb_suspend(struct pci_dev *, pm_message_t);
145 static int igb_resume(struct pci_dev *);
147 static void igb_shutdown(struct pci_dev *);
148 #ifdef CONFIG_IGB_DCA
149 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
150 static struct notifier_block dca_notifier = {
151 .notifier_call = igb_notify_dca,
156 #ifdef CONFIG_NET_POLL_CONTROLLER
157 /* for netdump / net console */
158 static void igb_netpoll(struct net_device *);
160 #ifdef CONFIG_PCI_IOV
161 static unsigned int max_vfs = 0;
162 module_param(max_vfs, uint, 0);
163 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
164 "per physical function");
165 #endif /* CONFIG_PCI_IOV */
167 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
168 pci_channel_state_t);
169 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
170 static void igb_io_resume(struct pci_dev *);
172 static struct pci_error_handlers igb_err_handler = {
173 .error_detected = igb_io_error_detected,
174 .slot_reset = igb_io_slot_reset,
175 .resume = igb_io_resume,
179 static struct pci_driver igb_driver = {
180 .name = igb_driver_name,
181 .id_table = igb_pci_tbl,
183 .remove = __devexit_p(igb_remove),
185 /* Power Managment Hooks */
186 .suspend = igb_suspend,
187 .resume = igb_resume,
189 .shutdown = igb_shutdown,
190 .err_handler = &igb_err_handler
193 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
194 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
195 MODULE_LICENSE("GPL");
196 MODULE_VERSION(DRV_VERSION);
199 * igb_read_clock - read raw cycle counter (to be used by time counter)
201 static cycle_t igb_read_clock(const struct cyclecounter *tc)
203 struct igb_adapter *adapter =
204 container_of(tc, struct igb_adapter, cycles);
205 struct e1000_hw *hw = &adapter->hw;
210 * The timestamp latches on lowest register read. For the 82580
211 * the lowest register is SYSTIMR instead of SYSTIML. However we never
212 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
214 if (hw->mac.type == e1000_82580) {
215 stamp = rd32(E1000_SYSTIMR) >> 8;
216 shift = IGB_82580_TSYNC_SHIFT;
219 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
220 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
226 * igb_get_hw_dev_name - return device name string
227 * used by hardware layer to print debugging information
229 char *igb_get_hw_dev_name(struct e1000_hw *hw)
231 struct igb_adapter *adapter = hw->back;
232 return adapter->netdev->name;
236 * igb_get_time_str - format current NIC and system time as string
238 static char *igb_get_time_str(struct igb_adapter *adapter,
241 cycle_t hw = adapter->cycles.read(&adapter->cycles);
242 struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
244 struct timespec delta;
245 getnstimeofday(&sys);
247 delta = timespec_sub(nic, sys);
250 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
252 (long)nic.tv_sec, nic.tv_nsec,
253 (long)sys.tv_sec, sys.tv_nsec,
254 (long)delta.tv_sec, delta.tv_nsec);
261 * igb_init_module - Driver Registration Routine
263 * igb_init_module is the first routine called when the driver is
264 * loaded. All it does is register with the PCI subsystem.
266 static int __init igb_init_module(void)
269 printk(KERN_INFO "%s - version %s\n",
270 igb_driver_string, igb_driver_version);
272 printk(KERN_INFO "%s\n", igb_copyright);
274 #ifdef CONFIG_IGB_DCA
275 dca_register_notify(&dca_notifier);
277 ret = pci_register_driver(&igb_driver);
281 module_init(igb_init_module);
284 * igb_exit_module - Driver Exit Cleanup Routine
286 * igb_exit_module is called just before the driver is removed
289 static void __exit igb_exit_module(void)
291 #ifdef CONFIG_IGB_DCA
292 dca_unregister_notify(&dca_notifier);
294 pci_unregister_driver(&igb_driver);
297 module_exit(igb_exit_module);
299 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
301 * igb_cache_ring_register - Descriptor ring to register mapping
302 * @adapter: board private structure to initialize
304 * Once we know the feature-set enabled for the device, we'll cache
305 * the register offset the descriptor ring is assigned to.
307 static void igb_cache_ring_register(struct igb_adapter *adapter)
310 u32 rbase_offset = adapter->vfs_allocated_count;
312 switch (adapter->hw.mac.type) {
314 /* The queues are allocated for virtualization such that VF 0
315 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
316 * In order to avoid collision we start at the first free queue
317 * and continue consuming queues in the same sequence
319 if (adapter->vfs_allocated_count) {
320 for (; i < adapter->rss_queues; i++)
321 adapter->rx_ring[i].reg_idx = rbase_offset +
323 for (; j < adapter->rss_queues; j++)
324 adapter->tx_ring[j].reg_idx = rbase_offset +
330 for (; i < adapter->num_rx_queues; i++)
331 adapter->rx_ring[i].reg_idx = rbase_offset + i;
332 for (; j < adapter->num_tx_queues; j++)
333 adapter->tx_ring[j].reg_idx = rbase_offset + j;
338 static void igb_free_queues(struct igb_adapter *adapter)
340 kfree(adapter->tx_ring);
341 kfree(adapter->rx_ring);
343 adapter->tx_ring = NULL;
344 adapter->rx_ring = NULL;
346 adapter->num_rx_queues = 0;
347 adapter->num_tx_queues = 0;
351 * igb_alloc_queues - Allocate memory for all rings
352 * @adapter: board private structure to initialize
354 * We allocate one ring per queue at run-time since we don't know the
355 * number of queues at compile-time.
357 static int igb_alloc_queues(struct igb_adapter *adapter)
361 adapter->tx_ring = kcalloc(adapter->num_tx_queues,
362 sizeof(struct igb_ring), GFP_KERNEL);
363 if (!adapter->tx_ring)
366 adapter->rx_ring = kcalloc(adapter->num_rx_queues,
367 sizeof(struct igb_ring), GFP_KERNEL);
368 if (!adapter->rx_ring)
371 for (i = 0; i < adapter->num_tx_queues; i++) {
372 struct igb_ring *ring = &(adapter->tx_ring[i]);
373 ring->count = adapter->tx_ring_count;
374 ring->queue_index = i;
375 ring->pdev = adapter->pdev;
376 ring->netdev = adapter->netdev;
377 /* For 82575, context index must be unique per ring. */
378 if (adapter->hw.mac.type == e1000_82575)
379 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
382 for (i = 0; i < adapter->num_rx_queues; i++) {
383 struct igb_ring *ring = &(adapter->rx_ring[i]);
384 ring->count = adapter->rx_ring_count;
385 ring->queue_index = i;
386 ring->pdev = adapter->pdev;
387 ring->netdev = adapter->netdev;
388 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
389 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
390 /* set flag indicating ring supports SCTP checksum offload */
391 if (adapter->hw.mac.type >= e1000_82576)
392 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
395 igb_cache_ring_register(adapter);
400 igb_free_queues(adapter);
405 #define IGB_N0_QUEUE -1
406 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
409 struct igb_adapter *adapter = q_vector->adapter;
410 struct e1000_hw *hw = &adapter->hw;
412 int rx_queue = IGB_N0_QUEUE;
413 int tx_queue = IGB_N0_QUEUE;
415 if (q_vector->rx_ring)
416 rx_queue = q_vector->rx_ring->reg_idx;
417 if (q_vector->tx_ring)
418 tx_queue = q_vector->tx_ring->reg_idx;
420 switch (hw->mac.type) {
422 /* The 82575 assigns vectors using a bitmask, which matches the
423 bitmask for the EICR/EIMS/EIMC registers. To assign one
424 or more queues to a vector, we write the appropriate bits
425 into the MSIXBM register for that vector. */
426 if (rx_queue > IGB_N0_QUEUE)
427 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
428 if (tx_queue > IGB_N0_QUEUE)
429 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
430 if (!adapter->msix_entries && msix_vector == 0)
431 msixbm |= E1000_EIMS_OTHER;
432 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
433 q_vector->eims_value = msixbm;
436 /* 82576 uses a table-based method for assigning vectors.
437 Each queue has a single entry in the table to which we write
438 a vector number along with a "valid" bit. Sadly, the layout
439 of the table is somewhat counterintuitive. */
440 if (rx_queue > IGB_N0_QUEUE) {
441 index = (rx_queue & 0x7);
442 ivar = array_rd32(E1000_IVAR0, index);
444 /* vector goes into low byte of register */
445 ivar = ivar & 0xFFFFFF00;
446 ivar |= msix_vector | E1000_IVAR_VALID;
448 /* vector goes into third byte of register */
449 ivar = ivar & 0xFF00FFFF;
450 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
452 array_wr32(E1000_IVAR0, index, ivar);
454 if (tx_queue > IGB_N0_QUEUE) {
455 index = (tx_queue & 0x7);
456 ivar = array_rd32(E1000_IVAR0, index);
458 /* vector goes into second byte of register */
459 ivar = ivar & 0xFFFF00FF;
460 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
462 /* vector goes into high byte of register */
463 ivar = ivar & 0x00FFFFFF;
464 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
466 array_wr32(E1000_IVAR0, index, ivar);
468 q_vector->eims_value = 1 << msix_vector;
471 /* 82580 uses the same table-based approach as 82576 but has fewer
472 entries as a result we carry over for queues greater than 4. */
473 if (rx_queue > IGB_N0_QUEUE) {
474 index = (rx_queue >> 1);
475 ivar = array_rd32(E1000_IVAR0, index);
476 if (rx_queue & 0x1) {
477 /* vector goes into third byte of register */
478 ivar = ivar & 0xFF00FFFF;
479 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
481 /* vector goes into low byte of register */
482 ivar = ivar & 0xFFFFFF00;
483 ivar |= msix_vector | E1000_IVAR_VALID;
485 array_wr32(E1000_IVAR0, index, ivar);
487 if (tx_queue > IGB_N0_QUEUE) {
488 index = (tx_queue >> 1);
489 ivar = array_rd32(E1000_IVAR0, index);
490 if (tx_queue & 0x1) {
491 /* vector goes into high byte of register */
492 ivar = ivar & 0x00FFFFFF;
493 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
495 /* vector goes into second byte of register */
496 ivar = ivar & 0xFFFF00FF;
497 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
499 array_wr32(E1000_IVAR0, index, ivar);
501 q_vector->eims_value = 1 << msix_vector;
508 /* add q_vector eims value to global eims_enable_mask */
509 adapter->eims_enable_mask |= q_vector->eims_value;
511 /* configure q_vector to set itr on first interrupt */
512 q_vector->set_itr = 1;
516 * igb_configure_msix - Configure MSI-X hardware
518 * igb_configure_msix sets up the hardware to properly
519 * generate MSI-X interrupts.
521 static void igb_configure_msix(struct igb_adapter *adapter)
525 struct e1000_hw *hw = &adapter->hw;
527 adapter->eims_enable_mask = 0;
529 /* set vector for other causes, i.e. link changes */
530 switch (hw->mac.type) {
532 tmp = rd32(E1000_CTRL_EXT);
533 /* enable MSI-X PBA support*/
534 tmp |= E1000_CTRL_EXT_PBA_CLR;
536 /* Auto-Mask interrupts upon ICR read. */
537 tmp |= E1000_CTRL_EXT_EIAME;
538 tmp |= E1000_CTRL_EXT_IRCA;
540 wr32(E1000_CTRL_EXT, tmp);
542 /* enable msix_other interrupt */
543 array_wr32(E1000_MSIXBM(0), vector++,
545 adapter->eims_other = E1000_EIMS_OTHER;
551 /* Turn on MSI-X capability first, or our settings
552 * won't stick. And it will take days to debug. */
553 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
554 E1000_GPIE_PBA | E1000_GPIE_EIAME |
557 /* enable msix_other interrupt */
558 adapter->eims_other = 1 << vector;
559 tmp = (vector++ | E1000_IVAR_VALID) << 8;
561 wr32(E1000_IVAR_MISC, tmp);
564 /* do nothing, since nothing else supports MSI-X */
566 } /* switch (hw->mac.type) */
568 adapter->eims_enable_mask |= adapter->eims_other;
570 for (i = 0; i < adapter->num_q_vectors; i++)
571 igb_assign_vector(adapter->q_vector[i], vector++);
577 * igb_request_msix - Initialize MSI-X interrupts
579 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
582 static int igb_request_msix(struct igb_adapter *adapter)
584 struct net_device *netdev = adapter->netdev;
585 struct e1000_hw *hw = &adapter->hw;
586 int i, err = 0, vector = 0;
588 err = request_irq(adapter->msix_entries[vector].vector,
589 igb_msix_other, 0, netdev->name, adapter);
594 for (i = 0; i < adapter->num_q_vectors; i++) {
595 struct igb_q_vector *q_vector = adapter->q_vector[i];
597 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
599 if (q_vector->rx_ring && q_vector->tx_ring)
600 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
601 q_vector->rx_ring->queue_index);
602 else if (q_vector->tx_ring)
603 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
604 q_vector->tx_ring->queue_index);
605 else if (q_vector->rx_ring)
606 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
607 q_vector->rx_ring->queue_index);
609 sprintf(q_vector->name, "%s-unused", netdev->name);
611 err = request_irq(adapter->msix_entries[vector].vector,
612 igb_msix_ring, 0, q_vector->name,
619 igb_configure_msix(adapter);
625 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
627 if (adapter->msix_entries) {
628 pci_disable_msix(adapter->pdev);
629 kfree(adapter->msix_entries);
630 adapter->msix_entries = NULL;
631 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
632 pci_disable_msi(adapter->pdev);
637 * igb_free_q_vectors - Free memory allocated for interrupt vectors
638 * @adapter: board private structure to initialize
640 * This function frees the memory allocated to the q_vectors. In addition if
641 * NAPI is enabled it will delete any references to the NAPI struct prior
642 * to freeing the q_vector.
644 static void igb_free_q_vectors(struct igb_adapter *adapter)
648 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
649 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
650 adapter->q_vector[v_idx] = NULL;
651 netif_napi_del(&q_vector->napi);
654 adapter->num_q_vectors = 0;
658 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
660 * This function resets the device so that it has 0 rx queues, tx queues, and
661 * MSI-X interrupts allocated.
663 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
665 igb_free_queues(adapter);
666 igb_free_q_vectors(adapter);
667 igb_reset_interrupt_capability(adapter);
671 * igb_set_interrupt_capability - set MSI or MSI-X if supported
673 * Attempt to configure interrupts using the best available
674 * capabilities of the hardware and kernel.
676 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
681 /* Number of supported queues. */
682 adapter->num_rx_queues = adapter->rss_queues;
683 adapter->num_tx_queues = adapter->rss_queues;
685 /* start with one vector for every rx queue */
686 numvecs = adapter->num_rx_queues;
688 /* if tx handler is seperate add 1 for every tx queue */
689 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
690 numvecs += adapter->num_tx_queues;
692 /* store the number of vectors reserved for queues */
693 adapter->num_q_vectors = numvecs;
695 /* add 1 vector for link status interrupts */
697 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
699 if (!adapter->msix_entries)
702 for (i = 0; i < numvecs; i++)
703 adapter->msix_entries[i].entry = i;
705 err = pci_enable_msix(adapter->pdev,
706 adapter->msix_entries,
711 igb_reset_interrupt_capability(adapter);
713 /* If we can't do MSI-X, try MSI */
715 #ifdef CONFIG_PCI_IOV
716 /* disable SR-IOV for non MSI-X configurations */
717 if (adapter->vf_data) {
718 struct e1000_hw *hw = &adapter->hw;
719 /* disable iov and allow time for transactions to clear */
720 pci_disable_sriov(adapter->pdev);
723 kfree(adapter->vf_data);
724 adapter->vf_data = NULL;
725 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
727 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
730 adapter->vfs_allocated_count = 0;
731 adapter->rss_queues = 1;
732 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
733 adapter->num_rx_queues = 1;
734 adapter->num_tx_queues = 1;
735 adapter->num_q_vectors = 1;
736 if (!pci_enable_msi(adapter->pdev))
737 adapter->flags |= IGB_FLAG_HAS_MSI;
739 /* Notify the stack of the (possibly) reduced Tx Queue count. */
740 adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
745 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
746 * @adapter: board private structure to initialize
748 * We allocate one q_vector per queue interrupt. If allocation fails we
751 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
753 struct igb_q_vector *q_vector;
754 struct e1000_hw *hw = &adapter->hw;
757 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
758 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
761 q_vector->adapter = adapter;
762 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
763 q_vector->itr_val = IGB_START_ITR;
764 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
765 adapter->q_vector[v_idx] = q_vector;
772 q_vector = adapter->q_vector[v_idx];
773 netif_napi_del(&q_vector->napi);
775 adapter->q_vector[v_idx] = NULL;
780 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
781 int ring_idx, int v_idx)
783 struct igb_q_vector *q_vector;
785 q_vector = adapter->q_vector[v_idx];
786 q_vector->rx_ring = &adapter->rx_ring[ring_idx];
787 q_vector->rx_ring->q_vector = q_vector;
788 q_vector->itr_val = adapter->rx_itr_setting;
789 if (q_vector->itr_val && q_vector->itr_val <= 3)
790 q_vector->itr_val = IGB_START_ITR;
793 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
794 int ring_idx, int v_idx)
796 struct igb_q_vector *q_vector;
798 q_vector = adapter->q_vector[v_idx];
799 q_vector->tx_ring = &adapter->tx_ring[ring_idx];
800 q_vector->tx_ring->q_vector = q_vector;
801 q_vector->itr_val = adapter->tx_itr_setting;
802 if (q_vector->itr_val && q_vector->itr_val <= 3)
803 q_vector->itr_val = IGB_START_ITR;
807 * igb_map_ring_to_vector - maps allocated queues to vectors
809 * This function maps the recently allocated queues to vectors.
811 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
816 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
817 (adapter->num_q_vectors < adapter->num_tx_queues))
820 if (adapter->num_q_vectors >=
821 (adapter->num_rx_queues + adapter->num_tx_queues)) {
822 for (i = 0; i < adapter->num_rx_queues; i++)
823 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
824 for (i = 0; i < adapter->num_tx_queues; i++)
825 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
827 for (i = 0; i < adapter->num_rx_queues; i++) {
828 if (i < adapter->num_tx_queues)
829 igb_map_tx_ring_to_vector(adapter, i, v_idx);
830 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
832 for (; i < adapter->num_tx_queues; i++)
833 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
839 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
841 * This function initializes the interrupts and allocates all of the queues.
843 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
845 struct pci_dev *pdev = adapter->pdev;
848 igb_set_interrupt_capability(adapter);
850 err = igb_alloc_q_vectors(adapter);
852 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
853 goto err_alloc_q_vectors;
856 err = igb_alloc_queues(adapter);
858 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
859 goto err_alloc_queues;
862 err = igb_map_ring_to_vector(adapter);
864 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
871 igb_free_queues(adapter);
873 igb_free_q_vectors(adapter);
875 igb_reset_interrupt_capability(adapter);
880 * igb_request_irq - initialize interrupts
882 * Attempts to configure interrupts using the best available
883 * capabilities of the hardware and kernel.
885 static int igb_request_irq(struct igb_adapter *adapter)
887 struct net_device *netdev = adapter->netdev;
888 struct pci_dev *pdev = adapter->pdev;
891 if (adapter->msix_entries) {
892 err = igb_request_msix(adapter);
895 /* fall back to MSI */
896 igb_clear_interrupt_scheme(adapter);
897 if (!pci_enable_msi(adapter->pdev))
898 adapter->flags |= IGB_FLAG_HAS_MSI;
899 igb_free_all_tx_resources(adapter);
900 igb_free_all_rx_resources(adapter);
901 adapter->num_tx_queues = 1;
902 adapter->num_rx_queues = 1;
903 adapter->num_q_vectors = 1;
904 err = igb_alloc_q_vectors(adapter);
907 "Unable to allocate memory for vectors\n");
910 err = igb_alloc_queues(adapter);
913 "Unable to allocate memory for queues\n");
914 igb_free_q_vectors(adapter);
917 igb_setup_all_tx_resources(adapter);
918 igb_setup_all_rx_resources(adapter);
920 igb_assign_vector(adapter->q_vector[0], 0);
923 if (adapter->flags & IGB_FLAG_HAS_MSI) {
924 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
925 netdev->name, adapter);
929 /* fall back to legacy interrupts */
930 igb_reset_interrupt_capability(adapter);
931 adapter->flags &= ~IGB_FLAG_HAS_MSI;
934 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
935 netdev->name, adapter);
938 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
945 static void igb_free_irq(struct igb_adapter *adapter)
947 if (adapter->msix_entries) {
950 free_irq(adapter->msix_entries[vector++].vector, adapter);
952 for (i = 0; i < adapter->num_q_vectors; i++) {
953 struct igb_q_vector *q_vector = adapter->q_vector[i];
954 free_irq(adapter->msix_entries[vector++].vector,
958 free_irq(adapter->pdev->irq, adapter);
963 * igb_irq_disable - Mask off interrupt generation on the NIC
964 * @adapter: board private structure
966 static void igb_irq_disable(struct igb_adapter *adapter)
968 struct e1000_hw *hw = &adapter->hw;
971 * we need to be careful when disabling interrupts. The VFs are also
972 * mapped into these registers and so clearing the bits can cause
973 * issues on the VF drivers so we only need to clear what we set
975 if (adapter->msix_entries) {
976 u32 regval = rd32(E1000_EIAM);
977 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
978 wr32(E1000_EIMC, adapter->eims_enable_mask);
979 regval = rd32(E1000_EIAC);
980 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
986 synchronize_irq(adapter->pdev->irq);
990 * igb_irq_enable - Enable default interrupt generation settings
991 * @adapter: board private structure
993 static void igb_irq_enable(struct igb_adapter *adapter)
995 struct e1000_hw *hw = &adapter->hw;
997 if (adapter->msix_entries) {
998 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
999 u32 regval = rd32(E1000_EIAC);
1000 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1001 regval = rd32(E1000_EIAM);
1002 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1003 wr32(E1000_EIMS, adapter->eims_enable_mask);
1004 if (adapter->vfs_allocated_count) {
1005 wr32(E1000_MBVFIMR, 0xFF);
1006 ims |= E1000_IMS_VMMB;
1008 if (adapter->hw.mac.type == e1000_82580)
1009 ims |= E1000_IMS_DRSTA;
1011 wr32(E1000_IMS, ims);
1013 wr32(E1000_IMS, IMS_ENABLE_MASK |
1015 wr32(E1000_IAM, IMS_ENABLE_MASK |
1020 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1022 struct e1000_hw *hw = &adapter->hw;
1023 u16 vid = adapter->hw.mng_cookie.vlan_id;
1024 u16 old_vid = adapter->mng_vlan_id;
1026 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1027 /* add VID to filter table */
1028 igb_vfta_set(hw, vid, true);
1029 adapter->mng_vlan_id = vid;
1031 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1034 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1036 !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1037 /* remove VID from filter table */
1038 igb_vfta_set(hw, old_vid, false);
1043 * igb_release_hw_control - release control of the h/w to f/w
1044 * @adapter: address of board private structure
1046 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1047 * For ASF and Pass Through versions of f/w this means that the
1048 * driver is no longer loaded.
1051 static void igb_release_hw_control(struct igb_adapter *adapter)
1053 struct e1000_hw *hw = &adapter->hw;
1056 /* Let firmware take over control of h/w */
1057 ctrl_ext = rd32(E1000_CTRL_EXT);
1058 wr32(E1000_CTRL_EXT,
1059 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1063 * igb_get_hw_control - get control of the h/w from f/w
1064 * @adapter: address of board private structure
1066 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1067 * For ASF and Pass Through versions of f/w this means that
1068 * the driver is loaded.
1071 static void igb_get_hw_control(struct igb_adapter *adapter)
1073 struct e1000_hw *hw = &adapter->hw;
1076 /* Let firmware know the driver has taken over */
1077 ctrl_ext = rd32(E1000_CTRL_EXT);
1078 wr32(E1000_CTRL_EXT,
1079 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1083 * igb_configure - configure the hardware for RX and TX
1084 * @adapter: private board structure
1086 static void igb_configure(struct igb_adapter *adapter)
1088 struct net_device *netdev = adapter->netdev;
1091 igb_get_hw_control(adapter);
1092 igb_set_rx_mode(netdev);
1094 igb_restore_vlan(adapter);
1096 igb_setup_tctl(adapter);
1097 igb_setup_mrqc(adapter);
1098 igb_setup_rctl(adapter);
1100 igb_configure_tx(adapter);
1101 igb_configure_rx(adapter);
1103 igb_rx_fifo_flush_82575(&adapter->hw);
1105 /* call igb_desc_unused which always leaves
1106 * at least 1 descriptor unused to make sure
1107 * next_to_use != next_to_clean */
1108 for (i = 0; i < adapter->num_rx_queues; i++) {
1109 struct igb_ring *ring = &adapter->rx_ring[i];
1110 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1114 adapter->tx_queue_len = netdev->tx_queue_len;
1118 * igb_power_up_link - Power up the phy/serdes link
1119 * @adapter: address of board private structure
1121 void igb_power_up_link(struct igb_adapter *adapter)
1123 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1124 igb_power_up_phy_copper(&adapter->hw);
1126 igb_power_up_serdes_link_82575(&adapter->hw);
1130 * igb_power_down_link - Power down the phy/serdes link
1131 * @adapter: address of board private structure
1133 static void igb_power_down_link(struct igb_adapter *adapter)
1135 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1136 igb_power_down_phy_copper_82575(&adapter->hw);
1138 igb_shutdown_serdes_link_82575(&adapter->hw);
1142 * igb_up - Open the interface and prepare it to handle traffic
1143 * @adapter: board private structure
1145 int igb_up(struct igb_adapter *adapter)
1147 struct e1000_hw *hw = &adapter->hw;
1150 /* hardware has been reset, we need to reload some things */
1151 igb_configure(adapter);
1153 clear_bit(__IGB_DOWN, &adapter->state);
1155 for (i = 0; i < adapter->num_q_vectors; i++) {
1156 struct igb_q_vector *q_vector = adapter->q_vector[i];
1157 napi_enable(&q_vector->napi);
1159 if (adapter->msix_entries)
1160 igb_configure_msix(adapter);
1162 igb_assign_vector(adapter->q_vector[0], 0);
1164 /* Clear any pending interrupts. */
1166 igb_irq_enable(adapter);
1168 /* notify VFs that reset has been completed */
1169 if (adapter->vfs_allocated_count) {
1170 u32 reg_data = rd32(E1000_CTRL_EXT);
1171 reg_data |= E1000_CTRL_EXT_PFRSTD;
1172 wr32(E1000_CTRL_EXT, reg_data);
1175 netif_tx_start_all_queues(adapter->netdev);
1177 /* start the watchdog. */
1178 hw->mac.get_link_status = 1;
1179 schedule_work(&adapter->watchdog_task);
1184 void igb_down(struct igb_adapter *adapter)
1186 struct net_device *netdev = adapter->netdev;
1187 struct e1000_hw *hw = &adapter->hw;
1191 /* signal that we're down so the interrupt handler does not
1192 * reschedule our watchdog timer */
1193 set_bit(__IGB_DOWN, &adapter->state);
1195 /* disable receives in the hardware */
1196 rctl = rd32(E1000_RCTL);
1197 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1198 /* flush and sleep below */
1200 netif_tx_stop_all_queues(netdev);
1202 /* disable transmits in the hardware */
1203 tctl = rd32(E1000_TCTL);
1204 tctl &= ~E1000_TCTL_EN;
1205 wr32(E1000_TCTL, tctl);
1206 /* flush both disables and wait for them to finish */
1210 for (i = 0; i < adapter->num_q_vectors; i++) {
1211 struct igb_q_vector *q_vector = adapter->q_vector[i];
1212 napi_disable(&q_vector->napi);
1215 igb_irq_disable(adapter);
1217 del_timer_sync(&adapter->watchdog_timer);
1218 del_timer_sync(&adapter->phy_info_timer);
1220 netdev->tx_queue_len = adapter->tx_queue_len;
1221 netif_carrier_off(netdev);
1223 /* record the stats before reset*/
1224 igb_update_stats(adapter);
1226 adapter->link_speed = 0;
1227 adapter->link_duplex = 0;
1229 if (!pci_channel_offline(adapter->pdev))
1231 igb_clean_all_tx_rings(adapter);
1232 igb_clean_all_rx_rings(adapter);
1233 #ifdef CONFIG_IGB_DCA
1235 /* since we reset the hardware DCA settings were cleared */
1236 igb_setup_dca(adapter);
1240 void igb_reinit_locked(struct igb_adapter *adapter)
1242 WARN_ON(in_interrupt());
1243 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1247 clear_bit(__IGB_RESETTING, &adapter->state);
1250 void igb_reset(struct igb_adapter *adapter)
1252 struct pci_dev *pdev = adapter->pdev;
1253 struct e1000_hw *hw = &adapter->hw;
1254 struct e1000_mac_info *mac = &hw->mac;
1255 struct e1000_fc_info *fc = &hw->fc;
1256 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1259 /* Repartition Pba for greater than 9k mtu
1260 * To take effect CTRL.RST is required.
1262 switch (mac->type) {
1264 pba = rd32(E1000_RXPBS);
1265 pba = igb_rxpbs_adjust_82580(pba);
1268 pba = rd32(E1000_RXPBS);
1269 pba &= E1000_RXPBS_SIZE_MASK_82576;
1273 pba = E1000_PBA_34K;
1277 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1278 (mac->type < e1000_82576)) {
1279 /* adjust PBA for jumbo frames */
1280 wr32(E1000_PBA, pba);
1282 /* To maintain wire speed transmits, the Tx FIFO should be
1283 * large enough to accommodate two full transmit packets,
1284 * rounded up to the next 1KB and expressed in KB. Likewise,
1285 * the Rx FIFO should be large enough to accommodate at least
1286 * one full receive packet and is similarly rounded up and
1287 * expressed in KB. */
1288 pba = rd32(E1000_PBA);
1289 /* upper 16 bits has Tx packet buffer allocation size in KB */
1290 tx_space = pba >> 16;
1291 /* lower 16 bits has Rx packet buffer allocation size in KB */
1293 /* the tx fifo also stores 16 bytes of information about the tx
1294 * but don't include ethernet FCS because hardware appends it */
1295 min_tx_space = (adapter->max_frame_size +
1296 sizeof(union e1000_adv_tx_desc) -
1298 min_tx_space = ALIGN(min_tx_space, 1024);
1299 min_tx_space >>= 10;
1300 /* software strips receive CRC, so leave room for it */
1301 min_rx_space = adapter->max_frame_size;
1302 min_rx_space = ALIGN(min_rx_space, 1024);
1303 min_rx_space >>= 10;
1305 /* If current Tx allocation is less than the min Tx FIFO size,
1306 * and the min Tx FIFO size is less than the current Rx FIFO
1307 * allocation, take space away from current Rx allocation */
1308 if (tx_space < min_tx_space &&
1309 ((min_tx_space - tx_space) < pba)) {
1310 pba = pba - (min_tx_space - tx_space);
1312 /* if short on rx space, rx wins and must trump tx
1314 if (pba < min_rx_space)
1317 wr32(E1000_PBA, pba);
1320 /* flow control settings */
1321 /* The high water mark must be low enough to fit one full frame
1322 * (or the size used for early receive) above it in the Rx FIFO.
1323 * Set it to the lower of:
1324 * - 90% of the Rx FIFO size, or
1325 * - the full Rx FIFO size minus one full frame */
1326 hwm = min(((pba << 10) * 9 / 10),
1327 ((pba << 10) - 2 * adapter->max_frame_size));
1329 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1330 fc->low_water = fc->high_water - 16;
1331 fc->pause_time = 0xFFFF;
1333 fc->current_mode = fc->requested_mode;
1335 /* disable receive for all VFs and wait one second */
1336 if (adapter->vfs_allocated_count) {
1338 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1339 adapter->vf_data[i].flags = 0;
1341 /* ping all the active vfs to let them know we are going down */
1342 igb_ping_all_vfs(adapter);
1344 /* disable transmits and receives */
1345 wr32(E1000_VFRE, 0);
1346 wr32(E1000_VFTE, 0);
1349 /* Allow time for pending master requests to run */
1350 hw->mac.ops.reset_hw(hw);
1353 if (hw->mac.ops.init_hw(hw))
1354 dev_err(&pdev->dev, "Hardware Error\n");
1356 if (hw->mac.type == e1000_82580) {
1357 u32 reg = rd32(E1000_PCIEMISC);
1358 wr32(E1000_PCIEMISC,
1359 reg & ~E1000_PCIEMISC_LX_DECISION);
1361 if (!netif_running(adapter->netdev))
1362 igb_power_down_link(adapter);
1364 igb_update_mng_vlan(adapter);
1366 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1367 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1369 igb_reset_adaptive(hw);
1370 igb_get_phy_info(hw);
1373 static const struct net_device_ops igb_netdev_ops = {
1374 .ndo_open = igb_open,
1375 .ndo_stop = igb_close,
1376 .ndo_start_xmit = igb_xmit_frame_adv,
1377 .ndo_get_stats = igb_get_stats,
1378 .ndo_set_rx_mode = igb_set_rx_mode,
1379 .ndo_set_multicast_list = igb_set_rx_mode,
1380 .ndo_set_mac_address = igb_set_mac,
1381 .ndo_change_mtu = igb_change_mtu,
1382 .ndo_do_ioctl = igb_ioctl,
1383 .ndo_tx_timeout = igb_tx_timeout,
1384 .ndo_validate_addr = eth_validate_addr,
1385 .ndo_vlan_rx_register = igb_vlan_rx_register,
1386 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1387 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1388 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1389 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1390 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1391 .ndo_get_vf_config = igb_ndo_get_vf_config,
1392 #ifdef CONFIG_NET_POLL_CONTROLLER
1393 .ndo_poll_controller = igb_netpoll,
1398 * igb_probe - Device Initialization Routine
1399 * @pdev: PCI device information struct
1400 * @ent: entry in igb_pci_tbl
1402 * Returns 0 on success, negative on failure
1404 * igb_probe initializes an adapter identified by a pci_dev structure.
1405 * The OS initialization, configuring of the adapter private structure,
1406 * and a hardware reset occur.
1408 static int __devinit igb_probe(struct pci_dev *pdev,
1409 const struct pci_device_id *ent)
1411 struct net_device *netdev;
1412 struct igb_adapter *adapter;
1413 struct e1000_hw *hw;
1414 u16 eeprom_data = 0;
1415 static int global_quad_port_a; /* global quad port a indication */
1416 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1417 unsigned long mmio_start, mmio_len;
1418 int err, pci_using_dac;
1419 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1422 err = pci_enable_device_mem(pdev);
1427 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1429 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1433 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1435 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1437 dev_err(&pdev->dev, "No usable DMA "
1438 "configuration, aborting\n");
1444 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1450 pci_enable_pcie_error_reporting(pdev);
1452 pci_set_master(pdev);
1453 pci_save_state(pdev);
1456 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1457 IGB_ABS_MAX_TX_QUEUES);
1459 goto err_alloc_etherdev;
1461 SET_NETDEV_DEV(netdev, &pdev->dev);
1463 pci_set_drvdata(pdev, netdev);
1464 adapter = netdev_priv(netdev);
1465 adapter->netdev = netdev;
1466 adapter->pdev = pdev;
1469 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1471 mmio_start = pci_resource_start(pdev, 0);
1472 mmio_len = pci_resource_len(pdev, 0);
1475 hw->hw_addr = ioremap(mmio_start, mmio_len);
1479 netdev->netdev_ops = &igb_netdev_ops;
1480 igb_set_ethtool_ops(netdev);
1481 netdev->watchdog_timeo = 5 * HZ;
1483 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1485 netdev->mem_start = mmio_start;
1486 netdev->mem_end = mmio_start + mmio_len;
1488 /* PCI config space info */
1489 hw->vendor_id = pdev->vendor;
1490 hw->device_id = pdev->device;
1491 hw->revision_id = pdev->revision;
1492 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1493 hw->subsystem_device_id = pdev->subsystem_device;
1495 /* Copy the default MAC, PHY and NVM function pointers */
1496 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1497 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1498 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1499 /* Initialize skew-specific constants */
1500 err = ei->get_invariants(hw);
1504 /* setup the private structure */
1505 err = igb_sw_init(adapter);
1509 igb_get_bus_info_pcie(hw);
1511 hw->phy.autoneg_wait_to_complete = false;
1512 hw->mac.adaptive_ifs = true;
1514 /* Copper options */
1515 if (hw->phy.media_type == e1000_media_type_copper) {
1516 hw->phy.mdix = AUTO_ALL_MODES;
1517 hw->phy.disable_polarity_correction = false;
1518 hw->phy.ms_type = e1000_ms_hw_default;
1521 if (igb_check_reset_block(hw))
1522 dev_info(&pdev->dev,
1523 "PHY reset is blocked due to SOL/IDER session.\n");
1525 netdev->features = NETIF_F_SG |
1527 NETIF_F_HW_VLAN_TX |
1528 NETIF_F_HW_VLAN_RX |
1529 NETIF_F_HW_VLAN_FILTER;
1531 netdev->features |= NETIF_F_IPV6_CSUM;
1532 netdev->features |= NETIF_F_TSO;
1533 netdev->features |= NETIF_F_TSO6;
1534 netdev->features |= NETIF_F_GRO;
1536 netdev->vlan_features |= NETIF_F_TSO;
1537 netdev->vlan_features |= NETIF_F_TSO6;
1538 netdev->vlan_features |= NETIF_F_IP_CSUM;
1539 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1540 netdev->vlan_features |= NETIF_F_SG;
1543 netdev->features |= NETIF_F_HIGHDMA;
1545 if (hw->mac.type >= e1000_82576)
1546 netdev->features |= NETIF_F_SCTP_CSUM;
1548 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1550 /* before reading the NVM, reset the controller to put the device in a
1551 * known good starting state */
1552 hw->mac.ops.reset_hw(hw);
1554 /* make sure the NVM is good */
1555 if (igb_validate_nvm_checksum(hw) < 0) {
1556 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1561 /* copy the MAC address out of the NVM */
1562 if (hw->mac.ops.read_mac_addr(hw))
1563 dev_err(&pdev->dev, "NVM Read Error\n");
1565 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1566 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1568 if (!is_valid_ether_addr(netdev->perm_addr)) {
1569 dev_err(&pdev->dev, "Invalid MAC Address\n");
1574 setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1575 (unsigned long) adapter);
1576 setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1577 (unsigned long) adapter);
1579 INIT_WORK(&adapter->reset_task, igb_reset_task);
1580 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1582 /* Initialize link properties that are user-changeable */
1583 adapter->fc_autoneg = true;
1584 hw->mac.autoneg = true;
1585 hw->phy.autoneg_advertised = 0x2f;
1587 hw->fc.requested_mode = e1000_fc_default;
1588 hw->fc.current_mode = e1000_fc_default;
1590 igb_validate_mdi_setting(hw);
1592 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1593 * enable the ACPI Magic Packet filter
1596 if (hw->bus.func == 0)
1597 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1598 else if (hw->mac.type == e1000_82580)
1599 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1600 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1602 else if (hw->bus.func == 1)
1603 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1605 if (eeprom_data & eeprom_apme_mask)
1606 adapter->eeprom_wol |= E1000_WUFC_MAG;
1608 /* now that we have the eeprom settings, apply the special cases where
1609 * the eeprom may be wrong or the board simply won't support wake on
1610 * lan on a particular port */
1611 switch (pdev->device) {
1612 case E1000_DEV_ID_82575GB_QUAD_COPPER:
1613 adapter->eeprom_wol = 0;
1615 case E1000_DEV_ID_82575EB_FIBER_SERDES:
1616 case E1000_DEV_ID_82576_FIBER:
1617 case E1000_DEV_ID_82576_SERDES:
1618 /* Wake events only supported on port A for dual fiber
1619 * regardless of eeprom setting */
1620 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1621 adapter->eeprom_wol = 0;
1623 case E1000_DEV_ID_82576_QUAD_COPPER:
1624 /* if quad port adapter, disable WoL on all but port A */
1625 if (global_quad_port_a != 0)
1626 adapter->eeprom_wol = 0;
1628 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1629 /* Reset for multiple quad port adapters */
1630 if (++global_quad_port_a == 4)
1631 global_quad_port_a = 0;
1635 /* initialize the wol settings based on the eeprom settings */
1636 adapter->wol = adapter->eeprom_wol;
1637 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1639 /* reset the hardware with the new settings */
1642 /* let the f/w know that the h/w is now under the control of the
1644 igb_get_hw_control(adapter);
1646 strcpy(netdev->name, "eth%d");
1647 err = register_netdev(netdev);
1651 /* carrier off reporting is important to ethtool even BEFORE open */
1652 netif_carrier_off(netdev);
1654 #ifdef CONFIG_IGB_DCA
1655 if (dca_add_requester(&pdev->dev) == 0) {
1656 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1657 dev_info(&pdev->dev, "DCA enabled\n");
1658 igb_setup_dca(adapter);
1662 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1663 /* print bus type/speed/width info */
1664 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1666 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1668 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1669 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1670 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1674 igb_read_part_num(hw, &part_num);
1675 dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1676 (part_num >> 8), (part_num & 0xff));
1678 dev_info(&pdev->dev,
1679 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1680 adapter->msix_entries ? "MSI-X" :
1681 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1682 adapter->num_rx_queues, adapter->num_tx_queues);
1687 igb_release_hw_control(adapter);
1689 if (!igb_check_reset_block(hw))
1692 if (hw->flash_address)
1693 iounmap(hw->flash_address);
1695 igb_clear_interrupt_scheme(adapter);
1696 iounmap(hw->hw_addr);
1698 free_netdev(netdev);
1700 pci_release_selected_regions(pdev,
1701 pci_select_bars(pdev, IORESOURCE_MEM));
1704 pci_disable_device(pdev);
1709 * igb_remove - Device Removal Routine
1710 * @pdev: PCI device information struct
1712 * igb_remove is called by the PCI subsystem to alert the driver
1713 * that it should release a PCI device. The could be caused by a
1714 * Hot-Plug event, or because the driver is going to be removed from
1717 static void __devexit igb_remove(struct pci_dev *pdev)
1719 struct net_device *netdev = pci_get_drvdata(pdev);
1720 struct igb_adapter *adapter = netdev_priv(netdev);
1721 struct e1000_hw *hw = &adapter->hw;
1723 /* flush_scheduled work may reschedule our watchdog task, so
1724 * explicitly disable watchdog tasks from being rescheduled */
1725 set_bit(__IGB_DOWN, &adapter->state);
1726 del_timer_sync(&adapter->watchdog_timer);
1727 del_timer_sync(&adapter->phy_info_timer);
1729 flush_scheduled_work();
1731 #ifdef CONFIG_IGB_DCA
1732 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1733 dev_info(&pdev->dev, "DCA disabled\n");
1734 dca_remove_requester(&pdev->dev);
1735 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1736 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1740 /* Release control of h/w to f/w. If f/w is AMT enabled, this
1741 * would have already happened in close and is redundant. */
1742 igb_release_hw_control(adapter);
1744 unregister_netdev(netdev);
1746 igb_clear_interrupt_scheme(adapter);
1748 #ifdef CONFIG_PCI_IOV
1749 /* reclaim resources allocated to VFs */
1750 if (adapter->vf_data) {
1751 /* disable iov and allow time for transactions to clear */
1752 pci_disable_sriov(pdev);
1755 kfree(adapter->vf_data);
1756 adapter->vf_data = NULL;
1757 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1759 dev_info(&pdev->dev, "IOV Disabled\n");
1763 iounmap(hw->hw_addr);
1764 if (hw->flash_address)
1765 iounmap(hw->flash_address);
1766 pci_release_selected_regions(pdev,
1767 pci_select_bars(pdev, IORESOURCE_MEM));
1769 free_netdev(netdev);
1771 pci_disable_pcie_error_reporting(pdev);
1773 pci_disable_device(pdev);
1777 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1778 * @adapter: board private structure to initialize
1780 * This function initializes the vf specific data storage and then attempts to
1781 * allocate the VFs. The reason for ordering it this way is because it is much
1782 * mor expensive time wise to disable SR-IOV than it is to allocate and free
1783 * the memory for the VFs.
1785 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1787 #ifdef CONFIG_PCI_IOV
1788 struct pci_dev *pdev = adapter->pdev;
1790 if (adapter->vfs_allocated_count > 7)
1791 adapter->vfs_allocated_count = 7;
1793 if (adapter->vfs_allocated_count) {
1794 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1795 sizeof(struct vf_data_storage),
1797 /* if allocation failed then we do not support SR-IOV */
1798 if (!adapter->vf_data) {
1799 adapter->vfs_allocated_count = 0;
1800 dev_err(&pdev->dev, "Unable to allocate memory for VF "
1805 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1806 kfree(adapter->vf_data);
1807 adapter->vf_data = NULL;
1808 #endif /* CONFIG_PCI_IOV */
1809 adapter->vfs_allocated_count = 0;
1810 #ifdef CONFIG_PCI_IOV
1812 unsigned char mac_addr[ETH_ALEN];
1814 dev_info(&pdev->dev, "%d vfs allocated\n",
1815 adapter->vfs_allocated_count);
1816 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1817 random_ether_addr(mac_addr);
1818 igb_set_vf_mac(adapter, i, mac_addr);
1821 #endif /* CONFIG_PCI_IOV */
1826 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1827 * @adapter: board private structure to initialize
1829 * igb_init_hw_timer initializes the function pointer and values for the hw
1830 * timer found in hardware.
1832 static void igb_init_hw_timer(struct igb_adapter *adapter)
1834 struct e1000_hw *hw = &adapter->hw;
1836 switch (hw->mac.type) {
1838 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1839 adapter->cycles.read = igb_read_clock;
1840 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1841 adapter->cycles.mult = 1;
1843 * The 82580 timesync updates the system timer every 8ns by 8ns
1844 * and the value cannot be shifted. Instead we need to shift
1845 * the registers to generate a 64bit timer value. As a result
1846 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
1847 * 24 in order to generate a larger value for synchronization.
1849 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
1850 /* disable system timer temporarily by setting bit 31 */
1851 wr32(E1000_TSAUXC, 0x80000000);
1854 /* Set registers so that rollover occurs soon to test this. */
1855 wr32(E1000_SYSTIMR, 0x00000000);
1856 wr32(E1000_SYSTIML, 0x80000000);
1857 wr32(E1000_SYSTIMH, 0x000000FF);
1860 /* enable system timer by clearing bit 31 */
1861 wr32(E1000_TSAUXC, 0x0);
1864 timecounter_init(&adapter->clock,
1866 ktime_to_ns(ktime_get_real()));
1868 * Synchronize our NIC clock against system wall clock. NIC
1869 * time stamp reading requires ~3us per sample, each sample
1870 * was pretty stable even under load => only require 10
1871 * samples for each offset comparison.
1873 memset(&adapter->compare, 0, sizeof(adapter->compare));
1874 adapter->compare.source = &adapter->clock;
1875 adapter->compare.target = ktime_get_real;
1876 adapter->compare.num_samples = 10;
1877 timecompare_update(&adapter->compare, 0);
1881 * Initialize hardware timer: we keep it running just in case
1882 * that some program needs it later on.
1884 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1885 adapter->cycles.read = igb_read_clock;
1886 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1887 adapter->cycles.mult = 1;
1889 * Scale the NIC clock cycle by a large factor so that
1890 * relatively small clock corrections can be added or
1891 * substracted at each clock tick. The drawbacks of a large
1892 * factor are a) that the clock register overflows more quickly
1893 * (not such a big deal) and b) that the increment per tick has
1894 * to fit into 24 bits. As a result we need to use a shift of
1895 * 19 so we can fit a value of 16 into the TIMINCA register.
1897 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1899 (1 << E1000_TIMINCA_16NS_SHIFT) |
1900 (16 << IGB_82576_TSYNC_SHIFT));
1902 /* Set registers so that rollover occurs soon to test this. */
1903 wr32(E1000_SYSTIML, 0x00000000);
1904 wr32(E1000_SYSTIMH, 0xFF800000);
1907 timecounter_init(&adapter->clock,
1909 ktime_to_ns(ktime_get_real()));
1911 * Synchronize our NIC clock against system wall clock. NIC
1912 * time stamp reading requires ~3us per sample, each sample
1913 * was pretty stable even under load => only require 10
1914 * samples for each offset comparison.
1916 memset(&adapter->compare, 0, sizeof(adapter->compare));
1917 adapter->compare.source = &adapter->clock;
1918 adapter->compare.target = ktime_get_real;
1919 adapter->compare.num_samples = 10;
1920 timecompare_update(&adapter->compare, 0);
1923 /* 82575 does not support timesync */
1931 * igb_sw_init - Initialize general software structures (struct igb_adapter)
1932 * @adapter: board private structure to initialize
1934 * igb_sw_init initializes the Adapter private data structure.
1935 * Fields are initialized based on PCI device information and
1936 * OS network device settings (MTU size).
1938 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1940 struct e1000_hw *hw = &adapter->hw;
1941 struct net_device *netdev = adapter->netdev;
1942 struct pci_dev *pdev = adapter->pdev;
1944 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1946 adapter->tx_ring_count = IGB_DEFAULT_TXD;
1947 adapter->rx_ring_count = IGB_DEFAULT_RXD;
1948 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1949 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1951 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1952 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1954 #ifdef CONFIG_PCI_IOV
1955 if (hw->mac.type == e1000_82576)
1956 adapter->vfs_allocated_count = max_vfs;
1958 #endif /* CONFIG_PCI_IOV */
1959 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1962 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1963 * then we should combine the queues into a queue pair in order to
1964 * conserve interrupts due to limited supply
1966 if ((adapter->rss_queues > 4) ||
1967 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1968 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1970 /* This call may decrease the number of queues */
1971 if (igb_init_interrupt_scheme(adapter)) {
1972 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1976 igb_init_hw_timer(adapter);
1977 igb_probe_vfs(adapter);
1979 /* Explicitly disable IRQ since the NIC can be in any state. */
1980 igb_irq_disable(adapter);
1982 set_bit(__IGB_DOWN, &adapter->state);
1987 * igb_open - Called when a network interface is made active
1988 * @netdev: network interface device structure
1990 * Returns 0 on success, negative value on failure
1992 * The open entry point is called when a network interface is made
1993 * active by the system (IFF_UP). At this point all resources needed
1994 * for transmit and receive operations are allocated, the interrupt
1995 * handler is registered with the OS, the watchdog timer is started,
1996 * and the stack is notified that the interface is ready.
1998 static int igb_open(struct net_device *netdev)
2000 struct igb_adapter *adapter = netdev_priv(netdev);
2001 struct e1000_hw *hw = &adapter->hw;
2005 /* disallow open during test */
2006 if (test_bit(__IGB_TESTING, &adapter->state))
2009 netif_carrier_off(netdev);
2011 /* allocate transmit descriptors */
2012 err = igb_setup_all_tx_resources(adapter);
2016 /* allocate receive descriptors */
2017 err = igb_setup_all_rx_resources(adapter);
2021 igb_power_up_link(adapter);
2023 /* before we allocate an interrupt, we must be ready to handle it.
2024 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2025 * as soon as we call pci_request_irq, so we have to setup our
2026 * clean_rx handler before we do so. */
2027 igb_configure(adapter);
2029 err = igb_request_irq(adapter);
2033 /* From here on the code is the same as igb_up() */
2034 clear_bit(__IGB_DOWN, &adapter->state);
2036 for (i = 0; i < adapter->num_q_vectors; i++) {
2037 struct igb_q_vector *q_vector = adapter->q_vector[i];
2038 napi_enable(&q_vector->napi);
2041 /* Clear any pending interrupts. */
2044 igb_irq_enable(adapter);
2046 /* notify VFs that reset has been completed */
2047 if (adapter->vfs_allocated_count) {
2048 u32 reg_data = rd32(E1000_CTRL_EXT);
2049 reg_data |= E1000_CTRL_EXT_PFRSTD;
2050 wr32(E1000_CTRL_EXT, reg_data);
2053 netif_tx_start_all_queues(netdev);
2055 /* start the watchdog. */
2056 hw->mac.get_link_status = 1;
2057 schedule_work(&adapter->watchdog_task);
2062 igb_release_hw_control(adapter);
2063 igb_power_down_link(adapter);
2064 igb_free_all_rx_resources(adapter);
2066 igb_free_all_tx_resources(adapter);
2074 * igb_close - Disables a network interface
2075 * @netdev: network interface device structure
2077 * Returns 0, this is not allowed to fail
2079 * The close entry point is called when an interface is de-activated
2080 * by the OS. The hardware is still under the driver's control, but
2081 * needs to be disabled. A global MAC reset is issued to stop the
2082 * hardware, and all transmit and receive resources are freed.
2084 static int igb_close(struct net_device *netdev)
2086 struct igb_adapter *adapter = netdev_priv(netdev);
2088 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2091 igb_free_irq(adapter);
2093 igb_free_all_tx_resources(adapter);
2094 igb_free_all_rx_resources(adapter);
2100 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2101 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2103 * Return 0 on success, negative on failure
2105 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2107 struct pci_dev *pdev = tx_ring->pdev;
2110 size = sizeof(struct igb_buffer) * tx_ring->count;
2111 tx_ring->buffer_info = vmalloc(size);
2112 if (!tx_ring->buffer_info)
2114 memset(tx_ring->buffer_info, 0, size);
2116 /* round up to nearest 4K */
2117 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2118 tx_ring->size = ALIGN(tx_ring->size, 4096);
2120 tx_ring->desc = pci_alloc_consistent(pdev,
2127 tx_ring->next_to_use = 0;
2128 tx_ring->next_to_clean = 0;
2132 vfree(tx_ring->buffer_info);
2134 "Unable to allocate memory for the transmit descriptor ring\n");
2139 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2140 * (Descriptors) for all queues
2141 * @adapter: board private structure
2143 * Return 0 on success, negative on failure
2145 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2147 struct pci_dev *pdev = adapter->pdev;
2150 for (i = 0; i < adapter->num_tx_queues; i++) {
2151 err = igb_setup_tx_resources(&adapter->tx_ring[i]);
2154 "Allocation for Tx Queue %u failed\n", i);
2155 for (i--; i >= 0; i--)
2156 igb_free_tx_resources(&adapter->tx_ring[i]);
2161 for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2162 int r_idx = i % adapter->num_tx_queues;
2163 adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
2169 * igb_setup_tctl - configure the transmit control registers
2170 * @adapter: Board private structure
2172 void igb_setup_tctl(struct igb_adapter *adapter)
2174 struct e1000_hw *hw = &adapter->hw;
2177 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2178 wr32(E1000_TXDCTL(0), 0);
2180 /* Program the Transmit Control Register */
2181 tctl = rd32(E1000_TCTL);
2182 tctl &= ~E1000_TCTL_CT;
2183 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2184 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2186 igb_config_collision_dist(hw);
2188 /* Enable transmits */
2189 tctl |= E1000_TCTL_EN;
2191 wr32(E1000_TCTL, tctl);
2195 * igb_configure_tx_ring - Configure transmit ring after Reset
2196 * @adapter: board private structure
2197 * @ring: tx ring to configure
2199 * Configure a transmit ring after a reset.
2201 void igb_configure_tx_ring(struct igb_adapter *adapter,
2202 struct igb_ring *ring)
2204 struct e1000_hw *hw = &adapter->hw;
2206 u64 tdba = ring->dma;
2207 int reg_idx = ring->reg_idx;
2209 /* disable the queue */
2210 txdctl = rd32(E1000_TXDCTL(reg_idx));
2211 wr32(E1000_TXDCTL(reg_idx),
2212 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2216 wr32(E1000_TDLEN(reg_idx),
2217 ring->count * sizeof(union e1000_adv_tx_desc));
2218 wr32(E1000_TDBAL(reg_idx),
2219 tdba & 0x00000000ffffffffULL);
2220 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2222 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2223 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2224 writel(0, ring->head);
2225 writel(0, ring->tail);
2227 txdctl |= IGB_TX_PTHRESH;
2228 txdctl |= IGB_TX_HTHRESH << 8;
2229 txdctl |= IGB_TX_WTHRESH << 16;
2231 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2232 wr32(E1000_TXDCTL(reg_idx), txdctl);
2236 * igb_configure_tx - Configure transmit Unit after Reset
2237 * @adapter: board private structure
2239 * Configure the Tx unit of the MAC after a reset.
2241 static void igb_configure_tx(struct igb_adapter *adapter)
2245 for (i = 0; i < adapter->num_tx_queues; i++)
2246 igb_configure_tx_ring(adapter, &adapter->tx_ring[i]);
2250 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2251 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2253 * Returns 0 on success, negative on failure
2255 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2257 struct pci_dev *pdev = rx_ring->pdev;
2260 size = sizeof(struct igb_buffer) * rx_ring->count;
2261 rx_ring->buffer_info = vmalloc(size);
2262 if (!rx_ring->buffer_info)
2264 memset(rx_ring->buffer_info, 0, size);
2266 desc_len = sizeof(union e1000_adv_rx_desc);
2268 /* Round up to nearest 4K */
2269 rx_ring->size = rx_ring->count * desc_len;
2270 rx_ring->size = ALIGN(rx_ring->size, 4096);
2272 rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2278 rx_ring->next_to_clean = 0;
2279 rx_ring->next_to_use = 0;
2284 vfree(rx_ring->buffer_info);
2285 rx_ring->buffer_info = NULL;
2286 dev_err(&pdev->dev, "Unable to allocate memory for "
2287 "the receive descriptor ring\n");
2292 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2293 * (Descriptors) for all queues
2294 * @adapter: board private structure
2296 * Return 0 on success, negative on failure
2298 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2300 struct pci_dev *pdev = adapter->pdev;
2303 for (i = 0; i < adapter->num_rx_queues; i++) {
2304 err = igb_setup_rx_resources(&adapter->rx_ring[i]);
2307 "Allocation for Rx Queue %u failed\n", i);
2308 for (i--; i >= 0; i--)
2309 igb_free_rx_resources(&adapter->rx_ring[i]);
2318 * igb_setup_mrqc - configure the multiple receive queue control registers
2319 * @adapter: Board private structure
2321 static void igb_setup_mrqc(struct igb_adapter *adapter)
2323 struct e1000_hw *hw = &adapter->hw;
2325 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2330 static const u8 rsshash[40] = {
2331 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2332 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2333 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2334 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2336 /* Fill out hash function seeds */
2337 for (j = 0; j < 10; j++) {
2338 u32 rsskey = rsshash[(j * 4)];
2339 rsskey |= rsshash[(j * 4) + 1] << 8;
2340 rsskey |= rsshash[(j * 4) + 2] << 16;
2341 rsskey |= rsshash[(j * 4) + 3] << 24;
2342 array_wr32(E1000_RSSRK(0), j, rsskey);
2345 num_rx_queues = adapter->rss_queues;
2347 if (adapter->vfs_allocated_count) {
2348 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2349 switch (hw->mac.type) {
2365 if (hw->mac.type == e1000_82575)
2369 for (j = 0; j < (32 * 4); j++) {
2370 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2372 reta.bytes[j & 3] |= num_rx_queues << shift2;
2374 wr32(E1000_RETA(j >> 2), reta.dword);
2378 * Disable raw packet checksumming so that RSS hash is placed in
2379 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2380 * offloads as they are enabled by default
2382 rxcsum = rd32(E1000_RXCSUM);
2383 rxcsum |= E1000_RXCSUM_PCSD;
2385 if (adapter->hw.mac.type >= e1000_82576)
2386 /* Enable Receive Checksum Offload for SCTP */
2387 rxcsum |= E1000_RXCSUM_CRCOFL;
2389 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2390 wr32(E1000_RXCSUM, rxcsum);
2392 /* If VMDq is enabled then we set the appropriate mode for that, else
2393 * we default to RSS so that an RSS hash is calculated per packet even
2394 * if we are only using one queue */
2395 if (adapter->vfs_allocated_count) {
2396 if (hw->mac.type > e1000_82575) {
2397 /* Set the default pool for the PF's first queue */
2398 u32 vtctl = rd32(E1000_VT_CTL);
2399 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2400 E1000_VT_CTL_DISABLE_DEF_POOL);
2401 vtctl |= adapter->vfs_allocated_count <<
2402 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2403 wr32(E1000_VT_CTL, vtctl);
2405 if (adapter->rss_queues > 1)
2406 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2408 mrqc = E1000_MRQC_ENABLE_VMDQ;
2410 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2412 igb_vmm_control(adapter);
2414 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2415 E1000_MRQC_RSS_FIELD_IPV4_TCP);
2416 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2417 E1000_MRQC_RSS_FIELD_IPV6_TCP);
2418 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2419 E1000_MRQC_RSS_FIELD_IPV6_UDP);
2420 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2421 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2423 wr32(E1000_MRQC, mrqc);
2427 * igb_setup_rctl - configure the receive control registers
2428 * @adapter: Board private structure
2430 void igb_setup_rctl(struct igb_adapter *adapter)
2432 struct e1000_hw *hw = &adapter->hw;
2435 rctl = rd32(E1000_RCTL);
2437 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2438 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2440 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2441 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2444 * enable stripping of CRC. It's unlikely this will break BMC
2445 * redirection as it did with e1000. Newer features require
2446 * that the HW strips the CRC.
2448 rctl |= E1000_RCTL_SECRC;
2450 /* disable store bad packets and clear size bits. */
2451 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2453 /* enable LPE to prevent packets larger than max_frame_size */
2454 rctl |= E1000_RCTL_LPE;
2456 /* disable queue 0 to prevent tail write w/o re-config */
2457 wr32(E1000_RXDCTL(0), 0);
2459 /* Attention!!! For SR-IOV PF driver operations you must enable
2460 * queue drop for all VF and PF queues to prevent head of line blocking
2461 * if an un-trusted VF does not provide descriptors to hardware.
2463 if (adapter->vfs_allocated_count) {
2464 /* set all queue drop enable bits */
2465 wr32(E1000_QDE, ALL_QUEUES);
2468 wr32(E1000_RCTL, rctl);
2471 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2474 struct e1000_hw *hw = &adapter->hw;
2477 /* if it isn't the PF check to see if VFs are enabled and
2478 * increase the size to support vlan tags */
2479 if (vfn < adapter->vfs_allocated_count &&
2480 adapter->vf_data[vfn].vlans_enabled)
2481 size += VLAN_TAG_SIZE;
2483 vmolr = rd32(E1000_VMOLR(vfn));
2484 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2485 vmolr |= size | E1000_VMOLR_LPE;
2486 wr32(E1000_VMOLR(vfn), vmolr);
2492 * igb_rlpml_set - set maximum receive packet size
2493 * @adapter: board private structure
2495 * Configure maximum receivable packet size.
2497 static void igb_rlpml_set(struct igb_adapter *adapter)
2499 u32 max_frame_size = adapter->max_frame_size;
2500 struct e1000_hw *hw = &adapter->hw;
2501 u16 pf_id = adapter->vfs_allocated_count;
2504 max_frame_size += VLAN_TAG_SIZE;
2506 /* if vfs are enabled we set RLPML to the largest possible request
2507 * size and set the VMOLR RLPML to the size we need */
2509 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2510 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2513 wr32(E1000_RLPML, max_frame_size);
2516 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2519 struct e1000_hw *hw = &adapter->hw;
2523 * This register exists only on 82576 and newer so if we are older then
2524 * we should exit and do nothing
2526 if (hw->mac.type < e1000_82576)
2529 vmolr = rd32(E1000_VMOLR(vfn));
2530 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2532 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
2534 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2536 /* clear all bits that might not be set */
2537 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2539 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2540 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2542 * for VMDq only allow the VFs and pool 0 to accept broadcast and
2545 if (vfn <= adapter->vfs_allocated_count)
2546 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
2548 wr32(E1000_VMOLR(vfn), vmolr);
2552 * igb_configure_rx_ring - Configure a receive ring after Reset
2553 * @adapter: board private structure
2554 * @ring: receive ring to be configured
2556 * Configure the Rx unit of the MAC after a reset.
2558 void igb_configure_rx_ring(struct igb_adapter *adapter,
2559 struct igb_ring *ring)
2561 struct e1000_hw *hw = &adapter->hw;
2562 u64 rdba = ring->dma;
2563 int reg_idx = ring->reg_idx;
2566 /* disable the queue */
2567 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2568 wr32(E1000_RXDCTL(reg_idx),
2569 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2571 /* Set DMA base address registers */
2572 wr32(E1000_RDBAL(reg_idx),
2573 rdba & 0x00000000ffffffffULL);
2574 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2575 wr32(E1000_RDLEN(reg_idx),
2576 ring->count * sizeof(union e1000_adv_rx_desc));
2578 /* initialize head and tail */
2579 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2580 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2581 writel(0, ring->head);
2582 writel(0, ring->tail);
2584 /* set descriptor configuration */
2585 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2586 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2587 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2588 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2589 srrctl |= IGB_RXBUFFER_16384 >>
2590 E1000_SRRCTL_BSIZEPKT_SHIFT;
2592 srrctl |= (PAGE_SIZE / 2) >>
2593 E1000_SRRCTL_BSIZEPKT_SHIFT;
2595 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2597 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2598 E1000_SRRCTL_BSIZEPKT_SHIFT;
2599 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2602 wr32(E1000_SRRCTL(reg_idx), srrctl);
2604 /* set filtering for VMDQ pools */
2605 igb_set_vmolr(adapter, reg_idx & 0x7, true);
2607 /* enable receive descriptor fetching */
2608 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2609 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2610 rxdctl &= 0xFFF00000;
2611 rxdctl |= IGB_RX_PTHRESH;
2612 rxdctl |= IGB_RX_HTHRESH << 8;
2613 rxdctl |= IGB_RX_WTHRESH << 16;
2614 wr32(E1000_RXDCTL(reg_idx), rxdctl);
2618 * igb_configure_rx - Configure receive Unit after Reset
2619 * @adapter: board private structure
2621 * Configure the Rx unit of the MAC after a reset.
2623 static void igb_configure_rx(struct igb_adapter *adapter)
2627 /* set UTA to appropriate mode */
2628 igb_set_uta(adapter);
2630 /* set the correct pool for the PF default MAC address in entry 0 */
2631 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2632 adapter->vfs_allocated_count);
2634 /* Setup the HW Rx Head and Tail Descriptor Pointers and
2635 * the Base and Length of the Rx Descriptor Ring */
2636 for (i = 0; i < adapter->num_rx_queues; i++)
2637 igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
2641 * igb_free_tx_resources - Free Tx Resources per Queue
2642 * @tx_ring: Tx descriptor ring for a specific queue
2644 * Free all transmit software resources
2646 void igb_free_tx_resources(struct igb_ring *tx_ring)
2648 igb_clean_tx_ring(tx_ring);
2650 vfree(tx_ring->buffer_info);
2651 tx_ring->buffer_info = NULL;
2653 /* if not set, then don't free */
2657 pci_free_consistent(tx_ring->pdev, tx_ring->size,
2658 tx_ring->desc, tx_ring->dma);
2660 tx_ring->desc = NULL;
2664 * igb_free_all_tx_resources - Free Tx Resources for All Queues
2665 * @adapter: board private structure
2667 * Free all transmit software resources
2669 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2673 for (i = 0; i < adapter->num_tx_queues; i++)
2674 igb_free_tx_resources(&adapter->tx_ring[i]);
2677 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2678 struct igb_buffer *buffer_info)
2680 if (buffer_info->dma) {
2681 if (buffer_info->mapped_as_page)
2682 pci_unmap_page(tx_ring->pdev,
2684 buffer_info->length,
2687 pci_unmap_single(tx_ring->pdev,
2689 buffer_info->length,
2691 buffer_info->dma = 0;
2693 if (buffer_info->skb) {
2694 dev_kfree_skb_any(buffer_info->skb);
2695 buffer_info->skb = NULL;
2697 buffer_info->time_stamp = 0;
2698 buffer_info->length = 0;
2699 buffer_info->next_to_watch = 0;
2700 buffer_info->mapped_as_page = false;
2704 * igb_clean_tx_ring - Free Tx Buffers
2705 * @tx_ring: ring to be cleaned
2707 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2709 struct igb_buffer *buffer_info;
2713 if (!tx_ring->buffer_info)
2715 /* Free all the Tx ring sk_buffs */
2717 for (i = 0; i < tx_ring->count; i++) {
2718 buffer_info = &tx_ring->buffer_info[i];
2719 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2722 size = sizeof(struct igb_buffer) * tx_ring->count;
2723 memset(tx_ring->buffer_info, 0, size);
2725 /* Zero out the descriptor ring */
2726 memset(tx_ring->desc, 0, tx_ring->size);
2728 tx_ring->next_to_use = 0;
2729 tx_ring->next_to_clean = 0;
2733 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2734 * @adapter: board private structure
2736 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2740 for (i = 0; i < adapter->num_tx_queues; i++)
2741 igb_clean_tx_ring(&adapter->tx_ring[i]);
2745 * igb_free_rx_resources - Free Rx Resources
2746 * @rx_ring: ring to clean the resources from
2748 * Free all receive software resources
2750 void igb_free_rx_resources(struct igb_ring *rx_ring)
2752 igb_clean_rx_ring(rx_ring);
2754 vfree(rx_ring->buffer_info);
2755 rx_ring->buffer_info = NULL;
2757 /* if not set, then don't free */
2761 pci_free_consistent(rx_ring->pdev, rx_ring->size,
2762 rx_ring->desc, rx_ring->dma);
2764 rx_ring->desc = NULL;
2768 * igb_free_all_rx_resources - Free Rx Resources for All Queues
2769 * @adapter: board private structure
2771 * Free all receive software resources
2773 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2777 for (i = 0; i < adapter->num_rx_queues; i++)
2778 igb_free_rx_resources(&adapter->rx_ring[i]);
2782 * igb_clean_rx_ring - Free Rx Buffers per Queue
2783 * @rx_ring: ring to free buffers from
2785 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2787 struct igb_buffer *buffer_info;
2791 if (!rx_ring->buffer_info)
2794 /* Free all the Rx ring sk_buffs */
2795 for (i = 0; i < rx_ring->count; i++) {
2796 buffer_info = &rx_ring->buffer_info[i];
2797 if (buffer_info->dma) {
2798 pci_unmap_single(rx_ring->pdev,
2800 rx_ring->rx_buffer_len,
2801 PCI_DMA_FROMDEVICE);
2802 buffer_info->dma = 0;
2805 if (buffer_info->skb) {
2806 dev_kfree_skb(buffer_info->skb);
2807 buffer_info->skb = NULL;
2809 if (buffer_info->page_dma) {
2810 pci_unmap_page(rx_ring->pdev,
2811 buffer_info->page_dma,
2813 PCI_DMA_FROMDEVICE);
2814 buffer_info->page_dma = 0;
2816 if (buffer_info->page) {
2817 put_page(buffer_info->page);
2818 buffer_info->page = NULL;
2819 buffer_info->page_offset = 0;
2823 size = sizeof(struct igb_buffer) * rx_ring->count;
2824 memset(rx_ring->buffer_info, 0, size);
2826 /* Zero out the descriptor ring */
2827 memset(rx_ring->desc, 0, rx_ring->size);
2829 rx_ring->next_to_clean = 0;
2830 rx_ring->next_to_use = 0;
2834 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2835 * @adapter: board private structure
2837 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2841 for (i = 0; i < adapter->num_rx_queues; i++)
2842 igb_clean_rx_ring(&adapter->rx_ring[i]);
2846 * igb_set_mac - Change the Ethernet Address of the NIC
2847 * @netdev: network interface device structure
2848 * @p: pointer to an address structure
2850 * Returns 0 on success, negative on failure
2852 static int igb_set_mac(struct net_device *netdev, void *p)
2854 struct igb_adapter *adapter = netdev_priv(netdev);
2855 struct e1000_hw *hw = &adapter->hw;
2856 struct sockaddr *addr = p;
2858 if (!is_valid_ether_addr(addr->sa_data))
2859 return -EADDRNOTAVAIL;
2861 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2862 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2864 /* set the correct pool for the new PF MAC address in entry 0 */
2865 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2866 adapter->vfs_allocated_count);
2872 * igb_write_mc_addr_list - write multicast addresses to MTA
2873 * @netdev: network interface device structure
2875 * Writes multicast address list to the MTA hash table.
2876 * Returns: -ENOMEM on failure
2877 * 0 on no addresses written
2878 * X on writing X addresses to MTA
2880 static int igb_write_mc_addr_list(struct net_device *netdev)
2882 struct igb_adapter *adapter = netdev_priv(netdev);
2883 struct e1000_hw *hw = &adapter->hw;
2884 struct dev_mc_list *mc_ptr = netdev->mc_list;
2889 if (netdev_mc_empty(netdev)) {
2890 /* nothing to program, so clear mc list */
2891 igb_update_mc_addr_list(hw, NULL, 0);
2892 igb_restore_vf_multicasts(adapter);
2896 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
2900 /* set vmolr receive overflow multicast bit */
2901 vmolr |= E1000_VMOLR_ROMPE;
2903 /* The shared function expects a packed array of only addresses. */
2904 mc_ptr = netdev->mc_list;
2906 for (i = 0; i < netdev_mc_count(netdev); i++) {
2909 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2910 mc_ptr = mc_ptr->next;
2912 igb_update_mc_addr_list(hw, mta_list, i);
2915 return netdev_mc_count(netdev);
2919 * igb_write_uc_addr_list - write unicast addresses to RAR table
2920 * @netdev: network interface device structure
2922 * Writes unicast address list to the RAR table.
2923 * Returns: -ENOMEM on failure/insufficient address space
2924 * 0 on no addresses written
2925 * X on writing X addresses to the RAR table
2927 static int igb_write_uc_addr_list(struct net_device *netdev)
2929 struct igb_adapter *adapter = netdev_priv(netdev);
2930 struct e1000_hw *hw = &adapter->hw;
2931 unsigned int vfn = adapter->vfs_allocated_count;
2932 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2935 /* return ENOMEM indicating insufficient memory for addresses */
2936 if (netdev_uc_count(netdev) > rar_entries)
2939 if (!netdev_uc_empty(netdev) && rar_entries) {
2940 struct netdev_hw_addr *ha;
2942 netdev_for_each_uc_addr(ha, netdev) {
2945 igb_rar_set_qsel(adapter, ha->addr,
2951 /* write the addresses in reverse order to avoid write combining */
2952 for (; rar_entries > 0 ; rar_entries--) {
2953 wr32(E1000_RAH(rar_entries), 0);
2954 wr32(E1000_RAL(rar_entries), 0);
2962 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2963 * @netdev: network interface device structure
2965 * The set_rx_mode entry point is called whenever the unicast or multicast
2966 * address lists or the network interface flags are updated. This routine is
2967 * responsible for configuring the hardware for proper unicast, multicast,
2968 * promiscuous mode, and all-multi behavior.
2970 static void igb_set_rx_mode(struct net_device *netdev)
2972 struct igb_adapter *adapter = netdev_priv(netdev);
2973 struct e1000_hw *hw = &adapter->hw;
2974 unsigned int vfn = adapter->vfs_allocated_count;
2975 u32 rctl, vmolr = 0;
2978 /* Check for Promiscuous and All Multicast modes */
2979 rctl = rd32(E1000_RCTL);
2981 /* clear the effected bits */
2982 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2984 if (netdev->flags & IFF_PROMISC) {
2985 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2986 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2988 if (netdev->flags & IFF_ALLMULTI) {
2989 rctl |= E1000_RCTL_MPE;
2990 vmolr |= E1000_VMOLR_MPME;
2993 * Write addresses to the MTA, if the attempt fails
2994 * then we should just turn on promiscous mode so
2995 * that we can at least receive multicast traffic
2997 count = igb_write_mc_addr_list(netdev);
2999 rctl |= E1000_RCTL_MPE;
3000 vmolr |= E1000_VMOLR_MPME;
3002 vmolr |= E1000_VMOLR_ROMPE;
3006 * Write addresses to available RAR registers, if there is not
3007 * sufficient space to store all the addresses then enable
3008 * unicast promiscous mode
3010 count = igb_write_uc_addr_list(netdev);
3012 rctl |= E1000_RCTL_UPE;
3013 vmolr |= E1000_VMOLR_ROPE;
3015 rctl |= E1000_RCTL_VFE;
3017 wr32(E1000_RCTL, rctl);
3020 * In order to support SR-IOV and eventually VMDq it is necessary to set
3021 * the VMOLR to enable the appropriate modes. Without this workaround
3022 * we will have issues with VLAN tag stripping not being done for frames
3023 * that are only arriving because we are the default pool
3025 if (hw->mac.type < e1000_82576)
3028 vmolr |= rd32(E1000_VMOLR(vfn)) &
3029 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3030 wr32(E1000_VMOLR(vfn), vmolr);
3031 igb_restore_vf_multicasts(adapter);
3034 /* Need to wait a few seconds after link up to get diagnostic information from
3036 static void igb_update_phy_info(unsigned long data)
3038 struct igb_adapter *adapter = (struct igb_adapter *) data;
3039 igb_get_phy_info(&adapter->hw);
3043 * igb_has_link - check shared code for link and determine up/down
3044 * @adapter: pointer to driver private info
3046 bool igb_has_link(struct igb_adapter *adapter)
3048 struct e1000_hw *hw = &adapter->hw;
3049 bool link_active = false;
3052 /* get_link_status is set on LSC (link status) interrupt or
3053 * rx sequence error interrupt. get_link_status will stay
3054 * false until the e1000_check_for_link establishes link
3055 * for copper adapters ONLY
3057 switch (hw->phy.media_type) {
3058 case e1000_media_type_copper:
3059 if (hw->mac.get_link_status) {
3060 ret_val = hw->mac.ops.check_for_link(hw);
3061 link_active = !hw->mac.get_link_status;
3066 case e1000_media_type_internal_serdes:
3067 ret_val = hw->mac.ops.check_for_link(hw);
3068 link_active = hw->mac.serdes_has_link;
3071 case e1000_media_type_unknown:
3079 * igb_watchdog - Timer Call-back
3080 * @data: pointer to adapter cast into an unsigned long
3082 static void igb_watchdog(unsigned long data)
3084 struct igb_adapter *adapter = (struct igb_adapter *)data;
3085 /* Do the rest outside of interrupt context */
3086 schedule_work(&adapter->watchdog_task);
3089 static void igb_watchdog_task(struct work_struct *work)
3091 struct igb_adapter *adapter = container_of(work,
3094 struct e1000_hw *hw = &adapter->hw;
3095 struct net_device *netdev = adapter->netdev;
3099 link = igb_has_link(adapter);
3101 if (!netif_carrier_ok(netdev)) {
3103 hw->mac.ops.get_speed_and_duplex(hw,
3104 &adapter->link_speed,
3105 &adapter->link_duplex);
3107 ctrl = rd32(E1000_CTRL);
3108 /* Links status message must follow this format */
3109 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3110 "Flow Control: %s\n",
3112 adapter->link_speed,
3113 adapter->link_duplex == FULL_DUPLEX ?
3114 "Full Duplex" : "Half Duplex",
3115 ((ctrl & E1000_CTRL_TFCE) &&
3116 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3117 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3118 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3120 /* tweak tx_queue_len according to speed/duplex and
3121 * adjust the timeout factor */
3122 netdev->tx_queue_len = adapter->tx_queue_len;
3123 adapter->tx_timeout_factor = 1;
3124 switch (adapter->link_speed) {
3126 netdev->tx_queue_len = 10;
3127 adapter->tx_timeout_factor = 14;
3130 netdev->tx_queue_len = 100;
3131 /* maybe add some timeout factor ? */
3135 netif_carrier_on(netdev);
3137 igb_ping_all_vfs(adapter);
3139 /* link state has changed, schedule phy info update */
3140 if (!test_bit(__IGB_DOWN, &adapter->state))
3141 mod_timer(&adapter->phy_info_timer,
3142 round_jiffies(jiffies + 2 * HZ));
3145 if (netif_carrier_ok(netdev)) {
3146 adapter->link_speed = 0;
3147 adapter->link_duplex = 0;
3148 /* Links status message must follow this format */
3149 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3151 netif_carrier_off(netdev);
3153 igb_ping_all_vfs(adapter);
3155 /* link state has changed, schedule phy info update */
3156 if (!test_bit(__IGB_DOWN, &adapter->state))
3157 mod_timer(&adapter->phy_info_timer,
3158 round_jiffies(jiffies + 2 * HZ));
3162 igb_update_stats(adapter);
3163 igb_update_adaptive(hw);
3165 for (i = 0; i < adapter->num_tx_queues; i++) {
3166 struct igb_ring *tx_ring = &adapter->tx_ring[i];
3167 if (!netif_carrier_ok(netdev)) {
3168 /* We've lost link, so the controller stops DMA,
3169 * but we've got queued Tx work that's never going
3170 * to get done, so reset controller to flush Tx.
3171 * (Do the reset outside of interrupt context). */
3172 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3173 adapter->tx_timeout_count++;
3174 schedule_work(&adapter->reset_task);
3175 /* return immediately since reset is imminent */
3180 /* Force detection of hung controller every watchdog period */
3181 tx_ring->detect_tx_hung = true;
3184 /* Cause software interrupt to ensure rx ring is cleaned */
3185 if (adapter->msix_entries) {
3187 for (i = 0; i < adapter->num_q_vectors; i++) {
3188 struct igb_q_vector *q_vector = adapter->q_vector[i];
3189 eics |= q_vector->eims_value;
3191 wr32(E1000_EICS, eics);
3193 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3196 /* Reset the timer */
3197 if (!test_bit(__IGB_DOWN, &adapter->state))
3198 mod_timer(&adapter->watchdog_timer,
3199 round_jiffies(jiffies + 2 * HZ));
3202 enum latency_range {
3206 latency_invalid = 255
3210 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3212 * Stores a new ITR value based on strictly on packet size. This
3213 * algorithm is less sophisticated than that used in igb_update_itr,
3214 * due to the difficulty of synchronizing statistics across multiple
3215 * receive rings. The divisors and thresholds used by this fuction
3216 * were determined based on theoretical maximum wire speed and testing
3217 * data, in order to minimize response time while increasing bulk
3219 * This functionality is controlled by the InterruptThrottleRate module
3220 * parameter (see igb_param.c)
3221 * NOTE: This function is called only when operating in a multiqueue
3222 * receive environment.
3223 * @q_vector: pointer to q_vector
3225 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3227 int new_val = q_vector->itr_val;
3228 int avg_wire_size = 0;
3229 struct igb_adapter *adapter = q_vector->adapter;
3231 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3232 * ints/sec - ITR timer value of 120 ticks.
3234 if (adapter->link_speed != SPEED_1000) {
3239 if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3240 struct igb_ring *ring = q_vector->rx_ring;
3241 avg_wire_size = ring->total_bytes / ring->total_packets;
3244 if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3245 struct igb_ring *ring = q_vector->tx_ring;
3246 avg_wire_size = max_t(u32, avg_wire_size,
3247 (ring->total_bytes /
3248 ring->total_packets));
3251 /* if avg_wire_size isn't set no work was done */
3255 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3256 avg_wire_size += 24;
3258 /* Don't starve jumbo frames */
3259 avg_wire_size = min(avg_wire_size, 3000);
3261 /* Give a little boost to mid-size frames */
3262 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3263 new_val = avg_wire_size / 3;
3265 new_val = avg_wire_size / 2;
3268 if (new_val != q_vector->itr_val) {
3269 q_vector->itr_val = new_val;
3270 q_vector->set_itr = 1;
3273 if (q_vector->rx_ring) {
3274 q_vector->rx_ring->total_bytes = 0;
3275 q_vector->rx_ring->total_packets = 0;
3277 if (q_vector->tx_ring) {
3278 q_vector->tx_ring->total_bytes = 0;
3279 q_vector->tx_ring->total_packets = 0;
3284 * igb_update_itr - update the dynamic ITR value based on statistics
3285 * Stores a new ITR value based on packets and byte
3286 * counts during the last interrupt. The advantage of per interrupt
3287 * computation is faster updates and more accurate ITR for the current
3288 * traffic pattern. Constants in this function were computed
3289 * based on theoretical maximum wire speed and thresholds were set based
3290 * on testing data as well as attempting to minimize response time
3291 * while increasing bulk throughput.
3292 * this functionality is controlled by the InterruptThrottleRate module
3293 * parameter (see igb_param.c)
3294 * NOTE: These calculations are only valid when operating in a single-
3295 * queue environment.
3296 * @adapter: pointer to adapter
3297 * @itr_setting: current q_vector->itr_val
3298 * @packets: the number of packets during this measurement interval
3299 * @bytes: the number of bytes during this measurement interval
3301 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3302 int packets, int bytes)
3304 unsigned int retval = itr_setting;
3307 goto update_itr_done;
3309 switch (itr_setting) {
3310 case lowest_latency:
3311 /* handle TSO and jumbo frames */
3312 if (bytes/packets > 8000)
3313 retval = bulk_latency;
3314 else if ((packets < 5) && (bytes > 512))
3315 retval = low_latency;
3317 case low_latency: /* 50 usec aka 20000 ints/s */
3318 if (bytes > 10000) {
3319 /* this if handles the TSO accounting */
3320 if (bytes/packets > 8000) {
3321 retval = bulk_latency;
3322 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3323 retval = bulk_latency;
3324 } else if ((packets > 35)) {
3325 retval = lowest_latency;
3327 } else if (bytes/packets > 2000) {
3328 retval = bulk_latency;
3329 } else if (packets <= 2 && bytes < 512) {
3330 retval = lowest_latency;
3333 case bulk_latency: /* 250 usec aka 4000 ints/s */
3334 if (bytes > 25000) {
3336 retval = low_latency;
3337 } else if (bytes < 1500) {
3338 retval = low_latency;
3347 static void igb_set_itr(struct igb_adapter *adapter)
3349 struct igb_q_vector *q_vector = adapter->q_vector[0];
3351 u32 new_itr = q_vector->itr_val;
3353 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3354 if (adapter->link_speed != SPEED_1000) {
3360 adapter->rx_itr = igb_update_itr(adapter,
3362 adapter->rx_ring->total_packets,
3363 adapter->rx_ring->total_bytes);
3365 adapter->tx_itr = igb_update_itr(adapter,
3367 adapter->tx_ring->total_packets,
3368 adapter->tx_ring->total_bytes);
3369 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3371 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3372 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3373 current_itr = low_latency;
3375 switch (current_itr) {
3376 /* counts and packets in update_itr are dependent on these numbers */
3377 case lowest_latency:
3378 new_itr = 56; /* aka 70,000 ints/sec */
3381 new_itr = 196; /* aka 20,000 ints/sec */
3384 new_itr = 980; /* aka 4,000 ints/sec */
3391 adapter->rx_ring->total_bytes = 0;
3392 adapter->rx_ring->total_packets = 0;
3393 adapter->tx_ring->total_bytes = 0;
3394 adapter->tx_ring->total_packets = 0;
3396 if (new_itr != q_vector->itr_val) {
3397 /* this attempts to bias the interrupt rate towards Bulk
3398 * by adding intermediate steps when interrupt rate is
3400 new_itr = new_itr > q_vector->itr_val ?
3401 max((new_itr * q_vector->itr_val) /
3402 (new_itr + (q_vector->itr_val >> 2)),
3405 /* Don't write the value here; it resets the adapter's
3406 * internal timer, and causes us to delay far longer than
3407 * we should between interrupts. Instead, we write the ITR
3408 * value at the beginning of the next interrupt so the timing
3409 * ends up being correct.
3411 q_vector->itr_val = new_itr;
3412 q_vector->set_itr = 1;
3418 #define IGB_TX_FLAGS_CSUM 0x00000001
3419 #define IGB_TX_FLAGS_VLAN 0x00000002
3420 #define IGB_TX_FLAGS_TSO 0x00000004
3421 #define IGB_TX_FLAGS_IPV4 0x00000008
3422 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3423 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3424 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3426 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3427 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3429 struct e1000_adv_tx_context_desc *context_desc;
3432 struct igb_buffer *buffer_info;
3433 u32 info = 0, tu_cmd = 0;
3434 u32 mss_l4len_idx, l4len;
3437 if (skb_header_cloned(skb)) {
3438 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3443 l4len = tcp_hdrlen(skb);
3446 if (skb->protocol == htons(ETH_P_IP)) {
3447 struct iphdr *iph = ip_hdr(skb);
3450 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3454 } else if (skb_is_gso_v6(skb)) {
3455 ipv6_hdr(skb)->payload_len = 0;
3456 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3457 &ipv6_hdr(skb)->daddr,
3461 i = tx_ring->next_to_use;
3463 buffer_info = &tx_ring->buffer_info[i];
3464 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3465 /* VLAN MACLEN IPLEN */
3466 if (tx_flags & IGB_TX_FLAGS_VLAN)
3467 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3468 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3469 *hdr_len += skb_network_offset(skb);
3470 info |= skb_network_header_len(skb);
3471 *hdr_len += skb_network_header_len(skb);
3472 context_desc->vlan_macip_lens = cpu_to_le32(info);
3474 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3475 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3477 if (skb->protocol == htons(ETH_P_IP))
3478 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3479 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3481 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3484 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3485 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3487 /* For 82575, context index must be unique per ring. */
3488 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3489 mss_l4len_idx |= tx_ring->reg_idx << 4;
3491 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3492 context_desc->seqnum_seed = 0;
3494 buffer_info->time_stamp = jiffies;
3495 buffer_info->next_to_watch = i;
3496 buffer_info->dma = 0;
3498 if (i == tx_ring->count)
3501 tx_ring->next_to_use = i;
3506 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3507 struct sk_buff *skb, u32 tx_flags)
3509 struct e1000_adv_tx_context_desc *context_desc;
3510 struct pci_dev *pdev = tx_ring->pdev;
3511 struct igb_buffer *buffer_info;
3512 u32 info = 0, tu_cmd = 0;
3515 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3516 (tx_flags & IGB_TX_FLAGS_VLAN)) {
3517 i = tx_ring->next_to_use;
3518 buffer_info = &tx_ring->buffer_info[i];
3519 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3521 if (tx_flags & IGB_TX_FLAGS_VLAN)
3522 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3524 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3525 if (skb->ip_summed == CHECKSUM_PARTIAL)
3526 info |= skb_network_header_len(skb);
3528 context_desc->vlan_macip_lens = cpu_to_le32(info);
3530 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3532 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3535 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3536 const struct vlan_ethhdr *vhdr =
3537 (const struct vlan_ethhdr*)skb->data;
3539 protocol = vhdr->h_vlan_encapsulated_proto;
3541 protocol = skb->protocol;
3545 case cpu_to_be16(ETH_P_IP):
3546 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3547 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3548 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3549 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3550 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3552 case cpu_to_be16(ETH_P_IPV6):
3553 /* XXX what about other V6 headers?? */
3554 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3555 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3556 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3557 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3560 if (unlikely(net_ratelimit()))
3561 dev_warn(&pdev->dev,
3562 "partial checksum but proto=%x!\n",
3568 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3569 context_desc->seqnum_seed = 0;
3570 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3571 context_desc->mss_l4len_idx =
3572 cpu_to_le32(tx_ring->reg_idx << 4);
3574 buffer_info->time_stamp = jiffies;
3575 buffer_info->next_to_watch = i;
3576 buffer_info->dma = 0;
3579 if (i == tx_ring->count)
3581 tx_ring->next_to_use = i;
3588 #define IGB_MAX_TXD_PWR 16
3589 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
3591 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3594 struct igb_buffer *buffer_info;
3595 struct pci_dev *pdev = tx_ring->pdev;
3596 unsigned int len = skb_headlen(skb);
3597 unsigned int count = 0, i;
3600 i = tx_ring->next_to_use;
3602 buffer_info = &tx_ring->buffer_info[i];
3603 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3604 buffer_info->length = len;
3605 /* set time_stamp *before* dma to help avoid a possible race */
3606 buffer_info->time_stamp = jiffies;
3607 buffer_info->next_to_watch = i;
3608 buffer_info->dma = pci_map_single(pdev, skb->data, len,
3610 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3613 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3614 struct skb_frag_struct *frag;
3618 if (i == tx_ring->count)
3621 frag = &skb_shinfo(skb)->frags[f];
3624 buffer_info = &tx_ring->buffer_info[i];
3625 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3626 buffer_info->length = len;
3627 buffer_info->time_stamp = jiffies;
3628 buffer_info->next_to_watch = i;
3629 buffer_info->mapped_as_page = true;
3630 buffer_info->dma = pci_map_page(pdev,
3635 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3640 tx_ring->buffer_info[i].skb = skb;
3641 tx_ring->buffer_info[first].next_to_watch = i;
3646 dev_err(&pdev->dev, "TX DMA map failed\n");
3648 /* clear timestamp and dma mappings for failed buffer_info mapping */
3649 buffer_info->dma = 0;
3650 buffer_info->time_stamp = 0;
3651 buffer_info->length = 0;
3652 buffer_info->next_to_watch = 0;
3653 buffer_info->mapped_as_page = false;
3656 /* clear timestamp and dma mappings for remaining portion of packet */
3657 while (count >= 0) {
3661 i += tx_ring->count;
3662 buffer_info = &tx_ring->buffer_info[i];
3663 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3669 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3670 int tx_flags, int count, u32 paylen,
3673 union e1000_adv_tx_desc *tx_desc;
3674 struct igb_buffer *buffer_info;
3675 u32 olinfo_status = 0, cmd_type_len;
3676 unsigned int i = tx_ring->next_to_use;
3678 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3679 E1000_ADVTXD_DCMD_DEXT);
3681 if (tx_flags & IGB_TX_FLAGS_VLAN)
3682 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3684 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3685 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3687 if (tx_flags & IGB_TX_FLAGS_TSO) {
3688 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3690 /* insert tcp checksum */
3691 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3693 /* insert ip checksum */
3694 if (tx_flags & IGB_TX_FLAGS_IPV4)
3695 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3697 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3698 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3701 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3702 (tx_flags & (IGB_TX_FLAGS_CSUM |
3704 IGB_TX_FLAGS_VLAN)))
3705 olinfo_status |= tx_ring->reg_idx << 4;
3707 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3710 buffer_info = &tx_ring->buffer_info[i];
3711 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3712 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3713 tx_desc->read.cmd_type_len =
3714 cpu_to_le32(cmd_type_len | buffer_info->length);
3715 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3718 if (i == tx_ring->count)
3720 } while (count > 0);
3722 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3723 /* Force memory writes to complete before letting h/w
3724 * know there are new descriptors to fetch. (Only
3725 * applicable for weak-ordered memory model archs,
3726 * such as IA-64). */
3729 tx_ring->next_to_use = i;
3730 writel(i, tx_ring->tail);
3731 /* we need this if more than one processor can write to our tail
3732 * at a time, it syncronizes IO on IA64/Altix systems */
3736 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3738 struct net_device *netdev = tx_ring->netdev;
3740 netif_stop_subqueue(netdev, tx_ring->queue_index);
3742 /* Herbert's original patch had:
3743 * smp_mb__after_netif_stop_queue();
3744 * but since that doesn't exist yet, just open code it. */
3747 /* We need to check again in a case another CPU has just
3748 * made room available. */
3749 if (igb_desc_unused(tx_ring) < size)
3753 netif_wake_subqueue(netdev, tx_ring->queue_index);
3754 tx_ring->tx_stats.restart_queue++;
3758 static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3760 if (igb_desc_unused(tx_ring) >= size)
3762 return __igb_maybe_stop_tx(tx_ring, size);
3765 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3766 struct igb_ring *tx_ring)
3768 struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3770 unsigned int tx_flags = 0;
3773 union skb_shared_tx *shtx = skb_tx(skb);
3775 /* need: 1 descriptor per page,
3776 * + 2 desc gap to keep tail from touching head,
3777 * + 1 desc for skb->data,
3778 * + 1 desc for context descriptor,
3779 * otherwise try next time */
3780 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3781 /* this is a hard error */
3782 return NETDEV_TX_BUSY;
3785 if (unlikely(shtx->hardware)) {
3786 shtx->in_progress = 1;
3787 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3790 if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3791 tx_flags |= IGB_TX_FLAGS_VLAN;
3792 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3795 if (skb->protocol == htons(ETH_P_IP))
3796 tx_flags |= IGB_TX_FLAGS_IPV4;
3798 first = tx_ring->next_to_use;
3799 if (skb_is_gso(skb)) {
3800 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3803 dev_kfree_skb_any(skb);
3804 return NETDEV_TX_OK;
3809 tx_flags |= IGB_TX_FLAGS_TSO;
3810 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3811 (skb->ip_summed == CHECKSUM_PARTIAL))
3812 tx_flags |= IGB_TX_FLAGS_CSUM;
3815 * count reflects descriptors mapped, if 0 or less then mapping error
3816 * has occured and we need to rewind the descriptor queue
3818 count = igb_tx_map_adv(tx_ring, skb, first);
3820 dev_kfree_skb_any(skb);
3821 tx_ring->buffer_info[first].time_stamp = 0;
3822 tx_ring->next_to_use = first;
3823 return NETDEV_TX_OK;
3826 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3828 /* Make sure there is space in the ring for the next send. */
3829 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3831 return NETDEV_TX_OK;
3834 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3835 struct net_device *netdev)
3837 struct igb_adapter *adapter = netdev_priv(netdev);
3838 struct igb_ring *tx_ring;
3841 if (test_bit(__IGB_DOWN, &adapter->state)) {
3842 dev_kfree_skb_any(skb);
3843 return NETDEV_TX_OK;
3846 if (skb->len <= 0) {
3847 dev_kfree_skb_any(skb);
3848 return NETDEV_TX_OK;
3851 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3852 tx_ring = adapter->multi_tx_table[r_idx];
3854 /* This goes back to the question of how to logically map a tx queue
3855 * to a flow. Right now, performance is impacted slightly negatively
3856 * if using multiple tx queues. If the stack breaks away from a
3857 * single qdisc implementation, we can look at this again. */
3858 return igb_xmit_frame_ring_adv(skb, tx_ring);
3862 * igb_tx_timeout - Respond to a Tx Hang
3863 * @netdev: network interface device structure
3865 static void igb_tx_timeout(struct net_device *netdev)
3867 struct igb_adapter *adapter = netdev_priv(netdev);
3868 struct e1000_hw *hw = &adapter->hw;
3870 /* Do the reset outside of interrupt context */
3871 adapter->tx_timeout_count++;
3873 if (hw->mac.type == e1000_82580)
3874 hw->dev_spec._82575.global_device_reset = true;
3876 schedule_work(&adapter->reset_task);
3878 (adapter->eims_enable_mask & ~adapter->eims_other));
3881 static void igb_reset_task(struct work_struct *work)
3883 struct igb_adapter *adapter;
3884 adapter = container_of(work, struct igb_adapter, reset_task);
3886 igb_reinit_locked(adapter);
3890 * igb_get_stats - Get System Network Statistics
3891 * @netdev: network interface device structure
3893 * Returns the address of the device statistics structure.
3894 * The statistics are actually updated from the timer callback.
3896 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3898 /* only return the current stats */
3899 return &netdev->stats;
3903 * igb_change_mtu - Change the Maximum Transfer Unit
3904 * @netdev: network interface device structure
3905 * @new_mtu: new value for maximum frame size
3907 * Returns 0 on success, negative on failure
3909 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3911 struct igb_adapter *adapter = netdev_priv(netdev);
3912 struct pci_dev *pdev = adapter->pdev;
3913 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3914 u32 rx_buffer_len, i;
3916 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3917 dev_err(&pdev->dev, "Invalid MTU setting\n");
3921 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3922 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3926 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3929 /* igb_down has a dependency on max_frame_size */
3930 adapter->max_frame_size = max_frame;
3932 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3933 * means we reserve 2 more, this pushes us to allocate from the next
3935 * i.e. RXBUFFER_2048 --> size-4096 slab
3938 if (max_frame <= IGB_RXBUFFER_1024)
3939 rx_buffer_len = IGB_RXBUFFER_1024;
3940 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3941 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3943 rx_buffer_len = IGB_RXBUFFER_128;
3945 if (netif_running(netdev))
3948 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3949 netdev->mtu, new_mtu);
3950 netdev->mtu = new_mtu;
3952 for (i = 0; i < adapter->num_rx_queues; i++)
3953 adapter->rx_ring[i].rx_buffer_len = rx_buffer_len;
3955 if (netif_running(netdev))
3960 clear_bit(__IGB_RESETTING, &adapter->state);
3966 * igb_update_stats - Update the board statistics counters
3967 * @adapter: board private structure
3970 void igb_update_stats(struct igb_adapter *adapter)
3972 struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3973 struct e1000_hw *hw = &adapter->hw;
3974 struct pci_dev *pdev = adapter->pdev;
3980 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3983 * Prevent stats update while adapter is being reset, or if the pci
3984 * connection is down.
3986 if (adapter->link_speed == 0)
3988 if (pci_channel_offline(pdev))
3993 for (i = 0; i < adapter->num_rx_queues; i++) {
3994 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3995 adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
3996 net_stats->rx_fifo_errors += rqdpc_tmp;
3997 bytes += adapter->rx_ring[i].rx_stats.bytes;
3998 packets += adapter->rx_ring[i].rx_stats.packets;
4001 net_stats->rx_bytes = bytes;
4002 net_stats->rx_packets = packets;
4006 for (i = 0; i < adapter->num_tx_queues; i++) {
4007 bytes += adapter->tx_ring[i].tx_stats.bytes;
4008 packets += adapter->tx_ring[i].tx_stats.packets;
4010 net_stats->tx_bytes = bytes;
4011 net_stats->tx_packets = packets;
4013 /* read stats registers */
4014 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4015 adapter->stats.gprc += rd32(E1000_GPRC);
4016 adapter->stats.gorc += rd32(E1000_GORCL);
4017 rd32(E1000_GORCH); /* clear GORCL */
4018 adapter->stats.bprc += rd32(E1000_BPRC);
4019 adapter->stats.mprc += rd32(E1000_MPRC);
4020 adapter->stats.roc += rd32(E1000_ROC);
4022 adapter->stats.prc64 += rd32(E1000_PRC64);
4023 adapter->stats.prc127 += rd32(E1000_PRC127);
4024 adapter->stats.prc255 += rd32(E1000_PRC255);
4025 adapter->stats.prc511 += rd32(E1000_PRC511);
4026 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4027 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4028 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4029 adapter->stats.sec += rd32(E1000_SEC);
4031 adapter->stats.mpc += rd32(E1000_MPC);
4032 adapter->stats.scc += rd32(E1000_SCC);
4033 adapter->stats.ecol += rd32(E1000_ECOL);
4034 adapter->stats.mcc += rd32(E1000_MCC);
4035 adapter->stats.latecol += rd32(E1000_LATECOL);
4036 adapter->stats.dc += rd32(E1000_DC);
4037 adapter->stats.rlec += rd32(E1000_RLEC);
4038 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4039 adapter->stats.xontxc += rd32(E1000_XONTXC);
4040 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4041 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4042 adapter->stats.fcruc += rd32(E1000_FCRUC);
4043 adapter->stats.gptc += rd32(E1000_GPTC);
4044 adapter->stats.gotc += rd32(E1000_GOTCL);
4045 rd32(E1000_GOTCH); /* clear GOTCL */
4046 rnbc = rd32(E1000_RNBC);
4047 adapter->stats.rnbc += rnbc;
4048 net_stats->rx_fifo_errors += rnbc;
4049 adapter->stats.ruc += rd32(E1000_RUC);
4050 adapter->stats.rfc += rd32(E1000_RFC);
4051 adapter->stats.rjc += rd32(E1000_RJC);
4052 adapter->stats.tor += rd32(E1000_TORH);
4053 adapter->stats.tot += rd32(E1000_TOTH);
4054 adapter->stats.tpr += rd32(E1000_TPR);
4056 adapter->stats.ptc64 += rd32(E1000_PTC64);
4057 adapter->stats.ptc127 += rd32(E1000_PTC127);
4058 adapter->stats.ptc255 += rd32(E1000_PTC255);
4059 adapter->stats.ptc511 += rd32(E1000_PTC511);
4060 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4061 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4063 adapter->stats.mptc += rd32(E1000_MPTC);
4064 adapter->stats.bptc += rd32(E1000_BPTC);
4066 /* used for adaptive IFS */
4067 hw->mac.tx_packet_delta = rd32(E1000_TPT);
4068 adapter->stats.tpt += hw->mac.tx_packet_delta;
4069 hw->mac.collision_delta = rd32(E1000_COLC);
4070 adapter->stats.colc += hw->mac.collision_delta;
4072 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4073 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4074 adapter->stats.tncrs += rd32(E1000_TNCRS);
4075 adapter->stats.tsctc += rd32(E1000_TSCTC);
4076 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4078 adapter->stats.iac += rd32(E1000_IAC);
4079 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4080 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4081 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4082 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4083 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4084 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4085 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4086 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4088 /* Fill out the OS statistics structure */
4089 net_stats->multicast = adapter->stats.mprc;
4090 net_stats->collisions = adapter->stats.colc;
4094 /* RLEC on some newer hardware can be incorrect so build
4095 * our own version based on RUC and ROC */
4096 net_stats->rx_errors = adapter->stats.rxerrc +
4097 adapter->stats.crcerrs + adapter->stats.algnerrc +
4098 adapter->stats.ruc + adapter->stats.roc +
4099 adapter->stats.cexterr;
4100 net_stats->rx_length_errors = adapter->stats.ruc +
4102 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4103 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4104 net_stats->rx_missed_errors = adapter->stats.mpc;
4107 net_stats->tx_errors = adapter->stats.ecol +
4108 adapter->stats.latecol;
4109 net_stats->tx_aborted_errors = adapter->stats.ecol;
4110 net_stats->tx_window_errors = adapter->stats.latecol;
4111 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4113 /* Tx Dropped needs to be maintained elsewhere */
4116 if (hw->phy.media_type == e1000_media_type_copper) {
4117 if ((adapter->link_speed == SPEED_1000) &&
4118 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4119 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4120 adapter->phy_stats.idle_errors += phy_tmp;
4124 /* Management Stats */
4125 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4126 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4127 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4130 static irqreturn_t igb_msix_other(int irq, void *data)
4132 struct igb_adapter *adapter = data;
4133 struct e1000_hw *hw = &adapter->hw;
4134 u32 icr = rd32(E1000_ICR);
4135 /* reading ICR causes bit 31 of EICR to be cleared */
4137 if (icr & E1000_ICR_DRSTA)
4138 schedule_work(&adapter->reset_task);
4140 if (icr & E1000_ICR_DOUTSYNC) {
4141 /* HW is reporting DMA is out of sync */
4142 adapter->stats.doosync++;
4145 /* Check for a mailbox event */
4146 if (icr & E1000_ICR_VMMB)
4147 igb_msg_task(adapter);
4149 if (icr & E1000_ICR_LSC) {
4150 hw->mac.get_link_status = 1;
4151 /* guard against interrupt when we're going down */
4152 if (!test_bit(__IGB_DOWN, &adapter->state))
4153 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4156 if (adapter->vfs_allocated_count)
4157 wr32(E1000_IMS, E1000_IMS_LSC |
4159 E1000_IMS_DOUTSYNC);
4161 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4162 wr32(E1000_EIMS, adapter->eims_other);
4167 static void igb_write_itr(struct igb_q_vector *q_vector)
4169 struct igb_adapter *adapter = q_vector->adapter;
4170 u32 itr_val = q_vector->itr_val & 0x7FFC;
4172 if (!q_vector->set_itr)
4178 if (adapter->hw.mac.type == e1000_82575)
4179 itr_val |= itr_val << 16;
4181 itr_val |= 0x8000000;
4183 writel(itr_val, q_vector->itr_register);
4184 q_vector->set_itr = 0;
4187 static irqreturn_t igb_msix_ring(int irq, void *data)
4189 struct igb_q_vector *q_vector = data;
4191 /* Write the ITR value calculated from the previous interrupt. */
4192 igb_write_itr(q_vector);
4194 napi_schedule(&q_vector->napi);
4199 #ifdef CONFIG_IGB_DCA
4200 static void igb_update_dca(struct igb_q_vector *q_vector)
4202 struct igb_adapter *adapter = q_vector->adapter;
4203 struct e1000_hw *hw = &adapter->hw;
4204 int cpu = get_cpu();
4206 if (q_vector->cpu == cpu)
4209 if (q_vector->tx_ring) {
4210 int q = q_vector->tx_ring->reg_idx;
4211 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4212 if (hw->mac.type == e1000_82575) {
4213 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4214 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4216 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4217 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4218 E1000_DCA_TXCTRL_CPUID_SHIFT;
4220 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4221 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4223 if (q_vector->rx_ring) {
4224 int q = q_vector->rx_ring->reg_idx;
4225 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4226 if (hw->mac.type == e1000_82575) {
4227 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4228 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4230 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4231 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4232 E1000_DCA_RXCTRL_CPUID_SHIFT;
4234 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4235 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4236 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4237 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4239 q_vector->cpu = cpu;
4244 static void igb_setup_dca(struct igb_adapter *adapter)
4246 struct e1000_hw *hw = &adapter->hw;
4249 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4252 /* Always use CB2 mode, difference is masked in the CB driver. */
4253 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4255 for (i = 0; i < adapter->num_q_vectors; i++) {
4256 adapter->q_vector[i]->cpu = -1;
4257 igb_update_dca(adapter->q_vector[i]);
4261 static int __igb_notify_dca(struct device *dev, void *data)
4263 struct net_device *netdev = dev_get_drvdata(dev);
4264 struct igb_adapter *adapter = netdev_priv(netdev);
4265 struct pci_dev *pdev = adapter->pdev;
4266 struct e1000_hw *hw = &adapter->hw;
4267 unsigned long event = *(unsigned long *)data;
4270 case DCA_PROVIDER_ADD:
4271 /* if already enabled, don't do it again */
4272 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4274 if (dca_add_requester(dev) == 0) {
4275 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4276 dev_info(&pdev->dev, "DCA enabled\n");
4277 igb_setup_dca(adapter);
4280 /* Fall Through since DCA is disabled. */
4281 case DCA_PROVIDER_REMOVE:
4282 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4283 /* without this a class_device is left
4284 * hanging around in the sysfs model */
4285 dca_remove_requester(dev);
4286 dev_info(&pdev->dev, "DCA disabled\n");
4287 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4288 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4296 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4301 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4304 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4306 #endif /* CONFIG_IGB_DCA */
4308 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4310 struct e1000_hw *hw = &adapter->hw;
4314 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4315 ping = E1000_PF_CONTROL_MSG;
4316 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4317 ping |= E1000_VT_MSGTYPE_CTS;
4318 igb_write_mbx(hw, &ping, 1, i);
4322 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4324 struct e1000_hw *hw = &adapter->hw;
4325 u32 vmolr = rd32(E1000_VMOLR(vf));
4326 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4328 vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4329 IGB_VF_FLAG_MULTI_PROMISC);
4330 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4332 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4333 vmolr |= E1000_VMOLR_MPME;
4334 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4337 * if we have hashes and we are clearing a multicast promisc
4338 * flag we need to write the hashes to the MTA as this step
4339 * was previously skipped
4341 if (vf_data->num_vf_mc_hashes > 30) {
4342 vmolr |= E1000_VMOLR_MPME;
4343 } else if (vf_data->num_vf_mc_hashes) {
4345 vmolr |= E1000_VMOLR_ROMPE;
4346 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4347 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4351 wr32(E1000_VMOLR(vf), vmolr);
4353 /* there are flags left unprocessed, likely not supported */
4354 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4361 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4362 u32 *msgbuf, u32 vf)
4364 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4365 u16 *hash_list = (u16 *)&msgbuf[1];
4366 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4369 /* salt away the number of multicast addresses assigned
4370 * to this VF for later use to restore when the PF multi cast
4373 vf_data->num_vf_mc_hashes = n;
4375 /* only up to 30 hash values supported */
4379 /* store the hashes for later use */
4380 for (i = 0; i < n; i++)
4381 vf_data->vf_mc_hashes[i] = hash_list[i];
4383 /* Flush and reset the mta with the new values */
4384 igb_set_rx_mode(adapter->netdev);
4389 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4391 struct e1000_hw *hw = &adapter->hw;
4392 struct vf_data_storage *vf_data;
4395 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4396 u32 vmolr = rd32(E1000_VMOLR(i));
4397 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4399 vf_data = &adapter->vf_data[i];
4401 if ((vf_data->num_vf_mc_hashes > 30) ||
4402 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4403 vmolr |= E1000_VMOLR_MPME;
4404 } else if (vf_data->num_vf_mc_hashes) {
4405 vmolr |= E1000_VMOLR_ROMPE;
4406 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4407 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4409 wr32(E1000_VMOLR(i), vmolr);
4413 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4415 struct e1000_hw *hw = &adapter->hw;
4416 u32 pool_mask, reg, vid;
4419 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4421 /* Find the vlan filter for this id */
4422 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4423 reg = rd32(E1000_VLVF(i));
4425 /* remove the vf from the pool */
4428 /* if pool is empty then remove entry from vfta */
4429 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4430 (reg & E1000_VLVF_VLANID_ENABLE)) {
4432 vid = reg & E1000_VLVF_VLANID_MASK;
4433 igb_vfta_set(hw, vid, false);
4436 wr32(E1000_VLVF(i), reg);
4439 adapter->vf_data[vf].vlans_enabled = 0;
4442 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4444 struct e1000_hw *hw = &adapter->hw;
4447 /* The vlvf table only exists on 82576 hardware and newer */
4448 if (hw->mac.type < e1000_82576)
4451 /* we only need to do this if VMDq is enabled */
4452 if (!adapter->vfs_allocated_count)
4455 /* Find the vlan filter for this id */
4456 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4457 reg = rd32(E1000_VLVF(i));
4458 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4459 vid == (reg & E1000_VLVF_VLANID_MASK))
4464 if (i == E1000_VLVF_ARRAY_SIZE) {
4465 /* Did not find a matching VLAN ID entry that was
4466 * enabled. Search for a free filter entry, i.e.
4467 * one without the enable bit set
4469 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4470 reg = rd32(E1000_VLVF(i));
4471 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4475 if (i < E1000_VLVF_ARRAY_SIZE) {
4476 /* Found an enabled/available entry */
4477 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4479 /* if !enabled we need to set this up in vfta */
4480 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4481 /* add VID to filter table */
4482 igb_vfta_set(hw, vid, true);
4483 reg |= E1000_VLVF_VLANID_ENABLE;
4485 reg &= ~E1000_VLVF_VLANID_MASK;
4487 wr32(E1000_VLVF(i), reg);
4489 /* do not modify RLPML for PF devices */
4490 if (vf >= adapter->vfs_allocated_count)
4493 if (!adapter->vf_data[vf].vlans_enabled) {
4495 reg = rd32(E1000_VMOLR(vf));
4496 size = reg & E1000_VMOLR_RLPML_MASK;
4498 reg &= ~E1000_VMOLR_RLPML_MASK;
4500 wr32(E1000_VMOLR(vf), reg);
4503 adapter->vf_data[vf].vlans_enabled++;
4507 if (i < E1000_VLVF_ARRAY_SIZE) {
4508 /* remove vf from the pool */
4509 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4510 /* if pool is empty then remove entry from vfta */
4511 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4513 igb_vfta_set(hw, vid, false);
4515 wr32(E1000_VLVF(i), reg);
4517 /* do not modify RLPML for PF devices */
4518 if (vf >= adapter->vfs_allocated_count)
4521 adapter->vf_data[vf].vlans_enabled--;
4522 if (!adapter->vf_data[vf].vlans_enabled) {
4524 reg = rd32(E1000_VMOLR(vf));
4525 size = reg & E1000_VMOLR_RLPML_MASK;
4527 reg &= ~E1000_VMOLR_RLPML_MASK;
4529 wr32(E1000_VMOLR(vf), reg);
4536 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4538 struct e1000_hw *hw = &adapter->hw;
4541 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4543 wr32(E1000_VMVIR(vf), 0);
4546 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4547 int vf, u16 vlan, u8 qos)
4550 struct igb_adapter *adapter = netdev_priv(netdev);
4552 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4555 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4558 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4559 igb_set_vmolr(adapter, vf, !vlan);
4560 adapter->vf_data[vf].pf_vlan = vlan;
4561 adapter->vf_data[vf].pf_qos = qos;
4562 dev_info(&adapter->pdev->dev,
4563 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4564 if (test_bit(__IGB_DOWN, &adapter->state)) {
4565 dev_warn(&adapter->pdev->dev,
4566 "The VF VLAN has been set,"
4567 " but the PF device is not up.\n");
4568 dev_warn(&adapter->pdev->dev,
4569 "Bring the PF device up before"
4570 " attempting to use the VF device.\n");
4573 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4575 igb_set_vmvir(adapter, vlan, vf);
4576 igb_set_vmolr(adapter, vf, true);
4577 adapter->vf_data[vf].pf_vlan = 0;
4578 adapter->vf_data[vf].pf_qos = 0;
4584 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4586 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4587 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4589 return igb_vlvf_set(adapter, vid, add, vf);
4592 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4595 adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4596 adapter->vf_data[vf].last_nack = jiffies;
4598 /* reset offloads to defaults */
4599 igb_set_vmolr(adapter, vf, true);
4601 /* reset vlans for device */
4602 igb_clear_vf_vfta(adapter, vf);
4603 if (adapter->vf_data[vf].pf_vlan)
4604 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4605 adapter->vf_data[vf].pf_vlan,
4606 adapter->vf_data[vf].pf_qos);
4608 igb_clear_vf_vfta(adapter, vf);
4610 /* reset multicast table array for vf */
4611 adapter->vf_data[vf].num_vf_mc_hashes = 0;
4613 /* Flush and reset the mta with the new values */
4614 igb_set_rx_mode(adapter->netdev);
4617 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4619 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4621 /* generate a new mac address as we were hotplug removed/added */
4622 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4623 random_ether_addr(vf_mac);
4625 /* process remaining reset events */
4626 igb_vf_reset(adapter, vf);
4629 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4631 struct e1000_hw *hw = &adapter->hw;
4632 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4633 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4635 u8 *addr = (u8 *)(&msgbuf[1]);
4637 /* process all the same items cleared in a function level reset */
4638 igb_vf_reset(adapter, vf);
4640 /* set vf mac address */
4641 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4643 /* enable transmit and receive for vf */
4644 reg = rd32(E1000_VFTE);
4645 wr32(E1000_VFTE, reg | (1 << vf));
4646 reg = rd32(E1000_VFRE);
4647 wr32(E1000_VFRE, reg | (1 << vf));
4649 adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4651 /* reply to reset with ack and vf mac address */
4652 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4653 memcpy(addr, vf_mac, 6);
4654 igb_write_mbx(hw, msgbuf, 3, vf);
4657 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4659 unsigned char *addr = (char *)&msg[1];
4662 if (is_valid_ether_addr(addr))
4663 err = igb_set_vf_mac(adapter, vf, addr);
4668 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4670 struct e1000_hw *hw = &adapter->hw;
4671 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4672 u32 msg = E1000_VT_MSGTYPE_NACK;
4674 /* if device isn't clear to send it shouldn't be reading either */
4675 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4676 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4677 igb_write_mbx(hw, &msg, 1, vf);
4678 vf_data->last_nack = jiffies;
4682 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4684 struct pci_dev *pdev = adapter->pdev;
4685 u32 msgbuf[E1000_VFMAILBOX_SIZE];
4686 struct e1000_hw *hw = &adapter->hw;
4687 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4690 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4693 /* if receive failed revoke VF CTS stats and restart init */
4694 dev_err(&pdev->dev, "Error receiving message from VF\n");
4695 vf_data->flags &= ~IGB_VF_FLAG_CTS;
4696 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4701 /* this is a message we already processed, do nothing */
4702 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4706 * until the vf completes a reset it should not be
4707 * allowed to start any configuration.
4710 if (msgbuf[0] == E1000_VF_RESET) {
4711 igb_vf_reset_msg(adapter, vf);
4715 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4716 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4722 switch ((msgbuf[0] & 0xFFFF)) {
4723 case E1000_VF_SET_MAC_ADDR:
4724 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4726 case E1000_VF_SET_PROMISC:
4727 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4729 case E1000_VF_SET_MULTICAST:
4730 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4732 case E1000_VF_SET_LPE:
4733 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4735 case E1000_VF_SET_VLAN:
4736 if (adapter->vf_data[vf].pf_vlan)
4739 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4742 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4747 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4749 /* notify the VF of the results of what it sent us */
4751 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4753 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4755 igb_write_mbx(hw, msgbuf, 1, vf);
4758 static void igb_msg_task(struct igb_adapter *adapter)
4760 struct e1000_hw *hw = &adapter->hw;
4763 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4764 /* process any reset requests */
4765 if (!igb_check_for_rst(hw, vf))
4766 igb_vf_reset_event(adapter, vf);
4768 /* process any messages pending */
4769 if (!igb_check_for_msg(hw, vf))
4770 igb_rcv_msg_from_vf(adapter, vf);
4772 /* process any acks */
4773 if (!igb_check_for_ack(hw, vf))
4774 igb_rcv_ack_from_vf(adapter, vf);
4779 * igb_set_uta - Set unicast filter table address
4780 * @adapter: board private structure
4782 * The unicast table address is a register array of 32-bit registers.
4783 * The table is meant to be used in a way similar to how the MTA is used
4784 * however due to certain limitations in the hardware it is necessary to
4785 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4786 * enable bit to allow vlan tag stripping when promiscous mode is enabled
4788 static void igb_set_uta(struct igb_adapter *adapter)
4790 struct e1000_hw *hw = &adapter->hw;
4793 /* The UTA table only exists on 82576 hardware and newer */
4794 if (hw->mac.type < e1000_82576)
4797 /* we only need to do this if VMDq is enabled */
4798 if (!adapter->vfs_allocated_count)
4801 for (i = 0; i < hw->mac.uta_reg_count; i++)
4802 array_wr32(E1000_UTA, i, ~0);
4806 * igb_intr_msi - Interrupt Handler
4807 * @irq: interrupt number
4808 * @data: pointer to a network interface device structure
4810 static irqreturn_t igb_intr_msi(int irq, void *data)
4812 struct igb_adapter *adapter = data;
4813 struct igb_q_vector *q_vector = adapter->q_vector[0];
4814 struct e1000_hw *hw = &adapter->hw;
4815 /* read ICR disables interrupts using IAM */
4816 u32 icr = rd32(E1000_ICR);
4818 igb_write_itr(q_vector);
4820 if (icr & E1000_ICR_DRSTA)
4821 schedule_work(&adapter->reset_task);
4823 if (icr & E1000_ICR_DOUTSYNC) {
4824 /* HW is reporting DMA is out of sync */
4825 adapter->stats.doosync++;
4828 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4829 hw->mac.get_link_status = 1;
4830 if (!test_bit(__IGB_DOWN, &adapter->state))
4831 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4834 napi_schedule(&q_vector->napi);
4840 * igb_intr - Legacy Interrupt Handler
4841 * @irq: interrupt number
4842 * @data: pointer to a network interface device structure
4844 static irqreturn_t igb_intr(int irq, void *data)
4846 struct igb_adapter *adapter = data;
4847 struct igb_q_vector *q_vector = adapter->q_vector[0];
4848 struct e1000_hw *hw = &adapter->hw;
4849 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
4850 * need for the IMC write */
4851 u32 icr = rd32(E1000_ICR);
4853 return IRQ_NONE; /* Not our interrupt */
4855 igb_write_itr(q_vector);
4857 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4858 * not set, then the adapter didn't send an interrupt */
4859 if (!(icr & E1000_ICR_INT_ASSERTED))
4862 if (icr & E1000_ICR_DRSTA)
4863 schedule_work(&adapter->reset_task);
4865 if (icr & E1000_ICR_DOUTSYNC) {
4866 /* HW is reporting DMA is out of sync */
4867 adapter->stats.doosync++;
4870 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4871 hw->mac.get_link_status = 1;
4872 /* guard against interrupt when we're going down */
4873 if (!test_bit(__IGB_DOWN, &adapter->state))
4874 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4877 napi_schedule(&q_vector->napi);
4882 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4884 struct igb_adapter *adapter = q_vector->adapter;
4885 struct e1000_hw *hw = &adapter->hw;
4887 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4888 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4889 if (!adapter->msix_entries)
4890 igb_set_itr(adapter);
4892 igb_update_ring_itr(q_vector);
4895 if (!test_bit(__IGB_DOWN, &adapter->state)) {
4896 if (adapter->msix_entries)
4897 wr32(E1000_EIMS, q_vector->eims_value);
4899 igb_irq_enable(adapter);
4904 * igb_poll - NAPI Rx polling callback
4905 * @napi: napi polling structure
4906 * @budget: count of how many packets we should handle
4908 static int igb_poll(struct napi_struct *napi, int budget)
4910 struct igb_q_vector *q_vector = container_of(napi,
4911 struct igb_q_vector,
4913 int tx_clean_complete = 1, work_done = 0;
4915 #ifdef CONFIG_IGB_DCA
4916 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4917 igb_update_dca(q_vector);
4919 if (q_vector->tx_ring)
4920 tx_clean_complete = igb_clean_tx_irq(q_vector);
4922 if (q_vector->rx_ring)
4923 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4925 if (!tx_clean_complete)
4928 /* If not enough Rx work done, exit the polling mode */
4929 if (work_done < budget) {
4930 napi_complete(napi);
4931 igb_ring_irq_enable(q_vector);
4938 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4939 * @adapter: board private structure
4940 * @shhwtstamps: timestamp structure to update
4941 * @regval: unsigned 64bit system time value.
4943 * We need to convert the system time value stored in the RX/TXSTMP registers
4944 * into a hwtstamp which can be used by the upper level timestamping functions
4946 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4947 struct skb_shared_hwtstamps *shhwtstamps,
4953 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
4954 * 24 to match clock shift we setup earlier.
4956 if (adapter->hw.mac.type == e1000_82580)
4957 regval <<= IGB_82580_TSYNC_SHIFT;
4959 ns = timecounter_cyc2time(&adapter->clock, regval);
4960 timecompare_update(&adapter->compare, ns);
4961 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4962 shhwtstamps->hwtstamp = ns_to_ktime(ns);
4963 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4967 * igb_tx_hwtstamp - utility function which checks for TX time stamp
4968 * @q_vector: pointer to q_vector containing needed info
4969 * @skb: packet that was just sent
4971 * If we were asked to do hardware stamping and such a time stamp is
4972 * available, then it must have been for this skb here because we only
4973 * allow only one such packet into the queue.
4975 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4977 struct igb_adapter *adapter = q_vector->adapter;
4978 union skb_shared_tx *shtx = skb_tx(skb);
4979 struct e1000_hw *hw = &adapter->hw;
4980 struct skb_shared_hwtstamps shhwtstamps;
4983 /* if skb does not support hw timestamp or TX stamp not valid exit */
4984 if (likely(!shtx->hardware) ||
4985 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4988 regval = rd32(E1000_TXSTMPL);
4989 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4991 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4992 skb_tstamp_tx(skb, &shhwtstamps);
4996 * igb_clean_tx_irq - Reclaim resources after transmit completes
4997 * @q_vector: pointer to q_vector containing needed info
4998 * returns true if ring is completely cleaned
5000 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5002 struct igb_adapter *adapter = q_vector->adapter;
5003 struct igb_ring *tx_ring = q_vector->tx_ring;
5004 struct net_device *netdev = tx_ring->netdev;
5005 struct e1000_hw *hw = &adapter->hw;
5006 struct igb_buffer *buffer_info;
5007 struct sk_buff *skb;
5008 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5009 unsigned int total_bytes = 0, total_packets = 0;
5010 unsigned int i, eop, count = 0;
5011 bool cleaned = false;
5013 i = tx_ring->next_to_clean;
5014 eop = tx_ring->buffer_info[i].next_to_watch;
5015 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5017 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5018 (count < tx_ring->count)) {
5019 for (cleaned = false; !cleaned; count++) {
5020 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5021 buffer_info = &tx_ring->buffer_info[i];
5022 cleaned = (i == eop);
5023 skb = buffer_info->skb;
5026 unsigned int segs, bytecount;
5027 /* gso_segs is currently only valid for tcp */
5028 segs = skb_shinfo(skb)->gso_segs ?: 1;
5029 /* multiply data chunks by size of headers */
5030 bytecount = ((segs - 1) * skb_headlen(skb)) +
5032 total_packets += segs;
5033 total_bytes += bytecount;
5035 igb_tx_hwtstamp(q_vector, skb);
5038 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5039 tx_desc->wb.status = 0;
5042 if (i == tx_ring->count)
5045 eop = tx_ring->buffer_info[i].next_to_watch;
5046 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5049 tx_ring->next_to_clean = i;
5051 if (unlikely(count &&
5052 netif_carrier_ok(netdev) &&
5053 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5054 /* Make sure that anybody stopping the queue after this
5055 * sees the new next_to_clean.
5058 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5059 !(test_bit(__IGB_DOWN, &adapter->state))) {
5060 netif_wake_subqueue(netdev, tx_ring->queue_index);
5061 tx_ring->tx_stats.restart_queue++;
5065 if (tx_ring->detect_tx_hung) {
5066 /* Detect a transmit hang in hardware, this serializes the
5067 * check with the clearing of time_stamp and movement of i */
5068 tx_ring->detect_tx_hung = false;
5069 if (tx_ring->buffer_info[i].time_stamp &&
5070 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5071 (adapter->tx_timeout_factor * HZ)) &&
5072 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5074 /* detected Tx unit hang */
5075 dev_err(&tx_ring->pdev->dev,
5076 "Detected Tx Unit Hang\n"
5080 " next_to_use <%x>\n"
5081 " next_to_clean <%x>\n"
5082 "buffer_info[next_to_clean]\n"
5083 " time_stamp <%lx>\n"
5084 " next_to_watch <%x>\n"
5086 " desc.status <%x>\n",
5087 tx_ring->queue_index,
5088 readl(tx_ring->head),
5089 readl(tx_ring->tail),
5090 tx_ring->next_to_use,
5091 tx_ring->next_to_clean,
5092 tx_ring->buffer_info[eop].time_stamp,
5095 eop_desc->wb.status);
5096 netif_stop_subqueue(netdev, tx_ring->queue_index);
5099 tx_ring->total_bytes += total_bytes;
5100 tx_ring->total_packets += total_packets;
5101 tx_ring->tx_stats.bytes += total_bytes;
5102 tx_ring->tx_stats.packets += total_packets;
5103 return (count < tx_ring->count);
5107 * igb_receive_skb - helper function to handle rx indications
5108 * @q_vector: structure containing interrupt and ring information
5109 * @skb: packet to send up
5110 * @vlan_tag: vlan tag for packet
5112 static void igb_receive_skb(struct igb_q_vector *q_vector,
5113 struct sk_buff *skb,
5116 struct igb_adapter *adapter = q_vector->adapter;
5119 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5122 napi_gro_receive(&q_vector->napi, skb);
5125 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5126 u32 status_err, struct sk_buff *skb)
5128 skb->ip_summed = CHECKSUM_NONE;
5130 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5131 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5132 (status_err & E1000_RXD_STAT_IXSM))
5135 /* TCP/UDP checksum error bit is set */
5137 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5139 * work around errata with sctp packets where the TCPE aka
5140 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5141 * packets, (aka let the stack check the crc32c)
5143 if ((skb->len == 60) &&
5144 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5145 ring->rx_stats.csum_err++;
5147 /* let the stack verify checksum errors */
5150 /* It must be a TCP or UDP packet with a valid checksum */
5151 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5152 skb->ip_summed = CHECKSUM_UNNECESSARY;
5154 dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
5157 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5158 struct sk_buff *skb)
5160 struct igb_adapter *adapter = q_vector->adapter;
5161 struct e1000_hw *hw = &adapter->hw;
5165 * If this bit is set, then the RX registers contain the time stamp. No
5166 * other packet will be time stamped until we read these registers, so
5167 * read the registers to make them available again. Because only one
5168 * packet can be time stamped at a time, we know that the register
5169 * values must belong to this one here and therefore we don't need to
5170 * compare any of the additional attributes stored for it.
5172 * If nothing went wrong, then it should have a skb_shared_tx that we
5173 * can turn into a skb_shared_hwtstamps.
5175 if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
5177 if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5180 regval = rd32(E1000_RXSTMPL);
5181 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5183 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5185 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5186 union e1000_adv_rx_desc *rx_desc)
5188 /* HW will not DMA in data larger than the given buffer, even if it
5189 * parses the (NFS, of course) header to be larger. In that case, it
5190 * fills the header buffer and spills the rest into the page.
5192 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5193 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5194 if (hlen > rx_ring->rx_buffer_len)
5195 hlen = rx_ring->rx_buffer_len;
5199 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5200 int *work_done, int budget)
5202 struct igb_ring *rx_ring = q_vector->rx_ring;
5203 struct net_device *netdev = rx_ring->netdev;
5204 struct pci_dev *pdev = rx_ring->pdev;
5205 union e1000_adv_rx_desc *rx_desc , *next_rxd;
5206 struct igb_buffer *buffer_info , *next_buffer;
5207 struct sk_buff *skb;
5208 bool cleaned = false;
5209 int cleaned_count = 0;
5210 int current_node = numa_node_id();
5211 unsigned int total_bytes = 0, total_packets = 0;
5217 i = rx_ring->next_to_clean;
5218 buffer_info = &rx_ring->buffer_info[i];
5219 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5220 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5222 while (staterr & E1000_RXD_STAT_DD) {
5223 if (*work_done >= budget)
5227 skb = buffer_info->skb;
5228 prefetch(skb->data - NET_IP_ALIGN);
5229 buffer_info->skb = NULL;
5232 if (i == rx_ring->count)
5235 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5237 next_buffer = &rx_ring->buffer_info[i];
5239 length = le16_to_cpu(rx_desc->wb.upper.length);
5243 if (buffer_info->dma) {
5244 pci_unmap_single(pdev, buffer_info->dma,
5245 rx_ring->rx_buffer_len,
5246 PCI_DMA_FROMDEVICE);
5247 buffer_info->dma = 0;
5248 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5249 skb_put(skb, length);
5252 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5256 pci_unmap_page(pdev, buffer_info->page_dma,
5257 PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5258 buffer_info->page_dma = 0;
5260 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
5262 buffer_info->page_offset,
5265 if ((page_count(buffer_info->page) != 1) ||
5266 (page_to_nid(buffer_info->page) != current_node))
5267 buffer_info->page = NULL;
5269 get_page(buffer_info->page);
5272 skb->data_len += length;
5273 skb->truesize += length;
5276 if (!(staterr & E1000_RXD_STAT_EOP)) {
5277 buffer_info->skb = next_buffer->skb;
5278 buffer_info->dma = next_buffer->dma;
5279 next_buffer->skb = skb;
5280 next_buffer->dma = 0;
5284 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5285 dev_kfree_skb_irq(skb);
5289 igb_rx_hwtstamp(q_vector, staterr, skb);
5290 total_bytes += skb->len;
5293 igb_rx_checksum_adv(rx_ring, staterr, skb);
5295 skb->protocol = eth_type_trans(skb, netdev);
5296 skb_record_rx_queue(skb, rx_ring->queue_index);
5298 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5299 le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5301 igb_receive_skb(q_vector, skb, vlan_tag);
5304 rx_desc->wb.upper.status_error = 0;
5306 /* return some buffers to hardware, one at a time is too slow */
5307 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5308 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5312 /* use prefetched values */
5314 buffer_info = next_buffer;
5315 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5318 rx_ring->next_to_clean = i;
5319 cleaned_count = igb_desc_unused(rx_ring);
5322 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5324 rx_ring->total_packets += total_packets;
5325 rx_ring->total_bytes += total_bytes;
5326 rx_ring->rx_stats.packets += total_packets;
5327 rx_ring->rx_stats.bytes += total_bytes;
5332 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5333 * @adapter: address of board private structure
5335 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5337 struct net_device *netdev = rx_ring->netdev;
5338 union e1000_adv_rx_desc *rx_desc;
5339 struct igb_buffer *buffer_info;
5340 struct sk_buff *skb;
5344 i = rx_ring->next_to_use;
5345 buffer_info = &rx_ring->buffer_info[i];
5347 bufsz = rx_ring->rx_buffer_len;
5349 while (cleaned_count--) {
5350 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5352 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5353 if (!buffer_info->page) {
5354 buffer_info->page = netdev_alloc_page(netdev);
5355 if (!buffer_info->page) {
5356 rx_ring->rx_stats.alloc_failed++;
5359 buffer_info->page_offset = 0;
5361 buffer_info->page_offset ^= PAGE_SIZE / 2;
5363 buffer_info->page_dma =
5364 pci_map_page(rx_ring->pdev, buffer_info->page,
5365 buffer_info->page_offset,
5367 PCI_DMA_FROMDEVICE);
5368 if (pci_dma_mapping_error(rx_ring->pdev,
5369 buffer_info->page_dma)) {
5370 buffer_info->page_dma = 0;
5371 rx_ring->rx_stats.alloc_failed++;
5376 skb = buffer_info->skb;
5378 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5380 rx_ring->rx_stats.alloc_failed++;
5384 buffer_info->skb = skb;
5386 if (!buffer_info->dma) {
5387 buffer_info->dma = pci_map_single(rx_ring->pdev,
5390 PCI_DMA_FROMDEVICE);
5391 if (pci_dma_mapping_error(rx_ring->pdev,
5392 buffer_info->dma)) {
5393 buffer_info->dma = 0;
5394 rx_ring->rx_stats.alloc_failed++;
5398 /* Refresh the desc even if buffer_addrs didn't change because
5399 * each write-back erases this info. */
5400 if (bufsz < IGB_RXBUFFER_1024) {
5401 rx_desc->read.pkt_addr =
5402 cpu_to_le64(buffer_info->page_dma);
5403 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5405 rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5406 rx_desc->read.hdr_addr = 0;
5410 if (i == rx_ring->count)
5412 buffer_info = &rx_ring->buffer_info[i];
5416 if (rx_ring->next_to_use != i) {
5417 rx_ring->next_to_use = i;
5419 i = (rx_ring->count - 1);
5423 /* Force memory writes to complete before letting h/w
5424 * know there are new descriptors to fetch. (Only
5425 * applicable for weak-ordered memory model archs,
5426 * such as IA-64). */
5428 writel(i, rx_ring->tail);
5438 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5440 struct igb_adapter *adapter = netdev_priv(netdev);
5441 struct mii_ioctl_data *data = if_mii(ifr);
5443 if (adapter->hw.phy.media_type != e1000_media_type_copper)
5448 data->phy_id = adapter->hw.phy.addr;
5451 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5463 * igb_hwtstamp_ioctl - control hardware time stamping
5468 * Outgoing time stamping can be enabled and disabled. Play nice and
5469 * disable it when requested, although it shouldn't case any overhead
5470 * when no packet needs it. At most one packet in the queue may be
5471 * marked for time stamping, otherwise it would be impossible to tell
5472 * for sure to which packet the hardware time stamp belongs.
5474 * Incoming time stamping has to be configured via the hardware
5475 * filters. Not all combinations are supported, in particular event
5476 * type has to be specified. Matching the kind of event packet is
5477 * not supported, with the exception of "all V2 events regardless of
5481 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5482 struct ifreq *ifr, int cmd)
5484 struct igb_adapter *adapter = netdev_priv(netdev);
5485 struct e1000_hw *hw = &adapter->hw;
5486 struct hwtstamp_config config;
5487 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5488 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5489 u32 tsync_rx_cfg = 0;
5494 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5497 /* reserved for future extensions */
5501 switch (config.tx_type) {
5502 case HWTSTAMP_TX_OFF:
5504 case HWTSTAMP_TX_ON:
5510 switch (config.rx_filter) {
5511 case HWTSTAMP_FILTER_NONE:
5514 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5515 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5516 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5517 case HWTSTAMP_FILTER_ALL:
5519 * register TSYNCRXCFG must be set, therefore it is not
5520 * possible to time stamp both Sync and Delay_Req messages
5521 * => fall back to time stamping all packets
5523 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5524 config.rx_filter = HWTSTAMP_FILTER_ALL;
5526 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5527 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5528 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5531 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5532 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5533 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5536 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5537 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5538 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5539 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5542 config.rx_filter = HWTSTAMP_FILTER_SOME;
5544 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5545 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5546 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5547 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5550 config.rx_filter = HWTSTAMP_FILTER_SOME;
5552 case HWTSTAMP_FILTER_PTP_V2_EVENT:
5553 case HWTSTAMP_FILTER_PTP_V2_SYNC:
5554 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5555 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5556 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5563 if (hw->mac.type == e1000_82575) {
5564 if (tsync_rx_ctl | tsync_tx_ctl)
5569 /* enable/disable TX */
5570 regval = rd32(E1000_TSYNCTXCTL);
5571 regval &= ~E1000_TSYNCTXCTL_ENABLED;
5572 regval |= tsync_tx_ctl;
5573 wr32(E1000_TSYNCTXCTL, regval);
5575 /* enable/disable RX */
5576 regval = rd32(E1000_TSYNCRXCTL);
5577 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5578 regval |= tsync_rx_ctl;
5579 wr32(E1000_TSYNCRXCTL, regval);
5581 /* define which PTP packets are time stamped */
5582 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5584 /* define ethertype filter for timestamped packets */
5587 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5588 E1000_ETQF_1588 | /* enable timestamping */
5589 ETH_P_1588)); /* 1588 eth protocol type */
5591 wr32(E1000_ETQF(3), 0);
5593 #define PTP_PORT 319
5594 /* L4 Queue Filter[3]: filter by destination port and protocol */
5596 u32 ftqf = (IPPROTO_UDP /* UDP */
5597 | E1000_FTQF_VF_BP /* VF not compared */
5598 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5599 | E1000_FTQF_MASK); /* mask all inputs */
5600 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5602 wr32(E1000_IMIR(3), htons(PTP_PORT));
5603 wr32(E1000_IMIREXT(3),
5604 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5605 if (hw->mac.type == e1000_82576) {
5606 /* enable source port check */
5607 wr32(E1000_SPQF(3), htons(PTP_PORT));
5608 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5610 wr32(E1000_FTQF(3), ftqf);
5612 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5616 adapter->hwtstamp_config = config;
5618 /* clear TX/RX time stamp registers, just to be sure */
5619 regval = rd32(E1000_TXSTMPH);
5620 regval = rd32(E1000_RXSTMPH);
5622 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5632 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5638 return igb_mii_ioctl(netdev, ifr, cmd);
5640 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5646 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5648 struct igb_adapter *adapter = hw->back;
5651 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5653 return -E1000_ERR_CONFIG;
5655 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5660 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5662 struct igb_adapter *adapter = hw->back;
5665 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5667 return -E1000_ERR_CONFIG;
5669 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5674 static void igb_vlan_rx_register(struct net_device *netdev,
5675 struct vlan_group *grp)
5677 struct igb_adapter *adapter = netdev_priv(netdev);
5678 struct e1000_hw *hw = &adapter->hw;
5681 igb_irq_disable(adapter);
5682 adapter->vlgrp = grp;
5685 /* enable VLAN tag insert/strip */
5686 ctrl = rd32(E1000_CTRL);
5687 ctrl |= E1000_CTRL_VME;
5688 wr32(E1000_CTRL, ctrl);
5690 /* Disable CFI check */
5691 rctl = rd32(E1000_RCTL);
5692 rctl &= ~E1000_RCTL_CFIEN;
5693 wr32(E1000_RCTL, rctl);
5695 /* disable VLAN tag insert/strip */
5696 ctrl = rd32(E1000_CTRL);
5697 ctrl &= ~E1000_CTRL_VME;
5698 wr32(E1000_CTRL, ctrl);
5701 igb_rlpml_set(adapter);
5703 if (!test_bit(__IGB_DOWN, &adapter->state))
5704 igb_irq_enable(adapter);
5707 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5709 struct igb_adapter *adapter = netdev_priv(netdev);
5710 struct e1000_hw *hw = &adapter->hw;
5711 int pf_id = adapter->vfs_allocated_count;
5713 /* attempt to add filter to vlvf array */
5714 igb_vlvf_set(adapter, vid, true, pf_id);
5716 /* add the filter since PF can receive vlans w/o entry in vlvf */
5717 igb_vfta_set(hw, vid, true);
5720 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5722 struct igb_adapter *adapter = netdev_priv(netdev);
5723 struct e1000_hw *hw = &adapter->hw;
5724 int pf_id = adapter->vfs_allocated_count;
5727 igb_irq_disable(adapter);
5728 vlan_group_set_device(adapter->vlgrp, vid, NULL);
5730 if (!test_bit(__IGB_DOWN, &adapter->state))
5731 igb_irq_enable(adapter);
5733 /* remove vlan from VLVF table array */
5734 err = igb_vlvf_set(adapter, vid, false, pf_id);
5736 /* if vid was not present in VLVF just remove it from table */
5738 igb_vfta_set(hw, vid, false);
5741 static void igb_restore_vlan(struct igb_adapter *adapter)
5743 igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5745 if (adapter->vlgrp) {
5747 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5748 if (!vlan_group_get_device(adapter->vlgrp, vid))
5750 igb_vlan_rx_add_vid(adapter->netdev, vid);
5755 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5757 struct pci_dev *pdev = adapter->pdev;
5758 struct e1000_mac_info *mac = &adapter->hw.mac;
5763 case SPEED_10 + DUPLEX_HALF:
5764 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5766 case SPEED_10 + DUPLEX_FULL:
5767 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5769 case SPEED_100 + DUPLEX_HALF:
5770 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5772 case SPEED_100 + DUPLEX_FULL:
5773 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5775 case SPEED_1000 + DUPLEX_FULL:
5777 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5779 case SPEED_1000 + DUPLEX_HALF: /* not supported */
5781 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5787 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5789 struct net_device *netdev = pci_get_drvdata(pdev);
5790 struct igb_adapter *adapter = netdev_priv(netdev);
5791 struct e1000_hw *hw = &adapter->hw;
5792 u32 ctrl, rctl, status;
5793 u32 wufc = adapter->wol;
5798 netif_device_detach(netdev);
5800 if (netif_running(netdev))
5803 igb_clear_interrupt_scheme(adapter);
5806 retval = pci_save_state(pdev);
5811 status = rd32(E1000_STATUS);
5812 if (status & E1000_STATUS_LU)
5813 wufc &= ~E1000_WUFC_LNKC;
5816 igb_setup_rctl(adapter);
5817 igb_set_rx_mode(netdev);
5819 /* turn on all-multi mode if wake on multicast is enabled */
5820 if (wufc & E1000_WUFC_MC) {
5821 rctl = rd32(E1000_RCTL);
5822 rctl |= E1000_RCTL_MPE;
5823 wr32(E1000_RCTL, rctl);
5826 ctrl = rd32(E1000_CTRL);
5827 /* advertise wake from D3Cold */
5828 #define E1000_CTRL_ADVD3WUC 0x00100000
5829 /* phy power management enable */
5830 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5831 ctrl |= E1000_CTRL_ADVD3WUC;
5832 wr32(E1000_CTRL, ctrl);
5834 /* Allow time for pending master requests to run */
5835 igb_disable_pcie_master(hw);
5837 wr32(E1000_WUC, E1000_WUC_PME_EN);
5838 wr32(E1000_WUFC, wufc);
5841 wr32(E1000_WUFC, 0);
5844 *enable_wake = wufc || adapter->en_mng_pt;
5846 igb_power_down_link(adapter);
5848 igb_power_up_link(adapter);
5850 /* Release control of h/w to f/w. If f/w is AMT enabled, this
5851 * would have already happened in close and is redundant. */
5852 igb_release_hw_control(adapter);
5854 pci_disable_device(pdev);
5860 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5865 retval = __igb_shutdown(pdev, &wake);
5870 pci_prepare_to_sleep(pdev);
5872 pci_wake_from_d3(pdev, false);
5873 pci_set_power_state(pdev, PCI_D3hot);
5879 static int igb_resume(struct pci_dev *pdev)
5881 struct net_device *netdev = pci_get_drvdata(pdev);
5882 struct igb_adapter *adapter = netdev_priv(netdev);
5883 struct e1000_hw *hw = &adapter->hw;
5886 pci_set_power_state(pdev, PCI_D0);
5887 pci_restore_state(pdev);
5888 pci_save_state(pdev);
5890 err = pci_enable_device_mem(pdev);
5893 "igb: Cannot enable PCI device from suspend\n");
5896 pci_set_master(pdev);
5898 pci_enable_wake(pdev, PCI_D3hot, 0);
5899 pci_enable_wake(pdev, PCI_D3cold, 0);
5901 if (igb_init_interrupt_scheme(adapter)) {
5902 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5908 /* let the f/w know that the h/w is now under the control of the
5910 igb_get_hw_control(adapter);
5912 wr32(E1000_WUS, ~0);
5914 if (netif_running(netdev)) {
5915 err = igb_open(netdev);
5920 netif_device_attach(netdev);
5926 static void igb_shutdown(struct pci_dev *pdev)
5930 __igb_shutdown(pdev, &wake);
5932 if (system_state == SYSTEM_POWER_OFF) {
5933 pci_wake_from_d3(pdev, wake);
5934 pci_set_power_state(pdev, PCI_D3hot);
5938 #ifdef CONFIG_NET_POLL_CONTROLLER
5940 * Polling 'interrupt' - used by things like netconsole to send skbs
5941 * without having to re-enable interrupts. It's not called while
5942 * the interrupt routine is executing.
5944 static void igb_netpoll(struct net_device *netdev)
5946 struct igb_adapter *adapter = netdev_priv(netdev);
5947 struct e1000_hw *hw = &adapter->hw;
5950 if (!adapter->msix_entries) {
5951 struct igb_q_vector *q_vector = adapter->q_vector[0];
5952 igb_irq_disable(adapter);
5953 napi_schedule(&q_vector->napi);
5957 for (i = 0; i < adapter->num_q_vectors; i++) {
5958 struct igb_q_vector *q_vector = adapter->q_vector[i];
5959 wr32(E1000_EIMC, q_vector->eims_value);
5960 napi_schedule(&q_vector->napi);
5963 #endif /* CONFIG_NET_POLL_CONTROLLER */
5966 * igb_io_error_detected - called when PCI error is detected
5967 * @pdev: Pointer to PCI device
5968 * @state: The current pci connection state
5970 * This function is called after a PCI bus error affecting
5971 * this device has been detected.
5973 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5974 pci_channel_state_t state)
5976 struct net_device *netdev = pci_get_drvdata(pdev);
5977 struct igb_adapter *adapter = netdev_priv(netdev);
5979 netif_device_detach(netdev);
5981 if (state == pci_channel_io_perm_failure)
5982 return PCI_ERS_RESULT_DISCONNECT;
5984 if (netif_running(netdev))
5986 pci_disable_device(pdev);
5988 /* Request a slot slot reset. */
5989 return PCI_ERS_RESULT_NEED_RESET;
5993 * igb_io_slot_reset - called after the pci bus has been reset.
5994 * @pdev: Pointer to PCI device
5996 * Restart the card from scratch, as if from a cold-boot. Implementation
5997 * resembles the first-half of the igb_resume routine.
5999 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6001 struct net_device *netdev = pci_get_drvdata(pdev);
6002 struct igb_adapter *adapter = netdev_priv(netdev);
6003 struct e1000_hw *hw = &adapter->hw;
6004 pci_ers_result_t result;
6007 if (pci_enable_device_mem(pdev)) {
6009 "Cannot re-enable PCI device after reset.\n");
6010 result = PCI_ERS_RESULT_DISCONNECT;
6012 pci_set_master(pdev);
6013 pci_restore_state(pdev);
6014 pci_save_state(pdev);
6016 pci_enable_wake(pdev, PCI_D3hot, 0);
6017 pci_enable_wake(pdev, PCI_D3cold, 0);
6020 wr32(E1000_WUS, ~0);
6021 result = PCI_ERS_RESULT_RECOVERED;
6024 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6026 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6027 "failed 0x%0x\n", err);
6028 /* non-fatal, continue */
6035 * igb_io_resume - called when traffic can start flowing again.
6036 * @pdev: Pointer to PCI device
6038 * This callback is called when the error recovery driver tells us that
6039 * its OK to resume normal operation. Implementation resembles the
6040 * second-half of the igb_resume routine.
6042 static void igb_io_resume(struct pci_dev *pdev)
6044 struct net_device *netdev = pci_get_drvdata(pdev);
6045 struct igb_adapter *adapter = netdev_priv(netdev);
6047 if (netif_running(netdev)) {
6048 if (igb_up(adapter)) {
6049 dev_err(&pdev->dev, "igb_up failed after reset\n");
6054 netif_device_attach(netdev);
6056 /* let the f/w know that the h/w is now under the control of the
6058 igb_get_hw_control(adapter);
6061 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6064 u32 rar_low, rar_high;
6065 struct e1000_hw *hw = &adapter->hw;
6067 /* HW expects these in little endian so we reverse the byte order
6068 * from network order (big endian) to little endian
6070 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6071 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6072 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6074 /* Indicate to hardware the Address is Valid. */
6075 rar_high |= E1000_RAH_AV;
6077 if (hw->mac.type == e1000_82575)
6078 rar_high |= E1000_RAH_POOL_1 * qsel;
6080 rar_high |= E1000_RAH_POOL_1 << qsel;
6082 wr32(E1000_RAL(index), rar_low);
6084 wr32(E1000_RAH(index), rar_high);
6088 static int igb_set_vf_mac(struct igb_adapter *adapter,
6089 int vf, unsigned char *mac_addr)
6091 struct e1000_hw *hw = &adapter->hw;
6092 /* VF MAC addresses start at end of receive addresses and moves
6093 * torwards the first, as a result a collision should not be possible */
6094 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6096 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6098 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6103 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6105 struct igb_adapter *adapter = netdev_priv(netdev);
6106 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6108 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6109 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6110 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6111 " change effective.");
6112 if (test_bit(__IGB_DOWN, &adapter->state)) {
6113 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6114 " but the PF device is not up.\n");
6115 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6116 " attempting to use the VF device.\n");
6118 return igb_set_vf_mac(adapter, vf, mac);
6121 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6126 static int igb_ndo_get_vf_config(struct net_device *netdev,
6127 int vf, struct ifla_vf_info *ivi)
6129 struct igb_adapter *adapter = netdev_priv(netdev);
6130 if (vf >= adapter->vfs_allocated_count)
6133 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6135 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6136 ivi->qos = adapter->vf_data[vf].pf_qos;
6140 static void igb_vmm_control(struct igb_adapter *adapter)
6142 struct e1000_hw *hw = &adapter->hw;
6145 /* replication is not supported for 82575 */
6146 if (hw->mac.type == e1000_82575)
6149 /* enable replication vlan tag stripping */
6150 reg = rd32(E1000_RPLOLR);
6151 reg |= E1000_RPLOLR_STRVLAN;
6152 wr32(E1000_RPLOLR, reg);
6154 /* notify HW that the MAC is adding vlan tags */
6155 reg = rd32(E1000_DTXCTL);
6156 reg |= E1000_DTXCTL_VLAN_ADDED;
6157 wr32(E1000_DTXCTL, reg);
6159 if (adapter->vfs_allocated_count) {
6160 igb_vmdq_set_loopback_pf(hw, true);
6161 igb_vmdq_set_replication_pf(hw, true);
6163 igb_vmdq_set_loopback_pf(hw, false);
6164 igb_vmdq_set_replication_pf(hw, false);