1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2009 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
48 #include <linux/dca.h>
52 #define DRV_VERSION "1.3.16-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
59 static const struct e1000_info *igb_info_tbl[] = {
60 [board_82575] = &e1000_82575_info,
63 static struct pci_device_id igb_pci_tbl[] = {
64 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
65 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
66 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
67 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
68 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
69 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
70 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
74 /* required last entry */
78 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
80 void igb_reset(struct igb_adapter *);
81 static int igb_setup_all_tx_resources(struct igb_adapter *);
82 static int igb_setup_all_rx_resources(struct igb_adapter *);
83 static void igb_free_all_tx_resources(struct igb_adapter *);
84 static void igb_free_all_rx_resources(struct igb_adapter *);
85 static void igb_setup_mrqc(struct igb_adapter *);
86 void igb_update_stats(struct igb_adapter *);
87 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
88 static void __devexit igb_remove(struct pci_dev *pdev);
89 static int igb_sw_init(struct igb_adapter *);
90 static int igb_open(struct net_device *);
91 static int igb_close(struct net_device *);
92 static void igb_configure_tx(struct igb_adapter *);
93 static void igb_configure_rx(struct igb_adapter *);
94 static void igb_clean_all_tx_rings(struct igb_adapter *);
95 static void igb_clean_all_rx_rings(struct igb_adapter *);
96 static void igb_clean_tx_ring(struct igb_ring *);
97 static void igb_clean_rx_ring(struct igb_ring *);
98 static void igb_set_rx_mode(struct net_device *);
99 static void igb_update_phy_info(unsigned long);
100 static void igb_watchdog(unsigned long);
101 static void igb_watchdog_task(struct work_struct *);
102 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
103 static struct net_device_stats *igb_get_stats(struct net_device *);
104 static int igb_change_mtu(struct net_device *, int);
105 static int igb_set_mac(struct net_device *, void *);
106 static void igb_set_uta(struct igb_adapter *adapter);
107 static irqreturn_t igb_intr(int irq, void *);
108 static irqreturn_t igb_intr_msi(int irq, void *);
109 static irqreturn_t igb_msix_other(int irq, void *);
110 static irqreturn_t igb_msix_ring(int irq, void *);
111 #ifdef CONFIG_IGB_DCA
112 static void igb_update_dca(struct igb_q_vector *);
113 static void igb_setup_dca(struct igb_adapter *);
114 #endif /* CONFIG_IGB_DCA */
115 static bool igb_clean_tx_irq(struct igb_q_vector *);
116 static int igb_poll(struct napi_struct *, int);
117 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
118 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
119 static void igb_tx_timeout(struct net_device *);
120 static void igb_reset_task(struct work_struct *);
121 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
122 static void igb_vlan_rx_add_vid(struct net_device *, u16);
123 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
124 static void igb_restore_vlan(struct igb_adapter *);
125 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
126 static void igb_ping_all_vfs(struct igb_adapter *);
127 static void igb_msg_task(struct igb_adapter *);
128 static void igb_vmm_control(struct igb_adapter *);
129 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
130 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
133 static int igb_suspend(struct pci_dev *, pm_message_t);
134 static int igb_resume(struct pci_dev *);
136 static void igb_shutdown(struct pci_dev *);
137 #ifdef CONFIG_IGB_DCA
138 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
139 static struct notifier_block dca_notifier = {
140 .notifier_call = igb_notify_dca,
145 #ifdef CONFIG_NET_POLL_CONTROLLER
146 /* for netdump / net console */
147 static void igb_netpoll(struct net_device *);
149 #ifdef CONFIG_PCI_IOV
150 static unsigned int max_vfs = 0;
151 module_param(max_vfs, uint, 0);
152 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
153 "per physical function");
154 #endif /* CONFIG_PCI_IOV */
156 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
157 pci_channel_state_t);
158 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
159 static void igb_io_resume(struct pci_dev *);
161 static struct pci_error_handlers igb_err_handler = {
162 .error_detected = igb_io_error_detected,
163 .slot_reset = igb_io_slot_reset,
164 .resume = igb_io_resume,
168 static struct pci_driver igb_driver = {
169 .name = igb_driver_name,
170 .id_table = igb_pci_tbl,
172 .remove = __devexit_p(igb_remove),
174 /* Power Managment Hooks */
175 .suspend = igb_suspend,
176 .resume = igb_resume,
178 .shutdown = igb_shutdown,
179 .err_handler = &igb_err_handler
182 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
183 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
184 MODULE_LICENSE("GPL");
185 MODULE_VERSION(DRV_VERSION);
188 * igb_read_clock - read raw cycle counter (to be used by time counter)
190 static cycle_t igb_read_clock(const struct cyclecounter *tc)
192 struct igb_adapter *adapter =
193 container_of(tc, struct igb_adapter, cycles);
194 struct e1000_hw *hw = &adapter->hw;
198 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
199 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
205 * igb_get_hw_dev_name - return device name string
206 * used by hardware layer to print debugging information
208 char *igb_get_hw_dev_name(struct e1000_hw *hw)
210 struct igb_adapter *adapter = hw->back;
211 return adapter->netdev->name;
215 * igb_get_time_str - format current NIC and system time as string
217 static char *igb_get_time_str(struct igb_adapter *adapter,
220 cycle_t hw = adapter->cycles.read(&adapter->cycles);
221 struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
223 struct timespec delta;
224 getnstimeofday(&sys);
226 delta = timespec_sub(nic, sys);
229 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
231 (long)nic.tv_sec, nic.tv_nsec,
232 (long)sys.tv_sec, sys.tv_nsec,
233 (long)delta.tv_sec, delta.tv_nsec);
240 * igb_init_module - Driver Registration Routine
242 * igb_init_module is the first routine called when the driver is
243 * loaded. All it does is register with the PCI subsystem.
245 static int __init igb_init_module(void)
248 printk(KERN_INFO "%s - version %s\n",
249 igb_driver_string, igb_driver_version);
251 printk(KERN_INFO "%s\n", igb_copyright);
253 #ifdef CONFIG_IGB_DCA
254 dca_register_notify(&dca_notifier);
256 ret = pci_register_driver(&igb_driver);
260 module_init(igb_init_module);
263 * igb_exit_module - Driver Exit Cleanup Routine
265 * igb_exit_module is called just before the driver is removed
268 static void __exit igb_exit_module(void)
270 #ifdef CONFIG_IGB_DCA
271 dca_unregister_notify(&dca_notifier);
273 pci_unregister_driver(&igb_driver);
276 module_exit(igb_exit_module);
278 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
280 * igb_cache_ring_register - Descriptor ring to register mapping
281 * @adapter: board private structure to initialize
283 * Once we know the feature-set enabled for the device, we'll cache
284 * the register offset the descriptor ring is assigned to.
286 static void igb_cache_ring_register(struct igb_adapter *adapter)
289 u32 rbase_offset = adapter->vfs_allocated_count;
291 switch (adapter->hw.mac.type) {
293 /* The queues are allocated for virtualization such that VF 0
294 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
295 * In order to avoid collision we start at the first free queue
296 * and continue consuming queues in the same sequence
298 if (adapter->vfs_allocated_count) {
299 for (; i < adapter->num_rx_queues; i++)
300 adapter->rx_ring[i].reg_idx = rbase_offset +
302 for (; j < adapter->num_tx_queues; j++)
303 adapter->tx_ring[j].reg_idx = rbase_offset +
308 for (; i < adapter->num_rx_queues; i++)
309 adapter->rx_ring[i].reg_idx = rbase_offset + i;
310 for (; j < adapter->num_tx_queues; j++)
311 adapter->tx_ring[j].reg_idx = rbase_offset + j;
316 static void igb_free_queues(struct igb_adapter *adapter)
318 kfree(adapter->tx_ring);
319 kfree(adapter->rx_ring);
321 adapter->tx_ring = NULL;
322 adapter->rx_ring = NULL;
324 adapter->num_rx_queues = 0;
325 adapter->num_tx_queues = 0;
329 * igb_alloc_queues - Allocate memory for all rings
330 * @adapter: board private structure to initialize
332 * We allocate one ring per queue at run-time since we don't know the
333 * number of queues at compile-time.
335 static int igb_alloc_queues(struct igb_adapter *adapter)
339 adapter->tx_ring = kcalloc(adapter->num_tx_queues,
340 sizeof(struct igb_ring), GFP_KERNEL);
341 if (!adapter->tx_ring)
344 adapter->rx_ring = kcalloc(adapter->num_rx_queues,
345 sizeof(struct igb_ring), GFP_KERNEL);
346 if (!adapter->rx_ring)
349 for (i = 0; i < adapter->num_tx_queues; i++) {
350 struct igb_ring *ring = &(adapter->tx_ring[i]);
351 ring->count = adapter->tx_ring_count;
352 ring->queue_index = i;
353 ring->pdev = adapter->pdev;
354 ring->netdev = adapter->netdev;
355 /* For 82575, context index must be unique per ring. */
356 if (adapter->hw.mac.type == e1000_82575)
357 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
360 for (i = 0; i < adapter->num_rx_queues; i++) {
361 struct igb_ring *ring = &(adapter->rx_ring[i]);
362 ring->count = adapter->rx_ring_count;
363 ring->queue_index = i;
364 ring->pdev = adapter->pdev;
365 ring->netdev = adapter->netdev;
366 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
367 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
368 /* set flag indicating ring supports SCTP checksum offload */
369 if (adapter->hw.mac.type >= e1000_82576)
370 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
373 igb_cache_ring_register(adapter);
378 igb_free_queues(adapter);
383 #define IGB_N0_QUEUE -1
384 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
387 struct igb_adapter *adapter = q_vector->adapter;
388 struct e1000_hw *hw = &adapter->hw;
390 int rx_queue = IGB_N0_QUEUE;
391 int tx_queue = IGB_N0_QUEUE;
393 if (q_vector->rx_ring)
394 rx_queue = q_vector->rx_ring->reg_idx;
395 if (q_vector->tx_ring)
396 tx_queue = q_vector->tx_ring->reg_idx;
398 switch (hw->mac.type) {
400 /* The 82575 assigns vectors using a bitmask, which matches the
401 bitmask for the EICR/EIMS/EIMC registers. To assign one
402 or more queues to a vector, we write the appropriate bits
403 into the MSIXBM register for that vector. */
404 if (rx_queue > IGB_N0_QUEUE)
405 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
406 if (tx_queue > IGB_N0_QUEUE)
407 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
408 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
409 q_vector->eims_value = msixbm;
412 /* 82576 uses a table-based method for assigning vectors.
413 Each queue has a single entry in the table to which we write
414 a vector number along with a "valid" bit. Sadly, the layout
415 of the table is somewhat counterintuitive. */
416 if (rx_queue > IGB_N0_QUEUE) {
417 index = (rx_queue & 0x7);
418 ivar = array_rd32(E1000_IVAR0, index);
420 /* vector goes into low byte of register */
421 ivar = ivar & 0xFFFFFF00;
422 ivar |= msix_vector | E1000_IVAR_VALID;
424 /* vector goes into third byte of register */
425 ivar = ivar & 0xFF00FFFF;
426 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
428 array_wr32(E1000_IVAR0, index, ivar);
430 if (tx_queue > IGB_N0_QUEUE) {
431 index = (tx_queue & 0x7);
432 ivar = array_rd32(E1000_IVAR0, index);
434 /* vector goes into second byte of register */
435 ivar = ivar & 0xFFFF00FF;
436 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
438 /* vector goes into high byte of register */
439 ivar = ivar & 0x00FFFFFF;
440 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
442 array_wr32(E1000_IVAR0, index, ivar);
444 q_vector->eims_value = 1 << msix_vector;
453 * igb_configure_msix - Configure MSI-X hardware
455 * igb_configure_msix sets up the hardware to properly
456 * generate MSI-X interrupts.
458 static void igb_configure_msix(struct igb_adapter *adapter)
462 struct e1000_hw *hw = &adapter->hw;
464 adapter->eims_enable_mask = 0;
466 /* set vector for other causes, i.e. link changes */
467 switch (hw->mac.type) {
469 tmp = rd32(E1000_CTRL_EXT);
470 /* enable MSI-X PBA support*/
471 tmp |= E1000_CTRL_EXT_PBA_CLR;
473 /* Auto-Mask interrupts upon ICR read. */
474 tmp |= E1000_CTRL_EXT_EIAME;
475 tmp |= E1000_CTRL_EXT_IRCA;
477 wr32(E1000_CTRL_EXT, tmp);
479 /* enable msix_other interrupt */
480 array_wr32(E1000_MSIXBM(0), vector++,
482 adapter->eims_other = E1000_EIMS_OTHER;
487 /* Turn on MSI-X capability first, or our settings
488 * won't stick. And it will take days to debug. */
489 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
490 E1000_GPIE_PBA | E1000_GPIE_EIAME |
493 /* enable msix_other interrupt */
494 adapter->eims_other = 1 << vector;
495 tmp = (vector++ | E1000_IVAR_VALID) << 8;
497 wr32(E1000_IVAR_MISC, tmp);
500 /* do nothing, since nothing else supports MSI-X */
502 } /* switch (hw->mac.type) */
504 adapter->eims_enable_mask |= adapter->eims_other;
506 for (i = 0; i < adapter->num_q_vectors; i++) {
507 struct igb_q_vector *q_vector = adapter->q_vector[i];
508 igb_assign_vector(q_vector, vector++);
509 adapter->eims_enable_mask |= q_vector->eims_value;
516 * igb_request_msix - Initialize MSI-X interrupts
518 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
521 static int igb_request_msix(struct igb_adapter *adapter)
523 struct net_device *netdev = adapter->netdev;
524 struct e1000_hw *hw = &adapter->hw;
525 int i, err = 0, vector = 0;
527 err = request_irq(adapter->msix_entries[vector].vector,
528 &igb_msix_other, 0, netdev->name, adapter);
533 for (i = 0; i < adapter->num_q_vectors; i++) {
534 struct igb_q_vector *q_vector = adapter->q_vector[i];
536 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
538 if (q_vector->rx_ring && q_vector->tx_ring)
539 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
540 q_vector->rx_ring->queue_index);
541 else if (q_vector->tx_ring)
542 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
543 q_vector->tx_ring->queue_index);
544 else if (q_vector->rx_ring)
545 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
546 q_vector->rx_ring->queue_index);
548 sprintf(q_vector->name, "%s-unused", netdev->name);
550 err = request_irq(adapter->msix_entries[vector].vector,
551 &igb_msix_ring, 0, q_vector->name,
558 igb_configure_msix(adapter);
564 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
566 if (adapter->msix_entries) {
567 pci_disable_msix(adapter->pdev);
568 kfree(adapter->msix_entries);
569 adapter->msix_entries = NULL;
570 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
571 pci_disable_msi(adapter->pdev);
576 * igb_free_q_vectors - Free memory allocated for interrupt vectors
577 * @adapter: board private structure to initialize
579 * This function frees the memory allocated to the q_vectors. In addition if
580 * NAPI is enabled it will delete any references to the NAPI struct prior
581 * to freeing the q_vector.
583 static void igb_free_q_vectors(struct igb_adapter *adapter)
587 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
588 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
589 adapter->q_vector[v_idx] = NULL;
590 netif_napi_del(&q_vector->napi);
593 adapter->num_q_vectors = 0;
597 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
599 * This function resets the device so that it has 0 rx queues, tx queues, and
600 * MSI-X interrupts allocated.
602 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
604 igb_free_queues(adapter);
605 igb_free_q_vectors(adapter);
606 igb_reset_interrupt_capability(adapter);
610 * igb_set_interrupt_capability - set MSI or MSI-X if supported
612 * Attempt to configure interrupts using the best available
613 * capabilities of the hardware and kernel.
615 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
620 /* Number of supported queues. */
621 adapter->num_rx_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
622 adapter->num_tx_queues = min_t(u32, IGB_MAX_TX_QUEUES, num_online_cpus());
624 /* start with one vector for every rx queue */
625 numvecs = adapter->num_rx_queues;
627 /* if tx handler is seperate add 1 for every tx queue */
628 numvecs += adapter->num_tx_queues;
630 /* store the number of vectors reserved for queues */
631 adapter->num_q_vectors = numvecs;
633 /* add 1 vector for link status interrupts */
635 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
637 if (!adapter->msix_entries)
640 for (i = 0; i < numvecs; i++)
641 adapter->msix_entries[i].entry = i;
643 err = pci_enable_msix(adapter->pdev,
644 adapter->msix_entries,
649 igb_reset_interrupt_capability(adapter);
651 /* If we can't do MSI-X, try MSI */
653 #ifdef CONFIG_PCI_IOV
654 /* disable SR-IOV for non MSI-X configurations */
655 if (adapter->vf_data) {
656 struct e1000_hw *hw = &adapter->hw;
657 /* disable iov and allow time for transactions to clear */
658 pci_disable_sriov(adapter->pdev);
661 kfree(adapter->vf_data);
662 adapter->vf_data = NULL;
663 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
665 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
668 adapter->vfs_allocated_count = 0;
669 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
670 adapter->num_rx_queues = 1;
671 adapter->num_tx_queues = 1;
672 adapter->num_q_vectors = 1;
673 if (!pci_enable_msi(adapter->pdev))
674 adapter->flags |= IGB_FLAG_HAS_MSI;
676 /* Notify the stack of the (possibly) reduced Tx Queue count. */
677 adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
682 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
683 * @adapter: board private structure to initialize
685 * We allocate one q_vector per queue interrupt. If allocation fails we
688 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
690 struct igb_q_vector *q_vector;
691 struct e1000_hw *hw = &adapter->hw;
694 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
695 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
698 q_vector->adapter = adapter;
699 q_vector->itr_shift = (hw->mac.type == e1000_82575) ? 16 : 0;
700 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
701 q_vector->itr_val = IGB_START_ITR;
702 q_vector->set_itr = 1;
703 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
704 adapter->q_vector[v_idx] = q_vector;
711 q_vector = adapter->q_vector[v_idx];
712 netif_napi_del(&q_vector->napi);
714 adapter->q_vector[v_idx] = NULL;
719 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
720 int ring_idx, int v_idx)
722 struct igb_q_vector *q_vector;
724 q_vector = adapter->q_vector[v_idx];
725 q_vector->rx_ring = &adapter->rx_ring[ring_idx];
726 q_vector->rx_ring->q_vector = q_vector;
727 q_vector->itr_val = adapter->rx_itr_setting;
728 if (q_vector->itr_val && q_vector->itr_val <= 3)
729 q_vector->itr_val = IGB_START_ITR;
732 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
733 int ring_idx, int v_idx)
735 struct igb_q_vector *q_vector;
737 q_vector = adapter->q_vector[v_idx];
738 q_vector->tx_ring = &adapter->tx_ring[ring_idx];
739 q_vector->tx_ring->q_vector = q_vector;
740 q_vector->itr_val = adapter->tx_itr_setting;
741 if (q_vector->itr_val && q_vector->itr_val <= 3)
742 q_vector->itr_val = IGB_START_ITR;
746 * igb_map_ring_to_vector - maps allocated queues to vectors
748 * This function maps the recently allocated queues to vectors.
750 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
755 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
756 (adapter->num_q_vectors < adapter->num_tx_queues))
759 if (adapter->num_q_vectors >=
760 (adapter->num_rx_queues + adapter->num_tx_queues)) {
761 for (i = 0; i < adapter->num_rx_queues; i++)
762 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
763 for (i = 0; i < adapter->num_tx_queues; i++)
764 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
766 for (i = 0; i < adapter->num_rx_queues; i++) {
767 if (i < adapter->num_tx_queues)
768 igb_map_tx_ring_to_vector(adapter, i, v_idx);
769 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
771 for (; i < adapter->num_tx_queues; i++)
772 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
778 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
780 * This function initializes the interrupts and allocates all of the queues.
782 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
784 struct pci_dev *pdev = adapter->pdev;
787 igb_set_interrupt_capability(adapter);
789 err = igb_alloc_q_vectors(adapter);
791 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
792 goto err_alloc_q_vectors;
795 err = igb_alloc_queues(adapter);
797 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
798 goto err_alloc_queues;
801 err = igb_map_ring_to_vector(adapter);
803 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
810 igb_free_queues(adapter);
812 igb_free_q_vectors(adapter);
814 igb_reset_interrupt_capability(adapter);
819 * igb_request_irq - initialize interrupts
821 * Attempts to configure interrupts using the best available
822 * capabilities of the hardware and kernel.
824 static int igb_request_irq(struct igb_adapter *adapter)
826 struct net_device *netdev = adapter->netdev;
827 struct pci_dev *pdev = adapter->pdev;
828 struct e1000_hw *hw = &adapter->hw;
831 if (adapter->msix_entries) {
832 err = igb_request_msix(adapter);
835 /* fall back to MSI */
836 igb_clear_interrupt_scheme(adapter);
837 if (!pci_enable_msi(adapter->pdev))
838 adapter->flags |= IGB_FLAG_HAS_MSI;
839 igb_free_all_tx_resources(adapter);
840 igb_free_all_rx_resources(adapter);
841 adapter->num_tx_queues = 1;
842 adapter->num_rx_queues = 1;
843 adapter->num_q_vectors = 1;
844 err = igb_alloc_q_vectors(adapter);
847 "Unable to allocate memory for vectors\n");
850 err = igb_alloc_queues(adapter);
853 "Unable to allocate memory for queues\n");
854 igb_free_q_vectors(adapter);
857 igb_setup_all_tx_resources(adapter);
858 igb_setup_all_rx_resources(adapter);
860 switch (hw->mac.type) {
862 wr32(E1000_MSIXBM(0),
863 (E1000_EICR_RX_QUEUE0 |
864 E1000_EICR_TX_QUEUE0 |
868 wr32(E1000_IVAR0, E1000_IVAR_VALID);
875 if (adapter->flags & IGB_FLAG_HAS_MSI) {
876 err = request_irq(adapter->pdev->irq, &igb_intr_msi, 0,
877 netdev->name, adapter);
881 /* fall back to legacy interrupts */
882 igb_reset_interrupt_capability(adapter);
883 adapter->flags &= ~IGB_FLAG_HAS_MSI;
886 err = request_irq(adapter->pdev->irq, &igb_intr, IRQF_SHARED,
887 netdev->name, adapter);
890 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
897 static void igb_free_irq(struct igb_adapter *adapter)
899 if (adapter->msix_entries) {
902 free_irq(adapter->msix_entries[vector++].vector, adapter);
904 for (i = 0; i < adapter->num_q_vectors; i++) {
905 struct igb_q_vector *q_vector = adapter->q_vector[i];
906 free_irq(adapter->msix_entries[vector++].vector,
910 free_irq(adapter->pdev->irq, adapter);
915 * igb_irq_disable - Mask off interrupt generation on the NIC
916 * @adapter: board private structure
918 static void igb_irq_disable(struct igb_adapter *adapter)
920 struct e1000_hw *hw = &adapter->hw;
922 if (adapter->msix_entries) {
923 u32 regval = rd32(E1000_EIAM);
924 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
925 wr32(E1000_EIMC, adapter->eims_enable_mask);
926 regval = rd32(E1000_EIAC);
927 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
933 synchronize_irq(adapter->pdev->irq);
937 * igb_irq_enable - Enable default interrupt generation settings
938 * @adapter: board private structure
940 static void igb_irq_enable(struct igb_adapter *adapter)
942 struct e1000_hw *hw = &adapter->hw;
944 if (adapter->msix_entries) {
945 u32 regval = rd32(E1000_EIAC);
946 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
947 regval = rd32(E1000_EIAM);
948 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
949 wr32(E1000_EIMS, adapter->eims_enable_mask);
950 if (adapter->vfs_allocated_count)
951 wr32(E1000_MBVFIMR, 0xFF);
952 wr32(E1000_IMS, (E1000_IMS_LSC | E1000_IMS_VMMB |
953 E1000_IMS_DOUTSYNC));
955 wr32(E1000_IMS, IMS_ENABLE_MASK);
956 wr32(E1000_IAM, IMS_ENABLE_MASK);
960 static void igb_update_mng_vlan(struct igb_adapter *adapter)
962 struct e1000_hw *hw = &adapter->hw;
963 u16 vid = adapter->hw.mng_cookie.vlan_id;
964 u16 old_vid = adapter->mng_vlan_id;
966 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
967 /* add VID to filter table */
968 igb_vfta_set(hw, vid, true);
969 adapter->mng_vlan_id = vid;
971 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
974 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
976 !vlan_group_get_device(adapter->vlgrp, old_vid)) {
977 /* remove VID from filter table */
978 igb_vfta_set(hw, old_vid, false);
983 * igb_release_hw_control - release control of the h/w to f/w
984 * @adapter: address of board private structure
986 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
987 * For ASF and Pass Through versions of f/w this means that the
988 * driver is no longer loaded.
991 static void igb_release_hw_control(struct igb_adapter *adapter)
993 struct e1000_hw *hw = &adapter->hw;
996 /* Let firmware take over control of h/w */
997 ctrl_ext = rd32(E1000_CTRL_EXT);
999 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1004 * igb_get_hw_control - get control of the h/w from f/w
1005 * @adapter: address of board private structure
1007 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1008 * For ASF and Pass Through versions of f/w this means that
1009 * the driver is loaded.
1012 static void igb_get_hw_control(struct igb_adapter *adapter)
1014 struct e1000_hw *hw = &adapter->hw;
1017 /* Let firmware know the driver has taken over */
1018 ctrl_ext = rd32(E1000_CTRL_EXT);
1019 wr32(E1000_CTRL_EXT,
1020 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1024 * igb_configure - configure the hardware for RX and TX
1025 * @adapter: private board structure
1027 static void igb_configure(struct igb_adapter *adapter)
1029 struct net_device *netdev = adapter->netdev;
1032 igb_get_hw_control(adapter);
1033 igb_set_rx_mode(netdev);
1035 igb_restore_vlan(adapter);
1037 igb_setup_tctl(adapter);
1038 igb_setup_mrqc(adapter);
1039 igb_setup_rctl(adapter);
1041 igb_configure_tx(adapter);
1042 igb_configure_rx(adapter);
1044 igb_rx_fifo_flush_82575(&adapter->hw);
1046 /* call igb_desc_unused which always leaves
1047 * at least 1 descriptor unused to make sure
1048 * next_to_use != next_to_clean */
1049 for (i = 0; i < adapter->num_rx_queues; i++) {
1050 struct igb_ring *ring = &adapter->rx_ring[i];
1051 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1055 adapter->tx_queue_len = netdev->tx_queue_len;
1060 * igb_up - Open the interface and prepare it to handle traffic
1061 * @adapter: board private structure
1064 int igb_up(struct igb_adapter *adapter)
1066 struct e1000_hw *hw = &adapter->hw;
1069 /* hardware has been reset, we need to reload some things */
1070 igb_configure(adapter);
1072 clear_bit(__IGB_DOWN, &adapter->state);
1074 for (i = 0; i < adapter->num_q_vectors; i++) {
1075 struct igb_q_vector *q_vector = adapter->q_vector[i];
1076 napi_enable(&q_vector->napi);
1078 if (adapter->msix_entries)
1079 igb_configure_msix(adapter);
1081 /* Clear any pending interrupts. */
1083 igb_irq_enable(adapter);
1085 /* notify VFs that reset has been completed */
1086 if (adapter->vfs_allocated_count) {
1087 u32 reg_data = rd32(E1000_CTRL_EXT);
1088 reg_data |= E1000_CTRL_EXT_PFRSTD;
1089 wr32(E1000_CTRL_EXT, reg_data);
1092 netif_tx_start_all_queues(adapter->netdev);
1094 /* Fire a link change interrupt to start the watchdog. */
1095 wr32(E1000_ICS, E1000_ICS_LSC);
1099 void igb_down(struct igb_adapter *adapter)
1101 struct e1000_hw *hw = &adapter->hw;
1102 struct net_device *netdev = adapter->netdev;
1106 /* signal that we're down so the interrupt handler does not
1107 * reschedule our watchdog timer */
1108 set_bit(__IGB_DOWN, &adapter->state);
1110 /* disable receives in the hardware */
1111 rctl = rd32(E1000_RCTL);
1112 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1113 /* flush and sleep below */
1115 netif_tx_stop_all_queues(netdev);
1117 /* disable transmits in the hardware */
1118 tctl = rd32(E1000_TCTL);
1119 tctl &= ~E1000_TCTL_EN;
1120 wr32(E1000_TCTL, tctl);
1121 /* flush both disables and wait for them to finish */
1125 for (i = 0; i < adapter->num_q_vectors; i++) {
1126 struct igb_q_vector *q_vector = adapter->q_vector[i];
1127 napi_disable(&q_vector->napi);
1130 igb_irq_disable(adapter);
1132 del_timer_sync(&adapter->watchdog_timer);
1133 del_timer_sync(&adapter->phy_info_timer);
1135 netdev->tx_queue_len = adapter->tx_queue_len;
1136 netif_carrier_off(netdev);
1138 /* record the stats before reset*/
1139 igb_update_stats(adapter);
1141 adapter->link_speed = 0;
1142 adapter->link_duplex = 0;
1144 if (!pci_channel_offline(adapter->pdev))
1146 igb_clean_all_tx_rings(adapter);
1147 igb_clean_all_rx_rings(adapter);
1148 #ifdef CONFIG_IGB_DCA
1150 /* since we reset the hardware DCA settings were cleared */
1151 igb_setup_dca(adapter);
1155 void igb_reinit_locked(struct igb_adapter *adapter)
1157 WARN_ON(in_interrupt());
1158 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1162 clear_bit(__IGB_RESETTING, &adapter->state);
1165 void igb_reset(struct igb_adapter *adapter)
1167 struct e1000_hw *hw = &adapter->hw;
1168 struct e1000_mac_info *mac = &hw->mac;
1169 struct e1000_fc_info *fc = &hw->fc;
1170 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1173 /* Repartition Pba for greater than 9k mtu
1174 * To take effect CTRL.RST is required.
1176 switch (mac->type) {
1178 pba = rd32(E1000_RXPBS);
1179 pba &= E1000_RXPBS_SIZE_MASK_82576;
1183 pba = E1000_PBA_34K;
1187 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1188 (mac->type < e1000_82576)) {
1189 /* adjust PBA for jumbo frames */
1190 wr32(E1000_PBA, pba);
1192 /* To maintain wire speed transmits, the Tx FIFO should be
1193 * large enough to accommodate two full transmit packets,
1194 * rounded up to the next 1KB and expressed in KB. Likewise,
1195 * the Rx FIFO should be large enough to accommodate at least
1196 * one full receive packet and is similarly rounded up and
1197 * expressed in KB. */
1198 pba = rd32(E1000_PBA);
1199 /* upper 16 bits has Tx packet buffer allocation size in KB */
1200 tx_space = pba >> 16;
1201 /* lower 16 bits has Rx packet buffer allocation size in KB */
1203 /* the tx fifo also stores 16 bytes of information about the tx
1204 * but don't include ethernet FCS because hardware appends it */
1205 min_tx_space = (adapter->max_frame_size +
1206 sizeof(union e1000_adv_tx_desc) -
1208 min_tx_space = ALIGN(min_tx_space, 1024);
1209 min_tx_space >>= 10;
1210 /* software strips receive CRC, so leave room for it */
1211 min_rx_space = adapter->max_frame_size;
1212 min_rx_space = ALIGN(min_rx_space, 1024);
1213 min_rx_space >>= 10;
1215 /* If current Tx allocation is less than the min Tx FIFO size,
1216 * and the min Tx FIFO size is less than the current Rx FIFO
1217 * allocation, take space away from current Rx allocation */
1218 if (tx_space < min_tx_space &&
1219 ((min_tx_space - tx_space) < pba)) {
1220 pba = pba - (min_tx_space - tx_space);
1222 /* if short on rx space, rx wins and must trump tx
1224 if (pba < min_rx_space)
1227 wr32(E1000_PBA, pba);
1230 /* flow control settings */
1231 /* The high water mark must be low enough to fit one full frame
1232 * (or the size used for early receive) above it in the Rx FIFO.
1233 * Set it to the lower of:
1234 * - 90% of the Rx FIFO size, or
1235 * - the full Rx FIFO size minus one full frame */
1236 hwm = min(((pba << 10) * 9 / 10),
1237 ((pba << 10) - 2 * adapter->max_frame_size));
1239 if (mac->type < e1000_82576) {
1240 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */
1241 fc->low_water = fc->high_water - 8;
1243 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1244 fc->low_water = fc->high_water - 16;
1246 fc->pause_time = 0xFFFF;
1248 fc->current_mode = fc->requested_mode;
1250 /* disable receive for all VFs and wait one second */
1251 if (adapter->vfs_allocated_count) {
1253 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1254 adapter->vf_data[i].flags = 0;
1256 /* ping all the active vfs to let them know we are going down */
1257 igb_ping_all_vfs(adapter);
1259 /* disable transmits and receives */
1260 wr32(E1000_VFRE, 0);
1261 wr32(E1000_VFTE, 0);
1264 /* Allow time for pending master requests to run */
1265 adapter->hw.mac.ops.reset_hw(&adapter->hw);
1268 if (adapter->hw.mac.ops.init_hw(&adapter->hw))
1269 dev_err(&adapter->pdev->dev, "Hardware Error\n");
1271 igb_update_mng_vlan(adapter);
1273 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1274 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1276 igb_reset_adaptive(&adapter->hw);
1277 igb_get_phy_info(&adapter->hw);
1280 static const struct net_device_ops igb_netdev_ops = {
1281 .ndo_open = igb_open,
1282 .ndo_stop = igb_close,
1283 .ndo_start_xmit = igb_xmit_frame_adv,
1284 .ndo_get_stats = igb_get_stats,
1285 .ndo_set_rx_mode = igb_set_rx_mode,
1286 .ndo_set_multicast_list = igb_set_rx_mode,
1287 .ndo_set_mac_address = igb_set_mac,
1288 .ndo_change_mtu = igb_change_mtu,
1289 .ndo_do_ioctl = igb_ioctl,
1290 .ndo_tx_timeout = igb_tx_timeout,
1291 .ndo_validate_addr = eth_validate_addr,
1292 .ndo_vlan_rx_register = igb_vlan_rx_register,
1293 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1294 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1295 #ifdef CONFIG_NET_POLL_CONTROLLER
1296 .ndo_poll_controller = igb_netpoll,
1301 * igb_probe - Device Initialization Routine
1302 * @pdev: PCI device information struct
1303 * @ent: entry in igb_pci_tbl
1305 * Returns 0 on success, negative on failure
1307 * igb_probe initializes an adapter identified by a pci_dev structure.
1308 * The OS initialization, configuring of the adapter private structure,
1309 * and a hardware reset occur.
1311 static int __devinit igb_probe(struct pci_dev *pdev,
1312 const struct pci_device_id *ent)
1314 struct net_device *netdev;
1315 struct igb_adapter *adapter;
1316 struct e1000_hw *hw;
1317 u16 eeprom_data = 0;
1318 static int global_quad_port_a; /* global quad port a indication */
1319 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1320 unsigned long mmio_start, mmio_len;
1321 int err, pci_using_dac;
1322 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1325 err = pci_enable_device_mem(pdev);
1330 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1332 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1336 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1338 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1340 dev_err(&pdev->dev, "No usable DMA "
1341 "configuration, aborting\n");
1347 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1353 pci_enable_pcie_error_reporting(pdev);
1355 pci_set_master(pdev);
1356 pci_save_state(pdev);
1359 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1360 IGB_ABS_MAX_TX_QUEUES);
1362 goto err_alloc_etherdev;
1364 SET_NETDEV_DEV(netdev, &pdev->dev);
1366 pci_set_drvdata(pdev, netdev);
1367 adapter = netdev_priv(netdev);
1368 adapter->netdev = netdev;
1369 adapter->pdev = pdev;
1372 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1374 mmio_start = pci_resource_start(pdev, 0);
1375 mmio_len = pci_resource_len(pdev, 0);
1378 hw->hw_addr = ioremap(mmio_start, mmio_len);
1382 netdev->netdev_ops = &igb_netdev_ops;
1383 igb_set_ethtool_ops(netdev);
1384 netdev->watchdog_timeo = 5 * HZ;
1386 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1388 netdev->mem_start = mmio_start;
1389 netdev->mem_end = mmio_start + mmio_len;
1391 /* PCI config space info */
1392 hw->vendor_id = pdev->vendor;
1393 hw->device_id = pdev->device;
1394 hw->revision_id = pdev->revision;
1395 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1396 hw->subsystem_device_id = pdev->subsystem_device;
1398 /* setup the private structure */
1400 /* Copy the default MAC, PHY and NVM function pointers */
1401 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1402 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1403 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1404 /* Initialize skew-specific constants */
1405 err = ei->get_invariants(hw);
1409 /* setup the private structure */
1410 err = igb_sw_init(adapter);
1414 igb_get_bus_info_pcie(hw);
1416 hw->phy.autoneg_wait_to_complete = false;
1417 hw->mac.adaptive_ifs = true;
1419 /* Copper options */
1420 if (hw->phy.media_type == e1000_media_type_copper) {
1421 hw->phy.mdix = AUTO_ALL_MODES;
1422 hw->phy.disable_polarity_correction = false;
1423 hw->phy.ms_type = e1000_ms_hw_default;
1426 if (igb_check_reset_block(hw))
1427 dev_info(&pdev->dev,
1428 "PHY reset is blocked due to SOL/IDER session.\n");
1430 netdev->features = NETIF_F_SG |
1432 NETIF_F_HW_VLAN_TX |
1433 NETIF_F_HW_VLAN_RX |
1434 NETIF_F_HW_VLAN_FILTER;
1436 netdev->features |= NETIF_F_IPV6_CSUM;
1437 netdev->features |= NETIF_F_TSO;
1438 netdev->features |= NETIF_F_TSO6;
1440 netdev->features |= NETIF_F_GRO;
1442 netdev->vlan_features |= NETIF_F_TSO;
1443 netdev->vlan_features |= NETIF_F_TSO6;
1444 netdev->vlan_features |= NETIF_F_IP_CSUM;
1445 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1446 netdev->vlan_features |= NETIF_F_SG;
1449 netdev->features |= NETIF_F_HIGHDMA;
1451 if (adapter->hw.mac.type == e1000_82576)
1452 netdev->features |= NETIF_F_SCTP_CSUM;
1454 adapter->en_mng_pt = igb_enable_mng_pass_thru(&adapter->hw);
1456 /* before reading the NVM, reset the controller to put the device in a
1457 * known good starting state */
1458 hw->mac.ops.reset_hw(hw);
1460 /* make sure the NVM is good */
1461 if (igb_validate_nvm_checksum(hw) < 0) {
1462 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1467 /* copy the MAC address out of the NVM */
1468 if (hw->mac.ops.read_mac_addr(hw))
1469 dev_err(&pdev->dev, "NVM Read Error\n");
1471 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1472 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1474 if (!is_valid_ether_addr(netdev->perm_addr)) {
1475 dev_err(&pdev->dev, "Invalid MAC Address\n");
1480 setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1481 (unsigned long) adapter);
1482 setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1483 (unsigned long) adapter);
1485 INIT_WORK(&adapter->reset_task, igb_reset_task);
1486 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1488 /* Initialize link properties that are user-changeable */
1489 adapter->fc_autoneg = true;
1490 hw->mac.autoneg = true;
1491 hw->phy.autoneg_advertised = 0x2f;
1493 hw->fc.requested_mode = e1000_fc_default;
1494 hw->fc.current_mode = e1000_fc_default;
1496 igb_validate_mdi_setting(hw);
1498 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1499 * enable the ACPI Magic Packet filter
1502 if (hw->bus.func == 0)
1503 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1504 else if (hw->bus.func == 1)
1505 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1507 if (eeprom_data & eeprom_apme_mask)
1508 adapter->eeprom_wol |= E1000_WUFC_MAG;
1510 /* now that we have the eeprom settings, apply the special cases where
1511 * the eeprom may be wrong or the board simply won't support wake on
1512 * lan on a particular port */
1513 switch (pdev->device) {
1514 case E1000_DEV_ID_82575GB_QUAD_COPPER:
1515 adapter->eeprom_wol = 0;
1517 case E1000_DEV_ID_82575EB_FIBER_SERDES:
1518 case E1000_DEV_ID_82576_FIBER:
1519 case E1000_DEV_ID_82576_SERDES:
1520 /* Wake events only supported on port A for dual fiber
1521 * regardless of eeprom setting */
1522 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1523 adapter->eeprom_wol = 0;
1525 case E1000_DEV_ID_82576_QUAD_COPPER:
1526 /* if quad port adapter, disable WoL on all but port A */
1527 if (global_quad_port_a != 0)
1528 adapter->eeprom_wol = 0;
1530 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1531 /* Reset for multiple quad port adapters */
1532 if (++global_quad_port_a == 4)
1533 global_quad_port_a = 0;
1537 /* initialize the wol settings based on the eeprom settings */
1538 adapter->wol = adapter->eeprom_wol;
1539 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1541 /* reset the hardware with the new settings */
1544 /* let the f/w know that the h/w is now under the control of the
1546 igb_get_hw_control(adapter);
1548 strcpy(netdev->name, "eth%d");
1549 err = register_netdev(netdev);
1553 /* carrier off reporting is important to ethtool even BEFORE open */
1554 netif_carrier_off(netdev);
1556 #ifdef CONFIG_IGB_DCA
1557 if (dca_add_requester(&pdev->dev) == 0) {
1558 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1559 dev_info(&pdev->dev, "DCA enabled\n");
1560 igb_setup_dca(adapter);
1565 switch (hw->mac.type) {
1568 * Initialize hardware timer: we keep it running just in case
1569 * that some program needs it later on.
1571 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1572 adapter->cycles.read = igb_read_clock;
1573 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1574 adapter->cycles.mult = 1;
1576 * Scale the NIC clock cycle by a large factor so that
1577 * relatively small clock corrections can be added or
1578 * substracted at each clock tick. The drawbacks of a large
1579 * factor are a) that the clock register overflows more quickly
1580 * (not such a big deal) and b) that the increment per tick has
1581 * to fit into 24 bits. As a result we need to use a shift of
1582 * 19 so we can fit a value of 16 into the TIMINCA register.
1584 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1586 (1 << E1000_TIMINCA_16NS_SHIFT) |
1587 (16 << IGB_82576_TSYNC_SHIFT));
1589 /* Set registers so that rollover occurs soon to test this. */
1590 wr32(E1000_SYSTIML, 0x00000000);
1591 wr32(E1000_SYSTIMH, 0xFF800000);
1594 timecounter_init(&adapter->clock,
1596 ktime_to_ns(ktime_get_real()));
1598 * Synchronize our NIC clock against system wall clock. NIC
1599 * time stamp reading requires ~3us per sample, each sample
1600 * was pretty stable even under load => only require 10
1601 * samples for each offset comparison.
1603 memset(&adapter->compare, 0, sizeof(adapter->compare));
1604 adapter->compare.source = &adapter->clock;
1605 adapter->compare.target = ktime_get_real;
1606 adapter->compare.num_samples = 10;
1607 timecompare_update(&adapter->compare, 0);
1610 /* 82575 does not support timesync */
1615 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1616 /* print bus type/speed/width info */
1617 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1619 ((hw->bus.speed == e1000_bus_speed_2500)
1620 ? "2.5Gb/s" : "unknown"),
1621 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1622 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1623 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1627 igb_read_part_num(hw, &part_num);
1628 dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1629 (part_num >> 8), (part_num & 0xff));
1631 dev_info(&pdev->dev,
1632 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1633 adapter->msix_entries ? "MSI-X" :
1634 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1635 adapter->num_rx_queues, adapter->num_tx_queues);
1640 igb_release_hw_control(adapter);
1642 if (!igb_check_reset_block(hw))
1645 if (hw->flash_address)
1646 iounmap(hw->flash_address);
1648 igb_clear_interrupt_scheme(adapter);
1649 iounmap(hw->hw_addr);
1651 free_netdev(netdev);
1653 pci_release_selected_regions(pdev, pci_select_bars(pdev,
1657 pci_disable_device(pdev);
1662 * igb_remove - Device Removal Routine
1663 * @pdev: PCI device information struct
1665 * igb_remove is called by the PCI subsystem to alert the driver
1666 * that it should release a PCI device. The could be caused by a
1667 * Hot-Plug event, or because the driver is going to be removed from
1670 static void __devexit igb_remove(struct pci_dev *pdev)
1672 struct net_device *netdev = pci_get_drvdata(pdev);
1673 struct igb_adapter *adapter = netdev_priv(netdev);
1674 struct e1000_hw *hw = &adapter->hw;
1676 /* flush_scheduled work may reschedule our watchdog task, so
1677 * explicitly disable watchdog tasks from being rescheduled */
1678 set_bit(__IGB_DOWN, &adapter->state);
1679 del_timer_sync(&adapter->watchdog_timer);
1680 del_timer_sync(&adapter->phy_info_timer);
1682 flush_scheduled_work();
1684 #ifdef CONFIG_IGB_DCA
1685 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1686 dev_info(&pdev->dev, "DCA disabled\n");
1687 dca_remove_requester(&pdev->dev);
1688 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1689 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1693 /* Release control of h/w to f/w. If f/w is AMT enabled, this
1694 * would have already happened in close and is redundant. */
1695 igb_release_hw_control(adapter);
1697 unregister_netdev(netdev);
1699 if (!igb_check_reset_block(&adapter->hw))
1700 igb_reset_phy(&adapter->hw);
1702 igb_clear_interrupt_scheme(adapter);
1704 #ifdef CONFIG_PCI_IOV
1705 /* reclaim resources allocated to VFs */
1706 if (adapter->vf_data) {
1707 /* disable iov and allow time for transactions to clear */
1708 pci_disable_sriov(pdev);
1711 kfree(adapter->vf_data);
1712 adapter->vf_data = NULL;
1713 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1715 dev_info(&pdev->dev, "IOV Disabled\n");
1718 iounmap(hw->hw_addr);
1719 if (hw->flash_address)
1720 iounmap(hw->flash_address);
1721 pci_release_selected_regions(pdev, pci_select_bars(pdev,
1724 free_netdev(netdev);
1726 pci_disable_pcie_error_reporting(pdev);
1728 pci_disable_device(pdev);
1732 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1733 * @adapter: board private structure to initialize
1735 * This function initializes the vf specific data storage and then attempts to
1736 * allocate the VFs. The reason for ordering it this way is because it is much
1737 * mor expensive time wise to disable SR-IOV than it is to allocate and free
1738 * the memory for the VFs.
1740 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1742 #ifdef CONFIG_PCI_IOV
1743 struct pci_dev *pdev = adapter->pdev;
1745 if (adapter->vfs_allocated_count > 7)
1746 adapter->vfs_allocated_count = 7;
1748 if (adapter->vfs_allocated_count) {
1749 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1750 sizeof(struct vf_data_storage),
1752 /* if allocation failed then we do not support SR-IOV */
1753 if (!adapter->vf_data) {
1754 adapter->vfs_allocated_count = 0;
1755 dev_err(&pdev->dev, "Unable to allocate memory for VF "
1760 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1761 kfree(adapter->vf_data);
1762 adapter->vf_data = NULL;
1763 #endif /* CONFIG_PCI_IOV */
1764 adapter->vfs_allocated_count = 0;
1765 #ifdef CONFIG_PCI_IOV
1767 unsigned char mac_addr[ETH_ALEN];
1769 dev_info(&pdev->dev, "%d vfs allocated\n",
1770 adapter->vfs_allocated_count);
1771 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1772 random_ether_addr(mac_addr);
1773 igb_set_vf_mac(adapter, i, mac_addr);
1776 #endif /* CONFIG_PCI_IOV */
1780 * igb_sw_init - Initialize general software structures (struct igb_adapter)
1781 * @adapter: board private structure to initialize
1783 * igb_sw_init initializes the Adapter private data structure.
1784 * Fields are initialized based on PCI device information and
1785 * OS network device settings (MTU size).
1787 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1789 struct e1000_hw *hw = &adapter->hw;
1790 struct net_device *netdev = adapter->netdev;
1791 struct pci_dev *pdev = adapter->pdev;
1793 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1795 adapter->tx_ring_count = IGB_DEFAULT_TXD;
1796 adapter->rx_ring_count = IGB_DEFAULT_RXD;
1797 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1798 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1800 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1801 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1803 #ifdef CONFIG_PCI_IOV
1804 if (hw->mac.type == e1000_82576)
1805 adapter->vfs_allocated_count = max_vfs;
1807 #endif /* CONFIG_PCI_IOV */
1808 /* This call may decrease the number of queues */
1809 if (igb_init_interrupt_scheme(adapter)) {
1810 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1814 igb_probe_vfs(adapter);
1816 /* Explicitly disable IRQ since the NIC can be in any state. */
1817 igb_irq_disable(adapter);
1819 set_bit(__IGB_DOWN, &adapter->state);
1824 * igb_open - Called when a network interface is made active
1825 * @netdev: network interface device structure
1827 * Returns 0 on success, negative value on failure
1829 * The open entry point is called when a network interface is made
1830 * active by the system (IFF_UP). At this point all resources needed
1831 * for transmit and receive operations are allocated, the interrupt
1832 * handler is registered with the OS, the watchdog timer is started,
1833 * and the stack is notified that the interface is ready.
1835 static int igb_open(struct net_device *netdev)
1837 struct igb_adapter *adapter = netdev_priv(netdev);
1838 struct e1000_hw *hw = &adapter->hw;
1842 /* disallow open during test */
1843 if (test_bit(__IGB_TESTING, &adapter->state))
1846 netif_carrier_off(netdev);
1848 /* allocate transmit descriptors */
1849 err = igb_setup_all_tx_resources(adapter);
1853 /* allocate receive descriptors */
1854 err = igb_setup_all_rx_resources(adapter);
1858 /* e1000_power_up_phy(adapter); */
1860 /* before we allocate an interrupt, we must be ready to handle it.
1861 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1862 * as soon as we call pci_request_irq, so we have to setup our
1863 * clean_rx handler before we do so. */
1864 igb_configure(adapter);
1866 err = igb_request_irq(adapter);
1870 /* From here on the code is the same as igb_up() */
1871 clear_bit(__IGB_DOWN, &adapter->state);
1873 for (i = 0; i < adapter->num_q_vectors; i++) {
1874 struct igb_q_vector *q_vector = adapter->q_vector[i];
1875 napi_enable(&q_vector->napi);
1878 /* Clear any pending interrupts. */
1881 igb_irq_enable(adapter);
1883 /* notify VFs that reset has been completed */
1884 if (adapter->vfs_allocated_count) {
1885 u32 reg_data = rd32(E1000_CTRL_EXT);
1886 reg_data |= E1000_CTRL_EXT_PFRSTD;
1887 wr32(E1000_CTRL_EXT, reg_data);
1890 netif_tx_start_all_queues(netdev);
1892 /* Fire a link status change interrupt to start the watchdog. */
1893 wr32(E1000_ICS, E1000_ICS_LSC);
1898 igb_release_hw_control(adapter);
1899 /* e1000_power_down_phy(adapter); */
1900 igb_free_all_rx_resources(adapter);
1902 igb_free_all_tx_resources(adapter);
1910 * igb_close - Disables a network interface
1911 * @netdev: network interface device structure
1913 * Returns 0, this is not allowed to fail
1915 * The close entry point is called when an interface is de-activated
1916 * by the OS. The hardware is still under the driver's control, but
1917 * needs to be disabled. A global MAC reset is issued to stop the
1918 * hardware, and all transmit and receive resources are freed.
1920 static int igb_close(struct net_device *netdev)
1922 struct igb_adapter *adapter = netdev_priv(netdev);
1924 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
1927 igb_free_irq(adapter);
1929 igb_free_all_tx_resources(adapter);
1930 igb_free_all_rx_resources(adapter);
1936 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
1937 * @tx_ring: tx descriptor ring (for a specific queue) to setup
1939 * Return 0 on success, negative on failure
1941 int igb_setup_tx_resources(struct igb_ring *tx_ring)
1943 struct pci_dev *pdev = tx_ring->pdev;
1946 size = sizeof(struct igb_buffer) * tx_ring->count;
1947 tx_ring->buffer_info = vmalloc(size);
1948 if (!tx_ring->buffer_info)
1950 memset(tx_ring->buffer_info, 0, size);
1952 /* round up to nearest 4K */
1953 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
1954 tx_ring->size = ALIGN(tx_ring->size, 4096);
1956 tx_ring->desc = pci_alloc_consistent(pdev,
1963 tx_ring->next_to_use = 0;
1964 tx_ring->next_to_clean = 0;
1968 vfree(tx_ring->buffer_info);
1970 "Unable to allocate memory for the transmit descriptor ring\n");
1975 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
1976 * (Descriptors) for all queues
1977 * @adapter: board private structure
1979 * Return 0 on success, negative on failure
1981 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
1983 struct pci_dev *pdev = adapter->pdev;
1986 for (i = 0; i < adapter->num_tx_queues; i++) {
1987 err = igb_setup_tx_resources(&adapter->tx_ring[i]);
1990 "Allocation for Tx Queue %u failed\n", i);
1991 for (i--; i >= 0; i--)
1992 igb_free_tx_resources(&adapter->tx_ring[i]);
1997 for (i = 0; i < IGB_MAX_TX_QUEUES; i++) {
1998 int r_idx = i % adapter->num_tx_queues;
1999 adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
2005 * igb_setup_tctl - configure the transmit control registers
2006 * @adapter: Board private structure
2008 void igb_setup_tctl(struct igb_adapter *adapter)
2010 struct e1000_hw *hw = &adapter->hw;
2013 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2014 wr32(E1000_TXDCTL(0), 0);
2016 /* Program the Transmit Control Register */
2017 tctl = rd32(E1000_TCTL);
2018 tctl &= ~E1000_TCTL_CT;
2019 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2020 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2022 igb_config_collision_dist(hw);
2024 /* Enable transmits */
2025 tctl |= E1000_TCTL_EN;
2027 wr32(E1000_TCTL, tctl);
2031 * igb_configure_tx_ring - Configure transmit ring after Reset
2032 * @adapter: board private structure
2033 * @ring: tx ring to configure
2035 * Configure a transmit ring after a reset.
2037 void igb_configure_tx_ring(struct igb_adapter *adapter,
2038 struct igb_ring *ring)
2040 struct e1000_hw *hw = &adapter->hw;
2042 u64 tdba = ring->dma;
2043 int reg_idx = ring->reg_idx;
2045 /* disable the queue */
2046 txdctl = rd32(E1000_TXDCTL(reg_idx));
2047 wr32(E1000_TXDCTL(reg_idx),
2048 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2052 wr32(E1000_TDLEN(reg_idx),
2053 ring->count * sizeof(union e1000_adv_tx_desc));
2054 wr32(E1000_TDBAL(reg_idx),
2055 tdba & 0x00000000ffffffffULL);
2056 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2058 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2059 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2060 writel(0, ring->head);
2061 writel(0, ring->tail);
2063 txdctl |= IGB_TX_PTHRESH;
2064 txdctl |= IGB_TX_HTHRESH << 8;
2065 txdctl |= IGB_TX_WTHRESH << 16;
2067 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2068 wr32(E1000_TXDCTL(reg_idx), txdctl);
2072 * igb_configure_tx - Configure transmit Unit after Reset
2073 * @adapter: board private structure
2075 * Configure the Tx unit of the MAC after a reset.
2077 static void igb_configure_tx(struct igb_adapter *adapter)
2081 for (i = 0; i < adapter->num_tx_queues; i++)
2082 igb_configure_tx_ring(adapter, &adapter->tx_ring[i]);
2086 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2087 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2089 * Returns 0 on success, negative on failure
2091 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2093 struct pci_dev *pdev = rx_ring->pdev;
2096 size = sizeof(struct igb_buffer) * rx_ring->count;
2097 rx_ring->buffer_info = vmalloc(size);
2098 if (!rx_ring->buffer_info)
2100 memset(rx_ring->buffer_info, 0, size);
2102 desc_len = sizeof(union e1000_adv_rx_desc);
2104 /* Round up to nearest 4K */
2105 rx_ring->size = rx_ring->count * desc_len;
2106 rx_ring->size = ALIGN(rx_ring->size, 4096);
2108 rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2114 rx_ring->next_to_clean = 0;
2115 rx_ring->next_to_use = 0;
2120 vfree(rx_ring->buffer_info);
2121 rx_ring->buffer_info = NULL;
2122 dev_err(&pdev->dev, "Unable to allocate memory for "
2123 "the receive descriptor ring\n");
2128 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2129 * (Descriptors) for all queues
2130 * @adapter: board private structure
2132 * Return 0 on success, negative on failure
2134 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2136 struct pci_dev *pdev = adapter->pdev;
2139 for (i = 0; i < adapter->num_rx_queues; i++) {
2140 err = igb_setup_rx_resources(&adapter->rx_ring[i]);
2143 "Allocation for Rx Queue %u failed\n", i);
2144 for (i--; i >= 0; i--)
2145 igb_free_rx_resources(&adapter->rx_ring[i]);
2154 * igb_setup_mrqc - configure the multiple receive queue control registers
2155 * @adapter: Board private structure
2157 static void igb_setup_mrqc(struct igb_adapter *adapter)
2159 struct e1000_hw *hw = &adapter->hw;
2161 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2166 static const u8 rsshash[40] = {
2167 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2168 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2169 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2170 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2172 /* Fill out hash function seeds */
2173 for (j = 0; j < 10; j++) {
2174 u32 rsskey = rsshash[(j * 4)];
2175 rsskey |= rsshash[(j * 4) + 1] << 8;
2176 rsskey |= rsshash[(j * 4) + 2] << 16;
2177 rsskey |= rsshash[(j * 4) + 3] << 24;
2178 array_wr32(E1000_RSSRK(0), j, rsskey);
2181 num_rx_queues = adapter->num_rx_queues;
2183 if (adapter->vfs_allocated_count) {
2184 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2185 switch (hw->mac.type) {
2197 if (hw->mac.type == e1000_82575)
2201 for (j = 0; j < (32 * 4); j++) {
2202 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2204 reta.bytes[j & 3] |= num_rx_queues << shift2;
2206 wr32(E1000_RETA(j >> 2), reta.dword);
2210 * Disable raw packet checksumming so that RSS hash is placed in
2211 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2212 * offloads as they are enabled by default
2214 rxcsum = rd32(E1000_RXCSUM);
2215 rxcsum |= E1000_RXCSUM_PCSD;
2217 if (adapter->hw.mac.type >= e1000_82576)
2218 /* Enable Receive Checksum Offload for SCTP */
2219 rxcsum |= E1000_RXCSUM_CRCOFL;
2221 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2222 wr32(E1000_RXCSUM, rxcsum);
2224 /* If VMDq is enabled then we set the appropriate mode for that, else
2225 * we default to RSS so that an RSS hash is calculated per packet even
2226 * if we are only using one queue */
2227 if (adapter->vfs_allocated_count) {
2228 if (hw->mac.type > e1000_82575) {
2229 /* Set the default pool for the PF's first queue */
2230 u32 vtctl = rd32(E1000_VT_CTL);
2231 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2232 E1000_VT_CTL_DISABLE_DEF_POOL);
2233 vtctl |= adapter->vfs_allocated_count <<
2234 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2235 wr32(E1000_VT_CTL, vtctl);
2237 if (adapter->num_rx_queues > 1)
2238 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2240 mrqc = E1000_MRQC_ENABLE_VMDQ;
2242 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2244 igb_vmm_control(adapter);
2246 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2247 E1000_MRQC_RSS_FIELD_IPV4_TCP);
2248 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2249 E1000_MRQC_RSS_FIELD_IPV6_TCP);
2250 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2251 E1000_MRQC_RSS_FIELD_IPV6_UDP);
2252 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2253 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2255 wr32(E1000_MRQC, mrqc);
2259 * igb_setup_rctl - configure the receive control registers
2260 * @adapter: Board private structure
2262 void igb_setup_rctl(struct igb_adapter *adapter)
2264 struct e1000_hw *hw = &adapter->hw;
2267 rctl = rd32(E1000_RCTL);
2269 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2270 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2272 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2273 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2276 * enable stripping of CRC. It's unlikely this will break BMC
2277 * redirection as it did with e1000. Newer features require
2278 * that the HW strips the CRC.
2280 rctl |= E1000_RCTL_SECRC;
2283 * disable store bad packets and clear size bits.
2285 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2287 /* enable LPE to prevent packets larger than max_frame_size */
2288 rctl |= E1000_RCTL_LPE;
2290 /* disable queue 0 to prevent tail write w/o re-config */
2291 wr32(E1000_RXDCTL(0), 0);
2293 /* Attention!!! For SR-IOV PF driver operations you must enable
2294 * queue drop for all VF and PF queues to prevent head of line blocking
2295 * if an un-trusted VF does not provide descriptors to hardware.
2297 if (adapter->vfs_allocated_count) {
2298 /* set all queue drop enable bits */
2299 wr32(E1000_QDE, ALL_QUEUES);
2302 wr32(E1000_RCTL, rctl);
2305 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2308 struct e1000_hw *hw = &adapter->hw;
2311 /* if it isn't the PF check to see if VFs are enabled and
2312 * increase the size to support vlan tags */
2313 if (vfn < adapter->vfs_allocated_count &&
2314 adapter->vf_data[vfn].vlans_enabled)
2315 size += VLAN_TAG_SIZE;
2317 vmolr = rd32(E1000_VMOLR(vfn));
2318 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2319 vmolr |= size | E1000_VMOLR_LPE;
2320 wr32(E1000_VMOLR(vfn), vmolr);
2326 * igb_rlpml_set - set maximum receive packet size
2327 * @adapter: board private structure
2329 * Configure maximum receivable packet size.
2331 static void igb_rlpml_set(struct igb_adapter *adapter)
2333 u32 max_frame_size = adapter->max_frame_size;
2334 struct e1000_hw *hw = &adapter->hw;
2335 u16 pf_id = adapter->vfs_allocated_count;
2338 max_frame_size += VLAN_TAG_SIZE;
2340 /* if vfs are enabled we set RLPML to the largest possible request
2341 * size and set the VMOLR RLPML to the size we need */
2343 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2344 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2347 wr32(E1000_RLPML, max_frame_size);
2350 static inline void igb_set_vmolr(struct igb_adapter *adapter, int vfn)
2352 struct e1000_hw *hw = &adapter->hw;
2356 * This register exists only on 82576 and newer so if we are older then
2357 * we should exit and do nothing
2359 if (hw->mac.type < e1000_82576)
2362 vmolr = rd32(E1000_VMOLR(vfn));
2363 vmolr |= E1000_VMOLR_AUPE | /* Accept untagged packets */
2364 E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2366 /* clear all bits that might not be set */
2367 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2369 if (adapter->num_rx_queues > 1 && vfn == adapter->vfs_allocated_count)
2370 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2372 * for VMDq only allow the VFs and pool 0 to accept broadcast and
2375 if (vfn <= adapter->vfs_allocated_count)
2376 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
2378 wr32(E1000_VMOLR(vfn), vmolr);
2382 * igb_configure_rx_ring - Configure a receive ring after Reset
2383 * @adapter: board private structure
2384 * @ring: receive ring to be configured
2386 * Configure the Rx unit of the MAC after a reset.
2388 void igb_configure_rx_ring(struct igb_adapter *adapter,
2389 struct igb_ring *ring)
2391 struct e1000_hw *hw = &adapter->hw;
2392 u64 rdba = ring->dma;
2393 int reg_idx = ring->reg_idx;
2396 /* disable the queue */
2397 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2398 wr32(E1000_RXDCTL(reg_idx),
2399 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2401 /* Set DMA base address registers */
2402 wr32(E1000_RDBAL(reg_idx),
2403 rdba & 0x00000000ffffffffULL);
2404 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2405 wr32(E1000_RDLEN(reg_idx),
2406 ring->count * sizeof(union e1000_adv_rx_desc));
2408 /* initialize head and tail */
2409 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2410 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2411 writel(0, ring->head);
2412 writel(0, ring->tail);
2414 /* set descriptor configuration */
2415 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2416 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2417 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2418 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2419 srrctl |= IGB_RXBUFFER_16384 >>
2420 E1000_SRRCTL_BSIZEPKT_SHIFT;
2422 srrctl |= (PAGE_SIZE / 2) >>
2423 E1000_SRRCTL_BSIZEPKT_SHIFT;
2425 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2427 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2428 E1000_SRRCTL_BSIZEPKT_SHIFT;
2429 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2432 wr32(E1000_SRRCTL(reg_idx), srrctl);
2434 /* set filtering for VMDQ pools */
2435 igb_set_vmolr(adapter, reg_idx & 0x7);
2437 /* enable receive descriptor fetching */
2438 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2439 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2440 rxdctl &= 0xFFF00000;
2441 rxdctl |= IGB_RX_PTHRESH;
2442 rxdctl |= IGB_RX_HTHRESH << 8;
2443 rxdctl |= IGB_RX_WTHRESH << 16;
2444 wr32(E1000_RXDCTL(reg_idx), rxdctl);
2448 * igb_configure_rx - Configure receive Unit after Reset
2449 * @adapter: board private structure
2451 * Configure the Rx unit of the MAC after a reset.
2453 static void igb_configure_rx(struct igb_adapter *adapter)
2457 /* set UTA to appropriate mode */
2458 igb_set_uta(adapter);
2460 /* set the correct pool for the PF default MAC address in entry 0 */
2461 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2462 adapter->vfs_allocated_count);
2464 /* Setup the HW Rx Head and Tail Descriptor Pointers and
2465 * the Base and Length of the Rx Descriptor Ring */
2466 for (i = 0; i < adapter->num_rx_queues; i++)
2467 igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
2471 * igb_free_tx_resources - Free Tx Resources per Queue
2472 * @tx_ring: Tx descriptor ring for a specific queue
2474 * Free all transmit software resources
2476 void igb_free_tx_resources(struct igb_ring *tx_ring)
2478 igb_clean_tx_ring(tx_ring);
2480 vfree(tx_ring->buffer_info);
2481 tx_ring->buffer_info = NULL;
2483 /* if not set, then don't free */
2487 pci_free_consistent(tx_ring->pdev, tx_ring->size,
2488 tx_ring->desc, tx_ring->dma);
2490 tx_ring->desc = NULL;
2494 * igb_free_all_tx_resources - Free Tx Resources for All Queues
2495 * @adapter: board private structure
2497 * Free all transmit software resources
2499 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2503 for (i = 0; i < adapter->num_tx_queues; i++)
2504 igb_free_tx_resources(&adapter->tx_ring[i]);
2507 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2508 struct igb_buffer *buffer_info)
2510 buffer_info->dma = 0;
2511 if (buffer_info->skb) {
2512 skb_dma_unmap(&tx_ring->pdev->dev,
2515 dev_kfree_skb_any(buffer_info->skb);
2516 buffer_info->skb = NULL;
2518 buffer_info->time_stamp = 0;
2519 /* buffer_info must be completely set up in the transmit path */
2523 * igb_clean_tx_ring - Free Tx Buffers
2524 * @tx_ring: ring to be cleaned
2526 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2528 struct igb_buffer *buffer_info;
2532 if (!tx_ring->buffer_info)
2534 /* Free all the Tx ring sk_buffs */
2536 for (i = 0; i < tx_ring->count; i++) {
2537 buffer_info = &tx_ring->buffer_info[i];
2538 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2541 size = sizeof(struct igb_buffer) * tx_ring->count;
2542 memset(tx_ring->buffer_info, 0, size);
2544 /* Zero out the descriptor ring */
2545 memset(tx_ring->desc, 0, tx_ring->size);
2547 tx_ring->next_to_use = 0;
2548 tx_ring->next_to_clean = 0;
2552 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2553 * @adapter: board private structure
2555 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2559 for (i = 0; i < adapter->num_tx_queues; i++)
2560 igb_clean_tx_ring(&adapter->tx_ring[i]);
2564 * igb_free_rx_resources - Free Rx Resources
2565 * @rx_ring: ring to clean the resources from
2567 * Free all receive software resources
2569 void igb_free_rx_resources(struct igb_ring *rx_ring)
2571 igb_clean_rx_ring(rx_ring);
2573 vfree(rx_ring->buffer_info);
2574 rx_ring->buffer_info = NULL;
2576 /* if not set, then don't free */
2580 pci_free_consistent(rx_ring->pdev, rx_ring->size,
2581 rx_ring->desc, rx_ring->dma);
2583 rx_ring->desc = NULL;
2587 * igb_free_all_rx_resources - Free Rx Resources for All Queues
2588 * @adapter: board private structure
2590 * Free all receive software resources
2592 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2596 for (i = 0; i < adapter->num_rx_queues; i++)
2597 igb_free_rx_resources(&adapter->rx_ring[i]);
2601 * igb_clean_rx_ring - Free Rx Buffers per Queue
2602 * @rx_ring: ring to free buffers from
2604 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2606 struct igb_buffer *buffer_info;
2610 if (!rx_ring->buffer_info)
2613 /* Free all the Rx ring sk_buffs */
2614 for (i = 0; i < rx_ring->count; i++) {
2615 buffer_info = &rx_ring->buffer_info[i];
2616 if (buffer_info->dma) {
2617 pci_unmap_single(rx_ring->pdev,
2619 rx_ring->rx_buffer_len,
2620 PCI_DMA_FROMDEVICE);
2621 buffer_info->dma = 0;
2624 if (buffer_info->skb) {
2625 dev_kfree_skb(buffer_info->skb);
2626 buffer_info->skb = NULL;
2628 if (buffer_info->page_dma) {
2629 pci_unmap_page(rx_ring->pdev,
2630 buffer_info->page_dma,
2632 PCI_DMA_FROMDEVICE);
2633 buffer_info->page_dma = 0;
2635 if (buffer_info->page) {
2636 put_page(buffer_info->page);
2637 buffer_info->page = NULL;
2638 buffer_info->page_offset = 0;
2642 size = sizeof(struct igb_buffer) * rx_ring->count;
2643 memset(rx_ring->buffer_info, 0, size);
2645 /* Zero out the descriptor ring */
2646 memset(rx_ring->desc, 0, rx_ring->size);
2648 rx_ring->next_to_clean = 0;
2649 rx_ring->next_to_use = 0;
2653 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2654 * @adapter: board private structure
2656 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2660 for (i = 0; i < adapter->num_rx_queues; i++)
2661 igb_clean_rx_ring(&adapter->rx_ring[i]);
2665 * igb_set_mac - Change the Ethernet Address of the NIC
2666 * @netdev: network interface device structure
2667 * @p: pointer to an address structure
2669 * Returns 0 on success, negative on failure
2671 static int igb_set_mac(struct net_device *netdev, void *p)
2673 struct igb_adapter *adapter = netdev_priv(netdev);
2674 struct e1000_hw *hw = &adapter->hw;
2675 struct sockaddr *addr = p;
2677 if (!is_valid_ether_addr(addr->sa_data))
2678 return -EADDRNOTAVAIL;
2680 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2681 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2683 /* set the correct pool for the new PF MAC address in entry 0 */
2684 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2685 adapter->vfs_allocated_count);
2691 * igb_write_mc_addr_list - write multicast addresses to MTA
2692 * @netdev: network interface device structure
2694 * Writes multicast address list to the MTA hash table.
2695 * Returns: -ENOMEM on failure
2696 * 0 on no addresses written
2697 * X on writing X addresses to MTA
2699 static int igb_write_mc_addr_list(struct net_device *netdev)
2701 struct igb_adapter *adapter = netdev_priv(netdev);
2702 struct e1000_hw *hw = &adapter->hw;
2703 struct dev_mc_list *mc_ptr = netdev->mc_list;
2708 if (!netdev->mc_count) {
2709 /* nothing to program, so clear mc list */
2710 igb_update_mc_addr_list(hw, NULL, 0);
2711 igb_restore_vf_multicasts(adapter);
2715 mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
2719 /* set vmolr receive overflow multicast bit */
2720 vmolr |= E1000_VMOLR_ROMPE;
2722 /* The shared function expects a packed array of only addresses. */
2723 mc_ptr = netdev->mc_list;
2725 for (i = 0; i < netdev->mc_count; i++) {
2728 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2729 mc_ptr = mc_ptr->next;
2731 igb_update_mc_addr_list(hw, mta_list, i);
2734 return netdev->mc_count;
2738 * igb_write_uc_addr_list - write unicast addresses to RAR table
2739 * @netdev: network interface device structure
2741 * Writes unicast address list to the RAR table.
2742 * Returns: -ENOMEM on failure/insufficient address space
2743 * 0 on no addresses written
2744 * X on writing X addresses to the RAR table
2746 static int igb_write_uc_addr_list(struct net_device *netdev)
2748 struct igb_adapter *adapter = netdev_priv(netdev);
2749 struct e1000_hw *hw = &adapter->hw;
2750 unsigned int vfn = adapter->vfs_allocated_count;
2751 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2754 /* return ENOMEM indicating insufficient memory for addresses */
2755 if (netdev->uc.count > rar_entries)
2758 if (netdev->uc.count && rar_entries) {
2759 struct netdev_hw_addr *ha;
2760 list_for_each_entry(ha, &netdev->uc.list, list) {
2763 igb_rar_set_qsel(adapter, ha->addr,
2769 /* write the addresses in reverse order to avoid write combining */
2770 for (; rar_entries > 0 ; rar_entries--) {
2771 wr32(E1000_RAH(rar_entries), 0);
2772 wr32(E1000_RAL(rar_entries), 0);
2780 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2781 * @netdev: network interface device structure
2783 * The set_rx_mode entry point is called whenever the unicast or multicast
2784 * address lists or the network interface flags are updated. This routine is
2785 * responsible for configuring the hardware for proper unicast, multicast,
2786 * promiscuous mode, and all-multi behavior.
2788 static void igb_set_rx_mode(struct net_device *netdev)
2790 struct igb_adapter *adapter = netdev_priv(netdev);
2791 struct e1000_hw *hw = &adapter->hw;
2792 unsigned int vfn = adapter->vfs_allocated_count;
2793 u32 rctl, vmolr = 0;
2796 /* Check for Promiscuous and All Multicast modes */
2797 rctl = rd32(E1000_RCTL);
2799 /* clear the effected bits */
2800 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2802 if (netdev->flags & IFF_PROMISC) {
2803 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2804 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2806 if (netdev->flags & IFF_ALLMULTI) {
2807 rctl |= E1000_RCTL_MPE;
2808 vmolr |= E1000_VMOLR_MPME;
2811 * Write addresses to the MTA, if the attempt fails
2812 * then we should just turn on promiscous mode so
2813 * that we can at least receive multicast traffic
2815 count = igb_write_mc_addr_list(netdev);
2817 rctl |= E1000_RCTL_MPE;
2818 vmolr |= E1000_VMOLR_MPME;
2820 vmolr |= E1000_VMOLR_ROMPE;
2824 * Write addresses to available RAR registers, if there is not
2825 * sufficient space to store all the addresses then enable
2826 * unicast promiscous mode
2828 count = igb_write_uc_addr_list(netdev);
2830 rctl |= E1000_RCTL_UPE;
2831 vmolr |= E1000_VMOLR_ROPE;
2833 rctl |= E1000_RCTL_VFE;
2835 wr32(E1000_RCTL, rctl);
2838 * In order to support SR-IOV and eventually VMDq it is necessary to set
2839 * the VMOLR to enable the appropriate modes. Without this workaround
2840 * we will have issues with VLAN tag stripping not being done for frames
2841 * that are only arriving because we are the default pool
2843 if (hw->mac.type < e1000_82576)
2846 vmolr |= rd32(E1000_VMOLR(vfn)) &
2847 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
2848 wr32(E1000_VMOLR(vfn), vmolr);
2849 igb_restore_vf_multicasts(adapter);
2852 /* Need to wait a few seconds after link up to get diagnostic information from
2854 static void igb_update_phy_info(unsigned long data)
2856 struct igb_adapter *adapter = (struct igb_adapter *) data;
2857 igb_get_phy_info(&adapter->hw);
2861 * igb_has_link - check shared code for link and determine up/down
2862 * @adapter: pointer to driver private info
2864 static bool igb_has_link(struct igb_adapter *adapter)
2866 struct e1000_hw *hw = &adapter->hw;
2867 bool link_active = false;
2870 /* get_link_status is set on LSC (link status) interrupt or
2871 * rx sequence error interrupt. get_link_status will stay
2872 * false until the e1000_check_for_link establishes link
2873 * for copper adapters ONLY
2875 switch (hw->phy.media_type) {
2876 case e1000_media_type_copper:
2877 if (hw->mac.get_link_status) {
2878 ret_val = hw->mac.ops.check_for_link(hw);
2879 link_active = !hw->mac.get_link_status;
2884 case e1000_media_type_internal_serdes:
2885 ret_val = hw->mac.ops.check_for_link(hw);
2886 link_active = hw->mac.serdes_has_link;
2889 case e1000_media_type_unknown:
2897 * igb_watchdog - Timer Call-back
2898 * @data: pointer to adapter cast into an unsigned long
2900 static void igb_watchdog(unsigned long data)
2902 struct igb_adapter *adapter = (struct igb_adapter *)data;
2903 /* Do the rest outside of interrupt context */
2904 schedule_work(&adapter->watchdog_task);
2907 static void igb_watchdog_task(struct work_struct *work)
2909 struct igb_adapter *adapter = container_of(work,
2910 struct igb_adapter, watchdog_task);
2911 struct e1000_hw *hw = &adapter->hw;
2912 struct net_device *netdev = adapter->netdev;
2913 struct igb_ring *tx_ring = adapter->tx_ring;
2917 link = igb_has_link(adapter);
2918 if ((netif_carrier_ok(netdev)) && link)
2922 if (!netif_carrier_ok(netdev)) {
2924 hw->mac.ops.get_speed_and_duplex(&adapter->hw,
2925 &adapter->link_speed,
2926 &adapter->link_duplex);
2928 ctrl = rd32(E1000_CTRL);
2929 /* Links status message must follow this format */
2930 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
2931 "Flow Control: %s\n",
2933 adapter->link_speed,
2934 adapter->link_duplex == FULL_DUPLEX ?
2935 "Full Duplex" : "Half Duplex",
2936 ((ctrl & E1000_CTRL_TFCE) && (ctrl &
2937 E1000_CTRL_RFCE)) ? "RX/TX" : ((ctrl &
2938 E1000_CTRL_RFCE) ? "RX" : ((ctrl &
2939 E1000_CTRL_TFCE) ? "TX" : "None")));
2941 /* tweak tx_queue_len according to speed/duplex and
2942 * adjust the timeout factor */
2943 netdev->tx_queue_len = adapter->tx_queue_len;
2944 adapter->tx_timeout_factor = 1;
2945 switch (adapter->link_speed) {
2947 netdev->tx_queue_len = 10;
2948 adapter->tx_timeout_factor = 14;
2951 netdev->tx_queue_len = 100;
2952 /* maybe add some timeout factor ? */
2956 netif_carrier_on(netdev);
2958 igb_ping_all_vfs(adapter);
2960 /* link state has changed, schedule phy info update */
2961 if (!test_bit(__IGB_DOWN, &adapter->state))
2962 mod_timer(&adapter->phy_info_timer,
2963 round_jiffies(jiffies + 2 * HZ));
2966 if (netif_carrier_ok(netdev)) {
2967 adapter->link_speed = 0;
2968 adapter->link_duplex = 0;
2969 /* Links status message must follow this format */
2970 printk(KERN_INFO "igb: %s NIC Link is Down\n",
2972 netif_carrier_off(netdev);
2974 igb_ping_all_vfs(adapter);
2976 /* link state has changed, schedule phy info update */
2977 if (!test_bit(__IGB_DOWN, &adapter->state))
2978 mod_timer(&adapter->phy_info_timer,
2979 round_jiffies(jiffies + 2 * HZ));
2984 igb_update_stats(adapter);
2986 hw->mac.tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;
2987 adapter->tpt_old = adapter->stats.tpt;
2988 hw->mac.collision_delta = adapter->stats.colc - adapter->colc_old;
2989 adapter->colc_old = adapter->stats.colc;
2991 adapter->gorc = adapter->stats.gorc - adapter->gorc_old;
2992 adapter->gorc_old = adapter->stats.gorc;
2993 adapter->gotc = adapter->stats.gotc - adapter->gotc_old;
2994 adapter->gotc_old = adapter->stats.gotc;
2996 igb_update_adaptive(&adapter->hw);
2998 if (!netif_carrier_ok(netdev)) {
2999 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3000 /* We've lost link, so the controller stops DMA,
3001 * but we've got queued Tx work that's never going
3002 * to get done, so reset controller to flush Tx.
3003 * (Do the reset outside of interrupt context). */
3004 adapter->tx_timeout_count++;
3005 schedule_work(&adapter->reset_task);
3006 /* return immediately since reset is imminent */
3011 /* Force detection of hung controller every watchdog period */
3012 for (i = 0; i < adapter->num_tx_queues; i++)
3013 adapter->tx_ring[i].detect_tx_hung = true;
3015 /* Cause software interrupt to ensure rx ring is cleaned */
3016 if (adapter->msix_entries) {
3018 for (i = 0; i < adapter->num_q_vectors; i++) {
3019 struct igb_q_vector *q_vector = adapter->q_vector[i];
3020 eics |= q_vector->eims_value;
3022 wr32(E1000_EICS, eics);
3024 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3027 /* Reset the timer */
3028 if (!test_bit(__IGB_DOWN, &adapter->state))
3029 mod_timer(&adapter->watchdog_timer,
3030 round_jiffies(jiffies + 2 * HZ));
3033 enum latency_range {
3037 latency_invalid = 255
3041 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3043 * Stores a new ITR value based on strictly on packet size. This
3044 * algorithm is less sophisticated than that used in igb_update_itr,
3045 * due to the difficulty of synchronizing statistics across multiple
3046 * receive rings. The divisors and thresholds used by this fuction
3047 * were determined based on theoretical maximum wire speed and testing
3048 * data, in order to minimize response time while increasing bulk
3050 * This functionality is controlled by the InterruptThrottleRate module
3051 * parameter (see igb_param.c)
3052 * NOTE: This function is called only when operating in a multiqueue
3053 * receive environment.
3054 * @q_vector: pointer to q_vector
3056 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3058 int new_val = q_vector->itr_val;
3059 int avg_wire_size = 0;
3060 struct igb_adapter *adapter = q_vector->adapter;
3062 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3063 * ints/sec - ITR timer value of 120 ticks.
3065 if (adapter->link_speed != SPEED_1000) {
3070 if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3071 struct igb_ring *ring = q_vector->rx_ring;
3072 avg_wire_size = ring->total_bytes / ring->total_packets;
3075 if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3076 struct igb_ring *ring = q_vector->tx_ring;
3077 avg_wire_size = max_t(u32, avg_wire_size,
3078 (ring->total_bytes /
3079 ring->total_packets));
3082 /* if avg_wire_size isn't set no work was done */
3086 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3087 avg_wire_size += 24;
3089 /* Don't starve jumbo frames */
3090 avg_wire_size = min(avg_wire_size, 3000);
3092 /* Give a little boost to mid-size frames */
3093 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3094 new_val = avg_wire_size / 3;
3096 new_val = avg_wire_size / 2;
3099 if (new_val != q_vector->itr_val) {
3100 q_vector->itr_val = new_val;
3101 q_vector->set_itr = 1;
3104 if (q_vector->rx_ring) {
3105 q_vector->rx_ring->total_bytes = 0;
3106 q_vector->rx_ring->total_packets = 0;
3108 if (q_vector->tx_ring) {
3109 q_vector->tx_ring->total_bytes = 0;
3110 q_vector->tx_ring->total_packets = 0;
3115 * igb_update_itr - update the dynamic ITR value based on statistics
3116 * Stores a new ITR value based on packets and byte
3117 * counts during the last interrupt. The advantage of per interrupt
3118 * computation is faster updates and more accurate ITR for the current
3119 * traffic pattern. Constants in this function were computed
3120 * based on theoretical maximum wire speed and thresholds were set based
3121 * on testing data as well as attempting to minimize response time
3122 * while increasing bulk throughput.
3123 * this functionality is controlled by the InterruptThrottleRate module
3124 * parameter (see igb_param.c)
3125 * NOTE: These calculations are only valid when operating in a single-
3126 * queue environment.
3127 * @adapter: pointer to adapter
3128 * @itr_setting: current q_vector->itr_val
3129 * @packets: the number of packets during this measurement interval
3130 * @bytes: the number of bytes during this measurement interval
3132 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3133 int packets, int bytes)
3135 unsigned int retval = itr_setting;
3138 goto update_itr_done;
3140 switch (itr_setting) {
3141 case lowest_latency:
3142 /* handle TSO and jumbo frames */
3143 if (bytes/packets > 8000)
3144 retval = bulk_latency;
3145 else if ((packets < 5) && (bytes > 512))
3146 retval = low_latency;
3148 case low_latency: /* 50 usec aka 20000 ints/s */
3149 if (bytes > 10000) {
3150 /* this if handles the TSO accounting */
3151 if (bytes/packets > 8000) {
3152 retval = bulk_latency;
3153 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3154 retval = bulk_latency;
3155 } else if ((packets > 35)) {
3156 retval = lowest_latency;
3158 } else if (bytes/packets > 2000) {
3159 retval = bulk_latency;
3160 } else if (packets <= 2 && bytes < 512) {
3161 retval = lowest_latency;
3164 case bulk_latency: /* 250 usec aka 4000 ints/s */
3165 if (bytes > 25000) {
3167 retval = low_latency;
3168 } else if (bytes < 1500) {
3169 retval = low_latency;
3178 static void igb_set_itr(struct igb_adapter *adapter)
3180 struct igb_q_vector *q_vector = adapter->q_vector[0];
3182 u32 new_itr = q_vector->itr_val;
3184 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3185 if (adapter->link_speed != SPEED_1000) {
3191 adapter->rx_itr = igb_update_itr(adapter,
3193 adapter->rx_ring->total_packets,
3194 adapter->rx_ring->total_bytes);
3196 adapter->tx_itr = igb_update_itr(adapter,
3198 adapter->tx_ring->total_packets,
3199 adapter->tx_ring->total_bytes);
3200 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3202 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3203 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3204 current_itr = low_latency;
3206 switch (current_itr) {
3207 /* counts and packets in update_itr are dependent on these numbers */
3208 case lowest_latency:
3209 new_itr = 56; /* aka 70,000 ints/sec */
3212 new_itr = 196; /* aka 20,000 ints/sec */
3215 new_itr = 980; /* aka 4,000 ints/sec */
3222 adapter->rx_ring->total_bytes = 0;
3223 adapter->rx_ring->total_packets = 0;
3224 adapter->tx_ring->total_bytes = 0;
3225 adapter->tx_ring->total_packets = 0;
3227 if (new_itr != q_vector->itr_val) {
3228 /* this attempts to bias the interrupt rate towards Bulk
3229 * by adding intermediate steps when interrupt rate is
3231 new_itr = new_itr > q_vector->itr_val ?
3232 max((new_itr * q_vector->itr_val) /
3233 (new_itr + (q_vector->itr_val >> 2)),
3236 /* Don't write the value here; it resets the adapter's
3237 * internal timer, and causes us to delay far longer than
3238 * we should between interrupts. Instead, we write the ITR
3239 * value at the beginning of the next interrupt so the timing
3240 * ends up being correct.
3242 q_vector->itr_val = new_itr;
3243 q_vector->set_itr = 1;
3249 #define IGB_TX_FLAGS_CSUM 0x00000001
3250 #define IGB_TX_FLAGS_VLAN 0x00000002
3251 #define IGB_TX_FLAGS_TSO 0x00000004
3252 #define IGB_TX_FLAGS_IPV4 0x00000008
3253 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3254 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3255 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3257 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3258 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3260 struct e1000_adv_tx_context_desc *context_desc;
3263 struct igb_buffer *buffer_info;
3264 u32 info = 0, tu_cmd = 0;
3265 u32 mss_l4len_idx, l4len;
3268 if (skb_header_cloned(skb)) {
3269 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3274 l4len = tcp_hdrlen(skb);
3277 if (skb->protocol == htons(ETH_P_IP)) {
3278 struct iphdr *iph = ip_hdr(skb);
3281 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3285 } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
3286 ipv6_hdr(skb)->payload_len = 0;
3287 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3288 &ipv6_hdr(skb)->daddr,
3292 i = tx_ring->next_to_use;
3294 buffer_info = &tx_ring->buffer_info[i];
3295 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3296 /* VLAN MACLEN IPLEN */
3297 if (tx_flags & IGB_TX_FLAGS_VLAN)
3298 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3299 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3300 *hdr_len += skb_network_offset(skb);
3301 info |= skb_network_header_len(skb);
3302 *hdr_len += skb_network_header_len(skb);
3303 context_desc->vlan_macip_lens = cpu_to_le32(info);
3305 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3306 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3308 if (skb->protocol == htons(ETH_P_IP))
3309 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3310 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3312 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3315 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3316 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3318 /* For 82575, context index must be unique per ring. */
3319 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3320 mss_l4len_idx |= tx_ring->reg_idx << 4;
3322 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3323 context_desc->seqnum_seed = 0;
3325 buffer_info->time_stamp = jiffies;
3326 buffer_info->next_to_watch = i;
3327 buffer_info->dma = 0;
3329 if (i == tx_ring->count)
3332 tx_ring->next_to_use = i;
3337 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3338 struct sk_buff *skb, u32 tx_flags)
3340 struct e1000_adv_tx_context_desc *context_desc;
3341 struct pci_dev *pdev = tx_ring->pdev;
3342 struct igb_buffer *buffer_info;
3343 u32 info = 0, tu_cmd = 0;
3346 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3347 (tx_flags & IGB_TX_FLAGS_VLAN)) {
3348 i = tx_ring->next_to_use;
3349 buffer_info = &tx_ring->buffer_info[i];
3350 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3352 if (tx_flags & IGB_TX_FLAGS_VLAN)
3353 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3354 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3355 if (skb->ip_summed == CHECKSUM_PARTIAL)
3356 info |= skb_network_header_len(skb);
3358 context_desc->vlan_macip_lens = cpu_to_le32(info);
3360 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3362 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3365 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3366 const struct vlan_ethhdr *vhdr =
3367 (const struct vlan_ethhdr*)skb->data;
3369 protocol = vhdr->h_vlan_encapsulated_proto;
3371 protocol = skb->protocol;
3375 case cpu_to_be16(ETH_P_IP):
3376 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3377 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3378 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3379 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3380 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3382 case cpu_to_be16(ETH_P_IPV6):
3383 /* XXX what about other V6 headers?? */
3384 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3385 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3386 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3387 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3390 if (unlikely(net_ratelimit()))
3391 dev_warn(&pdev->dev,
3392 "partial checksum but proto=%x!\n",
3398 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3399 context_desc->seqnum_seed = 0;
3400 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3401 context_desc->mss_l4len_idx =
3402 cpu_to_le32(tx_ring->reg_idx << 4);
3404 buffer_info->time_stamp = jiffies;
3405 buffer_info->next_to_watch = i;
3406 buffer_info->dma = 0;
3409 if (i == tx_ring->count)
3411 tx_ring->next_to_use = i;
3418 #define IGB_MAX_TXD_PWR 16
3419 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
3421 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3424 struct igb_buffer *buffer_info;
3425 struct pci_dev *pdev = tx_ring->pdev;
3426 unsigned int len = skb_headlen(skb);
3427 unsigned int count = 0, i;
3431 i = tx_ring->next_to_use;
3433 if (skb_dma_map(&pdev->dev, skb, DMA_TO_DEVICE)) {
3434 dev_err(&pdev->dev, "TX DMA map failed\n");
3438 map = skb_shinfo(skb)->dma_maps;
3440 buffer_info = &tx_ring->buffer_info[i];
3441 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3442 buffer_info->length = len;
3443 /* set time_stamp *before* dma to help avoid a possible race */
3444 buffer_info->time_stamp = jiffies;
3445 buffer_info->next_to_watch = i;
3446 buffer_info->dma = skb_shinfo(skb)->dma_head;
3448 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3449 struct skb_frag_struct *frag;
3452 if (i == tx_ring->count)
3455 frag = &skb_shinfo(skb)->frags[f];
3458 buffer_info = &tx_ring->buffer_info[i];
3459 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3460 buffer_info->length = len;
3461 buffer_info->time_stamp = jiffies;
3462 buffer_info->next_to_watch = i;
3463 buffer_info->dma = map[count];
3467 tx_ring->buffer_info[i].skb = skb;
3468 tx_ring->buffer_info[first].next_to_watch = i;
3473 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3474 int tx_flags, int count, u32 paylen,
3477 union e1000_adv_tx_desc *tx_desc = NULL;
3478 struct igb_buffer *buffer_info;
3479 u32 olinfo_status = 0, cmd_type_len;
3482 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3483 E1000_ADVTXD_DCMD_DEXT);
3485 if (tx_flags & IGB_TX_FLAGS_VLAN)
3486 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3488 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3489 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3491 if (tx_flags & IGB_TX_FLAGS_TSO) {
3492 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3494 /* insert tcp checksum */
3495 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3497 /* insert ip checksum */
3498 if (tx_flags & IGB_TX_FLAGS_IPV4)
3499 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3501 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3502 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3505 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3506 (tx_flags & (IGB_TX_FLAGS_CSUM |
3508 IGB_TX_FLAGS_VLAN)))
3509 olinfo_status |= tx_ring->reg_idx << 4;
3511 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3513 i = tx_ring->next_to_use;
3515 buffer_info = &tx_ring->buffer_info[i];
3516 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3517 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3518 tx_desc->read.cmd_type_len =
3519 cpu_to_le32(cmd_type_len | buffer_info->length);
3520 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3522 if (i == tx_ring->count)
3526 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3527 /* Force memory writes to complete before letting h/w
3528 * know there are new descriptors to fetch. (Only
3529 * applicable for weak-ordered memory model archs,
3530 * such as IA-64). */
3533 tx_ring->next_to_use = i;
3534 writel(i, tx_ring->tail);
3535 /* we need this if more than one processor can write to our tail
3536 * at a time, it syncronizes IO on IA64/Altix systems */
3540 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3542 struct net_device *netdev = tx_ring->netdev;
3544 netif_stop_subqueue(netdev, tx_ring->queue_index);
3546 /* Herbert's original patch had:
3547 * smp_mb__after_netif_stop_queue();
3548 * but since that doesn't exist yet, just open code it. */
3551 /* We need to check again in a case another CPU has just
3552 * made room available. */
3553 if (igb_desc_unused(tx_ring) < size)
3557 netif_wake_subqueue(netdev, tx_ring->queue_index);
3558 tx_ring->tx_stats.restart_queue++;
3562 static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3564 if (igb_desc_unused(tx_ring) >= size)
3566 return __igb_maybe_stop_tx(tx_ring, size);
3569 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3570 struct igb_ring *tx_ring)
3572 struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3574 unsigned int tx_flags = 0;
3578 union skb_shared_tx *shtx = skb_tx(skb);
3580 /* need: 1 descriptor per page,
3581 * + 2 desc gap to keep tail from touching head,
3582 * + 1 desc for skb->data,
3583 * + 1 desc for context descriptor,
3584 * otherwise try next time */
3585 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3586 /* this is a hard error */
3587 return NETDEV_TX_BUSY;
3590 if (unlikely(shtx->hardware)) {
3591 shtx->in_progress = 1;
3592 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3595 if (adapter->vlgrp && vlan_tx_tag_present(skb)) {
3596 tx_flags |= IGB_TX_FLAGS_VLAN;
3597 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3600 if (skb->protocol == htons(ETH_P_IP))
3601 tx_flags |= IGB_TX_FLAGS_IPV4;
3603 first = tx_ring->next_to_use;
3604 if (skb_is_gso(skb)) {
3605 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3607 dev_kfree_skb_any(skb);
3608 return NETDEV_TX_OK;
3613 tx_flags |= IGB_TX_FLAGS_TSO;
3614 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3615 (skb->ip_summed == CHECKSUM_PARTIAL))
3616 tx_flags |= IGB_TX_FLAGS_CSUM;
3619 * count reflects descriptors mapped, if 0 then mapping error
3620 * has occured and we need to rewind the descriptor queue
3622 count = igb_tx_map_adv(tx_ring, skb, first);
3625 dev_kfree_skb_any(skb);
3626 tx_ring->buffer_info[first].time_stamp = 0;
3627 tx_ring->next_to_use = first;
3628 return NETDEV_TX_OK;
3631 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3633 /* Make sure there is space in the ring for the next send. */
3634 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3636 return NETDEV_TX_OK;
3639 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3640 struct net_device *netdev)
3642 struct igb_adapter *adapter = netdev_priv(netdev);
3643 struct igb_ring *tx_ring;
3646 if (test_bit(__IGB_DOWN, &adapter->state)) {
3647 dev_kfree_skb_any(skb);
3648 return NETDEV_TX_OK;
3651 if (skb->len <= 0) {
3652 dev_kfree_skb_any(skb);
3653 return NETDEV_TX_OK;
3656 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3657 tx_ring = adapter->multi_tx_table[r_idx];
3659 /* This goes back to the question of how to logically map a tx queue
3660 * to a flow. Right now, performance is impacted slightly negatively
3661 * if using multiple tx queues. If the stack breaks away from a
3662 * single qdisc implementation, we can look at this again. */
3663 return igb_xmit_frame_ring_adv(skb, tx_ring);
3667 * igb_tx_timeout - Respond to a Tx Hang
3668 * @netdev: network interface device structure
3670 static void igb_tx_timeout(struct net_device *netdev)
3672 struct igb_adapter *adapter = netdev_priv(netdev);
3673 struct e1000_hw *hw = &adapter->hw;
3675 /* Do the reset outside of interrupt context */
3676 adapter->tx_timeout_count++;
3678 schedule_work(&adapter->reset_task);
3680 (adapter->eims_enable_mask & ~adapter->eims_other));
3683 static void igb_reset_task(struct work_struct *work)
3685 struct igb_adapter *adapter;
3686 adapter = container_of(work, struct igb_adapter, reset_task);
3688 igb_reinit_locked(adapter);
3692 * igb_get_stats - Get System Network Statistics
3693 * @netdev: network interface device structure
3695 * Returns the address of the device statistics structure.
3696 * The statistics are actually updated from the timer callback.
3698 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3700 /* only return the current stats */
3701 return &netdev->stats;
3705 * igb_change_mtu - Change the Maximum Transfer Unit
3706 * @netdev: network interface device structure
3707 * @new_mtu: new value for maximum frame size
3709 * Returns 0 on success, negative on failure
3711 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3713 struct igb_adapter *adapter = netdev_priv(netdev);
3714 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3715 u32 rx_buffer_len, i;
3717 if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) ||
3718 (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3719 dev_err(&adapter->pdev->dev, "Invalid MTU setting\n");
3723 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3724 dev_err(&adapter->pdev->dev, "MTU > 9216 not supported.\n");
3728 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3731 /* igb_down has a dependency on max_frame_size */
3732 adapter->max_frame_size = max_frame;
3733 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3734 * means we reserve 2 more, this pushes us to allocate from the next
3736 * i.e. RXBUFFER_2048 --> size-4096 slab
3739 if (max_frame <= IGB_RXBUFFER_1024)
3740 rx_buffer_len = IGB_RXBUFFER_1024;
3741 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3742 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3744 rx_buffer_len = IGB_RXBUFFER_128;
3746 if (netif_running(netdev))
3749 dev_info(&adapter->pdev->dev, "changing MTU from %d to %d\n",
3750 netdev->mtu, new_mtu);
3751 netdev->mtu = new_mtu;
3753 for (i = 0; i < adapter->num_rx_queues; i++)
3754 adapter->rx_ring[i].rx_buffer_len = rx_buffer_len;
3756 if (netif_running(netdev))
3761 clear_bit(__IGB_RESETTING, &adapter->state);
3767 * igb_update_stats - Update the board statistics counters
3768 * @adapter: board private structure
3771 void igb_update_stats(struct igb_adapter *adapter)
3773 struct net_device *netdev = adapter->netdev;
3774 struct e1000_hw *hw = &adapter->hw;
3775 struct pci_dev *pdev = adapter->pdev;
3781 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3784 * Prevent stats update while adapter is being reset, or if the pci
3785 * connection is down.
3787 if (adapter->link_speed == 0)
3789 if (pci_channel_offline(pdev))
3794 for (i = 0; i < adapter->num_rx_queues; i++) {
3795 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3796 adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
3797 netdev->stats.rx_fifo_errors += rqdpc_tmp;
3798 bytes += adapter->rx_ring[i].rx_stats.bytes;
3799 packets += adapter->rx_ring[i].rx_stats.packets;
3802 netdev->stats.rx_bytes = bytes;
3803 netdev->stats.rx_packets = packets;
3807 for (i = 0; i < adapter->num_tx_queues; i++) {
3808 bytes += adapter->tx_ring[i].tx_stats.bytes;
3809 packets += adapter->tx_ring[i].tx_stats.packets;
3811 netdev->stats.tx_bytes = bytes;
3812 netdev->stats.tx_packets = packets;
3814 /* read stats registers */
3815 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
3816 adapter->stats.gprc += rd32(E1000_GPRC);
3817 adapter->stats.gorc += rd32(E1000_GORCL);
3818 rd32(E1000_GORCH); /* clear GORCL */
3819 adapter->stats.bprc += rd32(E1000_BPRC);
3820 adapter->stats.mprc += rd32(E1000_MPRC);
3821 adapter->stats.roc += rd32(E1000_ROC);
3823 adapter->stats.prc64 += rd32(E1000_PRC64);
3824 adapter->stats.prc127 += rd32(E1000_PRC127);
3825 adapter->stats.prc255 += rd32(E1000_PRC255);
3826 adapter->stats.prc511 += rd32(E1000_PRC511);
3827 adapter->stats.prc1023 += rd32(E1000_PRC1023);
3828 adapter->stats.prc1522 += rd32(E1000_PRC1522);
3829 adapter->stats.symerrs += rd32(E1000_SYMERRS);
3830 adapter->stats.sec += rd32(E1000_SEC);
3832 adapter->stats.mpc += rd32(E1000_MPC);
3833 adapter->stats.scc += rd32(E1000_SCC);
3834 adapter->stats.ecol += rd32(E1000_ECOL);
3835 adapter->stats.mcc += rd32(E1000_MCC);
3836 adapter->stats.latecol += rd32(E1000_LATECOL);
3837 adapter->stats.dc += rd32(E1000_DC);
3838 adapter->stats.rlec += rd32(E1000_RLEC);
3839 adapter->stats.xonrxc += rd32(E1000_XONRXC);
3840 adapter->stats.xontxc += rd32(E1000_XONTXC);
3841 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
3842 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
3843 adapter->stats.fcruc += rd32(E1000_FCRUC);
3844 adapter->stats.gptc += rd32(E1000_GPTC);
3845 adapter->stats.gotc += rd32(E1000_GOTCL);
3846 rd32(E1000_GOTCH); /* clear GOTCL */
3847 rnbc = rd32(E1000_RNBC);
3848 adapter->stats.rnbc += rnbc;
3849 netdev->stats.rx_fifo_errors += rnbc;
3850 adapter->stats.ruc += rd32(E1000_RUC);
3851 adapter->stats.rfc += rd32(E1000_RFC);
3852 adapter->stats.rjc += rd32(E1000_RJC);
3853 adapter->stats.tor += rd32(E1000_TORH);
3854 adapter->stats.tot += rd32(E1000_TOTH);
3855 adapter->stats.tpr += rd32(E1000_TPR);
3857 adapter->stats.ptc64 += rd32(E1000_PTC64);
3858 adapter->stats.ptc127 += rd32(E1000_PTC127);
3859 adapter->stats.ptc255 += rd32(E1000_PTC255);
3860 adapter->stats.ptc511 += rd32(E1000_PTC511);
3861 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
3862 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
3864 adapter->stats.mptc += rd32(E1000_MPTC);
3865 adapter->stats.bptc += rd32(E1000_BPTC);
3867 /* used for adaptive IFS */
3869 hw->mac.tx_packet_delta = rd32(E1000_TPT);
3870 adapter->stats.tpt += hw->mac.tx_packet_delta;
3871 hw->mac.collision_delta = rd32(E1000_COLC);
3872 adapter->stats.colc += hw->mac.collision_delta;
3874 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
3875 adapter->stats.rxerrc += rd32(E1000_RXERRC);
3876 adapter->stats.tncrs += rd32(E1000_TNCRS);
3877 adapter->stats.tsctc += rd32(E1000_TSCTC);
3878 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
3880 adapter->stats.iac += rd32(E1000_IAC);
3881 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
3882 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
3883 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
3884 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
3885 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
3886 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
3887 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
3888 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
3890 /* Fill out the OS statistics structure */
3891 netdev->stats.multicast = adapter->stats.mprc;
3892 netdev->stats.collisions = adapter->stats.colc;
3896 /* RLEC on some newer hardware can be incorrect so build
3897 * our own version based on RUC and ROC */
3898 netdev->stats.rx_errors = adapter->stats.rxerrc +
3899 adapter->stats.crcerrs + adapter->stats.algnerrc +
3900 adapter->stats.ruc + adapter->stats.roc +
3901 adapter->stats.cexterr;
3902 netdev->stats.rx_length_errors = adapter->stats.ruc +
3904 netdev->stats.rx_crc_errors = adapter->stats.crcerrs;
3905 netdev->stats.rx_frame_errors = adapter->stats.algnerrc;
3906 netdev->stats.rx_missed_errors = adapter->stats.mpc;
3909 netdev->stats.tx_errors = adapter->stats.ecol +
3910 adapter->stats.latecol;
3911 netdev->stats.tx_aborted_errors = adapter->stats.ecol;
3912 netdev->stats.tx_window_errors = adapter->stats.latecol;
3913 netdev->stats.tx_carrier_errors = adapter->stats.tncrs;
3915 /* Tx Dropped needs to be maintained elsewhere */
3918 if (hw->phy.media_type == e1000_media_type_copper) {
3919 if ((adapter->link_speed == SPEED_1000) &&
3920 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
3921 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
3922 adapter->phy_stats.idle_errors += phy_tmp;
3926 /* Management Stats */
3927 adapter->stats.mgptc += rd32(E1000_MGTPTC);
3928 adapter->stats.mgprc += rd32(E1000_MGTPRC);
3929 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
3932 static irqreturn_t igb_msix_other(int irq, void *data)
3934 struct igb_adapter *adapter = data;
3935 struct e1000_hw *hw = &adapter->hw;
3936 u32 icr = rd32(E1000_ICR);
3937 /* reading ICR causes bit 31 of EICR to be cleared */
3939 if (icr & E1000_ICR_DOUTSYNC) {
3940 /* HW is reporting DMA is out of sync */
3941 adapter->stats.doosync++;
3944 /* Check for a mailbox event */
3945 if (icr & E1000_ICR_VMMB)
3946 igb_msg_task(adapter);
3948 if (icr & E1000_ICR_LSC) {
3949 hw->mac.get_link_status = 1;
3950 /* guard against interrupt when we're going down */
3951 if (!test_bit(__IGB_DOWN, &adapter->state))
3952 mod_timer(&adapter->watchdog_timer, jiffies + 1);
3955 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_VMMB);
3956 wr32(E1000_EIMS, adapter->eims_other);
3961 static void igb_write_itr(struct igb_q_vector *q_vector)
3963 u32 itr_val = q_vector->itr_val & 0x7FFC;
3965 if (!q_vector->set_itr)
3971 if (q_vector->itr_shift)
3972 itr_val |= itr_val << q_vector->itr_shift;
3974 itr_val |= 0x8000000;
3976 writel(itr_val, q_vector->itr_register);
3977 q_vector->set_itr = 0;
3980 static irqreturn_t igb_msix_ring(int irq, void *data)
3982 struct igb_q_vector *q_vector = data;
3984 /* Write the ITR value calculated from the previous interrupt. */
3985 igb_write_itr(q_vector);
3987 napi_schedule(&q_vector->napi);
3992 #ifdef CONFIG_IGB_DCA
3993 static void igb_update_dca(struct igb_q_vector *q_vector)
3995 struct igb_adapter *adapter = q_vector->adapter;
3996 struct e1000_hw *hw = &adapter->hw;
3997 int cpu = get_cpu();
3999 if (q_vector->cpu == cpu)
4002 if (q_vector->tx_ring) {
4003 int q = q_vector->tx_ring->reg_idx;
4004 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4005 if (hw->mac.type == e1000_82575) {
4006 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4007 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4009 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4010 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4011 E1000_DCA_TXCTRL_CPUID_SHIFT;
4013 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4014 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4016 if (q_vector->rx_ring) {
4017 int q = q_vector->rx_ring->reg_idx;
4018 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4019 if (hw->mac.type == e1000_82575) {
4020 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4021 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4023 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4024 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4025 E1000_DCA_RXCTRL_CPUID_SHIFT;
4027 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4028 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4029 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4030 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4032 q_vector->cpu = cpu;
4037 static void igb_setup_dca(struct igb_adapter *adapter)
4039 struct e1000_hw *hw = &adapter->hw;
4042 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4045 /* Always use CB2 mode, difference is masked in the CB driver. */
4046 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4048 for (i = 0; i < adapter->num_q_vectors; i++) {
4049 struct igb_q_vector *q_vector = adapter->q_vector[i];
4051 igb_update_dca(q_vector);
4055 static int __igb_notify_dca(struct device *dev, void *data)
4057 struct net_device *netdev = dev_get_drvdata(dev);
4058 struct igb_adapter *adapter = netdev_priv(netdev);
4059 struct e1000_hw *hw = &adapter->hw;
4060 unsigned long event = *(unsigned long *)data;
4063 case DCA_PROVIDER_ADD:
4064 /* if already enabled, don't do it again */
4065 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4067 /* Always use CB2 mode, difference is masked
4068 * in the CB driver. */
4069 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4070 if (dca_add_requester(dev) == 0) {
4071 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4072 dev_info(&adapter->pdev->dev, "DCA enabled\n");
4073 igb_setup_dca(adapter);
4076 /* Fall Through since DCA is disabled. */
4077 case DCA_PROVIDER_REMOVE:
4078 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4079 /* without this a class_device is left
4080 * hanging around in the sysfs model */
4081 dca_remove_requester(dev);
4082 dev_info(&adapter->pdev->dev, "DCA disabled\n");
4083 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4084 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4092 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4097 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4100 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4102 #endif /* CONFIG_IGB_DCA */
4104 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4106 struct e1000_hw *hw = &adapter->hw;
4110 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4111 ping = E1000_PF_CONTROL_MSG;
4112 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4113 ping |= E1000_VT_MSGTYPE_CTS;
4114 igb_write_mbx(hw, &ping, 1, i);
4118 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4120 struct e1000_hw *hw = &adapter->hw;
4121 u32 vmolr = rd32(E1000_VMOLR(vf));
4122 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4124 vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4125 IGB_VF_FLAG_MULTI_PROMISC);
4126 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4128 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4129 vmolr |= E1000_VMOLR_MPME;
4130 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4133 * if we have hashes and we are clearing a multicast promisc
4134 * flag we need to write the hashes to the MTA as this step
4135 * was previously skipped
4137 if (vf_data->num_vf_mc_hashes > 30) {
4138 vmolr |= E1000_VMOLR_MPME;
4139 } else if (vf_data->num_vf_mc_hashes) {
4141 vmolr |= E1000_VMOLR_ROMPE;
4142 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4143 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4147 wr32(E1000_VMOLR(vf), vmolr);
4149 /* there are flags left unprocessed, likely not supported */
4150 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4157 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4158 u32 *msgbuf, u32 vf)
4160 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4161 u16 *hash_list = (u16 *)&msgbuf[1];
4162 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4165 /* salt away the number of multicast addresses assigned
4166 * to this VF for later use to restore when the PF multi cast
4169 vf_data->num_vf_mc_hashes = n;
4171 /* only up to 30 hash values supported */
4175 /* store the hashes for later use */
4176 for (i = 0; i < n; i++)
4177 vf_data->vf_mc_hashes[i] = hash_list[i];
4179 /* Flush and reset the mta with the new values */
4180 igb_set_rx_mode(adapter->netdev);
4185 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4187 struct e1000_hw *hw = &adapter->hw;
4188 struct vf_data_storage *vf_data;
4191 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4192 u32 vmolr = rd32(E1000_VMOLR(i));
4193 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4195 vf_data = &adapter->vf_data[i];
4197 if ((vf_data->num_vf_mc_hashes > 30) ||
4198 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4199 vmolr |= E1000_VMOLR_MPME;
4200 } else if (vf_data->num_vf_mc_hashes) {
4201 vmolr |= E1000_VMOLR_ROMPE;
4202 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4203 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4205 wr32(E1000_VMOLR(i), vmolr);
4209 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4211 struct e1000_hw *hw = &adapter->hw;
4212 u32 pool_mask, reg, vid;
4215 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4217 /* Find the vlan filter for this id */
4218 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4219 reg = rd32(E1000_VLVF(i));
4221 /* remove the vf from the pool */
4224 /* if pool is empty then remove entry from vfta */
4225 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4226 (reg & E1000_VLVF_VLANID_ENABLE)) {
4228 vid = reg & E1000_VLVF_VLANID_MASK;
4229 igb_vfta_set(hw, vid, false);
4232 wr32(E1000_VLVF(i), reg);
4235 adapter->vf_data[vf].vlans_enabled = 0;
4238 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4240 struct e1000_hw *hw = &adapter->hw;
4243 /* The vlvf table only exists on 82576 hardware and newer */
4244 if (hw->mac.type < e1000_82576)
4247 /* we only need to do this if VMDq is enabled */
4248 if (!adapter->vfs_allocated_count)
4251 /* Find the vlan filter for this id */
4252 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4253 reg = rd32(E1000_VLVF(i));
4254 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4255 vid == (reg & E1000_VLVF_VLANID_MASK))
4260 if (i == E1000_VLVF_ARRAY_SIZE) {
4261 /* Did not find a matching VLAN ID entry that was
4262 * enabled. Search for a free filter entry, i.e.
4263 * one without the enable bit set
4265 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4266 reg = rd32(E1000_VLVF(i));
4267 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4271 if (i < E1000_VLVF_ARRAY_SIZE) {
4272 /* Found an enabled/available entry */
4273 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4275 /* if !enabled we need to set this up in vfta */
4276 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4277 /* add VID to filter table */
4278 igb_vfta_set(hw, vid, true);
4279 reg |= E1000_VLVF_VLANID_ENABLE;
4281 reg &= ~E1000_VLVF_VLANID_MASK;
4283 wr32(E1000_VLVF(i), reg);
4285 /* do not modify RLPML for PF devices */
4286 if (vf >= adapter->vfs_allocated_count)
4289 if (!adapter->vf_data[vf].vlans_enabled) {
4291 reg = rd32(E1000_VMOLR(vf));
4292 size = reg & E1000_VMOLR_RLPML_MASK;
4294 reg &= ~E1000_VMOLR_RLPML_MASK;
4296 wr32(E1000_VMOLR(vf), reg);
4299 adapter->vf_data[vf].vlans_enabled++;
4303 if (i < E1000_VLVF_ARRAY_SIZE) {
4304 /* remove vf from the pool */
4305 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4306 /* if pool is empty then remove entry from vfta */
4307 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4309 igb_vfta_set(hw, vid, false);
4311 wr32(E1000_VLVF(i), reg);
4313 /* do not modify RLPML for PF devices */
4314 if (vf >= adapter->vfs_allocated_count)
4317 adapter->vf_data[vf].vlans_enabled--;
4318 if (!adapter->vf_data[vf].vlans_enabled) {
4320 reg = rd32(E1000_VMOLR(vf));
4321 size = reg & E1000_VMOLR_RLPML_MASK;
4323 reg &= ~E1000_VMOLR_RLPML_MASK;
4325 wr32(E1000_VMOLR(vf), reg);
4333 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4335 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4336 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4338 return igb_vlvf_set(adapter, vid, add, vf);
4341 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4343 /* clear all flags */
4344 adapter->vf_data[vf].flags = 0;
4345 adapter->vf_data[vf].last_nack = jiffies;
4347 /* reset offloads to defaults */
4348 igb_set_vmolr(adapter, vf);
4350 /* reset vlans for device */
4351 igb_clear_vf_vfta(adapter, vf);
4353 /* reset multicast table array for vf */
4354 adapter->vf_data[vf].num_vf_mc_hashes = 0;
4356 /* Flush and reset the mta with the new values */
4357 igb_set_rx_mode(adapter->netdev);
4360 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4362 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4364 /* generate a new mac address as we were hotplug removed/added */
4365 random_ether_addr(vf_mac);
4367 /* process remaining reset events */
4368 igb_vf_reset(adapter, vf);
4371 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4373 struct e1000_hw *hw = &adapter->hw;
4374 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4375 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4377 u8 *addr = (u8 *)(&msgbuf[1]);
4379 /* process all the same items cleared in a function level reset */
4380 igb_vf_reset(adapter, vf);
4382 /* set vf mac address */
4383 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4385 /* enable transmit and receive for vf */
4386 reg = rd32(E1000_VFTE);
4387 wr32(E1000_VFTE, reg | (1 << vf));
4388 reg = rd32(E1000_VFRE);
4389 wr32(E1000_VFRE, reg | (1 << vf));
4391 adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4393 /* reply to reset with ack and vf mac address */
4394 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4395 memcpy(addr, vf_mac, 6);
4396 igb_write_mbx(hw, msgbuf, 3, vf);
4399 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4401 unsigned char *addr = (char *)&msg[1];
4404 if (is_valid_ether_addr(addr))
4405 err = igb_set_vf_mac(adapter, vf, addr);
4410 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4412 struct e1000_hw *hw = &adapter->hw;
4413 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4414 u32 msg = E1000_VT_MSGTYPE_NACK;
4416 /* if device isn't clear to send it shouldn't be reading either */
4417 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4418 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4419 igb_write_mbx(hw, &msg, 1, vf);
4420 vf_data->last_nack = jiffies;
4424 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4426 struct pci_dev *pdev = adapter->pdev;
4427 u32 msgbuf[E1000_VFMAILBOX_SIZE];
4428 struct e1000_hw *hw = &adapter->hw;
4429 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4432 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4435 dev_err(&pdev->dev, "Error receiving message from VF\n");
4437 /* this is a message we already processed, do nothing */
4438 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4442 * until the vf completes a reset it should not be
4443 * allowed to start any configuration.
4446 if (msgbuf[0] == E1000_VF_RESET) {
4447 igb_vf_reset_msg(adapter, vf);
4451 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4452 msgbuf[0] = E1000_VT_MSGTYPE_NACK;
4453 if (time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4454 igb_write_mbx(hw, msgbuf, 1, vf);
4455 vf_data->last_nack = jiffies;
4460 switch ((msgbuf[0] & 0xFFFF)) {
4461 case E1000_VF_SET_MAC_ADDR:
4462 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4464 case E1000_VF_SET_PROMISC:
4465 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4467 case E1000_VF_SET_MULTICAST:
4468 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4470 case E1000_VF_SET_LPE:
4471 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4473 case E1000_VF_SET_VLAN:
4474 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4477 dev_err(&adapter->pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4482 /* notify the VF of the results of what it sent us */
4484 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4486 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4488 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4490 igb_write_mbx(hw, msgbuf, 1, vf);
4493 static void igb_msg_task(struct igb_adapter *adapter)
4495 struct e1000_hw *hw = &adapter->hw;
4498 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4499 /* process any reset requests */
4500 if (!igb_check_for_rst(hw, vf))
4501 igb_vf_reset_event(adapter, vf);
4503 /* process any messages pending */
4504 if (!igb_check_for_msg(hw, vf))
4505 igb_rcv_msg_from_vf(adapter, vf);
4507 /* process any acks */
4508 if (!igb_check_for_ack(hw, vf))
4509 igb_rcv_ack_from_vf(adapter, vf);
4514 * igb_set_uta - Set unicast filter table address
4515 * @adapter: board private structure
4517 * The unicast table address is a register array of 32-bit registers.
4518 * The table is meant to be used in a way similar to how the MTA is used
4519 * however due to certain limitations in the hardware it is necessary to
4520 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4521 * enable bit to allow vlan tag stripping when promiscous mode is enabled
4523 static void igb_set_uta(struct igb_adapter *adapter)
4525 struct e1000_hw *hw = &adapter->hw;
4528 /* The UTA table only exists on 82576 hardware and newer */
4529 if (hw->mac.type < e1000_82576)
4532 /* we only need to do this if VMDq is enabled */
4533 if (!adapter->vfs_allocated_count)
4536 for (i = 0; i < hw->mac.uta_reg_count; i++)
4537 array_wr32(E1000_UTA, i, ~0);
4541 * igb_intr_msi - Interrupt Handler
4542 * @irq: interrupt number
4543 * @data: pointer to a network interface device structure
4545 static irqreturn_t igb_intr_msi(int irq, void *data)
4547 struct igb_adapter *adapter = data;
4548 struct igb_q_vector *q_vector = adapter->q_vector[0];
4549 struct e1000_hw *hw = &adapter->hw;
4550 /* read ICR disables interrupts using IAM */
4551 u32 icr = rd32(E1000_ICR);
4553 igb_write_itr(q_vector);
4555 if (icr & E1000_ICR_DOUTSYNC) {
4556 /* HW is reporting DMA is out of sync */
4557 adapter->stats.doosync++;
4560 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4561 hw->mac.get_link_status = 1;
4562 if (!test_bit(__IGB_DOWN, &adapter->state))
4563 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4566 napi_schedule(&q_vector->napi);
4572 * igb_intr - Legacy Interrupt Handler
4573 * @irq: interrupt number
4574 * @data: pointer to a network interface device structure
4576 static irqreturn_t igb_intr(int irq, void *data)
4578 struct igb_adapter *adapter = data;
4579 struct igb_q_vector *q_vector = adapter->q_vector[0];
4580 struct e1000_hw *hw = &adapter->hw;
4581 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
4582 * need for the IMC write */
4583 u32 icr = rd32(E1000_ICR);
4585 return IRQ_NONE; /* Not our interrupt */
4587 igb_write_itr(q_vector);
4589 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4590 * not set, then the adapter didn't send an interrupt */
4591 if (!(icr & E1000_ICR_INT_ASSERTED))
4594 if (icr & E1000_ICR_DOUTSYNC) {
4595 /* HW is reporting DMA is out of sync */
4596 adapter->stats.doosync++;
4599 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4600 hw->mac.get_link_status = 1;
4601 /* guard against interrupt when we're going down */
4602 if (!test_bit(__IGB_DOWN, &adapter->state))
4603 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4606 napi_schedule(&q_vector->napi);
4611 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4613 struct igb_adapter *adapter = q_vector->adapter;
4614 struct e1000_hw *hw = &adapter->hw;
4616 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4617 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4618 if (!adapter->msix_entries)
4619 igb_set_itr(adapter);
4621 igb_update_ring_itr(q_vector);
4624 if (!test_bit(__IGB_DOWN, &adapter->state)) {
4625 if (adapter->msix_entries)
4626 wr32(E1000_EIMS, q_vector->eims_value);
4628 igb_irq_enable(adapter);
4633 * igb_poll - NAPI Rx polling callback
4634 * @napi: napi polling structure
4635 * @budget: count of how many packets we should handle
4637 static int igb_poll(struct napi_struct *napi, int budget)
4639 struct igb_q_vector *q_vector = container_of(napi,
4640 struct igb_q_vector,
4642 int tx_clean_complete = 1, work_done = 0;
4644 #ifdef CONFIG_IGB_DCA
4645 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4646 igb_update_dca(q_vector);
4648 if (q_vector->tx_ring)
4649 tx_clean_complete = igb_clean_tx_irq(q_vector);
4651 if (q_vector->rx_ring)
4652 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4654 if (!tx_clean_complete)
4657 /* If not enough Rx work done, exit the polling mode */
4658 if (work_done < budget) {
4659 napi_complete(napi);
4660 igb_ring_irq_enable(q_vector);
4667 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4668 * @adapter: board private structure
4669 * @shhwtstamps: timestamp structure to update
4670 * @regval: unsigned 64bit system time value.
4672 * We need to convert the system time value stored in the RX/TXSTMP registers
4673 * into a hwtstamp which can be used by the upper level timestamping functions
4675 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4676 struct skb_shared_hwtstamps *shhwtstamps,
4681 ns = timecounter_cyc2time(&adapter->clock, regval);
4682 timecompare_update(&adapter->compare, ns);
4683 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4684 shhwtstamps->hwtstamp = ns_to_ktime(ns);
4685 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4689 * igb_tx_hwtstamp - utility function which checks for TX time stamp
4690 * @q_vector: pointer to q_vector containing needed info
4691 * @skb: packet that was just sent
4693 * If we were asked to do hardware stamping and such a time stamp is
4694 * available, then it must have been for this skb here because we only
4695 * allow only one such packet into the queue.
4697 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4699 struct igb_adapter *adapter = q_vector->adapter;
4700 union skb_shared_tx *shtx = skb_tx(skb);
4701 struct e1000_hw *hw = &adapter->hw;
4702 struct skb_shared_hwtstamps shhwtstamps;
4705 /* if skb does not support hw timestamp or TX stamp not valid exit */
4706 if (likely(!shtx->hardware) ||
4707 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4710 regval = rd32(E1000_TXSTMPL);
4711 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4713 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4714 skb_tstamp_tx(skb, &shhwtstamps);
4718 * igb_clean_tx_irq - Reclaim resources after transmit completes
4719 * @q_vector: pointer to q_vector containing needed info
4720 * returns true if ring is completely cleaned
4722 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4724 struct igb_adapter *adapter = q_vector->adapter;
4725 struct igb_ring *tx_ring = q_vector->tx_ring;
4726 struct net_device *netdev = tx_ring->netdev;
4727 struct e1000_hw *hw = &adapter->hw;
4728 struct igb_buffer *buffer_info;
4729 struct sk_buff *skb;
4730 union e1000_adv_tx_desc *tx_desc, *eop_desc;
4731 unsigned int total_bytes = 0, total_packets = 0;
4732 unsigned int i, eop, count = 0;
4733 bool cleaned = false;
4735 i = tx_ring->next_to_clean;
4736 eop = tx_ring->buffer_info[i].next_to_watch;
4737 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4739 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
4740 (count < tx_ring->count)) {
4741 for (cleaned = false; !cleaned; count++) {
4742 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4743 buffer_info = &tx_ring->buffer_info[i];
4744 cleaned = (i == eop);
4745 skb = buffer_info->skb;
4748 unsigned int segs, bytecount;
4749 /* gso_segs is currently only valid for tcp */
4750 segs = skb_shinfo(skb)->gso_segs ?: 1;
4751 /* multiply data chunks by size of headers */
4752 bytecount = ((segs - 1) * skb_headlen(skb)) +
4754 total_packets += segs;
4755 total_bytes += bytecount;
4757 igb_tx_hwtstamp(q_vector, skb);
4760 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4761 tx_desc->wb.status = 0;
4764 if (i == tx_ring->count)
4767 eop = tx_ring->buffer_info[i].next_to_watch;
4768 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4771 tx_ring->next_to_clean = i;
4773 if (unlikely(count &&
4774 netif_carrier_ok(netdev) &&
4775 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
4776 /* Make sure that anybody stopping the queue after this
4777 * sees the new next_to_clean.
4780 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
4781 !(test_bit(__IGB_DOWN, &adapter->state))) {
4782 netif_wake_subqueue(netdev, tx_ring->queue_index);
4783 tx_ring->tx_stats.restart_queue++;
4787 if (tx_ring->detect_tx_hung) {
4788 /* Detect a transmit hang in hardware, this serializes the
4789 * check with the clearing of time_stamp and movement of i */
4790 tx_ring->detect_tx_hung = false;
4791 if (tx_ring->buffer_info[i].time_stamp &&
4792 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
4793 (adapter->tx_timeout_factor * HZ))
4794 && !(rd32(E1000_STATUS) &
4795 E1000_STATUS_TXOFF)) {
4797 /* detected Tx unit hang */
4798 dev_err(&tx_ring->pdev->dev,
4799 "Detected Tx Unit Hang\n"
4803 " next_to_use <%x>\n"
4804 " next_to_clean <%x>\n"
4805 "buffer_info[next_to_clean]\n"
4806 " time_stamp <%lx>\n"
4807 " next_to_watch <%x>\n"
4809 " desc.status <%x>\n",
4810 tx_ring->queue_index,
4811 readl(tx_ring->head),
4812 readl(tx_ring->tail),
4813 tx_ring->next_to_use,
4814 tx_ring->next_to_clean,
4815 tx_ring->buffer_info[eop].time_stamp,
4818 eop_desc->wb.status);
4819 netif_stop_subqueue(netdev, tx_ring->queue_index);
4822 tx_ring->total_bytes += total_bytes;
4823 tx_ring->total_packets += total_packets;
4824 tx_ring->tx_stats.bytes += total_bytes;
4825 tx_ring->tx_stats.packets += total_packets;
4826 return (count < tx_ring->count);
4830 * igb_receive_skb - helper function to handle rx indications
4831 * @q_vector: structure containing interrupt and ring information
4832 * @skb: packet to send up
4833 * @vlan_tag: vlan tag for packet
4835 static void igb_receive_skb(struct igb_q_vector *q_vector,
4836 struct sk_buff *skb,
4839 struct igb_adapter *adapter = q_vector->adapter;
4842 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
4845 napi_gro_receive(&q_vector->napi, skb);
4848 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
4849 u32 status_err, struct sk_buff *skb)
4851 skb->ip_summed = CHECKSUM_NONE;
4853 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
4854 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
4855 (status_err & E1000_RXD_STAT_IXSM))
4858 /* TCP/UDP checksum error bit is set */
4860 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
4862 * work around errata with sctp packets where the TCPE aka
4863 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
4864 * packets, (aka let the stack check the crc32c)
4866 if ((skb->len == 60) &&
4867 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
4868 ring->rx_stats.csum_err++;
4870 /* let the stack verify checksum errors */
4873 /* It must be a TCP or UDP packet with a valid checksum */
4874 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
4875 skb->ip_summed = CHECKSUM_UNNECESSARY;
4877 dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
4880 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
4881 struct sk_buff *skb)
4883 struct igb_adapter *adapter = q_vector->adapter;
4884 struct e1000_hw *hw = &adapter->hw;
4888 * If this bit is set, then the RX registers contain the time stamp. No
4889 * other packet will be time stamped until we read these registers, so
4890 * read the registers to make them available again. Because only one
4891 * packet can be time stamped at a time, we know that the register
4892 * values must belong to this one here and therefore we don't need to
4893 * compare any of the additional attributes stored for it.
4895 * If nothing went wrong, then it should have a skb_shared_tx that we
4896 * can turn into a skb_shared_hwtstamps.
4898 if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
4900 if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
4903 regval = rd32(E1000_RXSTMPL);
4904 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4906 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
4908 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
4909 union e1000_adv_rx_desc *rx_desc)
4911 /* HW will not DMA in data larger than the given buffer, even if it
4912 * parses the (NFS, of course) header to be larger. In that case, it
4913 * fills the header buffer and spills the rest into the page.
4915 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
4916 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
4917 if (hlen > rx_ring->rx_buffer_len)
4918 hlen = rx_ring->rx_buffer_len;
4922 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
4923 int *work_done, int budget)
4925 struct igb_ring *rx_ring = q_vector->rx_ring;
4926 struct net_device *netdev = rx_ring->netdev;
4927 struct pci_dev *pdev = rx_ring->pdev;
4928 union e1000_adv_rx_desc *rx_desc , *next_rxd;
4929 struct igb_buffer *buffer_info , *next_buffer;
4930 struct sk_buff *skb;
4931 bool cleaned = false;
4932 int cleaned_count = 0;
4933 unsigned int total_bytes = 0, total_packets = 0;
4939 i = rx_ring->next_to_clean;
4940 buffer_info = &rx_ring->buffer_info[i];
4941 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
4942 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4944 while (staterr & E1000_RXD_STAT_DD) {
4945 if (*work_done >= budget)
4949 skb = buffer_info->skb;
4950 prefetch(skb->data - NET_IP_ALIGN);
4951 buffer_info->skb = NULL;
4954 if (i == rx_ring->count)
4956 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
4958 next_buffer = &rx_ring->buffer_info[i];
4960 length = le16_to_cpu(rx_desc->wb.upper.length);
4964 if (buffer_info->dma) {
4965 pci_unmap_single(pdev, buffer_info->dma,
4966 rx_ring->rx_buffer_len,
4967 PCI_DMA_FROMDEVICE);
4968 buffer_info->dma = 0;
4969 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
4970 skb_put(skb, length);
4973 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
4977 pci_unmap_page(pdev, buffer_info->page_dma,
4978 PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
4979 buffer_info->page_dma = 0;
4981 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
4983 buffer_info->page_offset,
4986 if (page_count(buffer_info->page) != 1)
4987 buffer_info->page = NULL;
4989 get_page(buffer_info->page);
4992 skb->data_len += length;
4994 skb->truesize += length;
4997 if (!(staterr & E1000_RXD_STAT_EOP)) {
4998 buffer_info->skb = next_buffer->skb;
4999 buffer_info->dma = next_buffer->dma;
5000 next_buffer->skb = skb;
5001 next_buffer->dma = 0;
5005 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5006 dev_kfree_skb_irq(skb);
5010 igb_rx_hwtstamp(q_vector, staterr, skb);
5011 total_bytes += skb->len;
5014 igb_rx_checksum_adv(rx_ring, staterr, skb);
5016 skb->protocol = eth_type_trans(skb, netdev);
5017 skb_record_rx_queue(skb, rx_ring->queue_index);
5019 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5020 le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5022 igb_receive_skb(q_vector, skb, vlan_tag);
5025 rx_desc->wb.upper.status_error = 0;
5027 /* return some buffers to hardware, one at a time is too slow */
5028 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5029 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5033 /* use prefetched values */
5035 buffer_info = next_buffer;
5036 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5039 rx_ring->next_to_clean = i;
5040 cleaned_count = igb_desc_unused(rx_ring);
5043 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5045 rx_ring->total_packets += total_packets;
5046 rx_ring->total_bytes += total_bytes;
5047 rx_ring->rx_stats.packets += total_packets;
5048 rx_ring->rx_stats.bytes += total_bytes;
5053 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5054 * @adapter: address of board private structure
5056 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5058 struct net_device *netdev = rx_ring->netdev;
5059 union e1000_adv_rx_desc *rx_desc;
5060 struct igb_buffer *buffer_info;
5061 struct sk_buff *skb;
5065 i = rx_ring->next_to_use;
5066 buffer_info = &rx_ring->buffer_info[i];
5068 bufsz = rx_ring->rx_buffer_len;
5070 while (cleaned_count--) {
5071 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5073 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5074 if (!buffer_info->page) {
5075 buffer_info->page = alloc_page(GFP_ATOMIC);
5076 if (!buffer_info->page) {
5077 rx_ring->rx_stats.alloc_failed++;
5080 buffer_info->page_offset = 0;
5082 buffer_info->page_offset ^= PAGE_SIZE / 2;
5084 buffer_info->page_dma =
5085 pci_map_page(rx_ring->pdev, buffer_info->page,
5086 buffer_info->page_offset,
5088 PCI_DMA_FROMDEVICE);
5091 if (!buffer_info->skb) {
5092 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5094 rx_ring->rx_stats.alloc_failed++;
5098 buffer_info->skb = skb;
5099 buffer_info->dma = pci_map_single(rx_ring->pdev,
5102 PCI_DMA_FROMDEVICE);
5104 /* Refresh the desc even if buffer_addrs didn't change because
5105 * each write-back erases this info. */
5106 if (bufsz < IGB_RXBUFFER_1024) {
5107 rx_desc->read.pkt_addr =
5108 cpu_to_le64(buffer_info->page_dma);
5109 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5111 rx_desc->read.pkt_addr =
5112 cpu_to_le64(buffer_info->dma);
5113 rx_desc->read.hdr_addr = 0;
5117 if (i == rx_ring->count)
5119 buffer_info = &rx_ring->buffer_info[i];
5123 if (rx_ring->next_to_use != i) {
5124 rx_ring->next_to_use = i;
5126 i = (rx_ring->count - 1);
5130 /* Force memory writes to complete before letting h/w
5131 * know there are new descriptors to fetch. (Only
5132 * applicable for weak-ordered memory model archs,
5133 * such as IA-64). */
5135 writel(i, rx_ring->tail);
5145 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5147 struct igb_adapter *adapter = netdev_priv(netdev);
5148 struct mii_ioctl_data *data = if_mii(ifr);
5150 if (adapter->hw.phy.media_type != e1000_media_type_copper)
5155 data->phy_id = adapter->hw.phy.addr;
5158 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5170 * igb_hwtstamp_ioctl - control hardware time stamping
5175 * Outgoing time stamping can be enabled and disabled. Play nice and
5176 * disable it when requested, although it shouldn't case any overhead
5177 * when no packet needs it. At most one packet in the queue may be
5178 * marked for time stamping, otherwise it would be impossible to tell
5179 * for sure to which packet the hardware time stamp belongs.
5181 * Incoming time stamping has to be configured via the hardware
5182 * filters. Not all combinations are supported, in particular event
5183 * type has to be specified. Matching the kind of event packet is
5184 * not supported, with the exception of "all V2 events regardless of
5188 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5189 struct ifreq *ifr, int cmd)
5191 struct igb_adapter *adapter = netdev_priv(netdev);
5192 struct e1000_hw *hw = &adapter->hw;
5193 struct hwtstamp_config config;
5194 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5195 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5196 u32 tsync_rx_cfg = 0;
5201 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5204 /* reserved for future extensions */
5208 switch (config.tx_type) {
5209 case HWTSTAMP_TX_OFF:
5211 case HWTSTAMP_TX_ON:
5217 switch (config.rx_filter) {
5218 case HWTSTAMP_FILTER_NONE:
5221 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5222 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5223 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5224 case HWTSTAMP_FILTER_ALL:
5226 * register TSYNCRXCFG must be set, therefore it is not
5227 * possible to time stamp both Sync and Delay_Req messages
5228 * => fall back to time stamping all packets
5230 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5231 config.rx_filter = HWTSTAMP_FILTER_ALL;
5233 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5234 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5235 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5238 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5239 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5240 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5243 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5244 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5245 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5246 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5249 config.rx_filter = HWTSTAMP_FILTER_SOME;
5251 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5252 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5253 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5254 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5257 config.rx_filter = HWTSTAMP_FILTER_SOME;
5259 case HWTSTAMP_FILTER_PTP_V2_EVENT:
5260 case HWTSTAMP_FILTER_PTP_V2_SYNC:
5261 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5262 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5263 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5270 if (hw->mac.type == e1000_82575) {
5271 if (tsync_rx_ctl | tsync_tx_ctl)
5276 /* enable/disable TX */
5277 regval = rd32(E1000_TSYNCTXCTL);
5278 regval &= ~E1000_TSYNCTXCTL_ENABLED;
5279 regval |= tsync_tx_ctl;
5280 wr32(E1000_TSYNCTXCTL, regval);
5282 /* enable/disable RX */
5283 regval = rd32(E1000_TSYNCRXCTL);
5284 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5285 regval |= tsync_rx_ctl;
5286 wr32(E1000_TSYNCRXCTL, regval);
5288 /* define which PTP packets are time stamped */
5289 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5291 /* define ethertype filter for timestamped packets */
5294 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5295 E1000_ETQF_1588 | /* enable timestamping */
5296 ETH_P_1588)); /* 1588 eth protocol type */
5298 wr32(E1000_ETQF(3), 0);
5300 #define PTP_PORT 319
5301 /* L4 Queue Filter[3]: filter by destination port and protocol */
5303 u32 ftqf = (IPPROTO_UDP /* UDP */
5304 | E1000_FTQF_VF_BP /* VF not compared */
5305 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5306 | E1000_FTQF_MASK); /* mask all inputs */
5307 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5309 wr32(E1000_IMIR(3), htons(PTP_PORT));
5310 wr32(E1000_IMIREXT(3),
5311 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5312 if (hw->mac.type == e1000_82576) {
5313 /* enable source port check */
5314 wr32(E1000_SPQF(3), htons(PTP_PORT));
5315 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5317 wr32(E1000_FTQF(3), ftqf);
5319 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5323 adapter->hwtstamp_config = config;
5325 /* clear TX/RX time stamp registers, just to be sure */
5326 regval = rd32(E1000_TXSTMPH);
5327 regval = rd32(E1000_RXSTMPH);
5329 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5339 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5345 return igb_mii_ioctl(netdev, ifr, cmd);
5347 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5353 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5355 struct igb_adapter *adapter = hw->back;
5358 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5360 return -E1000_ERR_CONFIG;
5362 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5367 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5369 struct igb_adapter *adapter = hw->back;
5372 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5374 return -E1000_ERR_CONFIG;
5376 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5381 static void igb_vlan_rx_register(struct net_device *netdev,
5382 struct vlan_group *grp)
5384 struct igb_adapter *adapter = netdev_priv(netdev);
5385 struct e1000_hw *hw = &adapter->hw;
5388 igb_irq_disable(adapter);
5389 adapter->vlgrp = grp;
5392 /* enable VLAN tag insert/strip */
5393 ctrl = rd32(E1000_CTRL);
5394 ctrl |= E1000_CTRL_VME;
5395 wr32(E1000_CTRL, ctrl);
5397 /* Disable CFI check */
5398 rctl = rd32(E1000_RCTL);
5399 rctl &= ~E1000_RCTL_CFIEN;
5400 wr32(E1000_RCTL, rctl);
5402 /* disable VLAN tag insert/strip */
5403 ctrl = rd32(E1000_CTRL);
5404 ctrl &= ~E1000_CTRL_VME;
5405 wr32(E1000_CTRL, ctrl);
5408 igb_rlpml_set(adapter);
5410 if (!test_bit(__IGB_DOWN, &adapter->state))
5411 igb_irq_enable(adapter);
5414 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5416 struct igb_adapter *adapter = netdev_priv(netdev);
5417 struct e1000_hw *hw = &adapter->hw;
5418 int pf_id = adapter->vfs_allocated_count;
5420 /* attempt to add filter to vlvf array */
5421 igb_vlvf_set(adapter, vid, true, pf_id);
5423 /* add the filter since PF can receive vlans w/o entry in vlvf */
5424 igb_vfta_set(hw, vid, true);
5427 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5429 struct igb_adapter *adapter = netdev_priv(netdev);
5430 struct e1000_hw *hw = &adapter->hw;
5431 int pf_id = adapter->vfs_allocated_count;
5434 igb_irq_disable(adapter);
5435 vlan_group_set_device(adapter->vlgrp, vid, NULL);
5437 if (!test_bit(__IGB_DOWN, &adapter->state))
5438 igb_irq_enable(adapter);
5440 /* remove vlan from VLVF table array */
5441 err = igb_vlvf_set(adapter, vid, false, pf_id);
5443 /* if vid was not present in VLVF just remove it from table */
5445 igb_vfta_set(hw, vid, false);
5448 static void igb_restore_vlan(struct igb_adapter *adapter)
5450 igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5452 if (adapter->vlgrp) {
5454 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5455 if (!vlan_group_get_device(adapter->vlgrp, vid))
5457 igb_vlan_rx_add_vid(adapter->netdev, vid);
5462 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5464 struct e1000_mac_info *mac = &adapter->hw.mac;
5469 case SPEED_10 + DUPLEX_HALF:
5470 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5472 case SPEED_10 + DUPLEX_FULL:
5473 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5475 case SPEED_100 + DUPLEX_HALF:
5476 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5478 case SPEED_100 + DUPLEX_FULL:
5479 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5481 case SPEED_1000 + DUPLEX_FULL:
5483 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5485 case SPEED_1000 + DUPLEX_HALF: /* not supported */
5487 dev_err(&adapter->pdev->dev,
5488 "Unsupported Speed/Duplex configuration\n");
5494 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5496 struct net_device *netdev = pci_get_drvdata(pdev);
5497 struct igb_adapter *adapter = netdev_priv(netdev);
5498 struct e1000_hw *hw = &adapter->hw;
5499 u32 ctrl, rctl, status;
5500 u32 wufc = adapter->wol;
5505 netif_device_detach(netdev);
5507 if (netif_running(netdev))
5510 igb_clear_interrupt_scheme(adapter);
5513 retval = pci_save_state(pdev);
5518 status = rd32(E1000_STATUS);
5519 if (status & E1000_STATUS_LU)
5520 wufc &= ~E1000_WUFC_LNKC;
5523 igb_setup_rctl(adapter);
5524 igb_set_rx_mode(netdev);
5526 /* turn on all-multi mode if wake on multicast is enabled */
5527 if (wufc & E1000_WUFC_MC) {
5528 rctl = rd32(E1000_RCTL);
5529 rctl |= E1000_RCTL_MPE;
5530 wr32(E1000_RCTL, rctl);
5533 ctrl = rd32(E1000_CTRL);
5534 /* advertise wake from D3Cold */
5535 #define E1000_CTRL_ADVD3WUC 0x00100000
5536 /* phy power management enable */
5537 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5538 ctrl |= E1000_CTRL_ADVD3WUC;
5539 wr32(E1000_CTRL, ctrl);
5541 /* Allow time for pending master requests to run */
5542 igb_disable_pcie_master(&adapter->hw);
5544 wr32(E1000_WUC, E1000_WUC_PME_EN);
5545 wr32(E1000_WUFC, wufc);
5548 wr32(E1000_WUFC, 0);
5551 *enable_wake = wufc || adapter->en_mng_pt;
5553 igb_shutdown_serdes_link_82575(hw);
5555 /* Release control of h/w to f/w. If f/w is AMT enabled, this
5556 * would have already happened in close and is redundant. */
5557 igb_release_hw_control(adapter);
5559 pci_disable_device(pdev);
5565 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5570 retval = __igb_shutdown(pdev, &wake);
5575 pci_prepare_to_sleep(pdev);
5577 pci_wake_from_d3(pdev, false);
5578 pci_set_power_state(pdev, PCI_D3hot);
5584 static int igb_resume(struct pci_dev *pdev)
5586 struct net_device *netdev = pci_get_drvdata(pdev);
5587 struct igb_adapter *adapter = netdev_priv(netdev);
5588 struct e1000_hw *hw = &adapter->hw;
5591 pci_set_power_state(pdev, PCI_D0);
5592 pci_restore_state(pdev);
5594 err = pci_enable_device_mem(pdev);
5597 "igb: Cannot enable PCI device from suspend\n");
5600 pci_set_master(pdev);
5602 pci_enable_wake(pdev, PCI_D3hot, 0);
5603 pci_enable_wake(pdev, PCI_D3cold, 0);
5605 if (igb_init_interrupt_scheme(adapter)) {
5606 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5610 /* e1000_power_up_phy(adapter); */
5614 /* let the f/w know that the h/w is now under the control of the
5616 igb_get_hw_control(adapter);
5618 wr32(E1000_WUS, ~0);
5620 if (netif_running(netdev)) {
5621 err = igb_open(netdev);
5626 netif_device_attach(netdev);
5632 static void igb_shutdown(struct pci_dev *pdev)
5636 __igb_shutdown(pdev, &wake);
5638 if (system_state == SYSTEM_POWER_OFF) {
5639 pci_wake_from_d3(pdev, wake);
5640 pci_set_power_state(pdev, PCI_D3hot);
5644 #ifdef CONFIG_NET_POLL_CONTROLLER
5646 * Polling 'interrupt' - used by things like netconsole to send skbs
5647 * without having to re-enable interrupts. It's not called while
5648 * the interrupt routine is executing.
5650 static void igb_netpoll(struct net_device *netdev)
5652 struct igb_adapter *adapter = netdev_priv(netdev);
5653 struct e1000_hw *hw = &adapter->hw;
5656 if (!adapter->msix_entries) {
5657 struct igb_q_vector *q_vector = adapter->q_vector[0];
5658 igb_irq_disable(adapter);
5659 napi_schedule(&q_vector->napi);
5663 for (i = 0; i < adapter->num_q_vectors; i++) {
5664 struct igb_q_vector *q_vector = adapter->q_vector[i];
5665 wr32(E1000_EIMC, q_vector->eims_value);
5666 napi_schedule(&q_vector->napi);
5669 #endif /* CONFIG_NET_POLL_CONTROLLER */
5672 * igb_io_error_detected - called when PCI error is detected
5673 * @pdev: Pointer to PCI device
5674 * @state: The current pci connection state
5676 * This function is called after a PCI bus error affecting
5677 * this device has been detected.
5679 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5680 pci_channel_state_t state)
5682 struct net_device *netdev = pci_get_drvdata(pdev);
5683 struct igb_adapter *adapter = netdev_priv(netdev);
5685 netif_device_detach(netdev);
5687 if (state == pci_channel_io_perm_failure)
5688 return PCI_ERS_RESULT_DISCONNECT;
5690 if (netif_running(netdev))
5692 pci_disable_device(pdev);
5694 /* Request a slot slot reset. */
5695 return PCI_ERS_RESULT_NEED_RESET;
5699 * igb_io_slot_reset - called after the pci bus has been reset.
5700 * @pdev: Pointer to PCI device
5702 * Restart the card from scratch, as if from a cold-boot. Implementation
5703 * resembles the first-half of the igb_resume routine.
5705 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5707 struct net_device *netdev = pci_get_drvdata(pdev);
5708 struct igb_adapter *adapter = netdev_priv(netdev);
5709 struct e1000_hw *hw = &adapter->hw;
5710 pci_ers_result_t result;
5713 if (pci_enable_device_mem(pdev)) {
5715 "Cannot re-enable PCI device after reset.\n");
5716 result = PCI_ERS_RESULT_DISCONNECT;
5718 pci_set_master(pdev);
5719 pci_restore_state(pdev);
5721 pci_enable_wake(pdev, PCI_D3hot, 0);
5722 pci_enable_wake(pdev, PCI_D3cold, 0);
5725 wr32(E1000_WUS, ~0);
5726 result = PCI_ERS_RESULT_RECOVERED;
5729 err = pci_cleanup_aer_uncorrect_error_status(pdev);
5731 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
5732 "failed 0x%0x\n", err);
5733 /* non-fatal, continue */
5740 * igb_io_resume - called when traffic can start flowing again.
5741 * @pdev: Pointer to PCI device
5743 * This callback is called when the error recovery driver tells us that
5744 * its OK to resume normal operation. Implementation resembles the
5745 * second-half of the igb_resume routine.
5747 static void igb_io_resume(struct pci_dev *pdev)
5749 struct net_device *netdev = pci_get_drvdata(pdev);
5750 struct igb_adapter *adapter = netdev_priv(netdev);
5752 if (netif_running(netdev)) {
5753 if (igb_up(adapter)) {
5754 dev_err(&pdev->dev, "igb_up failed after reset\n");
5759 netif_device_attach(netdev);
5761 /* let the f/w know that the h/w is now under the control of the
5763 igb_get_hw_control(adapter);
5766 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
5769 u32 rar_low, rar_high;
5770 struct e1000_hw *hw = &adapter->hw;
5772 /* HW expects these in little endian so we reverse the byte order
5773 * from network order (big endian) to little endian
5775 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
5776 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
5777 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
5779 /* Indicate to hardware the Address is Valid. */
5780 rar_high |= E1000_RAH_AV;
5782 if (hw->mac.type == e1000_82575)
5783 rar_high |= E1000_RAH_POOL_1 * qsel;
5785 rar_high |= E1000_RAH_POOL_1 << qsel;
5787 wr32(E1000_RAL(index), rar_low);
5789 wr32(E1000_RAH(index), rar_high);
5793 static int igb_set_vf_mac(struct igb_adapter *adapter,
5794 int vf, unsigned char *mac_addr)
5796 struct e1000_hw *hw = &adapter->hw;
5797 /* VF MAC addresses start at end of receive addresses and moves
5798 * torwards the first, as a result a collision should not be possible */
5799 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5801 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
5803 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
5808 static void igb_vmm_control(struct igb_adapter *adapter)
5810 struct e1000_hw *hw = &adapter->hw;
5813 /* replication is not supported for 82575 */
5814 if (hw->mac.type == e1000_82575)
5817 /* enable replication vlan tag stripping */
5818 reg = rd32(E1000_RPLOLR);
5819 reg |= E1000_RPLOLR_STRVLAN;
5820 wr32(E1000_RPLOLR, reg);
5822 /* notify HW that the MAC is adding vlan tags */
5823 reg = rd32(E1000_DTXCTL);
5824 reg |= E1000_DTXCTL_VLAN_ADDED;
5825 wr32(E1000_DTXCTL, reg);
5827 if (adapter->vfs_allocated_count) {
5828 igb_vmdq_set_loopback_pf(hw, true);
5829 igb_vmdq_set_replication_pf(hw, true);
5831 igb_vmdq_set_loopback_pf(hw, false);
5832 igb_vmdq_set_replication_pf(hw, false);