Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[safe/jmp/linux-2.6] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "2.1.0-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56                                 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60         [board_82575] = &e1000_82575_info,
61 };
62
63 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
64         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
79         /* required last entry */
80         {0, }
81 };
82
83 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
84
85 void igb_reset(struct igb_adapter *);
86 static int igb_setup_all_tx_resources(struct igb_adapter *);
87 static int igb_setup_all_rx_resources(struct igb_adapter *);
88 static void igb_free_all_tx_resources(struct igb_adapter *);
89 static void igb_free_all_rx_resources(struct igb_adapter *);
90 static void igb_setup_mrqc(struct igb_adapter *);
91 void igb_update_stats(struct igb_adapter *);
92 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
93 static void __devexit igb_remove(struct pci_dev *pdev);
94 static int igb_sw_init(struct igb_adapter *);
95 static int igb_open(struct net_device *);
96 static int igb_close(struct net_device *);
97 static void igb_configure_tx(struct igb_adapter *);
98 static void igb_configure_rx(struct igb_adapter *);
99 static void igb_clean_all_tx_rings(struct igb_adapter *);
100 static void igb_clean_all_rx_rings(struct igb_adapter *);
101 static void igb_clean_tx_ring(struct igb_ring *);
102 static void igb_clean_rx_ring(struct igb_ring *);
103 static void igb_set_rx_mode(struct net_device *);
104 static void igb_update_phy_info(unsigned long);
105 static void igb_watchdog(unsigned long);
106 static void igb_watchdog_task(struct work_struct *);
107 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
108 static struct net_device_stats *igb_get_stats(struct net_device *);
109 static int igb_change_mtu(struct net_device *, int);
110 static int igb_set_mac(struct net_device *, void *);
111 static void igb_set_uta(struct igb_adapter *adapter);
112 static irqreturn_t igb_intr(int irq, void *);
113 static irqreturn_t igb_intr_msi(int irq, void *);
114 static irqreturn_t igb_msix_other(int irq, void *);
115 static irqreturn_t igb_msix_ring(int irq, void *);
116 #ifdef CONFIG_IGB_DCA
117 static void igb_update_dca(struct igb_q_vector *);
118 static void igb_setup_dca(struct igb_adapter *);
119 #endif /* CONFIG_IGB_DCA */
120 static bool igb_clean_tx_irq(struct igb_q_vector *);
121 static int igb_poll(struct napi_struct *, int);
122 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
123 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
124 static void igb_tx_timeout(struct net_device *);
125 static void igb_reset_task(struct work_struct *);
126 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
127 static void igb_vlan_rx_add_vid(struct net_device *, u16);
128 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
129 static void igb_restore_vlan(struct igb_adapter *);
130 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
131 static void igb_ping_all_vfs(struct igb_adapter *);
132 static void igb_msg_task(struct igb_adapter *);
133 static void igb_vmm_control(struct igb_adapter *);
134 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
135 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
136
137 #ifdef CONFIG_PM
138 static int igb_suspend(struct pci_dev *, pm_message_t);
139 static int igb_resume(struct pci_dev *);
140 #endif
141 static void igb_shutdown(struct pci_dev *);
142 #ifdef CONFIG_IGB_DCA
143 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
144 static struct notifier_block dca_notifier = {
145         .notifier_call  = igb_notify_dca,
146         .next           = NULL,
147         .priority       = 0
148 };
149 #endif
150 #ifdef CONFIG_NET_POLL_CONTROLLER
151 /* for netdump / net console */
152 static void igb_netpoll(struct net_device *);
153 #endif
154 #ifdef CONFIG_PCI_IOV
155 static unsigned int max_vfs = 0;
156 module_param(max_vfs, uint, 0);
157 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
158                  "per physical function");
159 #endif /* CONFIG_PCI_IOV */
160
161 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
162                      pci_channel_state_t);
163 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
164 static void igb_io_resume(struct pci_dev *);
165
166 static struct pci_error_handlers igb_err_handler = {
167         .error_detected = igb_io_error_detected,
168         .slot_reset = igb_io_slot_reset,
169         .resume = igb_io_resume,
170 };
171
172
173 static struct pci_driver igb_driver = {
174         .name     = igb_driver_name,
175         .id_table = igb_pci_tbl,
176         .probe    = igb_probe,
177         .remove   = __devexit_p(igb_remove),
178 #ifdef CONFIG_PM
179         /* Power Managment Hooks */
180         .suspend  = igb_suspend,
181         .resume   = igb_resume,
182 #endif
183         .shutdown = igb_shutdown,
184         .err_handler = &igb_err_handler
185 };
186
187 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
188 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
189 MODULE_LICENSE("GPL");
190 MODULE_VERSION(DRV_VERSION);
191
192 /**
193  * igb_read_clock - read raw cycle counter (to be used by time counter)
194  */
195 static cycle_t igb_read_clock(const struct cyclecounter *tc)
196 {
197         struct igb_adapter *adapter =
198                 container_of(tc, struct igb_adapter, cycles);
199         struct e1000_hw *hw = &adapter->hw;
200         u64 stamp = 0;
201         int shift = 0;
202
203         /*
204          * The timestamp latches on lowest register read. For the 82580
205          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
206          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
207          */
208         if (hw->mac.type == e1000_82580) {
209                 stamp = rd32(E1000_SYSTIMR) >> 8;
210                 shift = IGB_82580_TSYNC_SHIFT;
211         }
212
213         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
214         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
215         return stamp;
216 }
217
218 #ifdef DEBUG
219 /**
220  * igb_get_hw_dev_name - return device name string
221  * used by hardware layer to print debugging information
222  **/
223 char *igb_get_hw_dev_name(struct e1000_hw *hw)
224 {
225         struct igb_adapter *adapter = hw->back;
226         return adapter->netdev->name;
227 }
228
229 /**
230  * igb_get_time_str - format current NIC and system time as string
231  */
232 static char *igb_get_time_str(struct igb_adapter *adapter,
233                               char buffer[160])
234 {
235         cycle_t hw = adapter->cycles.read(&adapter->cycles);
236         struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
237         struct timespec sys;
238         struct timespec delta;
239         getnstimeofday(&sys);
240
241         delta = timespec_sub(nic, sys);
242
243         sprintf(buffer,
244                 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
245                 hw,
246                 (long)nic.tv_sec, nic.tv_nsec,
247                 (long)sys.tv_sec, sys.tv_nsec,
248                 (long)delta.tv_sec, delta.tv_nsec);
249
250         return buffer;
251 }
252 #endif
253
254 /**
255  * igb_init_module - Driver Registration Routine
256  *
257  * igb_init_module is the first routine called when the driver is
258  * loaded. All it does is register with the PCI subsystem.
259  **/
260 static int __init igb_init_module(void)
261 {
262         int ret;
263         printk(KERN_INFO "%s - version %s\n",
264                igb_driver_string, igb_driver_version);
265
266         printk(KERN_INFO "%s\n", igb_copyright);
267
268 #ifdef CONFIG_IGB_DCA
269         dca_register_notify(&dca_notifier);
270 #endif
271         ret = pci_register_driver(&igb_driver);
272         return ret;
273 }
274
275 module_init(igb_init_module);
276
277 /**
278  * igb_exit_module - Driver Exit Cleanup Routine
279  *
280  * igb_exit_module is called just before the driver is removed
281  * from memory.
282  **/
283 static void __exit igb_exit_module(void)
284 {
285 #ifdef CONFIG_IGB_DCA
286         dca_unregister_notify(&dca_notifier);
287 #endif
288         pci_unregister_driver(&igb_driver);
289 }
290
291 module_exit(igb_exit_module);
292
293 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
294 /**
295  * igb_cache_ring_register - Descriptor ring to register mapping
296  * @adapter: board private structure to initialize
297  *
298  * Once we know the feature-set enabled for the device, we'll cache
299  * the register offset the descriptor ring is assigned to.
300  **/
301 static void igb_cache_ring_register(struct igb_adapter *adapter)
302 {
303         int i = 0, j = 0;
304         u32 rbase_offset = adapter->vfs_allocated_count;
305
306         switch (adapter->hw.mac.type) {
307         case e1000_82576:
308                 /* The queues are allocated for virtualization such that VF 0
309                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
310                  * In order to avoid collision we start at the first free queue
311                  * and continue consuming queues in the same sequence
312                  */
313                 if (adapter->vfs_allocated_count) {
314                         for (; i < adapter->rss_queues; i++)
315                                 adapter->rx_ring[i].reg_idx = rbase_offset +
316                                                               Q_IDX_82576(i);
317                         for (; j < adapter->rss_queues; j++)
318                                 adapter->tx_ring[j].reg_idx = rbase_offset +
319                                                               Q_IDX_82576(j);
320                 }
321         case e1000_82575:
322         case e1000_82580:
323         default:
324                 for (; i < adapter->num_rx_queues; i++)
325                         adapter->rx_ring[i].reg_idx = rbase_offset + i;
326                 for (; j < adapter->num_tx_queues; j++)
327                         adapter->tx_ring[j].reg_idx = rbase_offset + j;
328                 break;
329         }
330 }
331
332 static void igb_free_queues(struct igb_adapter *adapter)
333 {
334         kfree(adapter->tx_ring);
335         kfree(adapter->rx_ring);
336
337         adapter->tx_ring = NULL;
338         adapter->rx_ring = NULL;
339
340         adapter->num_rx_queues = 0;
341         adapter->num_tx_queues = 0;
342 }
343
344 /**
345  * igb_alloc_queues - Allocate memory for all rings
346  * @adapter: board private structure to initialize
347  *
348  * We allocate one ring per queue at run-time since we don't know the
349  * number of queues at compile-time.
350  **/
351 static int igb_alloc_queues(struct igb_adapter *adapter)
352 {
353         int i;
354
355         adapter->tx_ring = kcalloc(adapter->num_tx_queues,
356                                    sizeof(struct igb_ring), GFP_KERNEL);
357         if (!adapter->tx_ring)
358                 goto err;
359
360         adapter->rx_ring = kcalloc(adapter->num_rx_queues,
361                                    sizeof(struct igb_ring), GFP_KERNEL);
362         if (!adapter->rx_ring)
363                 goto err;
364
365         for (i = 0; i < adapter->num_tx_queues; i++) {
366                 struct igb_ring *ring = &(adapter->tx_ring[i]);
367                 ring->count = adapter->tx_ring_count;
368                 ring->queue_index = i;
369                 ring->pdev = adapter->pdev;
370                 ring->netdev = adapter->netdev;
371                 /* For 82575, context index must be unique per ring. */
372                 if (adapter->hw.mac.type == e1000_82575)
373                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
374         }
375
376         for (i = 0; i < adapter->num_rx_queues; i++) {
377                 struct igb_ring *ring = &(adapter->rx_ring[i]);
378                 ring->count = adapter->rx_ring_count;
379                 ring->queue_index = i;
380                 ring->pdev = adapter->pdev;
381                 ring->netdev = adapter->netdev;
382                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
383                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
384                 /* set flag indicating ring supports SCTP checksum offload */
385                 if (adapter->hw.mac.type >= e1000_82576)
386                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
387         }
388
389         igb_cache_ring_register(adapter);
390
391         return 0;
392
393 err:
394         igb_free_queues(adapter);
395
396         return -ENOMEM;
397 }
398
399 #define IGB_N0_QUEUE -1
400 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
401 {
402         u32 msixbm = 0;
403         struct igb_adapter *adapter = q_vector->adapter;
404         struct e1000_hw *hw = &adapter->hw;
405         u32 ivar, index;
406         int rx_queue = IGB_N0_QUEUE;
407         int tx_queue = IGB_N0_QUEUE;
408
409         if (q_vector->rx_ring)
410                 rx_queue = q_vector->rx_ring->reg_idx;
411         if (q_vector->tx_ring)
412                 tx_queue = q_vector->tx_ring->reg_idx;
413
414         switch (hw->mac.type) {
415         case e1000_82575:
416                 /* The 82575 assigns vectors using a bitmask, which matches the
417                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
418                    or more queues to a vector, we write the appropriate bits
419                    into the MSIXBM register for that vector. */
420                 if (rx_queue > IGB_N0_QUEUE)
421                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
422                 if (tx_queue > IGB_N0_QUEUE)
423                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
424                 if (!adapter->msix_entries && msix_vector == 0)
425                         msixbm |= E1000_EIMS_OTHER;
426                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
427                 q_vector->eims_value = msixbm;
428                 break;
429         case e1000_82576:
430                 /* 82576 uses a table-based method for assigning vectors.
431                    Each queue has a single entry in the table to which we write
432                    a vector number along with a "valid" bit.  Sadly, the layout
433                    of the table is somewhat counterintuitive. */
434                 if (rx_queue > IGB_N0_QUEUE) {
435                         index = (rx_queue & 0x7);
436                         ivar = array_rd32(E1000_IVAR0, index);
437                         if (rx_queue < 8) {
438                                 /* vector goes into low byte of register */
439                                 ivar = ivar & 0xFFFFFF00;
440                                 ivar |= msix_vector | E1000_IVAR_VALID;
441                         } else {
442                                 /* vector goes into third byte of register */
443                                 ivar = ivar & 0xFF00FFFF;
444                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
445                         }
446                         array_wr32(E1000_IVAR0, index, ivar);
447                 }
448                 if (tx_queue > IGB_N0_QUEUE) {
449                         index = (tx_queue & 0x7);
450                         ivar = array_rd32(E1000_IVAR0, index);
451                         if (tx_queue < 8) {
452                                 /* vector goes into second byte of register */
453                                 ivar = ivar & 0xFFFF00FF;
454                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
455                         } else {
456                                 /* vector goes into high byte of register */
457                                 ivar = ivar & 0x00FFFFFF;
458                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
459                         }
460                         array_wr32(E1000_IVAR0, index, ivar);
461                 }
462                 q_vector->eims_value = 1 << msix_vector;
463                 break;
464         case e1000_82580:
465                 /* 82580 uses the same table-based approach as 82576 but has fewer
466                    entries as a result we carry over for queues greater than 4. */
467                 if (rx_queue > IGB_N0_QUEUE) {
468                         index = (rx_queue >> 1);
469                         ivar = array_rd32(E1000_IVAR0, index);
470                         if (rx_queue & 0x1) {
471                                 /* vector goes into third byte of register */
472                                 ivar = ivar & 0xFF00FFFF;
473                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
474                         } else {
475                                 /* vector goes into low byte of register */
476                                 ivar = ivar & 0xFFFFFF00;
477                                 ivar |= msix_vector | E1000_IVAR_VALID;
478                         }
479                         array_wr32(E1000_IVAR0, index, ivar);
480                 }
481                 if (tx_queue > IGB_N0_QUEUE) {
482                         index = (tx_queue >> 1);
483                         ivar = array_rd32(E1000_IVAR0, index);
484                         if (tx_queue & 0x1) {
485                                 /* vector goes into high byte of register */
486                                 ivar = ivar & 0x00FFFFFF;
487                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
488                         } else {
489                                 /* vector goes into second byte of register */
490                                 ivar = ivar & 0xFFFF00FF;
491                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
492                         }
493                         array_wr32(E1000_IVAR0, index, ivar);
494                 }
495                 q_vector->eims_value = 1 << msix_vector;
496                 break;
497         default:
498                 BUG();
499                 break;
500         }
501 }
502
503 /**
504  * igb_configure_msix - Configure MSI-X hardware
505  *
506  * igb_configure_msix sets up the hardware to properly
507  * generate MSI-X interrupts.
508  **/
509 static void igb_configure_msix(struct igb_adapter *adapter)
510 {
511         u32 tmp;
512         int i, vector = 0;
513         struct e1000_hw *hw = &adapter->hw;
514
515         adapter->eims_enable_mask = 0;
516
517         /* set vector for other causes, i.e. link changes */
518         switch (hw->mac.type) {
519         case e1000_82575:
520                 tmp = rd32(E1000_CTRL_EXT);
521                 /* enable MSI-X PBA support*/
522                 tmp |= E1000_CTRL_EXT_PBA_CLR;
523
524                 /* Auto-Mask interrupts upon ICR read. */
525                 tmp |= E1000_CTRL_EXT_EIAME;
526                 tmp |= E1000_CTRL_EXT_IRCA;
527
528                 wr32(E1000_CTRL_EXT, tmp);
529
530                 /* enable msix_other interrupt */
531                 array_wr32(E1000_MSIXBM(0), vector++,
532                                       E1000_EIMS_OTHER);
533                 adapter->eims_other = E1000_EIMS_OTHER;
534
535                 break;
536
537         case e1000_82576:
538         case e1000_82580:
539                 /* Turn on MSI-X capability first, or our settings
540                  * won't stick.  And it will take days to debug. */
541                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
542                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
543                                 E1000_GPIE_NSICR);
544
545                 /* enable msix_other interrupt */
546                 adapter->eims_other = 1 << vector;
547                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
548
549                 wr32(E1000_IVAR_MISC, tmp);
550                 break;
551         default:
552                 /* do nothing, since nothing else supports MSI-X */
553                 break;
554         } /* switch (hw->mac.type) */
555
556         adapter->eims_enable_mask |= adapter->eims_other;
557
558         for (i = 0; i < adapter->num_q_vectors; i++) {
559                 struct igb_q_vector *q_vector = adapter->q_vector[i];
560                 igb_assign_vector(q_vector, vector++);
561                 adapter->eims_enable_mask |= q_vector->eims_value;
562         }
563
564         wrfl();
565 }
566
567 /**
568  * igb_request_msix - Initialize MSI-X interrupts
569  *
570  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
571  * kernel.
572  **/
573 static int igb_request_msix(struct igb_adapter *adapter)
574 {
575         struct net_device *netdev = adapter->netdev;
576         struct e1000_hw *hw = &adapter->hw;
577         int i, err = 0, vector = 0;
578
579         err = request_irq(adapter->msix_entries[vector].vector,
580                           igb_msix_other, 0, netdev->name, adapter);
581         if (err)
582                 goto out;
583         vector++;
584
585         for (i = 0; i < adapter->num_q_vectors; i++) {
586                 struct igb_q_vector *q_vector = adapter->q_vector[i];
587
588                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
589
590                 if (q_vector->rx_ring && q_vector->tx_ring)
591                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
592                                 q_vector->rx_ring->queue_index);
593                 else if (q_vector->tx_ring)
594                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
595                                 q_vector->tx_ring->queue_index);
596                 else if (q_vector->rx_ring)
597                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
598                                 q_vector->rx_ring->queue_index);
599                 else
600                         sprintf(q_vector->name, "%s-unused", netdev->name);
601
602                 err = request_irq(adapter->msix_entries[vector].vector,
603                                   igb_msix_ring, 0, q_vector->name,
604                                   q_vector);
605                 if (err)
606                         goto out;
607                 vector++;
608         }
609
610         igb_configure_msix(adapter);
611         return 0;
612 out:
613         return err;
614 }
615
616 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
617 {
618         if (adapter->msix_entries) {
619                 pci_disable_msix(adapter->pdev);
620                 kfree(adapter->msix_entries);
621                 adapter->msix_entries = NULL;
622         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
623                 pci_disable_msi(adapter->pdev);
624         }
625 }
626
627 /**
628  * igb_free_q_vectors - Free memory allocated for interrupt vectors
629  * @adapter: board private structure to initialize
630  *
631  * This function frees the memory allocated to the q_vectors.  In addition if
632  * NAPI is enabled it will delete any references to the NAPI struct prior
633  * to freeing the q_vector.
634  **/
635 static void igb_free_q_vectors(struct igb_adapter *adapter)
636 {
637         int v_idx;
638
639         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
640                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
641                 adapter->q_vector[v_idx] = NULL;
642                 netif_napi_del(&q_vector->napi);
643                 kfree(q_vector);
644         }
645         adapter->num_q_vectors = 0;
646 }
647
648 /**
649  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
650  *
651  * This function resets the device so that it has 0 rx queues, tx queues, and
652  * MSI-X interrupts allocated.
653  */
654 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
655 {
656         igb_free_queues(adapter);
657         igb_free_q_vectors(adapter);
658         igb_reset_interrupt_capability(adapter);
659 }
660
661 /**
662  * igb_set_interrupt_capability - set MSI or MSI-X if supported
663  *
664  * Attempt to configure interrupts using the best available
665  * capabilities of the hardware and kernel.
666  **/
667 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
668 {
669         int err;
670         int numvecs, i;
671
672         /* Number of supported queues. */
673         adapter->num_rx_queues = adapter->rss_queues;
674         adapter->num_tx_queues = adapter->rss_queues;
675
676         /* start with one vector for every rx queue */
677         numvecs = adapter->num_rx_queues;
678
679         /* if tx handler is seperate add 1 for every tx queue */
680         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
681                 numvecs += adapter->num_tx_queues;
682
683         /* store the number of vectors reserved for queues */
684         adapter->num_q_vectors = numvecs;
685
686         /* add 1 vector for link status interrupts */
687         numvecs++;
688         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
689                                         GFP_KERNEL);
690         if (!adapter->msix_entries)
691                 goto msi_only;
692
693         for (i = 0; i < numvecs; i++)
694                 adapter->msix_entries[i].entry = i;
695
696         err = pci_enable_msix(adapter->pdev,
697                               adapter->msix_entries,
698                               numvecs);
699         if (err == 0)
700                 goto out;
701
702         igb_reset_interrupt_capability(adapter);
703
704         /* If we can't do MSI-X, try MSI */
705 msi_only:
706 #ifdef CONFIG_PCI_IOV
707         /* disable SR-IOV for non MSI-X configurations */
708         if (adapter->vf_data) {
709                 struct e1000_hw *hw = &adapter->hw;
710                 /* disable iov and allow time for transactions to clear */
711                 pci_disable_sriov(adapter->pdev);
712                 msleep(500);
713
714                 kfree(adapter->vf_data);
715                 adapter->vf_data = NULL;
716                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
717                 msleep(100);
718                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
719         }
720 #endif
721         adapter->vfs_allocated_count = 0;
722         adapter->rss_queues = 1;
723         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
724         adapter->num_rx_queues = 1;
725         adapter->num_tx_queues = 1;
726         adapter->num_q_vectors = 1;
727         if (!pci_enable_msi(adapter->pdev))
728                 adapter->flags |= IGB_FLAG_HAS_MSI;
729 out:
730         /* Notify the stack of the (possibly) reduced Tx Queue count. */
731         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
732         return;
733 }
734
735 /**
736  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
737  * @adapter: board private structure to initialize
738  *
739  * We allocate one q_vector per queue interrupt.  If allocation fails we
740  * return -ENOMEM.
741  **/
742 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
743 {
744         struct igb_q_vector *q_vector;
745         struct e1000_hw *hw = &adapter->hw;
746         int v_idx;
747
748         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
749                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
750                 if (!q_vector)
751                         goto err_out;
752                 q_vector->adapter = adapter;
753                 q_vector->itr_shift = (hw->mac.type == e1000_82575) ? 16 : 0;
754                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
755                 q_vector->itr_val = IGB_START_ITR;
756                 q_vector->set_itr = 1;
757                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
758                 adapter->q_vector[v_idx] = q_vector;
759         }
760         return 0;
761
762 err_out:
763         while (v_idx) {
764                 v_idx--;
765                 q_vector = adapter->q_vector[v_idx];
766                 netif_napi_del(&q_vector->napi);
767                 kfree(q_vector);
768                 adapter->q_vector[v_idx] = NULL;
769         }
770         return -ENOMEM;
771 }
772
773 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
774                                       int ring_idx, int v_idx)
775 {
776         struct igb_q_vector *q_vector;
777
778         q_vector = adapter->q_vector[v_idx];
779         q_vector->rx_ring = &adapter->rx_ring[ring_idx];
780         q_vector->rx_ring->q_vector = q_vector;
781         q_vector->itr_val = adapter->rx_itr_setting;
782         if (q_vector->itr_val && q_vector->itr_val <= 3)
783                 q_vector->itr_val = IGB_START_ITR;
784 }
785
786 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
787                                       int ring_idx, int v_idx)
788 {
789         struct igb_q_vector *q_vector;
790
791         q_vector = adapter->q_vector[v_idx];
792         q_vector->tx_ring = &adapter->tx_ring[ring_idx];
793         q_vector->tx_ring->q_vector = q_vector;
794         q_vector->itr_val = adapter->tx_itr_setting;
795         if (q_vector->itr_val && q_vector->itr_val <= 3)
796                 q_vector->itr_val = IGB_START_ITR;
797 }
798
799 /**
800  * igb_map_ring_to_vector - maps allocated queues to vectors
801  *
802  * This function maps the recently allocated queues to vectors.
803  **/
804 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
805 {
806         int i;
807         int v_idx = 0;
808
809         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
810             (adapter->num_q_vectors < adapter->num_tx_queues))
811                 return -ENOMEM;
812
813         if (adapter->num_q_vectors >=
814             (adapter->num_rx_queues + adapter->num_tx_queues)) {
815                 for (i = 0; i < adapter->num_rx_queues; i++)
816                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
817                 for (i = 0; i < adapter->num_tx_queues; i++)
818                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
819         } else {
820                 for (i = 0; i < adapter->num_rx_queues; i++) {
821                         if (i < adapter->num_tx_queues)
822                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
823                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
824                 }
825                 for (; i < adapter->num_tx_queues; i++)
826                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
827         }
828         return 0;
829 }
830
831 /**
832  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
833  *
834  * This function initializes the interrupts and allocates all of the queues.
835  **/
836 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
837 {
838         struct pci_dev *pdev = adapter->pdev;
839         int err;
840
841         igb_set_interrupt_capability(adapter);
842
843         err = igb_alloc_q_vectors(adapter);
844         if (err) {
845                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
846                 goto err_alloc_q_vectors;
847         }
848
849         err = igb_alloc_queues(adapter);
850         if (err) {
851                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
852                 goto err_alloc_queues;
853         }
854
855         err = igb_map_ring_to_vector(adapter);
856         if (err) {
857                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
858                 goto err_map_queues;
859         }
860
861
862         return 0;
863 err_map_queues:
864         igb_free_queues(adapter);
865 err_alloc_queues:
866         igb_free_q_vectors(adapter);
867 err_alloc_q_vectors:
868         igb_reset_interrupt_capability(adapter);
869         return err;
870 }
871
872 /**
873  * igb_request_irq - initialize interrupts
874  *
875  * Attempts to configure interrupts using the best available
876  * capabilities of the hardware and kernel.
877  **/
878 static int igb_request_irq(struct igb_adapter *adapter)
879 {
880         struct net_device *netdev = adapter->netdev;
881         struct pci_dev *pdev = adapter->pdev;
882         int err = 0;
883
884         if (adapter->msix_entries) {
885                 err = igb_request_msix(adapter);
886                 if (!err)
887                         goto request_done;
888                 /* fall back to MSI */
889                 igb_clear_interrupt_scheme(adapter);
890                 if (!pci_enable_msi(adapter->pdev))
891                         adapter->flags |= IGB_FLAG_HAS_MSI;
892                 igb_free_all_tx_resources(adapter);
893                 igb_free_all_rx_resources(adapter);
894                 adapter->num_tx_queues = 1;
895                 adapter->num_rx_queues = 1;
896                 adapter->num_q_vectors = 1;
897                 err = igb_alloc_q_vectors(adapter);
898                 if (err) {
899                         dev_err(&pdev->dev,
900                                 "Unable to allocate memory for vectors\n");
901                         goto request_done;
902                 }
903                 err = igb_alloc_queues(adapter);
904                 if (err) {
905                         dev_err(&pdev->dev,
906                                 "Unable to allocate memory for queues\n");
907                         igb_free_q_vectors(adapter);
908                         goto request_done;
909                 }
910                 igb_setup_all_tx_resources(adapter);
911                 igb_setup_all_rx_resources(adapter);
912         } else {
913                 igb_assign_vector(adapter->q_vector[0], 0);
914         }
915
916         if (adapter->flags & IGB_FLAG_HAS_MSI) {
917                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
918                                   netdev->name, adapter);
919                 if (!err)
920                         goto request_done;
921
922                 /* fall back to legacy interrupts */
923                 igb_reset_interrupt_capability(adapter);
924                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
925         }
926
927         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
928                           netdev->name, adapter);
929
930         if (err)
931                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
932                         err);
933
934 request_done:
935         return err;
936 }
937
938 static void igb_free_irq(struct igb_adapter *adapter)
939 {
940         if (adapter->msix_entries) {
941                 int vector = 0, i;
942
943                 free_irq(adapter->msix_entries[vector++].vector, adapter);
944
945                 for (i = 0; i < adapter->num_q_vectors; i++) {
946                         struct igb_q_vector *q_vector = adapter->q_vector[i];
947                         free_irq(adapter->msix_entries[vector++].vector,
948                                  q_vector);
949                 }
950         } else {
951                 free_irq(adapter->pdev->irq, adapter);
952         }
953 }
954
955 /**
956  * igb_irq_disable - Mask off interrupt generation on the NIC
957  * @adapter: board private structure
958  **/
959 static void igb_irq_disable(struct igb_adapter *adapter)
960 {
961         struct e1000_hw *hw = &adapter->hw;
962
963         /*
964          * we need to be careful when disabling interrupts.  The VFs are also
965          * mapped into these registers and so clearing the bits can cause
966          * issues on the VF drivers so we only need to clear what we set
967          */
968         if (adapter->msix_entries) {
969                 u32 regval = rd32(E1000_EIAM);
970                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
971                 wr32(E1000_EIMC, adapter->eims_enable_mask);
972                 regval = rd32(E1000_EIAC);
973                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
974         }
975
976         wr32(E1000_IAM, 0);
977         wr32(E1000_IMC, ~0);
978         wrfl();
979         synchronize_irq(adapter->pdev->irq);
980 }
981
982 /**
983  * igb_irq_enable - Enable default interrupt generation settings
984  * @adapter: board private structure
985  **/
986 static void igb_irq_enable(struct igb_adapter *adapter)
987 {
988         struct e1000_hw *hw = &adapter->hw;
989
990         if (adapter->msix_entries) {
991                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
992                 u32 regval = rd32(E1000_EIAC);
993                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
994                 regval = rd32(E1000_EIAM);
995                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
996                 wr32(E1000_EIMS, adapter->eims_enable_mask);
997                 if (adapter->vfs_allocated_count) {
998                         wr32(E1000_MBVFIMR, 0xFF);
999                         ims |= E1000_IMS_VMMB;
1000                 }
1001                 if (adapter->hw.mac.type == e1000_82580)
1002                         ims |= E1000_IMS_DRSTA;
1003
1004                 wr32(E1000_IMS, ims);
1005         } else {
1006                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1007                                 E1000_IMS_DRSTA);
1008                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1009                                 E1000_IMS_DRSTA);
1010         }
1011 }
1012
1013 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1014 {
1015         struct e1000_hw *hw = &adapter->hw;
1016         u16 vid = adapter->hw.mng_cookie.vlan_id;
1017         u16 old_vid = adapter->mng_vlan_id;
1018
1019         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1020                 /* add VID to filter table */
1021                 igb_vfta_set(hw, vid, true);
1022                 adapter->mng_vlan_id = vid;
1023         } else {
1024                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1025         }
1026
1027         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1028             (vid != old_vid) &&
1029             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1030                 /* remove VID from filter table */
1031                 igb_vfta_set(hw, old_vid, false);
1032         }
1033 }
1034
1035 /**
1036  * igb_release_hw_control - release control of the h/w to f/w
1037  * @adapter: address of board private structure
1038  *
1039  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1040  * For ASF and Pass Through versions of f/w this means that the
1041  * driver is no longer loaded.
1042  *
1043  **/
1044 static void igb_release_hw_control(struct igb_adapter *adapter)
1045 {
1046         struct e1000_hw *hw = &adapter->hw;
1047         u32 ctrl_ext;
1048
1049         /* Let firmware take over control of h/w */
1050         ctrl_ext = rd32(E1000_CTRL_EXT);
1051         wr32(E1000_CTRL_EXT,
1052                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1053 }
1054
1055 /**
1056  * igb_get_hw_control - get control of the h/w from f/w
1057  * @adapter: address of board private structure
1058  *
1059  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1060  * For ASF and Pass Through versions of f/w this means that
1061  * the driver is loaded.
1062  *
1063  **/
1064 static void igb_get_hw_control(struct igb_adapter *adapter)
1065 {
1066         struct e1000_hw *hw = &adapter->hw;
1067         u32 ctrl_ext;
1068
1069         /* Let firmware know the driver has taken over */
1070         ctrl_ext = rd32(E1000_CTRL_EXT);
1071         wr32(E1000_CTRL_EXT,
1072                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1073 }
1074
1075 /**
1076  * igb_configure - configure the hardware for RX and TX
1077  * @adapter: private board structure
1078  **/
1079 static void igb_configure(struct igb_adapter *adapter)
1080 {
1081         struct net_device *netdev = adapter->netdev;
1082         int i;
1083
1084         igb_get_hw_control(adapter);
1085         igb_set_rx_mode(netdev);
1086
1087         igb_restore_vlan(adapter);
1088
1089         igb_setup_tctl(adapter);
1090         igb_setup_mrqc(adapter);
1091         igb_setup_rctl(adapter);
1092
1093         igb_configure_tx(adapter);
1094         igb_configure_rx(adapter);
1095
1096         igb_rx_fifo_flush_82575(&adapter->hw);
1097
1098         /* call igb_desc_unused which always leaves
1099          * at least 1 descriptor unused to make sure
1100          * next_to_use != next_to_clean */
1101         for (i = 0; i < adapter->num_rx_queues; i++) {
1102                 struct igb_ring *ring = &adapter->rx_ring[i];
1103                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1104         }
1105
1106
1107         adapter->tx_queue_len = netdev->tx_queue_len;
1108 }
1109
1110
1111 /**
1112  * igb_up - Open the interface and prepare it to handle traffic
1113  * @adapter: board private structure
1114  **/
1115 int igb_up(struct igb_adapter *adapter)
1116 {
1117         struct e1000_hw *hw = &adapter->hw;
1118         int i;
1119
1120         /* hardware has been reset, we need to reload some things */
1121         igb_configure(adapter);
1122
1123         clear_bit(__IGB_DOWN, &adapter->state);
1124
1125         for (i = 0; i < adapter->num_q_vectors; i++) {
1126                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1127                 napi_enable(&q_vector->napi);
1128         }
1129         if (adapter->msix_entries)
1130                 igb_configure_msix(adapter);
1131         else
1132                 igb_assign_vector(adapter->q_vector[0], 0);
1133
1134         /* Clear any pending interrupts. */
1135         rd32(E1000_ICR);
1136         igb_irq_enable(adapter);
1137
1138         /* notify VFs that reset has been completed */
1139         if (adapter->vfs_allocated_count) {
1140                 u32 reg_data = rd32(E1000_CTRL_EXT);
1141                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1142                 wr32(E1000_CTRL_EXT, reg_data);
1143         }
1144
1145         netif_tx_start_all_queues(adapter->netdev);
1146
1147         /* start the watchdog. */
1148         hw->mac.get_link_status = 1;
1149         schedule_work(&adapter->watchdog_task);
1150
1151         return 0;
1152 }
1153
1154 void igb_down(struct igb_adapter *adapter)
1155 {
1156         struct net_device *netdev = adapter->netdev;
1157         struct e1000_hw *hw = &adapter->hw;
1158         u32 tctl, rctl;
1159         int i;
1160
1161         /* signal that we're down so the interrupt handler does not
1162          * reschedule our watchdog timer */
1163         set_bit(__IGB_DOWN, &adapter->state);
1164
1165         /* disable receives in the hardware */
1166         rctl = rd32(E1000_RCTL);
1167         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1168         /* flush and sleep below */
1169
1170         netif_tx_stop_all_queues(netdev);
1171
1172         /* disable transmits in the hardware */
1173         tctl = rd32(E1000_TCTL);
1174         tctl &= ~E1000_TCTL_EN;
1175         wr32(E1000_TCTL, tctl);
1176         /* flush both disables and wait for them to finish */
1177         wrfl();
1178         msleep(10);
1179
1180         for (i = 0; i < adapter->num_q_vectors; i++) {
1181                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1182                 napi_disable(&q_vector->napi);
1183         }
1184
1185         igb_irq_disable(adapter);
1186
1187         del_timer_sync(&adapter->watchdog_timer);
1188         del_timer_sync(&adapter->phy_info_timer);
1189
1190         netdev->tx_queue_len = adapter->tx_queue_len;
1191         netif_carrier_off(netdev);
1192
1193         /* record the stats before reset*/
1194         igb_update_stats(adapter);
1195
1196         adapter->link_speed = 0;
1197         adapter->link_duplex = 0;
1198
1199         if (!pci_channel_offline(adapter->pdev))
1200                 igb_reset(adapter);
1201         igb_clean_all_tx_rings(adapter);
1202         igb_clean_all_rx_rings(adapter);
1203 #ifdef CONFIG_IGB_DCA
1204
1205         /* since we reset the hardware DCA settings were cleared */
1206         igb_setup_dca(adapter);
1207 #endif
1208 }
1209
1210 void igb_reinit_locked(struct igb_adapter *adapter)
1211 {
1212         WARN_ON(in_interrupt());
1213         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1214                 msleep(1);
1215         igb_down(adapter);
1216         igb_up(adapter);
1217         clear_bit(__IGB_RESETTING, &adapter->state);
1218 }
1219
1220 void igb_reset(struct igb_adapter *adapter)
1221 {
1222         struct pci_dev *pdev = adapter->pdev;
1223         struct e1000_hw *hw = &adapter->hw;
1224         struct e1000_mac_info *mac = &hw->mac;
1225         struct e1000_fc_info *fc = &hw->fc;
1226         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1227         u16 hwm;
1228
1229         /* Repartition Pba for greater than 9k mtu
1230          * To take effect CTRL.RST is required.
1231          */
1232         switch (mac->type) {
1233         case e1000_82580:
1234                 pba = rd32(E1000_RXPBS);
1235                 pba = igb_rxpbs_adjust_82580(pba);
1236                 break;
1237         case e1000_82576:
1238                 pba = rd32(E1000_RXPBS);
1239                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1240                 break;
1241         case e1000_82575:
1242         default:
1243                 pba = E1000_PBA_34K;
1244                 break;
1245         }
1246
1247         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1248             (mac->type < e1000_82576)) {
1249                 /* adjust PBA for jumbo frames */
1250                 wr32(E1000_PBA, pba);
1251
1252                 /* To maintain wire speed transmits, the Tx FIFO should be
1253                  * large enough to accommodate two full transmit packets,
1254                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1255                  * the Rx FIFO should be large enough to accommodate at least
1256                  * one full receive packet and is similarly rounded up and
1257                  * expressed in KB. */
1258                 pba = rd32(E1000_PBA);
1259                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1260                 tx_space = pba >> 16;
1261                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1262                 pba &= 0xffff;
1263                 /* the tx fifo also stores 16 bytes of information about the tx
1264                  * but don't include ethernet FCS because hardware appends it */
1265                 min_tx_space = (adapter->max_frame_size +
1266                                 sizeof(union e1000_adv_tx_desc) -
1267                                 ETH_FCS_LEN) * 2;
1268                 min_tx_space = ALIGN(min_tx_space, 1024);
1269                 min_tx_space >>= 10;
1270                 /* software strips receive CRC, so leave room for it */
1271                 min_rx_space = adapter->max_frame_size;
1272                 min_rx_space = ALIGN(min_rx_space, 1024);
1273                 min_rx_space >>= 10;
1274
1275                 /* If current Tx allocation is less than the min Tx FIFO size,
1276                  * and the min Tx FIFO size is less than the current Rx FIFO
1277                  * allocation, take space away from current Rx allocation */
1278                 if (tx_space < min_tx_space &&
1279                     ((min_tx_space - tx_space) < pba)) {
1280                         pba = pba - (min_tx_space - tx_space);
1281
1282                         /* if short on rx space, rx wins and must trump tx
1283                          * adjustment */
1284                         if (pba < min_rx_space)
1285                                 pba = min_rx_space;
1286                 }
1287                 wr32(E1000_PBA, pba);
1288         }
1289
1290         /* flow control settings */
1291         /* The high water mark must be low enough to fit one full frame
1292          * (or the size used for early receive) above it in the Rx FIFO.
1293          * Set it to the lower of:
1294          * - 90% of the Rx FIFO size, or
1295          * - the full Rx FIFO size minus one full frame */
1296         hwm = min(((pba << 10) * 9 / 10),
1297                         ((pba << 10) - 2 * adapter->max_frame_size));
1298
1299         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1300         fc->low_water = fc->high_water - 16;
1301         fc->pause_time = 0xFFFF;
1302         fc->send_xon = 1;
1303         fc->current_mode = fc->requested_mode;
1304
1305         /* disable receive for all VFs and wait one second */
1306         if (adapter->vfs_allocated_count) {
1307                 int i;
1308                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1309                         adapter->vf_data[i].flags = 0;
1310
1311                 /* ping all the active vfs to let them know we are going down */
1312                 igb_ping_all_vfs(adapter);
1313
1314                 /* disable transmits and receives */
1315                 wr32(E1000_VFRE, 0);
1316                 wr32(E1000_VFTE, 0);
1317         }
1318
1319         /* Allow time for pending master requests to run */
1320         hw->mac.ops.reset_hw(hw);
1321         wr32(E1000_WUC, 0);
1322
1323         if (hw->mac.ops.init_hw(hw))
1324                 dev_err(&pdev->dev, "Hardware Error\n");
1325
1326         if (hw->mac.type == e1000_82580) {
1327                 u32 reg = rd32(E1000_PCIEMISC);
1328                 wr32(E1000_PCIEMISC,
1329                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1330         }
1331         igb_update_mng_vlan(adapter);
1332
1333         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1334         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1335
1336         igb_reset_adaptive(hw);
1337         igb_get_phy_info(hw);
1338 }
1339
1340 static const struct net_device_ops igb_netdev_ops = {
1341         .ndo_open               = igb_open,
1342         .ndo_stop               = igb_close,
1343         .ndo_start_xmit         = igb_xmit_frame_adv,
1344         .ndo_get_stats          = igb_get_stats,
1345         .ndo_set_rx_mode        = igb_set_rx_mode,
1346         .ndo_set_multicast_list = igb_set_rx_mode,
1347         .ndo_set_mac_address    = igb_set_mac,
1348         .ndo_change_mtu         = igb_change_mtu,
1349         .ndo_do_ioctl           = igb_ioctl,
1350         .ndo_tx_timeout         = igb_tx_timeout,
1351         .ndo_validate_addr      = eth_validate_addr,
1352         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1353         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1354         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1355 #ifdef CONFIG_NET_POLL_CONTROLLER
1356         .ndo_poll_controller    = igb_netpoll,
1357 #endif
1358 };
1359
1360 /**
1361  * igb_probe - Device Initialization Routine
1362  * @pdev: PCI device information struct
1363  * @ent: entry in igb_pci_tbl
1364  *
1365  * Returns 0 on success, negative on failure
1366  *
1367  * igb_probe initializes an adapter identified by a pci_dev structure.
1368  * The OS initialization, configuring of the adapter private structure,
1369  * and a hardware reset occur.
1370  **/
1371 static int __devinit igb_probe(struct pci_dev *pdev,
1372                                const struct pci_device_id *ent)
1373 {
1374         struct net_device *netdev;
1375         struct igb_adapter *adapter;
1376         struct e1000_hw *hw;
1377         u16 eeprom_data = 0;
1378         static int global_quad_port_a; /* global quad port a indication */
1379         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1380         unsigned long mmio_start, mmio_len;
1381         int err, pci_using_dac;
1382         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1383         u32 part_num;
1384
1385         err = pci_enable_device_mem(pdev);
1386         if (err)
1387                 return err;
1388
1389         pci_using_dac = 0;
1390         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1391         if (!err) {
1392                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1393                 if (!err)
1394                         pci_using_dac = 1;
1395         } else {
1396                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1397                 if (err) {
1398                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1399                         if (err) {
1400                                 dev_err(&pdev->dev, "No usable DMA "
1401                                         "configuration, aborting\n");
1402                                 goto err_dma;
1403                         }
1404                 }
1405         }
1406
1407         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1408                                            IORESOURCE_MEM),
1409                                            igb_driver_name);
1410         if (err)
1411                 goto err_pci_reg;
1412
1413         pci_enable_pcie_error_reporting(pdev);
1414
1415         pci_set_master(pdev);
1416         pci_save_state(pdev);
1417
1418         err = -ENOMEM;
1419         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1420                                    IGB_ABS_MAX_TX_QUEUES);
1421         if (!netdev)
1422                 goto err_alloc_etherdev;
1423
1424         SET_NETDEV_DEV(netdev, &pdev->dev);
1425
1426         pci_set_drvdata(pdev, netdev);
1427         adapter = netdev_priv(netdev);
1428         adapter->netdev = netdev;
1429         adapter->pdev = pdev;
1430         hw = &adapter->hw;
1431         hw->back = adapter;
1432         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1433
1434         mmio_start = pci_resource_start(pdev, 0);
1435         mmio_len = pci_resource_len(pdev, 0);
1436
1437         err = -EIO;
1438         hw->hw_addr = ioremap(mmio_start, mmio_len);
1439         if (!hw->hw_addr)
1440                 goto err_ioremap;
1441
1442         netdev->netdev_ops = &igb_netdev_ops;
1443         igb_set_ethtool_ops(netdev);
1444         netdev->watchdog_timeo = 5 * HZ;
1445
1446         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1447
1448         netdev->mem_start = mmio_start;
1449         netdev->mem_end = mmio_start + mmio_len;
1450
1451         /* PCI config space info */
1452         hw->vendor_id = pdev->vendor;
1453         hw->device_id = pdev->device;
1454         hw->revision_id = pdev->revision;
1455         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1456         hw->subsystem_device_id = pdev->subsystem_device;
1457
1458         /* Copy the default MAC, PHY and NVM function pointers */
1459         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1460         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1461         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1462         /* Initialize skew-specific constants */
1463         err = ei->get_invariants(hw);
1464         if (err)
1465                 goto err_sw_init;
1466
1467         /* setup the private structure */
1468         err = igb_sw_init(adapter);
1469         if (err)
1470                 goto err_sw_init;
1471
1472         igb_get_bus_info_pcie(hw);
1473
1474         hw->phy.autoneg_wait_to_complete = false;
1475         hw->mac.adaptive_ifs = true;
1476
1477         /* Copper options */
1478         if (hw->phy.media_type == e1000_media_type_copper) {
1479                 hw->phy.mdix = AUTO_ALL_MODES;
1480                 hw->phy.disable_polarity_correction = false;
1481                 hw->phy.ms_type = e1000_ms_hw_default;
1482         }
1483
1484         if (igb_check_reset_block(hw))
1485                 dev_info(&pdev->dev,
1486                         "PHY reset is blocked due to SOL/IDER session.\n");
1487
1488         netdev->features = NETIF_F_SG |
1489                            NETIF_F_IP_CSUM |
1490                            NETIF_F_HW_VLAN_TX |
1491                            NETIF_F_HW_VLAN_RX |
1492                            NETIF_F_HW_VLAN_FILTER;
1493
1494         netdev->features |= NETIF_F_IPV6_CSUM;
1495         netdev->features |= NETIF_F_TSO;
1496         netdev->features |= NETIF_F_TSO6;
1497         netdev->features |= NETIF_F_GRO;
1498
1499         netdev->vlan_features |= NETIF_F_TSO;
1500         netdev->vlan_features |= NETIF_F_TSO6;
1501         netdev->vlan_features |= NETIF_F_IP_CSUM;
1502         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1503         netdev->vlan_features |= NETIF_F_SG;
1504
1505         if (pci_using_dac)
1506                 netdev->features |= NETIF_F_HIGHDMA;
1507
1508         if (hw->mac.type >= e1000_82576)
1509                 netdev->features |= NETIF_F_SCTP_CSUM;
1510
1511         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1512
1513         /* before reading the NVM, reset the controller to put the device in a
1514          * known good starting state */
1515         hw->mac.ops.reset_hw(hw);
1516
1517         /* make sure the NVM is good */
1518         if (igb_validate_nvm_checksum(hw) < 0) {
1519                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1520                 err = -EIO;
1521                 goto err_eeprom;
1522         }
1523
1524         /* copy the MAC address out of the NVM */
1525         if (hw->mac.ops.read_mac_addr(hw))
1526                 dev_err(&pdev->dev, "NVM Read Error\n");
1527
1528         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1529         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1530
1531         if (!is_valid_ether_addr(netdev->perm_addr)) {
1532                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1533                 err = -EIO;
1534                 goto err_eeprom;
1535         }
1536
1537         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1538                     (unsigned long) adapter);
1539         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1540                     (unsigned long) adapter);
1541
1542         INIT_WORK(&adapter->reset_task, igb_reset_task);
1543         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1544
1545         /* Initialize link properties that are user-changeable */
1546         adapter->fc_autoneg = true;
1547         hw->mac.autoneg = true;
1548         hw->phy.autoneg_advertised = 0x2f;
1549
1550         hw->fc.requested_mode = e1000_fc_default;
1551         hw->fc.current_mode = e1000_fc_default;
1552
1553         igb_validate_mdi_setting(hw);
1554
1555         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1556          * enable the ACPI Magic Packet filter
1557          */
1558
1559         if (hw->bus.func == 0)
1560                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1561         else if (hw->mac.type == e1000_82580)
1562                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1563                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1564                                  &eeprom_data);
1565         else if (hw->bus.func == 1)
1566                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1567
1568         if (eeprom_data & eeprom_apme_mask)
1569                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1570
1571         /* now that we have the eeprom settings, apply the special cases where
1572          * the eeprom may be wrong or the board simply won't support wake on
1573          * lan on a particular port */
1574         switch (pdev->device) {
1575         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1576                 adapter->eeprom_wol = 0;
1577                 break;
1578         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1579         case E1000_DEV_ID_82576_FIBER:
1580         case E1000_DEV_ID_82576_SERDES:
1581                 /* Wake events only supported on port A for dual fiber
1582                  * regardless of eeprom setting */
1583                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1584                         adapter->eeprom_wol = 0;
1585                 break;
1586         case E1000_DEV_ID_82576_QUAD_COPPER:
1587                 /* if quad port adapter, disable WoL on all but port A */
1588                 if (global_quad_port_a != 0)
1589                         adapter->eeprom_wol = 0;
1590                 else
1591                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1592                 /* Reset for multiple quad port adapters */
1593                 if (++global_quad_port_a == 4)
1594                         global_quad_port_a = 0;
1595                 break;
1596         }
1597
1598         /* initialize the wol settings based on the eeprom settings */
1599         adapter->wol = adapter->eeprom_wol;
1600         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1601
1602         /* reset the hardware with the new settings */
1603         igb_reset(adapter);
1604
1605         /* let the f/w know that the h/w is now under the control of the
1606          * driver. */
1607         igb_get_hw_control(adapter);
1608
1609         strcpy(netdev->name, "eth%d");
1610         err = register_netdev(netdev);
1611         if (err)
1612                 goto err_register;
1613
1614         /* carrier off reporting is important to ethtool even BEFORE open */
1615         netif_carrier_off(netdev);
1616
1617 #ifdef CONFIG_IGB_DCA
1618         if (dca_add_requester(&pdev->dev) == 0) {
1619                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1620                 dev_info(&pdev->dev, "DCA enabled\n");
1621                 igb_setup_dca(adapter);
1622         }
1623
1624 #endif
1625         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1626         /* print bus type/speed/width info */
1627         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1628                  netdev->name,
1629                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1630                                                             "unknown"),
1631                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1632                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1633                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1634                    "unknown"),
1635                  netdev->dev_addr);
1636
1637         igb_read_part_num(hw, &part_num);
1638         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1639                 (part_num >> 8), (part_num & 0xff));
1640
1641         dev_info(&pdev->dev,
1642                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1643                 adapter->msix_entries ? "MSI-X" :
1644                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1645                 adapter->num_rx_queues, adapter->num_tx_queues);
1646
1647         return 0;
1648
1649 err_register:
1650         igb_release_hw_control(adapter);
1651 err_eeprom:
1652         if (!igb_check_reset_block(hw))
1653                 igb_reset_phy(hw);
1654
1655         if (hw->flash_address)
1656                 iounmap(hw->flash_address);
1657 err_sw_init:
1658         igb_clear_interrupt_scheme(adapter);
1659         iounmap(hw->hw_addr);
1660 err_ioremap:
1661         free_netdev(netdev);
1662 err_alloc_etherdev:
1663         pci_release_selected_regions(pdev,
1664                                      pci_select_bars(pdev, IORESOURCE_MEM));
1665 err_pci_reg:
1666 err_dma:
1667         pci_disable_device(pdev);
1668         return err;
1669 }
1670
1671 /**
1672  * igb_remove - Device Removal Routine
1673  * @pdev: PCI device information struct
1674  *
1675  * igb_remove is called by the PCI subsystem to alert the driver
1676  * that it should release a PCI device.  The could be caused by a
1677  * Hot-Plug event, or because the driver is going to be removed from
1678  * memory.
1679  **/
1680 static void __devexit igb_remove(struct pci_dev *pdev)
1681 {
1682         struct net_device *netdev = pci_get_drvdata(pdev);
1683         struct igb_adapter *adapter = netdev_priv(netdev);
1684         struct e1000_hw *hw = &adapter->hw;
1685
1686         /* flush_scheduled work may reschedule our watchdog task, so
1687          * explicitly disable watchdog tasks from being rescheduled  */
1688         set_bit(__IGB_DOWN, &adapter->state);
1689         del_timer_sync(&adapter->watchdog_timer);
1690         del_timer_sync(&adapter->phy_info_timer);
1691
1692         flush_scheduled_work();
1693
1694 #ifdef CONFIG_IGB_DCA
1695         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1696                 dev_info(&pdev->dev, "DCA disabled\n");
1697                 dca_remove_requester(&pdev->dev);
1698                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1699                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1700         }
1701 #endif
1702
1703         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1704          * would have already happened in close and is redundant. */
1705         igb_release_hw_control(adapter);
1706
1707         unregister_netdev(netdev);
1708
1709         if (!igb_check_reset_block(hw))
1710                 igb_reset_phy(hw);
1711
1712         igb_clear_interrupt_scheme(adapter);
1713
1714 #ifdef CONFIG_PCI_IOV
1715         /* reclaim resources allocated to VFs */
1716         if (adapter->vf_data) {
1717                 /* disable iov and allow time for transactions to clear */
1718                 pci_disable_sriov(pdev);
1719                 msleep(500);
1720
1721                 kfree(adapter->vf_data);
1722                 adapter->vf_data = NULL;
1723                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1724                 msleep(100);
1725                 dev_info(&pdev->dev, "IOV Disabled\n");
1726         }
1727 #endif
1728
1729         iounmap(hw->hw_addr);
1730         if (hw->flash_address)
1731                 iounmap(hw->flash_address);
1732         pci_release_selected_regions(pdev,
1733                                      pci_select_bars(pdev, IORESOURCE_MEM));
1734
1735         free_netdev(netdev);
1736
1737         pci_disable_pcie_error_reporting(pdev);
1738
1739         pci_disable_device(pdev);
1740 }
1741
1742 /**
1743  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1744  * @adapter: board private structure to initialize
1745  *
1746  * This function initializes the vf specific data storage and then attempts to
1747  * allocate the VFs.  The reason for ordering it this way is because it is much
1748  * mor expensive time wise to disable SR-IOV than it is to allocate and free
1749  * the memory for the VFs.
1750  **/
1751 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1752 {
1753 #ifdef CONFIG_PCI_IOV
1754         struct pci_dev *pdev = adapter->pdev;
1755
1756         if (adapter->vfs_allocated_count > 7)
1757                 adapter->vfs_allocated_count = 7;
1758
1759         if (adapter->vfs_allocated_count) {
1760                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1761                                            sizeof(struct vf_data_storage),
1762                                            GFP_KERNEL);
1763                 /* if allocation failed then we do not support SR-IOV */
1764                 if (!adapter->vf_data) {
1765                         adapter->vfs_allocated_count = 0;
1766                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
1767                                 "Data Storage\n");
1768                 }
1769         }
1770
1771         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1772                 kfree(adapter->vf_data);
1773                 adapter->vf_data = NULL;
1774 #endif /* CONFIG_PCI_IOV */
1775                 adapter->vfs_allocated_count = 0;
1776 #ifdef CONFIG_PCI_IOV
1777         } else {
1778                 unsigned char mac_addr[ETH_ALEN];
1779                 int i;
1780                 dev_info(&pdev->dev, "%d vfs allocated\n",
1781                          adapter->vfs_allocated_count);
1782                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1783                         random_ether_addr(mac_addr);
1784                         igb_set_vf_mac(adapter, i, mac_addr);
1785                 }
1786         }
1787 #endif /* CONFIG_PCI_IOV */
1788 }
1789
1790
1791 /**
1792  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1793  * @adapter: board private structure to initialize
1794  *
1795  * igb_init_hw_timer initializes the function pointer and values for the hw
1796  * timer found in hardware.
1797  **/
1798 static void igb_init_hw_timer(struct igb_adapter *adapter)
1799 {
1800         struct e1000_hw *hw = &adapter->hw;
1801
1802         switch (hw->mac.type) {
1803         case e1000_82580:
1804                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1805                 adapter->cycles.read = igb_read_clock;
1806                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1807                 adapter->cycles.mult = 1;
1808                 /*
1809                  * The 82580 timesync updates the system timer every 8ns by 8ns
1810                  * and the value cannot be shifted.  Instead we need to shift
1811                  * the registers to generate a 64bit timer value.  As a result
1812                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
1813                  * 24 in order to generate a larger value for synchronization.
1814                  */
1815                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
1816                 /* disable system timer temporarily by setting bit 31 */
1817                 wr32(E1000_TSAUXC, 0x80000000);
1818                 wrfl();
1819
1820                 /* Set registers so that rollover occurs soon to test this. */
1821                 wr32(E1000_SYSTIMR, 0x00000000);
1822                 wr32(E1000_SYSTIML, 0x80000000);
1823                 wr32(E1000_SYSTIMH, 0x000000FF);
1824                 wrfl();
1825
1826                 /* enable system timer by clearing bit 31 */
1827                 wr32(E1000_TSAUXC, 0x0);
1828                 wrfl();
1829
1830                 timecounter_init(&adapter->clock,
1831                                  &adapter->cycles,
1832                                  ktime_to_ns(ktime_get_real()));
1833                 /*
1834                  * Synchronize our NIC clock against system wall clock. NIC
1835                  * time stamp reading requires ~3us per sample, each sample
1836                  * was pretty stable even under load => only require 10
1837                  * samples for each offset comparison.
1838                  */
1839                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1840                 adapter->compare.source = &adapter->clock;
1841                 adapter->compare.target = ktime_get_real;
1842                 adapter->compare.num_samples = 10;
1843                 timecompare_update(&adapter->compare, 0);
1844                 break;
1845         case e1000_82576:
1846                 /*
1847                  * Initialize hardware timer: we keep it running just in case
1848                  * that some program needs it later on.
1849                  */
1850                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1851                 adapter->cycles.read = igb_read_clock;
1852                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1853                 adapter->cycles.mult = 1;
1854                 /**
1855                  * Scale the NIC clock cycle by a large factor so that
1856                  * relatively small clock corrections can be added or
1857                  * substracted at each clock tick. The drawbacks of a large
1858                  * factor are a) that the clock register overflows more quickly
1859                  * (not such a big deal) and b) that the increment per tick has
1860                  * to fit into 24 bits.  As a result we need to use a shift of
1861                  * 19 so we can fit a value of 16 into the TIMINCA register.
1862                  */
1863                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1864                 wr32(E1000_TIMINCA,
1865                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1866                                 (16 << IGB_82576_TSYNC_SHIFT));
1867
1868                 /* Set registers so that rollover occurs soon to test this. */
1869                 wr32(E1000_SYSTIML, 0x00000000);
1870                 wr32(E1000_SYSTIMH, 0xFF800000);
1871                 wrfl();
1872
1873                 timecounter_init(&adapter->clock,
1874                                  &adapter->cycles,
1875                                  ktime_to_ns(ktime_get_real()));
1876                 /*
1877                  * Synchronize our NIC clock against system wall clock. NIC
1878                  * time stamp reading requires ~3us per sample, each sample
1879                  * was pretty stable even under load => only require 10
1880                  * samples for each offset comparison.
1881                  */
1882                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1883                 adapter->compare.source = &adapter->clock;
1884                 adapter->compare.target = ktime_get_real;
1885                 adapter->compare.num_samples = 10;
1886                 timecompare_update(&adapter->compare, 0);
1887                 break;
1888         case e1000_82575:
1889                 /* 82575 does not support timesync */
1890         default:
1891                 break;
1892         }
1893
1894 }
1895
1896 /**
1897  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1898  * @adapter: board private structure to initialize
1899  *
1900  * igb_sw_init initializes the Adapter private data structure.
1901  * Fields are initialized based on PCI device information and
1902  * OS network device settings (MTU size).
1903  **/
1904 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1905 {
1906         struct e1000_hw *hw = &adapter->hw;
1907         struct net_device *netdev = adapter->netdev;
1908         struct pci_dev *pdev = adapter->pdev;
1909
1910         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1911
1912         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1913         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1914         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1915         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1916
1917         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1918         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1919
1920 #ifdef CONFIG_PCI_IOV
1921         if (hw->mac.type == e1000_82576)
1922                 adapter->vfs_allocated_count = max_vfs;
1923
1924 #endif /* CONFIG_PCI_IOV */
1925         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1926
1927         /*
1928          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1929          * then we should combine the queues into a queue pair in order to
1930          * conserve interrupts due to limited supply
1931          */
1932         if ((adapter->rss_queues > 4) ||
1933             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1934                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1935
1936         /* This call may decrease the number of queues */
1937         if (igb_init_interrupt_scheme(adapter)) {
1938                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1939                 return -ENOMEM;
1940         }
1941
1942         igb_init_hw_timer(adapter);
1943         igb_probe_vfs(adapter);
1944
1945         /* Explicitly disable IRQ since the NIC can be in any state. */
1946         igb_irq_disable(adapter);
1947
1948         set_bit(__IGB_DOWN, &adapter->state);
1949         return 0;
1950 }
1951
1952 /**
1953  * igb_open - Called when a network interface is made active
1954  * @netdev: network interface device structure
1955  *
1956  * Returns 0 on success, negative value on failure
1957  *
1958  * The open entry point is called when a network interface is made
1959  * active by the system (IFF_UP).  At this point all resources needed
1960  * for transmit and receive operations are allocated, the interrupt
1961  * handler is registered with the OS, the watchdog timer is started,
1962  * and the stack is notified that the interface is ready.
1963  **/
1964 static int igb_open(struct net_device *netdev)
1965 {
1966         struct igb_adapter *adapter = netdev_priv(netdev);
1967         struct e1000_hw *hw = &adapter->hw;
1968         int err;
1969         int i;
1970
1971         /* disallow open during test */
1972         if (test_bit(__IGB_TESTING, &adapter->state))
1973                 return -EBUSY;
1974
1975         netif_carrier_off(netdev);
1976
1977         /* allocate transmit descriptors */
1978         err = igb_setup_all_tx_resources(adapter);
1979         if (err)
1980                 goto err_setup_tx;
1981
1982         /* allocate receive descriptors */
1983         err = igb_setup_all_rx_resources(adapter);
1984         if (err)
1985                 goto err_setup_rx;
1986
1987         /* e1000_power_up_phy(adapter); */
1988
1989         /* before we allocate an interrupt, we must be ready to handle it.
1990          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1991          * as soon as we call pci_request_irq, so we have to setup our
1992          * clean_rx handler before we do so.  */
1993         igb_configure(adapter);
1994
1995         err = igb_request_irq(adapter);
1996         if (err)
1997                 goto err_req_irq;
1998
1999         /* From here on the code is the same as igb_up() */
2000         clear_bit(__IGB_DOWN, &adapter->state);
2001
2002         for (i = 0; i < adapter->num_q_vectors; i++) {
2003                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2004                 napi_enable(&q_vector->napi);
2005         }
2006
2007         /* Clear any pending interrupts. */
2008         rd32(E1000_ICR);
2009
2010         igb_irq_enable(adapter);
2011
2012         /* notify VFs that reset has been completed */
2013         if (adapter->vfs_allocated_count) {
2014                 u32 reg_data = rd32(E1000_CTRL_EXT);
2015                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2016                 wr32(E1000_CTRL_EXT, reg_data);
2017         }
2018
2019         netif_tx_start_all_queues(netdev);
2020
2021         /* start the watchdog. */
2022         hw->mac.get_link_status = 1;
2023         schedule_work(&adapter->watchdog_task);
2024
2025         return 0;
2026
2027 err_req_irq:
2028         igb_release_hw_control(adapter);
2029         /* e1000_power_down_phy(adapter); */
2030         igb_free_all_rx_resources(adapter);
2031 err_setup_rx:
2032         igb_free_all_tx_resources(adapter);
2033 err_setup_tx:
2034         igb_reset(adapter);
2035
2036         return err;
2037 }
2038
2039 /**
2040  * igb_close - Disables a network interface
2041  * @netdev: network interface device structure
2042  *
2043  * Returns 0, this is not allowed to fail
2044  *
2045  * The close entry point is called when an interface is de-activated
2046  * by the OS.  The hardware is still under the driver's control, but
2047  * needs to be disabled.  A global MAC reset is issued to stop the
2048  * hardware, and all transmit and receive resources are freed.
2049  **/
2050 static int igb_close(struct net_device *netdev)
2051 {
2052         struct igb_adapter *adapter = netdev_priv(netdev);
2053
2054         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2055         igb_down(adapter);
2056
2057         igb_free_irq(adapter);
2058
2059         igb_free_all_tx_resources(adapter);
2060         igb_free_all_rx_resources(adapter);
2061
2062         return 0;
2063 }
2064
2065 /**
2066  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2067  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2068  *
2069  * Return 0 on success, negative on failure
2070  **/
2071 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2072 {
2073         struct pci_dev *pdev = tx_ring->pdev;
2074         int size;
2075
2076         size = sizeof(struct igb_buffer) * tx_ring->count;
2077         tx_ring->buffer_info = vmalloc(size);
2078         if (!tx_ring->buffer_info)
2079                 goto err;
2080         memset(tx_ring->buffer_info, 0, size);
2081
2082         /* round up to nearest 4K */
2083         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2084         tx_ring->size = ALIGN(tx_ring->size, 4096);
2085
2086         tx_ring->desc = pci_alloc_consistent(pdev,
2087                                              tx_ring->size,
2088                                              &tx_ring->dma);
2089
2090         if (!tx_ring->desc)
2091                 goto err;
2092
2093         tx_ring->next_to_use = 0;
2094         tx_ring->next_to_clean = 0;
2095         return 0;
2096
2097 err:
2098         vfree(tx_ring->buffer_info);
2099         dev_err(&pdev->dev,
2100                 "Unable to allocate memory for the transmit descriptor ring\n");
2101         return -ENOMEM;
2102 }
2103
2104 /**
2105  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2106  *                                (Descriptors) for all queues
2107  * @adapter: board private structure
2108  *
2109  * Return 0 on success, negative on failure
2110  **/
2111 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2112 {
2113         struct pci_dev *pdev = adapter->pdev;
2114         int i, err = 0;
2115
2116         for (i = 0; i < adapter->num_tx_queues; i++) {
2117                 err = igb_setup_tx_resources(&adapter->tx_ring[i]);
2118                 if (err) {
2119                         dev_err(&pdev->dev,
2120                                 "Allocation for Tx Queue %u failed\n", i);
2121                         for (i--; i >= 0; i--)
2122                                 igb_free_tx_resources(&adapter->tx_ring[i]);
2123                         break;
2124                 }
2125         }
2126
2127         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2128                 int r_idx = i % adapter->num_tx_queues;
2129                 adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
2130         }
2131         return err;
2132 }
2133
2134 /**
2135  * igb_setup_tctl - configure the transmit control registers
2136  * @adapter: Board private structure
2137  **/
2138 void igb_setup_tctl(struct igb_adapter *adapter)
2139 {
2140         struct e1000_hw *hw = &adapter->hw;
2141         u32 tctl;
2142
2143         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2144         wr32(E1000_TXDCTL(0), 0);
2145
2146         /* Program the Transmit Control Register */
2147         tctl = rd32(E1000_TCTL);
2148         tctl &= ~E1000_TCTL_CT;
2149         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2150                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2151
2152         igb_config_collision_dist(hw);
2153
2154         /* Enable transmits */
2155         tctl |= E1000_TCTL_EN;
2156
2157         wr32(E1000_TCTL, tctl);
2158 }
2159
2160 /**
2161  * igb_configure_tx_ring - Configure transmit ring after Reset
2162  * @adapter: board private structure
2163  * @ring: tx ring to configure
2164  *
2165  * Configure a transmit ring after a reset.
2166  **/
2167 void igb_configure_tx_ring(struct igb_adapter *adapter,
2168                            struct igb_ring *ring)
2169 {
2170         struct e1000_hw *hw = &adapter->hw;
2171         u32 txdctl;
2172         u64 tdba = ring->dma;
2173         int reg_idx = ring->reg_idx;
2174
2175         /* disable the queue */
2176         txdctl = rd32(E1000_TXDCTL(reg_idx));
2177         wr32(E1000_TXDCTL(reg_idx),
2178                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2179         wrfl();
2180         mdelay(10);
2181
2182         wr32(E1000_TDLEN(reg_idx),
2183                         ring->count * sizeof(union e1000_adv_tx_desc));
2184         wr32(E1000_TDBAL(reg_idx),
2185                         tdba & 0x00000000ffffffffULL);
2186         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2187
2188         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2189         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2190         writel(0, ring->head);
2191         writel(0, ring->tail);
2192
2193         txdctl |= IGB_TX_PTHRESH;
2194         txdctl |= IGB_TX_HTHRESH << 8;
2195         txdctl |= IGB_TX_WTHRESH << 16;
2196
2197         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2198         wr32(E1000_TXDCTL(reg_idx), txdctl);
2199 }
2200
2201 /**
2202  * igb_configure_tx - Configure transmit Unit after Reset
2203  * @adapter: board private structure
2204  *
2205  * Configure the Tx unit of the MAC after a reset.
2206  **/
2207 static void igb_configure_tx(struct igb_adapter *adapter)
2208 {
2209         int i;
2210
2211         for (i = 0; i < adapter->num_tx_queues; i++)
2212                 igb_configure_tx_ring(adapter, &adapter->tx_ring[i]);
2213 }
2214
2215 /**
2216  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2217  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2218  *
2219  * Returns 0 on success, negative on failure
2220  **/
2221 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2222 {
2223         struct pci_dev *pdev = rx_ring->pdev;
2224         int size, desc_len;
2225
2226         size = sizeof(struct igb_buffer) * rx_ring->count;
2227         rx_ring->buffer_info = vmalloc(size);
2228         if (!rx_ring->buffer_info)
2229                 goto err;
2230         memset(rx_ring->buffer_info, 0, size);
2231
2232         desc_len = sizeof(union e1000_adv_rx_desc);
2233
2234         /* Round up to nearest 4K */
2235         rx_ring->size = rx_ring->count * desc_len;
2236         rx_ring->size = ALIGN(rx_ring->size, 4096);
2237
2238         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2239                                              &rx_ring->dma);
2240
2241         if (!rx_ring->desc)
2242                 goto err;
2243
2244         rx_ring->next_to_clean = 0;
2245         rx_ring->next_to_use = 0;
2246
2247         return 0;
2248
2249 err:
2250         vfree(rx_ring->buffer_info);
2251         rx_ring->buffer_info = NULL;
2252         dev_err(&pdev->dev, "Unable to allocate memory for "
2253                 "the receive descriptor ring\n");
2254         return -ENOMEM;
2255 }
2256
2257 /**
2258  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2259  *                                (Descriptors) for all queues
2260  * @adapter: board private structure
2261  *
2262  * Return 0 on success, negative on failure
2263  **/
2264 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2265 {
2266         struct pci_dev *pdev = adapter->pdev;
2267         int i, err = 0;
2268
2269         for (i = 0; i < adapter->num_rx_queues; i++) {
2270                 err = igb_setup_rx_resources(&adapter->rx_ring[i]);
2271                 if (err) {
2272                         dev_err(&pdev->dev,
2273                                 "Allocation for Rx Queue %u failed\n", i);
2274                         for (i--; i >= 0; i--)
2275                                 igb_free_rx_resources(&adapter->rx_ring[i]);
2276                         break;
2277                 }
2278         }
2279
2280         return err;
2281 }
2282
2283 /**
2284  * igb_setup_mrqc - configure the multiple receive queue control registers
2285  * @adapter: Board private structure
2286  **/
2287 static void igb_setup_mrqc(struct igb_adapter *adapter)
2288 {
2289         struct e1000_hw *hw = &adapter->hw;
2290         u32 mrqc, rxcsum;
2291         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2292         union e1000_reta {
2293                 u32 dword;
2294                 u8  bytes[4];
2295         } reta;
2296         static const u8 rsshash[40] = {
2297                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2298                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2299                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2300                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2301
2302         /* Fill out hash function seeds */
2303         for (j = 0; j < 10; j++) {
2304                 u32 rsskey = rsshash[(j * 4)];
2305                 rsskey |= rsshash[(j * 4) + 1] << 8;
2306                 rsskey |= rsshash[(j * 4) + 2] << 16;
2307                 rsskey |= rsshash[(j * 4) + 3] << 24;
2308                 array_wr32(E1000_RSSRK(0), j, rsskey);
2309         }
2310
2311         num_rx_queues = adapter->rss_queues;
2312
2313         if (adapter->vfs_allocated_count) {
2314                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2315                 switch (hw->mac.type) {
2316                 case e1000_82580:
2317                         num_rx_queues = 1;
2318                         shift = 0;
2319                         break;
2320                 case e1000_82576:
2321                         shift = 3;
2322                         num_rx_queues = 2;
2323                         break;
2324                 case e1000_82575:
2325                         shift = 2;
2326                         shift2 = 6;
2327                 default:
2328                         break;
2329                 }
2330         } else {
2331                 if (hw->mac.type == e1000_82575)
2332                         shift = 6;
2333         }
2334
2335         for (j = 0; j < (32 * 4); j++) {
2336                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2337                 if (shift2)
2338                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2339                 if ((j & 3) == 3)
2340                         wr32(E1000_RETA(j >> 2), reta.dword);
2341         }
2342
2343         /*
2344          * Disable raw packet checksumming so that RSS hash is placed in
2345          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2346          * offloads as they are enabled by default
2347          */
2348         rxcsum = rd32(E1000_RXCSUM);
2349         rxcsum |= E1000_RXCSUM_PCSD;
2350
2351         if (adapter->hw.mac.type >= e1000_82576)
2352                 /* Enable Receive Checksum Offload for SCTP */
2353                 rxcsum |= E1000_RXCSUM_CRCOFL;
2354
2355         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2356         wr32(E1000_RXCSUM, rxcsum);
2357
2358         /* If VMDq is enabled then we set the appropriate mode for that, else
2359          * we default to RSS so that an RSS hash is calculated per packet even
2360          * if we are only using one queue */
2361         if (adapter->vfs_allocated_count) {
2362                 if (hw->mac.type > e1000_82575) {
2363                         /* Set the default pool for the PF's first queue */
2364                         u32 vtctl = rd32(E1000_VT_CTL);
2365                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2366                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2367                         vtctl |= adapter->vfs_allocated_count <<
2368                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2369                         wr32(E1000_VT_CTL, vtctl);
2370                 }
2371                 if (adapter->rss_queues > 1)
2372                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2373                 else
2374                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2375         } else {
2376                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2377         }
2378         igb_vmm_control(adapter);
2379
2380         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2381                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2382         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2383                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2384         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2385                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2386         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2387                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2388
2389         wr32(E1000_MRQC, mrqc);
2390 }
2391
2392 /**
2393  * igb_setup_rctl - configure the receive control registers
2394  * @adapter: Board private structure
2395  **/
2396 void igb_setup_rctl(struct igb_adapter *adapter)
2397 {
2398         struct e1000_hw *hw = &adapter->hw;
2399         u32 rctl;
2400
2401         rctl = rd32(E1000_RCTL);
2402
2403         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2404         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2405
2406         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2407                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2408
2409         /*
2410          * enable stripping of CRC. It's unlikely this will break BMC
2411          * redirection as it did with e1000. Newer features require
2412          * that the HW strips the CRC.
2413          */
2414         rctl |= E1000_RCTL_SECRC;
2415
2416         /* disable store bad packets and clear size bits. */
2417         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2418
2419         /* enable LPE to prevent packets larger than max_frame_size */
2420         rctl |= E1000_RCTL_LPE;
2421
2422         /* disable queue 0 to prevent tail write w/o re-config */
2423         wr32(E1000_RXDCTL(0), 0);
2424
2425         /* Attention!!!  For SR-IOV PF driver operations you must enable
2426          * queue drop for all VF and PF queues to prevent head of line blocking
2427          * if an un-trusted VF does not provide descriptors to hardware.
2428          */
2429         if (adapter->vfs_allocated_count) {
2430                 /* set all queue drop enable bits */
2431                 wr32(E1000_QDE, ALL_QUEUES);
2432         }
2433
2434         wr32(E1000_RCTL, rctl);
2435 }
2436
2437 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2438                                    int vfn)
2439 {
2440         struct e1000_hw *hw = &adapter->hw;
2441         u32 vmolr;
2442
2443         /* if it isn't the PF check to see if VFs are enabled and
2444          * increase the size to support vlan tags */
2445         if (vfn < adapter->vfs_allocated_count &&
2446             adapter->vf_data[vfn].vlans_enabled)
2447                 size += VLAN_TAG_SIZE;
2448
2449         vmolr = rd32(E1000_VMOLR(vfn));
2450         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2451         vmolr |= size | E1000_VMOLR_LPE;
2452         wr32(E1000_VMOLR(vfn), vmolr);
2453
2454         return 0;
2455 }
2456
2457 /**
2458  * igb_rlpml_set - set maximum receive packet size
2459  * @adapter: board private structure
2460  *
2461  * Configure maximum receivable packet size.
2462  **/
2463 static void igb_rlpml_set(struct igb_adapter *adapter)
2464 {
2465         u32 max_frame_size = adapter->max_frame_size;
2466         struct e1000_hw *hw = &adapter->hw;
2467         u16 pf_id = adapter->vfs_allocated_count;
2468
2469         if (adapter->vlgrp)
2470                 max_frame_size += VLAN_TAG_SIZE;
2471
2472         /* if vfs are enabled we set RLPML to the largest possible request
2473          * size and set the VMOLR RLPML to the size we need */
2474         if (pf_id) {
2475                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2476                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2477         }
2478
2479         wr32(E1000_RLPML, max_frame_size);
2480 }
2481
2482 static inline void igb_set_vmolr(struct igb_adapter *adapter, int vfn)
2483 {
2484         struct e1000_hw *hw = &adapter->hw;
2485         u32 vmolr;
2486
2487         /*
2488          * This register exists only on 82576 and newer so if we are older then
2489          * we should exit and do nothing
2490          */
2491         if (hw->mac.type < e1000_82576)
2492                 return;
2493
2494         vmolr = rd32(E1000_VMOLR(vfn));
2495         vmolr |= E1000_VMOLR_AUPE |        /* Accept untagged packets */
2496                  E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2497
2498         /* clear all bits that might not be set */
2499         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2500
2501         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2502                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2503         /*
2504          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2505          * multicast packets
2506          */
2507         if (vfn <= adapter->vfs_allocated_count)
2508                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2509
2510         wr32(E1000_VMOLR(vfn), vmolr);
2511 }
2512
2513 /**
2514  * igb_configure_rx_ring - Configure a receive ring after Reset
2515  * @adapter: board private structure
2516  * @ring: receive ring to be configured
2517  *
2518  * Configure the Rx unit of the MAC after a reset.
2519  **/
2520 void igb_configure_rx_ring(struct igb_adapter *adapter,
2521                            struct igb_ring *ring)
2522 {
2523         struct e1000_hw *hw = &adapter->hw;
2524         u64 rdba = ring->dma;
2525         int reg_idx = ring->reg_idx;
2526         u32 srrctl, rxdctl;
2527
2528         /* disable the queue */
2529         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2530         wr32(E1000_RXDCTL(reg_idx),
2531                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2532
2533         /* Set DMA base address registers */
2534         wr32(E1000_RDBAL(reg_idx),
2535              rdba & 0x00000000ffffffffULL);
2536         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2537         wr32(E1000_RDLEN(reg_idx),
2538                        ring->count * sizeof(union e1000_adv_rx_desc));
2539
2540         /* initialize head and tail */
2541         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2542         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2543         writel(0, ring->head);
2544         writel(0, ring->tail);
2545
2546         /* set descriptor configuration */
2547         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2548                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2549                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2550 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2551                 srrctl |= IGB_RXBUFFER_16384 >>
2552                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2553 #else
2554                 srrctl |= (PAGE_SIZE / 2) >>
2555                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2556 #endif
2557                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2558         } else {
2559                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2560                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2561                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2562         }
2563
2564         wr32(E1000_SRRCTL(reg_idx), srrctl);
2565
2566         /* set filtering for VMDQ pools */
2567         igb_set_vmolr(adapter, reg_idx & 0x7);
2568
2569         /* enable receive descriptor fetching */
2570         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2571         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2572         rxdctl &= 0xFFF00000;
2573         rxdctl |= IGB_RX_PTHRESH;
2574         rxdctl |= IGB_RX_HTHRESH << 8;
2575         rxdctl |= IGB_RX_WTHRESH << 16;
2576         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2577 }
2578
2579 /**
2580  * igb_configure_rx - Configure receive Unit after Reset
2581  * @adapter: board private structure
2582  *
2583  * Configure the Rx unit of the MAC after a reset.
2584  **/
2585 static void igb_configure_rx(struct igb_adapter *adapter)
2586 {
2587         int i;
2588
2589         /* set UTA to appropriate mode */
2590         igb_set_uta(adapter);
2591
2592         /* set the correct pool for the PF default MAC address in entry 0 */
2593         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2594                          adapter->vfs_allocated_count);
2595
2596         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2597          * the Base and Length of the Rx Descriptor Ring */
2598         for (i = 0; i < adapter->num_rx_queues; i++)
2599                 igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
2600 }
2601
2602 /**
2603  * igb_free_tx_resources - Free Tx Resources per Queue
2604  * @tx_ring: Tx descriptor ring for a specific queue
2605  *
2606  * Free all transmit software resources
2607  **/
2608 void igb_free_tx_resources(struct igb_ring *tx_ring)
2609 {
2610         igb_clean_tx_ring(tx_ring);
2611
2612         vfree(tx_ring->buffer_info);
2613         tx_ring->buffer_info = NULL;
2614
2615         /* if not set, then don't free */
2616         if (!tx_ring->desc)
2617                 return;
2618
2619         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2620                             tx_ring->desc, tx_ring->dma);
2621
2622         tx_ring->desc = NULL;
2623 }
2624
2625 /**
2626  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2627  * @adapter: board private structure
2628  *
2629  * Free all transmit software resources
2630  **/
2631 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2632 {
2633         int i;
2634
2635         for (i = 0; i < adapter->num_tx_queues; i++)
2636                 igb_free_tx_resources(&adapter->tx_ring[i]);
2637 }
2638
2639 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2640                                     struct igb_buffer *buffer_info)
2641 {
2642         if (buffer_info->dma) {
2643                 if (buffer_info->mapped_as_page)
2644                         pci_unmap_page(tx_ring->pdev,
2645                                         buffer_info->dma,
2646                                         buffer_info->length,
2647                                         PCI_DMA_TODEVICE);
2648                 else
2649                         pci_unmap_single(tx_ring->pdev,
2650                                         buffer_info->dma,
2651                                         buffer_info->length,
2652                                         PCI_DMA_TODEVICE);
2653                 buffer_info->dma = 0;
2654         }
2655         if (buffer_info->skb) {
2656                 dev_kfree_skb_any(buffer_info->skb);
2657                 buffer_info->skb = NULL;
2658         }
2659         buffer_info->time_stamp = 0;
2660         buffer_info->length = 0;
2661         buffer_info->next_to_watch = 0;
2662         buffer_info->mapped_as_page = false;
2663 }
2664
2665 /**
2666  * igb_clean_tx_ring - Free Tx Buffers
2667  * @tx_ring: ring to be cleaned
2668  **/
2669 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2670 {
2671         struct igb_buffer *buffer_info;
2672         unsigned long size;
2673         unsigned int i;
2674
2675         if (!tx_ring->buffer_info)
2676                 return;
2677         /* Free all the Tx ring sk_buffs */
2678
2679         for (i = 0; i < tx_ring->count; i++) {
2680                 buffer_info = &tx_ring->buffer_info[i];
2681                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2682         }
2683
2684         size = sizeof(struct igb_buffer) * tx_ring->count;
2685         memset(tx_ring->buffer_info, 0, size);
2686
2687         /* Zero out the descriptor ring */
2688         memset(tx_ring->desc, 0, tx_ring->size);
2689
2690         tx_ring->next_to_use = 0;
2691         tx_ring->next_to_clean = 0;
2692 }
2693
2694 /**
2695  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2696  * @adapter: board private structure
2697  **/
2698 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2699 {
2700         int i;
2701
2702         for (i = 0; i < adapter->num_tx_queues; i++)
2703                 igb_clean_tx_ring(&adapter->tx_ring[i]);
2704 }
2705
2706 /**
2707  * igb_free_rx_resources - Free Rx Resources
2708  * @rx_ring: ring to clean the resources from
2709  *
2710  * Free all receive software resources
2711  **/
2712 void igb_free_rx_resources(struct igb_ring *rx_ring)
2713 {
2714         igb_clean_rx_ring(rx_ring);
2715
2716         vfree(rx_ring->buffer_info);
2717         rx_ring->buffer_info = NULL;
2718
2719         /* if not set, then don't free */
2720         if (!rx_ring->desc)
2721                 return;
2722
2723         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2724                             rx_ring->desc, rx_ring->dma);
2725
2726         rx_ring->desc = NULL;
2727 }
2728
2729 /**
2730  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2731  * @adapter: board private structure
2732  *
2733  * Free all receive software resources
2734  **/
2735 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2736 {
2737         int i;
2738
2739         for (i = 0; i < adapter->num_rx_queues; i++)
2740                 igb_free_rx_resources(&adapter->rx_ring[i]);
2741 }
2742
2743 /**
2744  * igb_clean_rx_ring - Free Rx Buffers per Queue
2745  * @rx_ring: ring to free buffers from
2746  **/
2747 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2748 {
2749         struct igb_buffer *buffer_info;
2750         unsigned long size;
2751         unsigned int i;
2752
2753         if (!rx_ring->buffer_info)
2754                 return;
2755
2756         /* Free all the Rx ring sk_buffs */
2757         for (i = 0; i < rx_ring->count; i++) {
2758                 buffer_info = &rx_ring->buffer_info[i];
2759                 if (buffer_info->dma) {
2760                         pci_unmap_single(rx_ring->pdev,
2761                                          buffer_info->dma,
2762                                          rx_ring->rx_buffer_len,
2763                                          PCI_DMA_FROMDEVICE);
2764                         buffer_info->dma = 0;
2765                 }
2766
2767                 if (buffer_info->skb) {
2768                         dev_kfree_skb(buffer_info->skb);
2769                         buffer_info->skb = NULL;
2770                 }
2771                 if (buffer_info->page_dma) {
2772                         pci_unmap_page(rx_ring->pdev,
2773                                        buffer_info->page_dma,
2774                                        PAGE_SIZE / 2,
2775                                        PCI_DMA_FROMDEVICE);
2776                         buffer_info->page_dma = 0;
2777                 }
2778                 if (buffer_info->page) {
2779                         put_page(buffer_info->page);
2780                         buffer_info->page = NULL;
2781                         buffer_info->page_offset = 0;
2782                 }
2783         }
2784
2785         size = sizeof(struct igb_buffer) * rx_ring->count;
2786         memset(rx_ring->buffer_info, 0, size);
2787
2788         /* Zero out the descriptor ring */
2789         memset(rx_ring->desc, 0, rx_ring->size);
2790
2791         rx_ring->next_to_clean = 0;
2792         rx_ring->next_to_use = 0;
2793 }
2794
2795 /**
2796  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2797  * @adapter: board private structure
2798  **/
2799 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2800 {
2801         int i;
2802
2803         for (i = 0; i < adapter->num_rx_queues; i++)
2804                 igb_clean_rx_ring(&adapter->rx_ring[i]);
2805 }
2806
2807 /**
2808  * igb_set_mac - Change the Ethernet Address of the NIC
2809  * @netdev: network interface device structure
2810  * @p: pointer to an address structure
2811  *
2812  * Returns 0 on success, negative on failure
2813  **/
2814 static int igb_set_mac(struct net_device *netdev, void *p)
2815 {
2816         struct igb_adapter *adapter = netdev_priv(netdev);
2817         struct e1000_hw *hw = &adapter->hw;
2818         struct sockaddr *addr = p;
2819
2820         if (!is_valid_ether_addr(addr->sa_data))
2821                 return -EADDRNOTAVAIL;
2822
2823         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2824         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2825
2826         /* set the correct pool for the new PF MAC address in entry 0 */
2827         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2828                          adapter->vfs_allocated_count);
2829
2830         return 0;
2831 }
2832
2833 /**
2834  * igb_write_mc_addr_list - write multicast addresses to MTA
2835  * @netdev: network interface device structure
2836  *
2837  * Writes multicast address list to the MTA hash table.
2838  * Returns: -ENOMEM on failure
2839  *                0 on no addresses written
2840  *                X on writing X addresses to MTA
2841  **/
2842 static int igb_write_mc_addr_list(struct net_device *netdev)
2843 {
2844         struct igb_adapter *adapter = netdev_priv(netdev);
2845         struct e1000_hw *hw = &adapter->hw;
2846         struct dev_mc_list *mc_ptr = netdev->mc_list;
2847         u8  *mta_list;
2848         u32 vmolr = 0;
2849         int i;
2850
2851         if (!netdev->mc_count) {
2852                 /* nothing to program, so clear mc list */
2853                 igb_update_mc_addr_list(hw, NULL, 0);
2854                 igb_restore_vf_multicasts(adapter);
2855                 return 0;
2856         }
2857
2858         mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
2859         if (!mta_list)
2860                 return -ENOMEM;
2861
2862         /* set vmolr receive overflow multicast bit */
2863         vmolr |= E1000_VMOLR_ROMPE;
2864
2865         /* The shared function expects a packed array of only addresses. */
2866         mc_ptr = netdev->mc_list;
2867
2868         for (i = 0; i < netdev->mc_count; i++) {
2869                 if (!mc_ptr)
2870                         break;
2871                 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2872                 mc_ptr = mc_ptr->next;
2873         }
2874         igb_update_mc_addr_list(hw, mta_list, i);
2875         kfree(mta_list);
2876
2877         return netdev->mc_count;
2878 }
2879
2880 /**
2881  * igb_write_uc_addr_list - write unicast addresses to RAR table
2882  * @netdev: network interface device structure
2883  *
2884  * Writes unicast address list to the RAR table.
2885  * Returns: -ENOMEM on failure/insufficient address space
2886  *                0 on no addresses written
2887  *                X on writing X addresses to the RAR table
2888  **/
2889 static int igb_write_uc_addr_list(struct net_device *netdev)
2890 {
2891         struct igb_adapter *adapter = netdev_priv(netdev);
2892         struct e1000_hw *hw = &adapter->hw;
2893         unsigned int vfn = adapter->vfs_allocated_count;
2894         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2895         int count = 0;
2896
2897         /* return ENOMEM indicating insufficient memory for addresses */
2898         if (netdev_uc_count(netdev) > rar_entries)
2899                 return -ENOMEM;
2900
2901         if (!netdev_uc_empty(netdev) && rar_entries) {
2902                 struct netdev_hw_addr *ha;
2903
2904                 netdev_for_each_uc_addr(ha, netdev) {
2905                         if (!rar_entries)
2906                                 break;
2907                         igb_rar_set_qsel(adapter, ha->addr,
2908                                          rar_entries--,
2909                                          vfn);
2910                         count++;
2911                 }
2912         }
2913         /* write the addresses in reverse order to avoid write combining */
2914         for (; rar_entries > 0 ; rar_entries--) {
2915                 wr32(E1000_RAH(rar_entries), 0);
2916                 wr32(E1000_RAL(rar_entries), 0);
2917         }
2918         wrfl();
2919
2920         return count;
2921 }
2922
2923 /**
2924  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2925  * @netdev: network interface device structure
2926  *
2927  * The set_rx_mode entry point is called whenever the unicast or multicast
2928  * address lists or the network interface flags are updated.  This routine is
2929  * responsible for configuring the hardware for proper unicast, multicast,
2930  * promiscuous mode, and all-multi behavior.
2931  **/
2932 static void igb_set_rx_mode(struct net_device *netdev)
2933 {
2934         struct igb_adapter *adapter = netdev_priv(netdev);
2935         struct e1000_hw *hw = &adapter->hw;
2936         unsigned int vfn = adapter->vfs_allocated_count;
2937         u32 rctl, vmolr = 0;
2938         int count;
2939
2940         /* Check for Promiscuous and All Multicast modes */
2941         rctl = rd32(E1000_RCTL);
2942
2943         /* clear the effected bits */
2944         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2945
2946         if (netdev->flags & IFF_PROMISC) {
2947                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2948                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2949         } else {
2950                 if (netdev->flags & IFF_ALLMULTI) {
2951                         rctl |= E1000_RCTL_MPE;
2952                         vmolr |= E1000_VMOLR_MPME;
2953                 } else {
2954                         /*
2955                          * Write addresses to the MTA, if the attempt fails
2956                          * then we should just turn on promiscous mode so
2957                          * that we can at least receive multicast traffic
2958                          */
2959                         count = igb_write_mc_addr_list(netdev);
2960                         if (count < 0) {
2961                                 rctl |= E1000_RCTL_MPE;
2962                                 vmolr |= E1000_VMOLR_MPME;
2963                         } else if (count) {
2964                                 vmolr |= E1000_VMOLR_ROMPE;
2965                         }
2966                 }
2967                 /*
2968                  * Write addresses to available RAR registers, if there is not
2969                  * sufficient space to store all the addresses then enable
2970                  * unicast promiscous mode
2971                  */
2972                 count = igb_write_uc_addr_list(netdev);
2973                 if (count < 0) {
2974                         rctl |= E1000_RCTL_UPE;
2975                         vmolr |= E1000_VMOLR_ROPE;
2976                 }
2977                 rctl |= E1000_RCTL_VFE;
2978         }
2979         wr32(E1000_RCTL, rctl);
2980
2981         /*
2982          * In order to support SR-IOV and eventually VMDq it is necessary to set
2983          * the VMOLR to enable the appropriate modes.  Without this workaround
2984          * we will have issues with VLAN tag stripping not being done for frames
2985          * that are only arriving because we are the default pool
2986          */
2987         if (hw->mac.type < e1000_82576)
2988                 return;
2989
2990         vmolr |= rd32(E1000_VMOLR(vfn)) &
2991                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
2992         wr32(E1000_VMOLR(vfn), vmolr);
2993         igb_restore_vf_multicasts(adapter);
2994 }
2995
2996 /* Need to wait a few seconds after link up to get diagnostic information from
2997  * the phy */
2998 static void igb_update_phy_info(unsigned long data)
2999 {
3000         struct igb_adapter *adapter = (struct igb_adapter *) data;
3001         igb_get_phy_info(&adapter->hw);
3002 }
3003
3004 /**
3005  * igb_has_link - check shared code for link and determine up/down
3006  * @adapter: pointer to driver private info
3007  **/
3008 static bool igb_has_link(struct igb_adapter *adapter)
3009 {
3010         struct e1000_hw *hw = &adapter->hw;
3011         bool link_active = false;
3012         s32 ret_val = 0;
3013
3014         /* get_link_status is set on LSC (link status) interrupt or
3015          * rx sequence error interrupt.  get_link_status will stay
3016          * false until the e1000_check_for_link establishes link
3017          * for copper adapters ONLY
3018          */
3019         switch (hw->phy.media_type) {
3020         case e1000_media_type_copper:
3021                 if (hw->mac.get_link_status) {
3022                         ret_val = hw->mac.ops.check_for_link(hw);
3023                         link_active = !hw->mac.get_link_status;
3024                 } else {
3025                         link_active = true;
3026                 }
3027                 break;
3028         case e1000_media_type_internal_serdes:
3029                 ret_val = hw->mac.ops.check_for_link(hw);
3030                 link_active = hw->mac.serdes_has_link;
3031                 break;
3032         default:
3033         case e1000_media_type_unknown:
3034                 break;
3035         }
3036
3037         return link_active;
3038 }
3039
3040 /**
3041  * igb_watchdog - Timer Call-back
3042  * @data: pointer to adapter cast into an unsigned long
3043  **/
3044 static void igb_watchdog(unsigned long data)
3045 {
3046         struct igb_adapter *adapter = (struct igb_adapter *)data;
3047         /* Do the rest outside of interrupt context */
3048         schedule_work(&adapter->watchdog_task);
3049 }
3050
3051 static void igb_watchdog_task(struct work_struct *work)
3052 {
3053         struct igb_adapter *adapter = container_of(work,
3054                                                    struct igb_adapter,
3055                                                    watchdog_task);
3056         struct e1000_hw *hw = &adapter->hw;
3057         struct net_device *netdev = adapter->netdev;
3058         u32 link;
3059         int i;
3060
3061         link = igb_has_link(adapter);
3062         if (link) {
3063                 if (!netif_carrier_ok(netdev)) {
3064                         u32 ctrl;
3065                         hw->mac.ops.get_speed_and_duplex(hw,
3066                                                          &adapter->link_speed,
3067                                                          &adapter->link_duplex);
3068
3069                         ctrl = rd32(E1000_CTRL);
3070                         /* Links status message must follow this format */
3071                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3072                                  "Flow Control: %s\n",
3073                                netdev->name,
3074                                adapter->link_speed,
3075                                adapter->link_duplex == FULL_DUPLEX ?
3076                                  "Full Duplex" : "Half Duplex",
3077                                ((ctrl & E1000_CTRL_TFCE) &&
3078                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3079                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3080                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3081
3082                         /* tweak tx_queue_len according to speed/duplex and
3083                          * adjust the timeout factor */
3084                         netdev->tx_queue_len = adapter->tx_queue_len;
3085                         adapter->tx_timeout_factor = 1;
3086                         switch (adapter->link_speed) {
3087                         case SPEED_10:
3088                                 netdev->tx_queue_len = 10;
3089                                 adapter->tx_timeout_factor = 14;
3090                                 break;
3091                         case SPEED_100:
3092                                 netdev->tx_queue_len = 100;
3093                                 /* maybe add some timeout factor ? */
3094                                 break;
3095                         }
3096
3097                         netif_carrier_on(netdev);
3098
3099                         igb_ping_all_vfs(adapter);
3100
3101                         /* link state has changed, schedule phy info update */
3102                         if (!test_bit(__IGB_DOWN, &adapter->state))
3103                                 mod_timer(&adapter->phy_info_timer,
3104                                           round_jiffies(jiffies + 2 * HZ));
3105                 }
3106         } else {
3107                 if (netif_carrier_ok(netdev)) {
3108                         adapter->link_speed = 0;
3109                         adapter->link_duplex = 0;
3110                         /* Links status message must follow this format */
3111                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3112                                netdev->name);
3113                         netif_carrier_off(netdev);
3114
3115                         igb_ping_all_vfs(adapter);
3116
3117                         /* link state has changed, schedule phy info update */
3118                         if (!test_bit(__IGB_DOWN, &adapter->state))
3119                                 mod_timer(&adapter->phy_info_timer,
3120                                           round_jiffies(jiffies + 2 * HZ));
3121                 }
3122         }
3123
3124         igb_update_stats(adapter);
3125         igb_update_adaptive(hw);
3126
3127         for (i = 0; i < adapter->num_tx_queues; i++) {
3128                 struct igb_ring *tx_ring = &adapter->tx_ring[i];
3129                 if (!netif_carrier_ok(netdev)) {
3130                         /* We've lost link, so the controller stops DMA,
3131                          * but we've got queued Tx work that's never going
3132                          * to get done, so reset controller to flush Tx.
3133                          * (Do the reset outside of interrupt context). */
3134                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3135                                 adapter->tx_timeout_count++;
3136                                 schedule_work(&adapter->reset_task);
3137                                 /* return immediately since reset is imminent */
3138                                 return;
3139                         }
3140                 }
3141
3142                 /* Force detection of hung controller every watchdog period */
3143                 tx_ring->detect_tx_hung = true;
3144         }
3145
3146         /* Cause software interrupt to ensure rx ring is cleaned */
3147         if (adapter->msix_entries) {
3148                 u32 eics = 0;
3149                 for (i = 0; i < adapter->num_q_vectors; i++) {
3150                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3151                         eics |= q_vector->eims_value;
3152                 }
3153                 wr32(E1000_EICS, eics);
3154         } else {
3155                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3156         }
3157
3158         /* Reset the timer */
3159         if (!test_bit(__IGB_DOWN, &adapter->state))
3160                 mod_timer(&adapter->watchdog_timer,
3161                           round_jiffies(jiffies + 2 * HZ));
3162 }
3163
3164 enum latency_range {
3165         lowest_latency = 0,
3166         low_latency = 1,
3167         bulk_latency = 2,
3168         latency_invalid = 255
3169 };
3170
3171 /**
3172  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3173  *
3174  *      Stores a new ITR value based on strictly on packet size.  This
3175  *      algorithm is less sophisticated than that used in igb_update_itr,
3176  *      due to the difficulty of synchronizing statistics across multiple
3177  *      receive rings.  The divisors and thresholds used by this fuction
3178  *      were determined based on theoretical maximum wire speed and testing
3179  *      data, in order to minimize response time while increasing bulk
3180  *      throughput.
3181  *      This functionality is controlled by the InterruptThrottleRate module
3182  *      parameter (see igb_param.c)
3183  *      NOTE:  This function is called only when operating in a multiqueue
3184  *             receive environment.
3185  * @q_vector: pointer to q_vector
3186  **/
3187 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3188 {
3189         int new_val = q_vector->itr_val;
3190         int avg_wire_size = 0;
3191         struct igb_adapter *adapter = q_vector->adapter;
3192
3193         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3194          * ints/sec - ITR timer value of 120 ticks.
3195          */
3196         if (adapter->link_speed != SPEED_1000) {
3197                 new_val = 976;
3198                 goto set_itr_val;
3199         }
3200
3201         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3202                 struct igb_ring *ring = q_vector->rx_ring;
3203                 avg_wire_size = ring->total_bytes / ring->total_packets;
3204         }
3205
3206         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3207                 struct igb_ring *ring = q_vector->tx_ring;
3208                 avg_wire_size = max_t(u32, avg_wire_size,
3209                                       (ring->total_bytes /
3210                                        ring->total_packets));
3211         }
3212
3213         /* if avg_wire_size isn't set no work was done */
3214         if (!avg_wire_size)
3215                 goto clear_counts;
3216
3217         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3218         avg_wire_size += 24;
3219
3220         /* Don't starve jumbo frames */
3221         avg_wire_size = min(avg_wire_size, 3000);
3222
3223         /* Give a little boost to mid-size frames */
3224         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3225                 new_val = avg_wire_size / 3;
3226         else
3227                 new_val = avg_wire_size / 2;
3228
3229 set_itr_val:
3230         if (new_val != q_vector->itr_val) {
3231                 q_vector->itr_val = new_val;
3232                 q_vector->set_itr = 1;
3233         }
3234 clear_counts:
3235         if (q_vector->rx_ring) {
3236                 q_vector->rx_ring->total_bytes = 0;
3237                 q_vector->rx_ring->total_packets = 0;
3238         }
3239         if (q_vector->tx_ring) {
3240                 q_vector->tx_ring->total_bytes = 0;
3241                 q_vector->tx_ring->total_packets = 0;
3242         }
3243 }
3244
3245 /**
3246  * igb_update_itr - update the dynamic ITR value based on statistics
3247  *      Stores a new ITR value based on packets and byte
3248  *      counts during the last interrupt.  The advantage of per interrupt
3249  *      computation is faster updates and more accurate ITR for the current
3250  *      traffic pattern.  Constants in this function were computed
3251  *      based on theoretical maximum wire speed and thresholds were set based
3252  *      on testing data as well as attempting to minimize response time
3253  *      while increasing bulk throughput.
3254  *      this functionality is controlled by the InterruptThrottleRate module
3255  *      parameter (see igb_param.c)
3256  *      NOTE:  These calculations are only valid when operating in a single-
3257  *             queue environment.
3258  * @adapter: pointer to adapter
3259  * @itr_setting: current q_vector->itr_val
3260  * @packets: the number of packets during this measurement interval
3261  * @bytes: the number of bytes during this measurement interval
3262  **/
3263 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3264                                    int packets, int bytes)
3265 {
3266         unsigned int retval = itr_setting;
3267
3268         if (packets == 0)
3269                 goto update_itr_done;
3270
3271         switch (itr_setting) {
3272         case lowest_latency:
3273                 /* handle TSO and jumbo frames */
3274                 if (bytes/packets > 8000)
3275                         retval = bulk_latency;
3276                 else if ((packets < 5) && (bytes > 512))
3277                         retval = low_latency;
3278                 break;
3279         case low_latency:  /* 50 usec aka 20000 ints/s */
3280                 if (bytes > 10000) {
3281                         /* this if handles the TSO accounting */
3282                         if (bytes/packets > 8000) {
3283                                 retval = bulk_latency;
3284                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3285                                 retval = bulk_latency;
3286                         } else if ((packets > 35)) {
3287                                 retval = lowest_latency;
3288                         }
3289                 } else if (bytes/packets > 2000) {
3290                         retval = bulk_latency;
3291                 } else if (packets <= 2 && bytes < 512) {
3292                         retval = lowest_latency;
3293                 }
3294                 break;
3295         case bulk_latency: /* 250 usec aka 4000 ints/s */
3296                 if (bytes > 25000) {
3297                         if (packets > 35)
3298                                 retval = low_latency;
3299                 } else if (bytes < 1500) {
3300                         retval = low_latency;
3301                 }
3302                 break;
3303         }
3304
3305 update_itr_done:
3306         return retval;
3307 }
3308
3309 static void igb_set_itr(struct igb_adapter *adapter)
3310 {
3311         struct igb_q_vector *q_vector = adapter->q_vector[0];
3312         u16 current_itr;
3313         u32 new_itr = q_vector->itr_val;
3314
3315         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3316         if (adapter->link_speed != SPEED_1000) {
3317                 current_itr = 0;
3318                 new_itr = 4000;
3319                 goto set_itr_now;
3320         }
3321
3322         adapter->rx_itr = igb_update_itr(adapter,
3323                                     adapter->rx_itr,
3324                                     adapter->rx_ring->total_packets,
3325                                     adapter->rx_ring->total_bytes);
3326
3327         adapter->tx_itr = igb_update_itr(adapter,
3328                                     adapter->tx_itr,
3329                                     adapter->tx_ring->total_packets,
3330                                     adapter->tx_ring->total_bytes);
3331         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3332
3333         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3334         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3335                 current_itr = low_latency;
3336
3337         switch (current_itr) {
3338         /* counts and packets in update_itr are dependent on these numbers */
3339         case lowest_latency:
3340                 new_itr = 56;  /* aka 70,000 ints/sec */
3341                 break;
3342         case low_latency:
3343                 new_itr = 196; /* aka 20,000 ints/sec */
3344                 break;
3345         case bulk_latency:
3346                 new_itr = 980; /* aka 4,000 ints/sec */
3347                 break;
3348         default:
3349                 break;
3350         }
3351
3352 set_itr_now:
3353         adapter->rx_ring->total_bytes = 0;
3354         adapter->rx_ring->total_packets = 0;
3355         adapter->tx_ring->total_bytes = 0;
3356         adapter->tx_ring->total_packets = 0;
3357
3358         if (new_itr != q_vector->itr_val) {
3359                 /* this attempts to bias the interrupt rate towards Bulk
3360                  * by adding intermediate steps when interrupt rate is
3361                  * increasing */
3362                 new_itr = new_itr > q_vector->itr_val ?
3363                              max((new_itr * q_vector->itr_val) /
3364                                  (new_itr + (q_vector->itr_val >> 2)),
3365                                  new_itr) :
3366                              new_itr;
3367                 /* Don't write the value here; it resets the adapter's
3368                  * internal timer, and causes us to delay far longer than
3369                  * we should between interrupts.  Instead, we write the ITR
3370                  * value at the beginning of the next interrupt so the timing
3371                  * ends up being correct.
3372                  */
3373                 q_vector->itr_val = new_itr;
3374                 q_vector->set_itr = 1;
3375         }
3376
3377         return;
3378 }
3379
3380 #define IGB_TX_FLAGS_CSUM               0x00000001
3381 #define IGB_TX_FLAGS_VLAN               0x00000002
3382 #define IGB_TX_FLAGS_TSO                0x00000004
3383 #define IGB_TX_FLAGS_IPV4               0x00000008
3384 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3385 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3386 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3387
3388 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3389                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3390 {
3391         struct e1000_adv_tx_context_desc *context_desc;
3392         unsigned int i;
3393         int err;
3394         struct igb_buffer *buffer_info;
3395         u32 info = 0, tu_cmd = 0;
3396         u32 mss_l4len_idx, l4len;
3397         *hdr_len = 0;
3398
3399         if (skb_header_cloned(skb)) {
3400                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3401                 if (err)
3402                         return err;
3403         }
3404
3405         l4len = tcp_hdrlen(skb);
3406         *hdr_len += l4len;
3407
3408         if (skb->protocol == htons(ETH_P_IP)) {
3409                 struct iphdr *iph = ip_hdr(skb);
3410                 iph->tot_len = 0;
3411                 iph->check = 0;
3412                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3413                                                          iph->daddr, 0,
3414                                                          IPPROTO_TCP,
3415                                                          0);
3416         } else if (skb_is_gso_v6(skb)) {
3417                 ipv6_hdr(skb)->payload_len = 0;
3418                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3419                                                        &ipv6_hdr(skb)->daddr,
3420                                                        0, IPPROTO_TCP, 0);
3421         }
3422
3423         i = tx_ring->next_to_use;
3424
3425         buffer_info = &tx_ring->buffer_info[i];
3426         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3427         /* VLAN MACLEN IPLEN */
3428         if (tx_flags & IGB_TX_FLAGS_VLAN)
3429                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3430         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3431         *hdr_len += skb_network_offset(skb);
3432         info |= skb_network_header_len(skb);
3433         *hdr_len += skb_network_header_len(skb);
3434         context_desc->vlan_macip_lens = cpu_to_le32(info);
3435
3436         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3437         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3438
3439         if (skb->protocol == htons(ETH_P_IP))
3440                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3441         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3442
3443         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3444
3445         /* MSS L4LEN IDX */
3446         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3447         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3448
3449         /* For 82575, context index must be unique per ring. */
3450         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3451                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3452
3453         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3454         context_desc->seqnum_seed = 0;
3455
3456         buffer_info->time_stamp = jiffies;
3457         buffer_info->next_to_watch = i;
3458         buffer_info->dma = 0;
3459         i++;
3460         if (i == tx_ring->count)
3461                 i = 0;
3462
3463         tx_ring->next_to_use = i;
3464
3465         return true;
3466 }
3467
3468 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3469                                    struct sk_buff *skb, u32 tx_flags)
3470 {
3471         struct e1000_adv_tx_context_desc *context_desc;
3472         struct pci_dev *pdev = tx_ring->pdev;
3473         struct igb_buffer *buffer_info;
3474         u32 info = 0, tu_cmd = 0;
3475         unsigned int i;
3476
3477         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3478             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3479                 i = tx_ring->next_to_use;
3480                 buffer_info = &tx_ring->buffer_info[i];
3481                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3482
3483                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3484                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3485
3486                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3487                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3488                         info |= skb_network_header_len(skb);
3489
3490                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3491
3492                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3493
3494                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3495                         __be16 protocol;
3496
3497                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3498                                 const struct vlan_ethhdr *vhdr =
3499                                           (const struct vlan_ethhdr*)skb->data;
3500
3501                                 protocol = vhdr->h_vlan_encapsulated_proto;
3502                         } else {
3503                                 protocol = skb->protocol;
3504                         }
3505
3506                         switch (protocol) {
3507                         case cpu_to_be16(ETH_P_IP):
3508                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3509                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3510                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3511                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3512                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3513                                 break;
3514                         case cpu_to_be16(ETH_P_IPV6):
3515                                 /* XXX what about other V6 headers?? */
3516                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3517                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3518                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3519                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3520                                 break;
3521                         default:
3522                                 if (unlikely(net_ratelimit()))
3523                                         dev_warn(&pdev->dev,
3524                                             "partial checksum but proto=%x!\n",
3525                                             skb->protocol);
3526                                 break;
3527                         }
3528                 }
3529
3530                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3531                 context_desc->seqnum_seed = 0;
3532                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3533                         context_desc->mss_l4len_idx =
3534                                 cpu_to_le32(tx_ring->reg_idx << 4);
3535
3536                 buffer_info->time_stamp = jiffies;
3537                 buffer_info->next_to_watch = i;
3538                 buffer_info->dma = 0;
3539
3540                 i++;
3541                 if (i == tx_ring->count)
3542                         i = 0;
3543                 tx_ring->next_to_use = i;
3544
3545                 return true;
3546         }
3547         return false;
3548 }
3549
3550 #define IGB_MAX_TXD_PWR 16
3551 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3552
3553 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3554                                  unsigned int first)
3555 {
3556         struct igb_buffer *buffer_info;
3557         struct pci_dev *pdev = tx_ring->pdev;
3558         unsigned int len = skb_headlen(skb);
3559         unsigned int count = 0, i;
3560         unsigned int f;
3561
3562         i = tx_ring->next_to_use;
3563
3564         buffer_info = &tx_ring->buffer_info[i];
3565         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3566         buffer_info->length = len;
3567         /* set time_stamp *before* dma to help avoid a possible race */
3568         buffer_info->time_stamp = jiffies;
3569         buffer_info->next_to_watch = i;
3570         buffer_info->dma = pci_map_single(pdev, skb->data, len,
3571                                           PCI_DMA_TODEVICE);
3572         if (pci_dma_mapping_error(pdev, buffer_info->dma))
3573                 goto dma_error;
3574
3575         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3576                 struct skb_frag_struct *frag;
3577
3578                 count++;
3579                 i++;
3580                 if (i == tx_ring->count)
3581                         i = 0;
3582
3583                 frag = &skb_shinfo(skb)->frags[f];
3584                 len = frag->size;
3585
3586                 buffer_info = &tx_ring->buffer_info[i];
3587                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3588                 buffer_info->length = len;
3589                 buffer_info->time_stamp = jiffies;
3590                 buffer_info->next_to_watch = i;
3591                 buffer_info->mapped_as_page = true;
3592                 buffer_info->dma = pci_map_page(pdev,
3593                                                 frag->page,
3594                                                 frag->page_offset,
3595                                                 len,
3596                                                 PCI_DMA_TODEVICE);
3597                 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3598                         goto dma_error;
3599
3600         }
3601
3602         tx_ring->buffer_info[i].skb = skb;
3603         tx_ring->buffer_info[first].next_to_watch = i;
3604
3605         return ++count;
3606
3607 dma_error:
3608         dev_err(&pdev->dev, "TX DMA map failed\n");
3609
3610         /* clear timestamp and dma mappings for failed buffer_info mapping */
3611         buffer_info->dma = 0;
3612         buffer_info->time_stamp = 0;
3613         buffer_info->length = 0;
3614         buffer_info->next_to_watch = 0;
3615         buffer_info->mapped_as_page = false;
3616         count--;
3617
3618         /* clear timestamp and dma mappings for remaining portion of packet */
3619         while (count >= 0) {
3620                 count--;
3621                 i--;
3622                 if (i < 0)
3623                         i += tx_ring->count;
3624                 buffer_info = &tx_ring->buffer_info[i];
3625                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3626         }
3627
3628         return 0;
3629 }
3630
3631 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3632                                     int tx_flags, int count, u32 paylen,
3633                                     u8 hdr_len)
3634 {
3635         union e1000_adv_tx_desc *tx_desc;
3636         struct igb_buffer *buffer_info;
3637         u32 olinfo_status = 0, cmd_type_len;
3638         unsigned int i = tx_ring->next_to_use;
3639
3640         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3641                         E1000_ADVTXD_DCMD_DEXT);
3642
3643         if (tx_flags & IGB_TX_FLAGS_VLAN)
3644                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3645
3646         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3647                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3648
3649         if (tx_flags & IGB_TX_FLAGS_TSO) {
3650                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3651
3652                 /* insert tcp checksum */
3653                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3654
3655                 /* insert ip checksum */
3656                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3657                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3658
3659         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3660                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3661         }
3662
3663         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3664             (tx_flags & (IGB_TX_FLAGS_CSUM |
3665                          IGB_TX_FLAGS_TSO |
3666                          IGB_TX_FLAGS_VLAN)))
3667                 olinfo_status |= tx_ring->reg_idx << 4;
3668
3669         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3670
3671         do {
3672                 buffer_info = &tx_ring->buffer_info[i];
3673                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3674                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3675                 tx_desc->read.cmd_type_len =
3676                         cpu_to_le32(cmd_type_len | buffer_info->length);
3677                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3678                 count--;
3679                 i++;
3680                 if (i == tx_ring->count)
3681                         i = 0;
3682         } while (count > 0);
3683
3684         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3685         /* Force memory writes to complete before letting h/w
3686          * know there are new descriptors to fetch.  (Only
3687          * applicable for weak-ordered memory model archs,
3688          * such as IA-64). */
3689         wmb();
3690
3691         tx_ring->next_to_use = i;
3692         writel(i, tx_ring->tail);
3693         /* we need this if more than one processor can write to our tail
3694          * at a time, it syncronizes IO on IA64/Altix systems */
3695         mmiowb();
3696 }
3697
3698 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3699 {
3700         struct net_device *netdev = tx_ring->netdev;
3701
3702         netif_stop_subqueue(netdev, tx_ring->queue_index);
3703
3704         /* Herbert's original patch had:
3705          *  smp_mb__after_netif_stop_queue();
3706          * but since that doesn't exist yet, just open code it. */
3707         smp_mb();
3708
3709         /* We need to check again in a case another CPU has just
3710          * made room available. */
3711         if (igb_desc_unused(tx_ring) < size)
3712                 return -EBUSY;
3713
3714         /* A reprieve! */
3715         netif_wake_subqueue(netdev, tx_ring->queue_index);
3716         tx_ring->tx_stats.restart_queue++;
3717         return 0;
3718 }
3719
3720 static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3721 {
3722         if (igb_desc_unused(tx_ring) >= size)
3723                 return 0;
3724         return __igb_maybe_stop_tx(tx_ring, size);
3725 }
3726
3727 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3728                                     struct igb_ring *tx_ring)
3729 {
3730         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3731         unsigned int first;
3732         unsigned int tx_flags = 0;
3733         u8 hdr_len = 0;
3734         int tso = 0, count;
3735         union skb_shared_tx *shtx = skb_tx(skb);
3736
3737         /* need: 1 descriptor per page,
3738          *       + 2 desc gap to keep tail from touching head,
3739          *       + 1 desc for skb->data,
3740          *       + 1 desc for context descriptor,
3741          * otherwise try next time */
3742         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3743                 /* this is a hard error */
3744                 return NETDEV_TX_BUSY;
3745         }
3746
3747         if (unlikely(shtx->hardware)) {
3748                 shtx->in_progress = 1;
3749                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3750         }
3751
3752         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3753                 tx_flags |= IGB_TX_FLAGS_VLAN;
3754                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3755         }
3756
3757         if (skb->protocol == htons(ETH_P_IP))
3758                 tx_flags |= IGB_TX_FLAGS_IPV4;
3759
3760         first = tx_ring->next_to_use;
3761         if (skb_is_gso(skb)) {
3762                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3763
3764                 if (tso < 0) {
3765                         dev_kfree_skb_any(skb);
3766                         return NETDEV_TX_OK;
3767                 }
3768         }
3769
3770         if (tso)
3771                 tx_flags |= IGB_TX_FLAGS_TSO;
3772         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3773                  (skb->ip_summed == CHECKSUM_PARTIAL))
3774                 tx_flags |= IGB_TX_FLAGS_CSUM;
3775
3776         /*
3777          * count reflects descriptors mapped, if 0 or less then mapping error
3778          * has occured and we need to rewind the descriptor queue
3779          */
3780         count = igb_tx_map_adv(tx_ring, skb, first);
3781         if (!count) {
3782                 dev_kfree_skb_any(skb);
3783                 tx_ring->buffer_info[first].time_stamp = 0;
3784                 tx_ring->next_to_use = first;
3785                 return NETDEV_TX_OK;
3786         }
3787
3788         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3789
3790         /* Make sure there is space in the ring for the next send. */
3791         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3792
3793         return NETDEV_TX_OK;
3794 }
3795
3796 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3797                                       struct net_device *netdev)
3798 {
3799         struct igb_adapter *adapter = netdev_priv(netdev);
3800         struct igb_ring *tx_ring;
3801         int r_idx = 0;
3802
3803         if (test_bit(__IGB_DOWN, &adapter->state)) {
3804                 dev_kfree_skb_any(skb);
3805                 return NETDEV_TX_OK;
3806         }
3807
3808         if (skb->len <= 0) {
3809                 dev_kfree_skb_any(skb);
3810                 return NETDEV_TX_OK;
3811         }
3812
3813         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3814         tx_ring = adapter->multi_tx_table[r_idx];
3815
3816         /* This goes back to the question of how to logically map a tx queue
3817          * to a flow.  Right now, performance is impacted slightly negatively
3818          * if using multiple tx queues.  If the stack breaks away from a
3819          * single qdisc implementation, we can look at this again. */
3820         return igb_xmit_frame_ring_adv(skb, tx_ring);
3821 }
3822
3823 /**
3824  * igb_tx_timeout - Respond to a Tx Hang
3825  * @netdev: network interface device structure
3826  **/
3827 static void igb_tx_timeout(struct net_device *netdev)
3828 {
3829         struct igb_adapter *adapter = netdev_priv(netdev);
3830         struct e1000_hw *hw = &adapter->hw;
3831
3832         /* Do the reset outside of interrupt context */
3833         adapter->tx_timeout_count++;
3834
3835         if (hw->mac.type == e1000_82580)
3836                 hw->dev_spec._82575.global_device_reset = true;
3837
3838         schedule_work(&adapter->reset_task);
3839         wr32(E1000_EICS,
3840              (adapter->eims_enable_mask & ~adapter->eims_other));
3841 }
3842
3843 static void igb_reset_task(struct work_struct *work)
3844 {
3845         struct igb_adapter *adapter;
3846         adapter = container_of(work, struct igb_adapter, reset_task);
3847
3848         igb_reinit_locked(adapter);
3849 }
3850
3851 /**
3852  * igb_get_stats - Get System Network Statistics
3853  * @netdev: network interface device structure
3854  *
3855  * Returns the address of the device statistics structure.
3856  * The statistics are actually updated from the timer callback.
3857  **/
3858 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3859 {
3860         /* only return the current stats */
3861         return &netdev->stats;
3862 }
3863
3864 /**
3865  * igb_change_mtu - Change the Maximum Transfer Unit
3866  * @netdev: network interface device structure
3867  * @new_mtu: new value for maximum frame size
3868  *
3869  * Returns 0 on success, negative on failure
3870  **/
3871 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3872 {
3873         struct igb_adapter *adapter = netdev_priv(netdev);
3874         struct pci_dev *pdev = adapter->pdev;
3875         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3876         u32 rx_buffer_len, i;
3877
3878         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3879                 dev_err(&pdev->dev, "Invalid MTU setting\n");
3880                 return -EINVAL;
3881         }
3882
3883         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3884                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3885                 return -EINVAL;
3886         }
3887
3888         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3889                 msleep(1);
3890
3891         /* igb_down has a dependency on max_frame_size */
3892         adapter->max_frame_size = max_frame;
3893
3894         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3895          * means we reserve 2 more, this pushes us to allocate from the next
3896          * larger slab size.
3897          * i.e. RXBUFFER_2048 --> size-4096 slab
3898          */
3899
3900         if (max_frame <= IGB_RXBUFFER_1024)
3901                 rx_buffer_len = IGB_RXBUFFER_1024;
3902         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3903                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3904         else
3905                 rx_buffer_len = IGB_RXBUFFER_128;
3906
3907         if (netif_running(netdev))
3908                 igb_down(adapter);
3909
3910         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3911                  netdev->mtu, new_mtu);
3912         netdev->mtu = new_mtu;
3913
3914         for (i = 0; i < adapter->num_rx_queues; i++)
3915                 adapter->rx_ring[i].rx_buffer_len = rx_buffer_len;
3916
3917         if (netif_running(netdev))
3918                 igb_up(adapter);
3919         else
3920                 igb_reset(adapter);
3921
3922         clear_bit(__IGB_RESETTING, &adapter->state);
3923
3924         return 0;
3925 }
3926
3927 /**
3928  * igb_update_stats - Update the board statistics counters
3929  * @adapter: board private structure
3930  **/
3931
3932 void igb_update_stats(struct igb_adapter *adapter)
3933 {
3934         struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3935         struct e1000_hw *hw = &adapter->hw;
3936         struct pci_dev *pdev = adapter->pdev;
3937         u32 rnbc;
3938         u16 phy_tmp;
3939         int i;
3940         u64 bytes, packets;
3941
3942 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3943
3944         /*
3945          * Prevent stats update while adapter is being reset, or if the pci
3946          * connection is down.
3947          */
3948         if (adapter->link_speed == 0)
3949                 return;
3950         if (pci_channel_offline(pdev))
3951                 return;
3952
3953         bytes = 0;
3954         packets = 0;
3955         for (i = 0; i < adapter->num_rx_queues; i++) {
3956                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3957                 adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
3958                 net_stats->rx_fifo_errors += rqdpc_tmp;
3959                 bytes += adapter->rx_ring[i].rx_stats.bytes;
3960                 packets += adapter->rx_ring[i].rx_stats.packets;
3961         }
3962
3963         net_stats->rx_bytes = bytes;
3964         net_stats->rx_packets = packets;
3965
3966         bytes = 0;
3967         packets = 0;
3968         for (i = 0; i < adapter->num_tx_queues; i++) {
3969                 bytes += adapter->tx_ring[i].tx_stats.bytes;
3970                 packets += adapter->tx_ring[i].tx_stats.packets;
3971         }
3972         net_stats->tx_bytes = bytes;
3973         net_stats->tx_packets = packets;
3974
3975         /* read stats registers */
3976         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
3977         adapter->stats.gprc += rd32(E1000_GPRC);
3978         adapter->stats.gorc += rd32(E1000_GORCL);
3979         rd32(E1000_GORCH); /* clear GORCL */
3980         adapter->stats.bprc += rd32(E1000_BPRC);
3981         adapter->stats.mprc += rd32(E1000_MPRC);
3982         adapter->stats.roc += rd32(E1000_ROC);
3983
3984         adapter->stats.prc64 += rd32(E1000_PRC64);
3985         adapter->stats.prc127 += rd32(E1000_PRC127);
3986         adapter->stats.prc255 += rd32(E1000_PRC255);
3987         adapter->stats.prc511 += rd32(E1000_PRC511);
3988         adapter->stats.prc1023 += rd32(E1000_PRC1023);
3989         adapter->stats.prc1522 += rd32(E1000_PRC1522);
3990         adapter->stats.symerrs += rd32(E1000_SYMERRS);
3991         adapter->stats.sec += rd32(E1000_SEC);
3992
3993         adapter->stats.mpc += rd32(E1000_MPC);
3994         adapter->stats.scc += rd32(E1000_SCC);
3995         adapter->stats.ecol += rd32(E1000_ECOL);
3996         adapter->stats.mcc += rd32(E1000_MCC);
3997         adapter->stats.latecol += rd32(E1000_LATECOL);
3998         adapter->stats.dc += rd32(E1000_DC);
3999         adapter->stats.rlec += rd32(E1000_RLEC);
4000         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4001         adapter->stats.xontxc += rd32(E1000_XONTXC);
4002         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4003         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4004         adapter->stats.fcruc += rd32(E1000_FCRUC);
4005         adapter->stats.gptc += rd32(E1000_GPTC);
4006         adapter->stats.gotc += rd32(E1000_GOTCL);
4007         rd32(E1000_GOTCH); /* clear GOTCL */
4008         rnbc = rd32(E1000_RNBC);
4009         adapter->stats.rnbc += rnbc;
4010         net_stats->rx_fifo_errors += rnbc;
4011         adapter->stats.ruc += rd32(E1000_RUC);
4012         adapter->stats.rfc += rd32(E1000_RFC);
4013         adapter->stats.rjc += rd32(E1000_RJC);
4014         adapter->stats.tor += rd32(E1000_TORH);
4015         adapter->stats.tot += rd32(E1000_TOTH);
4016         adapter->stats.tpr += rd32(E1000_TPR);
4017
4018         adapter->stats.ptc64 += rd32(E1000_PTC64);
4019         adapter->stats.ptc127 += rd32(E1000_PTC127);
4020         adapter->stats.ptc255 += rd32(E1000_PTC255);
4021         adapter->stats.ptc511 += rd32(E1000_PTC511);
4022         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4023         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4024
4025         adapter->stats.mptc += rd32(E1000_MPTC);
4026         adapter->stats.bptc += rd32(E1000_BPTC);
4027
4028         /* used for adaptive IFS */
4029         hw->mac.tx_packet_delta = rd32(E1000_TPT);
4030         adapter->stats.tpt += hw->mac.tx_packet_delta;
4031         hw->mac.collision_delta = rd32(E1000_COLC);
4032         adapter->stats.colc += hw->mac.collision_delta;
4033
4034         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4035         adapter->stats.rxerrc += rd32(E1000_RXERRC);
4036         adapter->stats.tncrs += rd32(E1000_TNCRS);
4037         adapter->stats.tsctc += rd32(E1000_TSCTC);
4038         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4039
4040         adapter->stats.iac += rd32(E1000_IAC);
4041         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4042         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4043         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4044         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4045         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4046         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4047         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4048         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4049
4050         /* Fill out the OS statistics structure */
4051         net_stats->multicast = adapter->stats.mprc;
4052         net_stats->collisions = adapter->stats.colc;
4053
4054         /* Rx Errors */
4055
4056         /* RLEC on some newer hardware can be incorrect so build
4057          * our own version based on RUC and ROC */
4058         net_stats->rx_errors = adapter->stats.rxerrc +
4059                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4060                 adapter->stats.ruc + adapter->stats.roc +
4061                 adapter->stats.cexterr;
4062         net_stats->rx_length_errors = adapter->stats.ruc +
4063                                       adapter->stats.roc;
4064         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4065         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4066         net_stats->rx_missed_errors = adapter->stats.mpc;
4067
4068         /* Tx Errors */
4069         net_stats->tx_errors = adapter->stats.ecol +
4070                                adapter->stats.latecol;
4071         net_stats->tx_aborted_errors = adapter->stats.ecol;
4072         net_stats->tx_window_errors = adapter->stats.latecol;
4073         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4074
4075         /* Tx Dropped needs to be maintained elsewhere */
4076
4077         /* Phy Stats */
4078         if (hw->phy.media_type == e1000_media_type_copper) {
4079                 if ((adapter->link_speed == SPEED_1000) &&
4080                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4081                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4082                         adapter->phy_stats.idle_errors += phy_tmp;
4083                 }
4084         }
4085
4086         /* Management Stats */
4087         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4088         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4089         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4090 }
4091
4092 static irqreturn_t igb_msix_other(int irq, void *data)
4093 {
4094         struct igb_adapter *adapter = data;
4095         struct e1000_hw *hw = &adapter->hw;
4096         u32 icr = rd32(E1000_ICR);
4097         /* reading ICR causes bit 31 of EICR to be cleared */
4098
4099         if (icr & E1000_ICR_DRSTA)
4100                 schedule_work(&adapter->reset_task);
4101
4102         if (icr & E1000_ICR_DOUTSYNC) {
4103                 /* HW is reporting DMA is out of sync */
4104                 adapter->stats.doosync++;
4105         }
4106
4107         /* Check for a mailbox event */
4108         if (icr & E1000_ICR_VMMB)
4109                 igb_msg_task(adapter);
4110
4111         if (icr & E1000_ICR_LSC) {
4112                 hw->mac.get_link_status = 1;
4113                 /* guard against interrupt when we're going down */
4114                 if (!test_bit(__IGB_DOWN, &adapter->state))
4115                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4116         }
4117
4118         if (adapter->vfs_allocated_count)
4119                 wr32(E1000_IMS, E1000_IMS_LSC |
4120                                 E1000_IMS_VMMB |
4121                                 E1000_IMS_DOUTSYNC);
4122         else
4123                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4124         wr32(E1000_EIMS, adapter->eims_other);
4125
4126         return IRQ_HANDLED;
4127 }
4128
4129 static void igb_write_itr(struct igb_q_vector *q_vector)
4130 {
4131         u32 itr_val = q_vector->itr_val & 0x7FFC;
4132
4133         if (!q_vector->set_itr)
4134                 return;
4135
4136         if (!itr_val)
4137                 itr_val = 0x4;
4138
4139         if (q_vector->itr_shift)
4140                 itr_val |= itr_val << q_vector->itr_shift;
4141         else
4142                 itr_val |= 0x8000000;
4143
4144         writel(itr_val, q_vector->itr_register);
4145         q_vector->set_itr = 0;
4146 }
4147
4148 static irqreturn_t igb_msix_ring(int irq, void *data)
4149 {
4150         struct igb_q_vector *q_vector = data;
4151
4152         /* Write the ITR value calculated from the previous interrupt. */
4153         igb_write_itr(q_vector);
4154
4155         napi_schedule(&q_vector->napi);
4156
4157         return IRQ_HANDLED;
4158 }
4159
4160 #ifdef CONFIG_IGB_DCA
4161 static void igb_update_dca(struct igb_q_vector *q_vector)
4162 {
4163         struct igb_adapter *adapter = q_vector->adapter;
4164         struct e1000_hw *hw = &adapter->hw;
4165         int cpu = get_cpu();
4166
4167         if (q_vector->cpu == cpu)
4168                 goto out_no_update;
4169
4170         if (q_vector->tx_ring) {
4171                 int q = q_vector->tx_ring->reg_idx;
4172                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4173                 if (hw->mac.type == e1000_82575) {
4174                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4175                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4176                 } else {
4177                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4178                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4179                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4180                 }
4181                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4182                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4183         }
4184         if (q_vector->rx_ring) {
4185                 int q = q_vector->rx_ring->reg_idx;
4186                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4187                 if (hw->mac.type == e1000_82575) {
4188                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4189                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4190                 } else {
4191                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4192                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4193                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4194                 }
4195                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4196                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4197                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4198                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4199         }
4200         q_vector->cpu = cpu;
4201 out_no_update:
4202         put_cpu();
4203 }
4204
4205 static void igb_setup_dca(struct igb_adapter *adapter)
4206 {
4207         struct e1000_hw *hw = &adapter->hw;
4208         int i;
4209
4210         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4211                 return;
4212
4213         /* Always use CB2 mode, difference is masked in the CB driver. */
4214         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4215
4216         for (i = 0; i < adapter->num_q_vectors; i++) {
4217                 struct igb_q_vector *q_vector = adapter->q_vector[i];
4218                 q_vector->cpu = -1;
4219                 igb_update_dca(q_vector);
4220         }
4221 }
4222
4223 static int __igb_notify_dca(struct device *dev, void *data)
4224 {
4225         struct net_device *netdev = dev_get_drvdata(dev);
4226         struct igb_adapter *adapter = netdev_priv(netdev);
4227         struct pci_dev *pdev = adapter->pdev;
4228         struct e1000_hw *hw = &adapter->hw;
4229         unsigned long event = *(unsigned long *)data;
4230
4231         switch (event) {
4232         case DCA_PROVIDER_ADD:
4233                 /* if already enabled, don't do it again */
4234                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4235                         break;
4236                 if (dca_add_requester(dev) == 0) {
4237                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4238                         dev_info(&pdev->dev, "DCA enabled\n");
4239                         igb_setup_dca(adapter);
4240                         break;
4241                 }
4242                 /* Fall Through since DCA is disabled. */
4243         case DCA_PROVIDER_REMOVE:
4244                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4245                         /* without this a class_device is left
4246                          * hanging around in the sysfs model */
4247                         dca_remove_requester(dev);
4248                         dev_info(&pdev->dev, "DCA disabled\n");
4249                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4250                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4251                 }
4252                 break;
4253         }
4254
4255         return 0;
4256 }
4257
4258 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4259                           void *p)
4260 {
4261         int ret_val;
4262
4263         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4264                                          __igb_notify_dca);
4265
4266         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4267 }
4268 #endif /* CONFIG_IGB_DCA */
4269
4270 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4271 {
4272         struct e1000_hw *hw = &adapter->hw;
4273         u32 ping;
4274         int i;
4275
4276         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4277                 ping = E1000_PF_CONTROL_MSG;
4278                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4279                         ping |= E1000_VT_MSGTYPE_CTS;
4280                 igb_write_mbx(hw, &ping, 1, i);
4281         }
4282 }
4283
4284 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4285 {
4286         struct e1000_hw *hw = &adapter->hw;
4287         u32 vmolr = rd32(E1000_VMOLR(vf));
4288         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4289
4290         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4291                             IGB_VF_FLAG_MULTI_PROMISC);
4292         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4293
4294         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4295                 vmolr |= E1000_VMOLR_MPME;
4296                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4297         } else {
4298                 /*
4299                  * if we have hashes and we are clearing a multicast promisc
4300                  * flag we need to write the hashes to the MTA as this step
4301                  * was previously skipped
4302                  */
4303                 if (vf_data->num_vf_mc_hashes > 30) {
4304                         vmolr |= E1000_VMOLR_MPME;
4305                 } else if (vf_data->num_vf_mc_hashes) {
4306                         int j;
4307                         vmolr |= E1000_VMOLR_ROMPE;
4308                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4309                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4310                 }
4311         }
4312
4313         wr32(E1000_VMOLR(vf), vmolr);
4314
4315         /* there are flags left unprocessed, likely not supported */
4316         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4317                 return -EINVAL;
4318
4319         return 0;
4320
4321 }
4322
4323 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4324                                   u32 *msgbuf, u32 vf)
4325 {
4326         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4327         u16 *hash_list = (u16 *)&msgbuf[1];
4328         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4329         int i;
4330
4331         /* salt away the number of multicast addresses assigned
4332          * to this VF for later use to restore when the PF multi cast
4333          * list changes
4334          */
4335         vf_data->num_vf_mc_hashes = n;
4336
4337         /* only up to 30 hash values supported */
4338         if (n > 30)
4339                 n = 30;
4340
4341         /* store the hashes for later use */
4342         for (i = 0; i < n; i++)
4343                 vf_data->vf_mc_hashes[i] = hash_list[i];
4344
4345         /* Flush and reset the mta with the new values */
4346         igb_set_rx_mode(adapter->netdev);
4347
4348         return 0;
4349 }
4350
4351 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4352 {
4353         struct e1000_hw *hw = &adapter->hw;
4354         struct vf_data_storage *vf_data;
4355         int i, j;
4356
4357         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4358                 u32 vmolr = rd32(E1000_VMOLR(i));
4359                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4360
4361                 vf_data = &adapter->vf_data[i];
4362
4363                 if ((vf_data->num_vf_mc_hashes > 30) ||
4364                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4365                         vmolr |= E1000_VMOLR_MPME;
4366                 } else if (vf_data->num_vf_mc_hashes) {
4367                         vmolr |= E1000_VMOLR_ROMPE;
4368                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4369                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4370                 }
4371                 wr32(E1000_VMOLR(i), vmolr);
4372         }
4373 }
4374
4375 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4376 {
4377         struct e1000_hw *hw = &adapter->hw;
4378         u32 pool_mask, reg, vid;
4379         int i;
4380
4381         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4382
4383         /* Find the vlan filter for this id */
4384         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4385                 reg = rd32(E1000_VLVF(i));
4386
4387                 /* remove the vf from the pool */
4388                 reg &= ~pool_mask;
4389
4390                 /* if pool is empty then remove entry from vfta */
4391                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4392                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4393                         reg = 0;
4394                         vid = reg & E1000_VLVF_VLANID_MASK;
4395                         igb_vfta_set(hw, vid, false);
4396                 }
4397
4398                 wr32(E1000_VLVF(i), reg);
4399         }
4400
4401         adapter->vf_data[vf].vlans_enabled = 0;
4402 }
4403
4404 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4405 {
4406         struct e1000_hw *hw = &adapter->hw;
4407         u32 reg, i;
4408
4409         /* The vlvf table only exists on 82576 hardware and newer */
4410         if (hw->mac.type < e1000_82576)
4411                 return -1;
4412
4413         /* we only need to do this if VMDq is enabled */
4414         if (!adapter->vfs_allocated_count)
4415                 return -1;
4416
4417         /* Find the vlan filter for this id */
4418         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4419                 reg = rd32(E1000_VLVF(i));
4420                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4421                     vid == (reg & E1000_VLVF_VLANID_MASK))
4422                         break;
4423         }
4424
4425         if (add) {
4426                 if (i == E1000_VLVF_ARRAY_SIZE) {
4427                         /* Did not find a matching VLAN ID entry that was
4428                          * enabled.  Search for a free filter entry, i.e.
4429                          * one without the enable bit set
4430                          */
4431                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4432                                 reg = rd32(E1000_VLVF(i));
4433                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4434                                         break;
4435                         }
4436                 }
4437                 if (i < E1000_VLVF_ARRAY_SIZE) {
4438                         /* Found an enabled/available entry */
4439                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4440
4441                         /* if !enabled we need to set this up in vfta */
4442                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4443                                 /* add VID to filter table */
4444                                 igb_vfta_set(hw, vid, true);
4445                                 reg |= E1000_VLVF_VLANID_ENABLE;
4446                         }
4447                         reg &= ~E1000_VLVF_VLANID_MASK;
4448                         reg |= vid;
4449                         wr32(E1000_VLVF(i), reg);
4450
4451                         /* do not modify RLPML for PF devices */
4452                         if (vf >= adapter->vfs_allocated_count)
4453                                 return 0;
4454
4455                         if (!adapter->vf_data[vf].vlans_enabled) {
4456                                 u32 size;
4457                                 reg = rd32(E1000_VMOLR(vf));
4458                                 size = reg & E1000_VMOLR_RLPML_MASK;
4459                                 size += 4;
4460                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4461                                 reg |= size;
4462                                 wr32(E1000_VMOLR(vf), reg);
4463                         }
4464
4465                         adapter->vf_data[vf].vlans_enabled++;
4466                         return 0;
4467                 }
4468         } else {
4469                 if (i < E1000_VLVF_ARRAY_SIZE) {
4470                         /* remove vf from the pool */
4471                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4472                         /* if pool is empty then remove entry from vfta */
4473                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4474                                 reg = 0;
4475                                 igb_vfta_set(hw, vid, false);
4476                         }
4477                         wr32(E1000_VLVF(i), reg);
4478
4479                         /* do not modify RLPML for PF devices */
4480                         if (vf >= adapter->vfs_allocated_count)
4481                                 return 0;
4482
4483                         adapter->vf_data[vf].vlans_enabled--;
4484                         if (!adapter->vf_data[vf].vlans_enabled) {
4485                                 u32 size;
4486                                 reg = rd32(E1000_VMOLR(vf));
4487                                 size = reg & E1000_VMOLR_RLPML_MASK;
4488                                 size -= 4;
4489                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4490                                 reg |= size;
4491                                 wr32(E1000_VMOLR(vf), reg);
4492                         }
4493                         return 0;
4494                 }
4495         }
4496         return -1;
4497 }
4498
4499 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4500 {
4501         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4502         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4503
4504         return igb_vlvf_set(adapter, vid, add, vf);
4505 }
4506
4507 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4508 {
4509         /* clear all flags */
4510         adapter->vf_data[vf].flags = 0;
4511         adapter->vf_data[vf].last_nack = jiffies;
4512
4513         /* reset offloads to defaults */
4514         igb_set_vmolr(adapter, vf);
4515
4516         /* reset vlans for device */
4517         igb_clear_vf_vfta(adapter, vf);
4518
4519         /* reset multicast table array for vf */
4520         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4521
4522         /* Flush and reset the mta with the new values */
4523         igb_set_rx_mode(adapter->netdev);
4524 }
4525
4526 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4527 {
4528         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4529
4530         /* generate a new mac address as we were hotplug removed/added */
4531         random_ether_addr(vf_mac);
4532
4533         /* process remaining reset events */
4534         igb_vf_reset(adapter, vf);
4535 }
4536
4537 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4538 {
4539         struct e1000_hw *hw = &adapter->hw;
4540         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4541         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4542         u32 reg, msgbuf[3];
4543         u8 *addr = (u8 *)(&msgbuf[1]);
4544
4545         /* process all the same items cleared in a function level reset */
4546         igb_vf_reset(adapter, vf);
4547
4548         /* set vf mac address */
4549         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4550
4551         /* enable transmit and receive for vf */
4552         reg = rd32(E1000_VFTE);
4553         wr32(E1000_VFTE, reg | (1 << vf));
4554         reg = rd32(E1000_VFRE);
4555         wr32(E1000_VFRE, reg | (1 << vf));
4556
4557         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4558
4559         /* reply to reset with ack and vf mac address */
4560         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4561         memcpy(addr, vf_mac, 6);
4562         igb_write_mbx(hw, msgbuf, 3, vf);
4563 }
4564
4565 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4566 {
4567         unsigned char *addr = (char *)&msg[1];
4568         int err = -1;
4569
4570         if (is_valid_ether_addr(addr))
4571                 err = igb_set_vf_mac(adapter, vf, addr);
4572
4573         return err;
4574 }
4575
4576 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4577 {
4578         struct e1000_hw *hw = &adapter->hw;
4579         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4580         u32 msg = E1000_VT_MSGTYPE_NACK;
4581
4582         /* if device isn't clear to send it shouldn't be reading either */
4583         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4584             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4585                 igb_write_mbx(hw, &msg, 1, vf);
4586                 vf_data->last_nack = jiffies;
4587         }
4588 }
4589
4590 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4591 {
4592         struct pci_dev *pdev = adapter->pdev;
4593         u32 msgbuf[E1000_VFMAILBOX_SIZE];
4594         struct e1000_hw *hw = &adapter->hw;
4595         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4596         s32 retval;
4597
4598         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4599
4600         if (retval) {
4601                 /* if receive failed revoke VF CTS stats and restart init */
4602                 dev_err(&pdev->dev, "Error receiving message from VF\n");
4603                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
4604                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4605                         return;
4606                 goto out;
4607         }
4608
4609         /* this is a message we already processed, do nothing */
4610         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4611                 return;
4612
4613         /*
4614          * until the vf completes a reset it should not be
4615          * allowed to start any configuration.
4616          */
4617
4618         if (msgbuf[0] == E1000_VF_RESET) {
4619                 igb_vf_reset_msg(adapter, vf);
4620                 return;
4621         }
4622
4623         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4624                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4625                         return;
4626                 retval = -1;
4627                 goto out;
4628         }
4629
4630         switch ((msgbuf[0] & 0xFFFF)) {
4631         case E1000_VF_SET_MAC_ADDR:
4632                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4633                 break;
4634         case E1000_VF_SET_PROMISC:
4635                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4636                 break;
4637         case E1000_VF_SET_MULTICAST:
4638                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4639                 break;
4640         case E1000_VF_SET_LPE:
4641                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4642                 break;
4643         case E1000_VF_SET_VLAN:
4644                 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4645                 break;
4646         default:
4647                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4648                 retval = -1;
4649                 break;
4650         }
4651
4652         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4653 out:
4654         /* notify the VF of the results of what it sent us */
4655         if (retval)
4656                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4657         else
4658                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4659
4660         igb_write_mbx(hw, msgbuf, 1, vf);
4661 }
4662
4663 static void igb_msg_task(struct igb_adapter *adapter)
4664 {
4665         struct e1000_hw *hw = &adapter->hw;
4666         u32 vf;
4667
4668         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4669                 /* process any reset requests */
4670                 if (!igb_check_for_rst(hw, vf))
4671                         igb_vf_reset_event(adapter, vf);
4672
4673                 /* process any messages pending */
4674                 if (!igb_check_for_msg(hw, vf))
4675                         igb_rcv_msg_from_vf(adapter, vf);
4676
4677                 /* process any acks */
4678                 if (!igb_check_for_ack(hw, vf))
4679                         igb_rcv_ack_from_vf(adapter, vf);
4680         }
4681 }
4682
4683 /**
4684  *  igb_set_uta - Set unicast filter table address
4685  *  @adapter: board private structure
4686  *
4687  *  The unicast table address is a register array of 32-bit registers.
4688  *  The table is meant to be used in a way similar to how the MTA is used
4689  *  however due to certain limitations in the hardware it is necessary to
4690  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4691  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4692  **/
4693 static void igb_set_uta(struct igb_adapter *adapter)
4694 {
4695         struct e1000_hw *hw = &adapter->hw;
4696         int i;
4697
4698         /* The UTA table only exists on 82576 hardware and newer */
4699         if (hw->mac.type < e1000_82576)
4700                 return;
4701
4702         /* we only need to do this if VMDq is enabled */
4703         if (!adapter->vfs_allocated_count)
4704                 return;
4705
4706         for (i = 0; i < hw->mac.uta_reg_count; i++)
4707                 array_wr32(E1000_UTA, i, ~0);
4708 }
4709
4710 /**
4711  * igb_intr_msi - Interrupt Handler
4712  * @irq: interrupt number
4713  * @data: pointer to a network interface device structure
4714  **/
4715 static irqreturn_t igb_intr_msi(int irq, void *data)
4716 {
4717         struct igb_adapter *adapter = data;
4718         struct igb_q_vector *q_vector = adapter->q_vector[0];
4719         struct e1000_hw *hw = &adapter->hw;
4720         /* read ICR disables interrupts using IAM */
4721         u32 icr = rd32(E1000_ICR);
4722
4723         igb_write_itr(q_vector);
4724
4725         if (icr & E1000_ICR_DRSTA)
4726                 schedule_work(&adapter->reset_task);
4727
4728         if (icr & E1000_ICR_DOUTSYNC) {
4729                 /* HW is reporting DMA is out of sync */
4730                 adapter->stats.doosync++;
4731         }
4732
4733         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4734                 hw->mac.get_link_status = 1;
4735                 if (!test_bit(__IGB_DOWN, &adapter->state))
4736                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4737         }
4738
4739         napi_schedule(&q_vector->napi);
4740
4741         return IRQ_HANDLED;
4742 }
4743
4744 /**
4745  * igb_intr - Legacy Interrupt Handler
4746  * @irq: interrupt number
4747  * @data: pointer to a network interface device structure
4748  **/
4749 static irqreturn_t igb_intr(int irq, void *data)
4750 {
4751         struct igb_adapter *adapter = data;
4752         struct igb_q_vector *q_vector = adapter->q_vector[0];
4753         struct e1000_hw *hw = &adapter->hw;
4754         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4755          * need for the IMC write */
4756         u32 icr = rd32(E1000_ICR);
4757         if (!icr)
4758                 return IRQ_NONE;  /* Not our interrupt */
4759
4760         igb_write_itr(q_vector);
4761
4762         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4763          * not set, then the adapter didn't send an interrupt */
4764         if (!(icr & E1000_ICR_INT_ASSERTED))
4765                 return IRQ_NONE;
4766
4767         if (icr & E1000_ICR_DRSTA)
4768                 schedule_work(&adapter->reset_task);
4769
4770         if (icr & E1000_ICR_DOUTSYNC) {
4771                 /* HW is reporting DMA is out of sync */
4772                 adapter->stats.doosync++;
4773         }
4774
4775         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4776                 hw->mac.get_link_status = 1;
4777                 /* guard against interrupt when we're going down */
4778                 if (!test_bit(__IGB_DOWN, &adapter->state))
4779                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4780         }
4781
4782         napi_schedule(&q_vector->napi);
4783
4784         return IRQ_HANDLED;
4785 }
4786
4787 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4788 {
4789         struct igb_adapter *adapter = q_vector->adapter;
4790         struct e1000_hw *hw = &adapter->hw;
4791
4792         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4793             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4794                 if (!adapter->msix_entries)
4795                         igb_set_itr(adapter);
4796                 else
4797                         igb_update_ring_itr(q_vector);
4798         }
4799
4800         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4801                 if (adapter->msix_entries)
4802                         wr32(E1000_EIMS, q_vector->eims_value);
4803                 else
4804                         igb_irq_enable(adapter);
4805         }
4806 }
4807
4808 /**
4809  * igb_poll - NAPI Rx polling callback
4810  * @napi: napi polling structure
4811  * @budget: count of how many packets we should handle
4812  **/
4813 static int igb_poll(struct napi_struct *napi, int budget)
4814 {
4815         struct igb_q_vector *q_vector = container_of(napi,
4816                                                      struct igb_q_vector,
4817                                                      napi);
4818         int tx_clean_complete = 1, work_done = 0;
4819
4820 #ifdef CONFIG_IGB_DCA
4821         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4822                 igb_update_dca(q_vector);
4823 #endif
4824         if (q_vector->tx_ring)
4825                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4826
4827         if (q_vector->rx_ring)
4828                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4829
4830         if (!tx_clean_complete)
4831                 work_done = budget;
4832
4833         /* If not enough Rx work done, exit the polling mode */
4834         if (work_done < budget) {
4835                 napi_complete(napi);
4836                 igb_ring_irq_enable(q_vector);
4837         }
4838
4839         return work_done;
4840 }
4841
4842 /**
4843  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4844  * @adapter: board private structure
4845  * @shhwtstamps: timestamp structure to update
4846  * @regval: unsigned 64bit system time value.
4847  *
4848  * We need to convert the system time value stored in the RX/TXSTMP registers
4849  * into a hwtstamp which can be used by the upper level timestamping functions
4850  */
4851 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4852                                    struct skb_shared_hwtstamps *shhwtstamps,
4853                                    u64 regval)
4854 {
4855         u64 ns;
4856
4857         /*
4858          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
4859          * 24 to match clock shift we setup earlier.
4860          */
4861         if (adapter->hw.mac.type == e1000_82580)
4862                 regval <<= IGB_82580_TSYNC_SHIFT;
4863
4864         ns = timecounter_cyc2time(&adapter->clock, regval);
4865         timecompare_update(&adapter->compare, ns);
4866         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4867         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4868         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4869 }
4870
4871 /**
4872  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4873  * @q_vector: pointer to q_vector containing needed info
4874  * @skb: packet that was just sent
4875  *
4876  * If we were asked to do hardware stamping and such a time stamp is
4877  * available, then it must have been for this skb here because we only
4878  * allow only one such packet into the queue.
4879  */
4880 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4881 {
4882         struct igb_adapter *adapter = q_vector->adapter;
4883         union skb_shared_tx *shtx = skb_tx(skb);
4884         struct e1000_hw *hw = &adapter->hw;
4885         struct skb_shared_hwtstamps shhwtstamps;
4886         u64 regval;
4887
4888         /* if skb does not support hw timestamp or TX stamp not valid exit */
4889         if (likely(!shtx->hardware) ||
4890             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4891                 return;
4892
4893         regval = rd32(E1000_TXSTMPL);
4894         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4895
4896         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4897         skb_tstamp_tx(skb, &shhwtstamps);
4898 }
4899
4900 /**
4901  * igb_clean_tx_irq - Reclaim resources after transmit completes
4902  * @q_vector: pointer to q_vector containing needed info
4903  * returns true if ring is completely cleaned
4904  **/
4905 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4906 {
4907         struct igb_adapter *adapter = q_vector->adapter;
4908         struct igb_ring *tx_ring = q_vector->tx_ring;
4909         struct net_device *netdev = tx_ring->netdev;
4910         struct e1000_hw *hw = &adapter->hw;
4911         struct igb_buffer *buffer_info;
4912         struct sk_buff *skb;
4913         union e1000_adv_tx_desc *tx_desc, *eop_desc;
4914         unsigned int total_bytes = 0, total_packets = 0;
4915         unsigned int i, eop, count = 0;
4916         bool cleaned = false;
4917
4918         i = tx_ring->next_to_clean;
4919         eop = tx_ring->buffer_info[i].next_to_watch;
4920         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4921
4922         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
4923                (count < tx_ring->count)) {
4924                 for (cleaned = false; !cleaned; count++) {
4925                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4926                         buffer_info = &tx_ring->buffer_info[i];
4927                         cleaned = (i == eop);
4928                         skb = buffer_info->skb;
4929
4930                         if (skb) {
4931                                 unsigned int segs, bytecount;
4932                                 /* gso_segs is currently only valid for tcp */
4933                                 segs = skb_shinfo(skb)->gso_segs ?: 1;
4934                                 /* multiply data chunks by size of headers */
4935                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
4936                                             skb->len;
4937                                 total_packets += segs;
4938                                 total_bytes += bytecount;
4939
4940                                 igb_tx_hwtstamp(q_vector, skb);
4941                         }
4942
4943                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4944                         tx_desc->wb.status = 0;
4945
4946                         i++;
4947                         if (i == tx_ring->count)
4948                                 i = 0;
4949                 }
4950                 eop = tx_ring->buffer_info[i].next_to_watch;
4951                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4952         }
4953
4954         tx_ring->next_to_clean = i;
4955
4956         if (unlikely(count &&
4957                      netif_carrier_ok(netdev) &&
4958                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
4959                 /* Make sure that anybody stopping the queue after this
4960                  * sees the new next_to_clean.
4961                  */
4962                 smp_mb();
4963                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
4964                     !(test_bit(__IGB_DOWN, &adapter->state))) {
4965                         netif_wake_subqueue(netdev, tx_ring->queue_index);
4966                         tx_ring->tx_stats.restart_queue++;
4967                 }
4968         }
4969
4970         if (tx_ring->detect_tx_hung) {
4971                 /* Detect a transmit hang in hardware, this serializes the
4972                  * check with the clearing of time_stamp and movement of i */
4973                 tx_ring->detect_tx_hung = false;
4974                 if (tx_ring->buffer_info[i].time_stamp &&
4975                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
4976                                (adapter->tx_timeout_factor * HZ)) &&
4977                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
4978
4979                         /* detected Tx unit hang */
4980                         dev_err(&tx_ring->pdev->dev,
4981                                 "Detected Tx Unit Hang\n"
4982                                 "  Tx Queue             <%d>\n"
4983                                 "  TDH                  <%x>\n"
4984                                 "  TDT                  <%x>\n"
4985                                 "  next_to_use          <%x>\n"
4986                                 "  next_to_clean        <%x>\n"
4987                                 "buffer_info[next_to_clean]\n"
4988                                 "  time_stamp           <%lx>\n"
4989                                 "  next_to_watch        <%x>\n"
4990                                 "  jiffies              <%lx>\n"
4991                                 "  desc.status          <%x>\n",
4992                                 tx_ring->queue_index,
4993                                 readl(tx_ring->head),
4994                                 readl(tx_ring->tail),
4995                                 tx_ring->next_to_use,
4996                                 tx_ring->next_to_clean,
4997                                 tx_ring->buffer_info[eop].time_stamp,
4998                                 eop,
4999                                 jiffies,
5000                                 eop_desc->wb.status);
5001                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5002                 }
5003         }
5004         tx_ring->total_bytes += total_bytes;
5005         tx_ring->total_packets += total_packets;
5006         tx_ring->tx_stats.bytes += total_bytes;
5007         tx_ring->tx_stats.packets += total_packets;
5008         return (count < tx_ring->count);
5009 }
5010
5011 /**
5012  * igb_receive_skb - helper function to handle rx indications
5013  * @q_vector: structure containing interrupt and ring information
5014  * @skb: packet to send up
5015  * @vlan_tag: vlan tag for packet
5016  **/
5017 static void igb_receive_skb(struct igb_q_vector *q_vector,
5018                             struct sk_buff *skb,
5019                             u16 vlan_tag)
5020 {
5021         struct igb_adapter *adapter = q_vector->adapter;
5022
5023         if (vlan_tag)
5024                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5025                                  vlan_tag, skb);
5026         else
5027                 napi_gro_receive(&q_vector->napi, skb);
5028 }
5029
5030 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5031                                        u32 status_err, struct sk_buff *skb)
5032 {
5033         skb->ip_summed = CHECKSUM_NONE;
5034
5035         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5036         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5037              (status_err & E1000_RXD_STAT_IXSM))
5038                 return;
5039
5040         /* TCP/UDP checksum error bit is set */
5041         if (status_err &
5042             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5043                 /*
5044                  * work around errata with sctp packets where the TCPE aka
5045                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5046                  * packets, (aka let the stack check the crc32c)
5047                  */
5048                 if ((skb->len == 60) &&
5049                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5050                         ring->rx_stats.csum_err++;
5051
5052                 /* let the stack verify checksum errors */
5053                 return;
5054         }
5055         /* It must be a TCP or UDP packet with a valid checksum */
5056         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5057                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5058
5059         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
5060 }
5061
5062 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5063                                    struct sk_buff *skb)
5064 {
5065         struct igb_adapter *adapter = q_vector->adapter;
5066         struct e1000_hw *hw = &adapter->hw;
5067         u64 regval;
5068
5069         /*
5070          * If this bit is set, then the RX registers contain the time stamp. No
5071          * other packet will be time stamped until we read these registers, so
5072          * read the registers to make them available again. Because only one
5073          * packet can be time stamped at a time, we know that the register
5074          * values must belong to this one here and therefore we don't need to
5075          * compare any of the additional attributes stored for it.
5076          *
5077          * If nothing went wrong, then it should have a skb_shared_tx that we
5078          * can turn into a skb_shared_hwtstamps.
5079          */
5080         if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
5081                 return;
5082         if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5083                 return;
5084
5085         regval = rd32(E1000_RXSTMPL);
5086         regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5087
5088         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5089 }
5090 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5091                                union e1000_adv_rx_desc *rx_desc)
5092 {
5093         /* HW will not DMA in data larger than the given buffer, even if it
5094          * parses the (NFS, of course) header to be larger.  In that case, it
5095          * fills the header buffer and spills the rest into the page.
5096          */
5097         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5098                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5099         if (hlen > rx_ring->rx_buffer_len)
5100                 hlen = rx_ring->rx_buffer_len;
5101         return hlen;
5102 }
5103
5104 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5105                                  int *work_done, int budget)
5106 {
5107         struct igb_ring *rx_ring = q_vector->rx_ring;
5108         struct net_device *netdev = rx_ring->netdev;
5109         struct pci_dev *pdev = rx_ring->pdev;
5110         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5111         struct igb_buffer *buffer_info , *next_buffer;
5112         struct sk_buff *skb;
5113         bool cleaned = false;
5114         int cleaned_count = 0;
5115         int current_node = numa_node_id();
5116         unsigned int total_bytes = 0, total_packets = 0;
5117         unsigned int i;
5118         u32 staterr;
5119         u16 length;
5120         u16 vlan_tag;
5121
5122         i = rx_ring->next_to_clean;
5123         buffer_info = &rx_ring->buffer_info[i];
5124         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5125         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5126
5127         while (staterr & E1000_RXD_STAT_DD) {
5128                 if (*work_done >= budget)
5129                         break;
5130                 (*work_done)++;
5131
5132                 skb = buffer_info->skb;
5133                 prefetch(skb->data - NET_IP_ALIGN);
5134                 buffer_info->skb = NULL;
5135
5136                 i++;
5137                 if (i == rx_ring->count)
5138                         i = 0;
5139
5140                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5141                 prefetch(next_rxd);
5142                 next_buffer = &rx_ring->buffer_info[i];
5143
5144                 length = le16_to_cpu(rx_desc->wb.upper.length);
5145                 cleaned = true;
5146                 cleaned_count++;
5147
5148                 if (buffer_info->dma) {
5149                         pci_unmap_single(pdev, buffer_info->dma,
5150                                          rx_ring->rx_buffer_len,
5151                                          PCI_DMA_FROMDEVICE);
5152                         buffer_info->dma = 0;
5153                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5154                                 skb_put(skb, length);
5155                                 goto send_up;
5156                         }
5157                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5158                 }
5159
5160                 if (length) {
5161                         pci_unmap_page(pdev, buffer_info->page_dma,
5162                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5163                         buffer_info->page_dma = 0;
5164
5165                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
5166                                                 buffer_info->page,
5167                                                 buffer_info->page_offset,
5168                                                 length);
5169
5170                         if ((page_count(buffer_info->page) != 1) ||
5171                             (page_to_nid(buffer_info->page) != current_node))
5172                                 buffer_info->page = NULL;
5173                         else
5174                                 get_page(buffer_info->page);
5175
5176                         skb->len += length;
5177                         skb->data_len += length;
5178                         skb->truesize += length;
5179                 }
5180
5181                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5182                         buffer_info->skb = next_buffer->skb;
5183                         buffer_info->dma = next_buffer->dma;
5184                         next_buffer->skb = skb;
5185                         next_buffer->dma = 0;
5186                         goto next_desc;
5187                 }
5188 send_up:
5189                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5190                         dev_kfree_skb_irq(skb);
5191                         goto next_desc;
5192                 }
5193
5194                 igb_rx_hwtstamp(q_vector, staterr, skb);
5195                 total_bytes += skb->len;
5196                 total_packets++;
5197
5198                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5199
5200                 skb->protocol = eth_type_trans(skb, netdev);
5201                 skb_record_rx_queue(skb, rx_ring->queue_index);
5202
5203                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5204                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5205
5206                 igb_receive_skb(q_vector, skb, vlan_tag);
5207
5208 next_desc:
5209                 rx_desc->wb.upper.status_error = 0;
5210
5211                 /* return some buffers to hardware, one at a time is too slow */
5212                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5213                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5214                         cleaned_count = 0;
5215                 }
5216
5217                 /* use prefetched values */
5218                 rx_desc = next_rxd;
5219                 buffer_info = next_buffer;
5220                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5221         }
5222
5223         rx_ring->next_to_clean = i;
5224         cleaned_count = igb_desc_unused(rx_ring);
5225
5226         if (cleaned_count)
5227                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5228
5229         rx_ring->total_packets += total_packets;
5230         rx_ring->total_bytes += total_bytes;
5231         rx_ring->rx_stats.packets += total_packets;
5232         rx_ring->rx_stats.bytes += total_bytes;
5233         return cleaned;
5234 }
5235
5236 /**
5237  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5238  * @adapter: address of board private structure
5239  **/
5240 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5241 {
5242         struct net_device *netdev = rx_ring->netdev;
5243         union e1000_adv_rx_desc *rx_desc;
5244         struct igb_buffer *buffer_info;
5245         struct sk_buff *skb;
5246         unsigned int i;
5247         int bufsz;
5248
5249         i = rx_ring->next_to_use;
5250         buffer_info = &rx_ring->buffer_info[i];
5251
5252         bufsz = rx_ring->rx_buffer_len;
5253
5254         while (cleaned_count--) {
5255                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5256
5257                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5258                         if (!buffer_info->page) {
5259                                 buffer_info->page = netdev_alloc_page(netdev);
5260                                 if (!buffer_info->page) {
5261                                         rx_ring->rx_stats.alloc_failed++;
5262                                         goto no_buffers;
5263                                 }
5264                                 buffer_info->page_offset = 0;
5265                         } else {
5266                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5267                         }
5268                         buffer_info->page_dma =
5269                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5270                                              buffer_info->page_offset,
5271                                              PAGE_SIZE / 2,
5272                                              PCI_DMA_FROMDEVICE);
5273                         if (pci_dma_mapping_error(rx_ring->pdev,
5274                                                   buffer_info->page_dma)) {
5275                                 buffer_info->page_dma = 0;
5276                                 rx_ring->rx_stats.alloc_failed++;
5277                                 goto no_buffers;
5278                         }
5279                 }
5280
5281                 skb = buffer_info->skb;
5282                 if (!skb) {
5283                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5284                         if (!skb) {
5285                                 rx_ring->rx_stats.alloc_failed++;
5286                                 goto no_buffers;
5287                         }
5288
5289                         buffer_info->skb = skb;
5290                 }
5291                 if (!buffer_info->dma) {
5292                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5293                                                           skb->data,
5294                                                           bufsz,
5295                                                           PCI_DMA_FROMDEVICE);
5296                         if (pci_dma_mapping_error(rx_ring->pdev,
5297                                                   buffer_info->dma)) {
5298                                 buffer_info->dma = 0;
5299                                 rx_ring->rx_stats.alloc_failed++;
5300                                 goto no_buffers;
5301                         }
5302                 }
5303                 /* Refresh the desc even if buffer_addrs didn't change because
5304                  * each write-back erases this info. */
5305                 if (bufsz < IGB_RXBUFFER_1024) {
5306                         rx_desc->read.pkt_addr =
5307                              cpu_to_le64(buffer_info->page_dma);
5308                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5309                 } else {
5310                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5311                         rx_desc->read.hdr_addr = 0;
5312                 }
5313
5314                 i++;
5315                 if (i == rx_ring->count)
5316                         i = 0;
5317                 buffer_info = &rx_ring->buffer_info[i];
5318         }
5319
5320 no_buffers:
5321         if (rx_ring->next_to_use != i) {
5322                 rx_ring->next_to_use = i;
5323                 if (i == 0)
5324                         i = (rx_ring->count - 1);
5325                 else
5326                         i--;
5327
5328                 /* Force memory writes to complete before letting h/w
5329                  * know there are new descriptors to fetch.  (Only
5330                  * applicable for weak-ordered memory model archs,
5331                  * such as IA-64). */
5332                 wmb();
5333                 writel(i, rx_ring->tail);
5334         }
5335 }
5336
5337 /**
5338  * igb_mii_ioctl -
5339  * @netdev:
5340  * @ifreq:
5341  * @cmd:
5342  **/
5343 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5344 {
5345         struct igb_adapter *adapter = netdev_priv(netdev);
5346         struct mii_ioctl_data *data = if_mii(ifr);
5347
5348         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5349                 return -EOPNOTSUPP;
5350
5351         switch (cmd) {
5352         case SIOCGMIIPHY:
5353                 data->phy_id = adapter->hw.phy.addr;
5354                 break;
5355         case SIOCGMIIREG:
5356                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5357                                      &data->val_out))
5358                         return -EIO;
5359                 break;
5360         case SIOCSMIIREG:
5361         default:
5362                 return -EOPNOTSUPP;
5363         }
5364         return 0;
5365 }
5366
5367 /**
5368  * igb_hwtstamp_ioctl - control hardware time stamping
5369  * @netdev:
5370  * @ifreq:
5371  * @cmd:
5372  *
5373  * Outgoing time stamping can be enabled and disabled. Play nice and
5374  * disable it when requested, although it shouldn't case any overhead
5375  * when no packet needs it. At most one packet in the queue may be
5376  * marked for time stamping, otherwise it would be impossible to tell
5377  * for sure to which packet the hardware time stamp belongs.
5378  *
5379  * Incoming time stamping has to be configured via the hardware
5380  * filters. Not all combinations are supported, in particular event
5381  * type has to be specified. Matching the kind of event packet is
5382  * not supported, with the exception of "all V2 events regardless of
5383  * level 2 or 4".
5384  *
5385  **/
5386 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5387                               struct ifreq *ifr, int cmd)
5388 {
5389         struct igb_adapter *adapter = netdev_priv(netdev);
5390         struct e1000_hw *hw = &adapter->hw;
5391         struct hwtstamp_config config;
5392         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5393         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5394         u32 tsync_rx_cfg = 0;
5395         bool is_l4 = false;
5396         bool is_l2 = false;
5397         u32 regval;
5398
5399         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5400                 return -EFAULT;
5401
5402         /* reserved for future extensions */
5403         if (config.flags)
5404                 return -EINVAL;
5405
5406         switch (config.tx_type) {
5407         case HWTSTAMP_TX_OFF:
5408                 tsync_tx_ctl = 0;
5409         case HWTSTAMP_TX_ON:
5410                 break;
5411         default:
5412                 return -ERANGE;
5413         }
5414
5415         switch (config.rx_filter) {
5416         case HWTSTAMP_FILTER_NONE:
5417                 tsync_rx_ctl = 0;
5418                 break;
5419         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5420         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5421         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5422         case HWTSTAMP_FILTER_ALL:
5423                 /*
5424                  * register TSYNCRXCFG must be set, therefore it is not
5425                  * possible to time stamp both Sync and Delay_Req messages
5426                  * => fall back to time stamping all packets
5427                  */
5428                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5429                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5430                 break;
5431         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5432                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5433                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5434                 is_l4 = true;
5435                 break;
5436         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5437                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5438                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5439                 is_l4 = true;
5440                 break;
5441         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5442         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5443                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5444                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5445                 is_l2 = true;
5446                 is_l4 = true;
5447                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5448                 break;
5449         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5450         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5451                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5452                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5453                 is_l2 = true;
5454                 is_l4 = true;
5455                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5456                 break;
5457         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5458         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5459         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5460                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5461                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5462                 is_l2 = true;
5463                 break;
5464         default:
5465                 return -ERANGE;
5466         }
5467
5468         if (hw->mac.type == e1000_82575) {
5469                 if (tsync_rx_ctl | tsync_tx_ctl)
5470                         return -EINVAL;
5471                 return 0;
5472         }
5473
5474         /* enable/disable TX */
5475         regval = rd32(E1000_TSYNCTXCTL);
5476         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5477         regval |= tsync_tx_ctl;
5478         wr32(E1000_TSYNCTXCTL, regval);
5479
5480         /* enable/disable RX */
5481         regval = rd32(E1000_TSYNCRXCTL);
5482         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5483         regval |= tsync_rx_ctl;
5484         wr32(E1000_TSYNCRXCTL, regval);
5485
5486         /* define which PTP packets are time stamped */
5487         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5488
5489         /* define ethertype filter for timestamped packets */
5490         if (is_l2)
5491                 wr32(E1000_ETQF(3),
5492                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5493                                  E1000_ETQF_1588 | /* enable timestamping */
5494                                  ETH_P_1588));     /* 1588 eth protocol type */
5495         else
5496                 wr32(E1000_ETQF(3), 0);
5497
5498 #define PTP_PORT 319
5499         /* L4 Queue Filter[3]: filter by destination port and protocol */
5500         if (is_l4) {
5501                 u32 ftqf = (IPPROTO_UDP /* UDP */
5502                         | E1000_FTQF_VF_BP /* VF not compared */
5503                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5504                         | E1000_FTQF_MASK); /* mask all inputs */
5505                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5506
5507                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5508                 wr32(E1000_IMIREXT(3),
5509                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5510                 if (hw->mac.type == e1000_82576) {
5511                         /* enable source port check */
5512                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5513                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5514                 }
5515                 wr32(E1000_FTQF(3), ftqf);
5516         } else {
5517                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5518         }
5519         wrfl();
5520
5521         adapter->hwtstamp_config = config;
5522
5523         /* clear TX/RX time stamp registers, just to be sure */
5524         regval = rd32(E1000_TXSTMPH);
5525         regval = rd32(E1000_RXSTMPH);
5526
5527         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5528                 -EFAULT : 0;
5529 }
5530
5531 /**
5532  * igb_ioctl -
5533  * @netdev:
5534  * @ifreq:
5535  * @cmd:
5536  **/
5537 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5538 {
5539         switch (cmd) {
5540         case SIOCGMIIPHY:
5541         case SIOCGMIIREG:
5542         case SIOCSMIIREG:
5543                 return igb_mii_ioctl(netdev, ifr, cmd);
5544         case SIOCSHWTSTAMP:
5545                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5546         default:
5547                 return -EOPNOTSUPP;
5548         }
5549 }
5550
5551 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5552 {
5553         struct igb_adapter *adapter = hw->back;
5554         u16 cap_offset;
5555
5556         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5557         if (!cap_offset)
5558                 return -E1000_ERR_CONFIG;
5559
5560         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5561
5562         return 0;
5563 }
5564
5565 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5566 {
5567         struct igb_adapter *adapter = hw->back;
5568         u16 cap_offset;
5569
5570         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5571         if (!cap_offset)
5572                 return -E1000_ERR_CONFIG;
5573
5574         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5575
5576         return 0;
5577 }
5578
5579 static void igb_vlan_rx_register(struct net_device *netdev,
5580                                  struct vlan_group *grp)
5581 {
5582         struct igb_adapter *adapter = netdev_priv(netdev);
5583         struct e1000_hw *hw = &adapter->hw;
5584         u32 ctrl, rctl;
5585
5586         igb_irq_disable(adapter);
5587         adapter->vlgrp = grp;
5588
5589         if (grp) {
5590                 /* enable VLAN tag insert/strip */
5591                 ctrl = rd32(E1000_CTRL);
5592                 ctrl |= E1000_CTRL_VME;
5593                 wr32(E1000_CTRL, ctrl);
5594
5595                 /* Disable CFI check */
5596                 rctl = rd32(E1000_RCTL);
5597                 rctl &= ~E1000_RCTL_CFIEN;
5598                 wr32(E1000_RCTL, rctl);
5599         } else {
5600                 /* disable VLAN tag insert/strip */
5601                 ctrl = rd32(E1000_CTRL);
5602                 ctrl &= ~E1000_CTRL_VME;
5603                 wr32(E1000_CTRL, ctrl);
5604         }
5605
5606         igb_rlpml_set(adapter);
5607
5608         if (!test_bit(__IGB_DOWN, &adapter->state))
5609                 igb_irq_enable(adapter);
5610 }
5611
5612 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5613 {
5614         struct igb_adapter *adapter = netdev_priv(netdev);
5615         struct e1000_hw *hw = &adapter->hw;
5616         int pf_id = adapter->vfs_allocated_count;
5617
5618         /* attempt to add filter to vlvf array */
5619         igb_vlvf_set(adapter, vid, true, pf_id);
5620
5621         /* add the filter since PF can receive vlans w/o entry in vlvf */
5622         igb_vfta_set(hw, vid, true);
5623 }
5624
5625 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5626 {
5627         struct igb_adapter *adapter = netdev_priv(netdev);
5628         struct e1000_hw *hw = &adapter->hw;
5629         int pf_id = adapter->vfs_allocated_count;
5630         s32 err;
5631
5632         igb_irq_disable(adapter);
5633         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5634
5635         if (!test_bit(__IGB_DOWN, &adapter->state))
5636                 igb_irq_enable(adapter);
5637
5638         /* remove vlan from VLVF table array */
5639         err = igb_vlvf_set(adapter, vid, false, pf_id);
5640
5641         /* if vid was not present in VLVF just remove it from table */
5642         if (err)
5643                 igb_vfta_set(hw, vid, false);
5644 }
5645
5646 static void igb_restore_vlan(struct igb_adapter *adapter)
5647 {
5648         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5649
5650         if (adapter->vlgrp) {
5651                 u16 vid;
5652                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5653                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5654                                 continue;
5655                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5656                 }
5657         }
5658 }
5659
5660 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5661 {
5662         struct pci_dev *pdev = adapter->pdev;
5663         struct e1000_mac_info *mac = &adapter->hw.mac;
5664
5665         mac->autoneg = 0;
5666
5667         switch (spddplx) {
5668         case SPEED_10 + DUPLEX_HALF:
5669                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5670                 break;
5671         case SPEED_10 + DUPLEX_FULL:
5672                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5673                 break;
5674         case SPEED_100 + DUPLEX_HALF:
5675                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5676                 break;
5677         case SPEED_100 + DUPLEX_FULL:
5678                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5679                 break;
5680         case SPEED_1000 + DUPLEX_FULL:
5681                 mac->autoneg = 1;
5682                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5683                 break;
5684         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5685         default:
5686                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5687                 return -EINVAL;
5688         }
5689         return 0;
5690 }
5691
5692 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5693 {
5694         struct net_device *netdev = pci_get_drvdata(pdev);
5695         struct igb_adapter *adapter = netdev_priv(netdev);
5696         struct e1000_hw *hw = &adapter->hw;
5697         u32 ctrl, rctl, status;
5698         u32 wufc = adapter->wol;
5699 #ifdef CONFIG_PM
5700         int retval = 0;
5701 #endif
5702
5703         netif_device_detach(netdev);
5704
5705         if (netif_running(netdev))
5706                 igb_close(netdev);
5707
5708         igb_clear_interrupt_scheme(adapter);
5709
5710 #ifdef CONFIG_PM
5711         retval = pci_save_state(pdev);
5712         if (retval)
5713                 return retval;
5714 #endif
5715
5716         status = rd32(E1000_STATUS);
5717         if (status & E1000_STATUS_LU)
5718                 wufc &= ~E1000_WUFC_LNKC;
5719
5720         if (wufc) {
5721                 igb_setup_rctl(adapter);
5722                 igb_set_rx_mode(netdev);
5723
5724                 /* turn on all-multi mode if wake on multicast is enabled */
5725                 if (wufc & E1000_WUFC_MC) {
5726                         rctl = rd32(E1000_RCTL);
5727                         rctl |= E1000_RCTL_MPE;
5728                         wr32(E1000_RCTL, rctl);
5729                 }
5730
5731                 ctrl = rd32(E1000_CTRL);
5732                 /* advertise wake from D3Cold */
5733                 #define E1000_CTRL_ADVD3WUC 0x00100000
5734                 /* phy power management enable */
5735                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5736                 ctrl |= E1000_CTRL_ADVD3WUC;
5737                 wr32(E1000_CTRL, ctrl);
5738
5739                 /* Allow time for pending master requests to run */
5740                 igb_disable_pcie_master(hw);
5741
5742                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5743                 wr32(E1000_WUFC, wufc);
5744         } else {
5745                 wr32(E1000_WUC, 0);
5746                 wr32(E1000_WUFC, 0);
5747         }
5748
5749         *enable_wake = wufc || adapter->en_mng_pt;
5750         if (!*enable_wake)
5751                 igb_shutdown_serdes_link_82575(hw);
5752
5753         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5754          * would have already happened in close and is redundant. */
5755         igb_release_hw_control(adapter);
5756
5757         pci_disable_device(pdev);
5758
5759         return 0;
5760 }
5761
5762 #ifdef CONFIG_PM
5763 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5764 {
5765         int retval;
5766         bool wake;
5767
5768         retval = __igb_shutdown(pdev, &wake);
5769         if (retval)
5770                 return retval;
5771
5772         if (wake) {
5773                 pci_prepare_to_sleep(pdev);
5774         } else {
5775                 pci_wake_from_d3(pdev, false);
5776                 pci_set_power_state(pdev, PCI_D3hot);
5777         }
5778
5779         return 0;
5780 }
5781
5782 static int igb_resume(struct pci_dev *pdev)
5783 {
5784         struct net_device *netdev = pci_get_drvdata(pdev);
5785         struct igb_adapter *adapter = netdev_priv(netdev);
5786         struct e1000_hw *hw = &adapter->hw;
5787         u32 err;
5788
5789         pci_set_power_state(pdev, PCI_D0);
5790         pci_restore_state(pdev);
5791
5792         err = pci_enable_device_mem(pdev);
5793         if (err) {
5794                 dev_err(&pdev->dev,
5795                         "igb: Cannot enable PCI device from suspend\n");
5796                 return err;
5797         }
5798         pci_set_master(pdev);
5799
5800         pci_enable_wake(pdev, PCI_D3hot, 0);
5801         pci_enable_wake(pdev, PCI_D3cold, 0);
5802
5803         if (igb_init_interrupt_scheme(adapter)) {
5804                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5805                 return -ENOMEM;
5806         }
5807
5808         /* e1000_power_up_phy(adapter); */
5809
5810         igb_reset(adapter);
5811
5812         /* let the f/w know that the h/w is now under the control of the
5813          * driver. */
5814         igb_get_hw_control(adapter);
5815
5816         wr32(E1000_WUS, ~0);
5817
5818         if (netif_running(netdev)) {
5819                 err = igb_open(netdev);
5820                 if (err)
5821                         return err;
5822         }
5823
5824         netif_device_attach(netdev);
5825
5826         return 0;
5827 }
5828 #endif
5829
5830 static void igb_shutdown(struct pci_dev *pdev)
5831 {
5832         bool wake;
5833
5834         __igb_shutdown(pdev, &wake);
5835
5836         if (system_state == SYSTEM_POWER_OFF) {
5837                 pci_wake_from_d3(pdev, wake);
5838                 pci_set_power_state(pdev, PCI_D3hot);
5839         }
5840 }
5841
5842 #ifdef CONFIG_NET_POLL_CONTROLLER
5843 /*
5844  * Polling 'interrupt' - used by things like netconsole to send skbs
5845  * without having to re-enable interrupts. It's not called while
5846  * the interrupt routine is executing.
5847  */
5848 static void igb_netpoll(struct net_device *netdev)
5849 {
5850         struct igb_adapter *adapter = netdev_priv(netdev);
5851         struct e1000_hw *hw = &adapter->hw;
5852         int i;
5853
5854         if (!adapter->msix_entries) {
5855                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5856                 igb_irq_disable(adapter);
5857                 napi_schedule(&q_vector->napi);
5858                 return;
5859         }
5860
5861         for (i = 0; i < adapter->num_q_vectors; i++) {
5862                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5863                 wr32(E1000_EIMC, q_vector->eims_value);
5864                 napi_schedule(&q_vector->napi);
5865         }
5866 }
5867 #endif /* CONFIG_NET_POLL_CONTROLLER */
5868
5869 /**
5870  * igb_io_error_detected - called when PCI error is detected
5871  * @pdev: Pointer to PCI device
5872  * @state: The current pci connection state
5873  *
5874  * This function is called after a PCI bus error affecting
5875  * this device has been detected.
5876  */
5877 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5878                                               pci_channel_state_t state)
5879 {
5880         struct net_device *netdev = pci_get_drvdata(pdev);
5881         struct igb_adapter *adapter = netdev_priv(netdev);
5882
5883         netif_device_detach(netdev);
5884
5885         if (state == pci_channel_io_perm_failure)
5886                 return PCI_ERS_RESULT_DISCONNECT;
5887
5888         if (netif_running(netdev))
5889                 igb_down(adapter);
5890         pci_disable_device(pdev);
5891
5892         /* Request a slot slot reset. */
5893         return PCI_ERS_RESULT_NEED_RESET;
5894 }
5895
5896 /**
5897  * igb_io_slot_reset - called after the pci bus has been reset.
5898  * @pdev: Pointer to PCI device
5899  *
5900  * Restart the card from scratch, as if from a cold-boot. Implementation
5901  * resembles the first-half of the igb_resume routine.
5902  */
5903 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5904 {
5905         struct net_device *netdev = pci_get_drvdata(pdev);
5906         struct igb_adapter *adapter = netdev_priv(netdev);
5907         struct e1000_hw *hw = &adapter->hw;
5908         pci_ers_result_t result;
5909         int err;
5910
5911         if (pci_enable_device_mem(pdev)) {
5912                 dev_err(&pdev->dev,
5913                         "Cannot re-enable PCI device after reset.\n");
5914                 result = PCI_ERS_RESULT_DISCONNECT;
5915         } else {
5916                 pci_set_master(pdev);
5917                 pci_restore_state(pdev);
5918
5919                 pci_enable_wake(pdev, PCI_D3hot, 0);
5920                 pci_enable_wake(pdev, PCI_D3cold, 0);
5921
5922                 igb_reset(adapter);
5923                 wr32(E1000_WUS, ~0);
5924                 result = PCI_ERS_RESULT_RECOVERED;
5925         }
5926
5927         err = pci_cleanup_aer_uncorrect_error_status(pdev);
5928         if (err) {
5929                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
5930                         "failed 0x%0x\n", err);
5931                 /* non-fatal, continue */
5932         }
5933
5934         return result;
5935 }
5936
5937 /**
5938  * igb_io_resume - called when traffic can start flowing again.
5939  * @pdev: Pointer to PCI device
5940  *
5941  * This callback is called when the error recovery driver tells us that
5942  * its OK to resume normal operation. Implementation resembles the
5943  * second-half of the igb_resume routine.
5944  */
5945 static void igb_io_resume(struct pci_dev *pdev)
5946 {
5947         struct net_device *netdev = pci_get_drvdata(pdev);
5948         struct igb_adapter *adapter = netdev_priv(netdev);
5949
5950         if (netif_running(netdev)) {
5951                 if (igb_up(adapter)) {
5952                         dev_err(&pdev->dev, "igb_up failed after reset\n");
5953                         return;
5954                 }
5955         }
5956
5957         netif_device_attach(netdev);
5958
5959         /* let the f/w know that the h/w is now under the control of the
5960          * driver. */
5961         igb_get_hw_control(adapter);
5962 }
5963
5964 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
5965                              u8 qsel)
5966 {
5967         u32 rar_low, rar_high;
5968         struct e1000_hw *hw = &adapter->hw;
5969
5970         /* HW expects these in little endian so we reverse the byte order
5971          * from network order (big endian) to little endian
5972          */
5973         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
5974                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
5975         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
5976
5977         /* Indicate to hardware the Address is Valid. */
5978         rar_high |= E1000_RAH_AV;
5979
5980         if (hw->mac.type == e1000_82575)
5981                 rar_high |= E1000_RAH_POOL_1 * qsel;
5982         else
5983                 rar_high |= E1000_RAH_POOL_1 << qsel;
5984
5985         wr32(E1000_RAL(index), rar_low);
5986         wrfl();
5987         wr32(E1000_RAH(index), rar_high);
5988         wrfl();
5989 }
5990
5991 static int igb_set_vf_mac(struct igb_adapter *adapter,
5992                           int vf, unsigned char *mac_addr)
5993 {
5994         struct e1000_hw *hw = &adapter->hw;
5995         /* VF MAC addresses start at end of receive addresses and moves
5996          * torwards the first, as a result a collision should not be possible */
5997         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5998
5999         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6000
6001         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6002
6003         return 0;
6004 }
6005
6006 static void igb_vmm_control(struct igb_adapter *adapter)
6007 {
6008         struct e1000_hw *hw = &adapter->hw;
6009         u32 reg;
6010
6011         /* replication is not supported for 82575 */
6012         if (hw->mac.type == e1000_82575)
6013                 return;
6014
6015         /* enable replication vlan tag stripping */
6016         reg = rd32(E1000_RPLOLR);
6017         reg |= E1000_RPLOLR_STRVLAN;
6018         wr32(E1000_RPLOLR, reg);
6019
6020         /* notify HW that the MAC is adding vlan tags */
6021         reg = rd32(E1000_DTXCTL);
6022         reg |= E1000_DTXCTL_VLAN_ADDED;
6023         wr32(E1000_DTXCTL, reg);
6024
6025         if (adapter->vfs_allocated_count) {
6026                 igb_vmdq_set_loopback_pf(hw, true);
6027                 igb_vmdq_set_replication_pf(hw, true);
6028         } else {
6029                 igb_vmdq_set_loopback_pf(hw, false);
6030                 igb_vmdq_set_replication_pf(hw, false);
6031         }
6032 }
6033
6034 /* igb_main.c */