igb: add support for seperate tx-usecs setting in ethtool
[safe/jmp/linux-2.6] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "1.3.16-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56                                 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60         [board_82575] = &e1000_82575_info,
61 };
62
63 static struct pci_device_id igb_pci_tbl[] = {
64         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
74         /* required last entry */
75         {0, }
76 };
77
78 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
79
80 void igb_reset(struct igb_adapter *);
81 static int igb_setup_all_tx_resources(struct igb_adapter *);
82 static int igb_setup_all_rx_resources(struct igb_adapter *);
83 static void igb_free_all_tx_resources(struct igb_adapter *);
84 static void igb_free_all_rx_resources(struct igb_adapter *);
85 static void igb_setup_mrqc(struct igb_adapter *);
86 void igb_update_stats(struct igb_adapter *);
87 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
88 static void __devexit igb_remove(struct pci_dev *pdev);
89 static int igb_sw_init(struct igb_adapter *);
90 static int igb_open(struct net_device *);
91 static int igb_close(struct net_device *);
92 static void igb_configure_tx(struct igb_adapter *);
93 static void igb_configure_rx(struct igb_adapter *);
94 static void igb_clean_all_tx_rings(struct igb_adapter *);
95 static void igb_clean_all_rx_rings(struct igb_adapter *);
96 static void igb_clean_tx_ring(struct igb_ring *);
97 static void igb_clean_rx_ring(struct igb_ring *);
98 static void igb_set_rx_mode(struct net_device *);
99 static void igb_update_phy_info(unsigned long);
100 static void igb_watchdog(unsigned long);
101 static void igb_watchdog_task(struct work_struct *);
102 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
103 static struct net_device_stats *igb_get_stats(struct net_device *);
104 static int igb_change_mtu(struct net_device *, int);
105 static int igb_set_mac(struct net_device *, void *);
106 static void igb_set_uta(struct igb_adapter *adapter);
107 static irqreturn_t igb_intr(int irq, void *);
108 static irqreturn_t igb_intr_msi(int irq, void *);
109 static irqreturn_t igb_msix_other(int irq, void *);
110 static irqreturn_t igb_msix_ring(int irq, void *);
111 #ifdef CONFIG_IGB_DCA
112 static void igb_update_dca(struct igb_q_vector *);
113 static void igb_setup_dca(struct igb_adapter *);
114 #endif /* CONFIG_IGB_DCA */
115 static bool igb_clean_tx_irq(struct igb_q_vector *);
116 static int igb_poll(struct napi_struct *, int);
117 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
118 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
119 static void igb_tx_timeout(struct net_device *);
120 static void igb_reset_task(struct work_struct *);
121 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
122 static void igb_vlan_rx_add_vid(struct net_device *, u16);
123 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
124 static void igb_restore_vlan(struct igb_adapter *);
125 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
126 static void igb_ping_all_vfs(struct igb_adapter *);
127 static void igb_msg_task(struct igb_adapter *);
128 static int igb_rcv_msg_from_vf(struct igb_adapter *, u32);
129 static void igb_vmm_control(struct igb_adapter *);
130 static int igb_set_vf_mac(struct igb_adapter *adapter, int, unsigned char *);
131 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
132
133 static inline void igb_set_vmolr(struct e1000_hw *hw, int vfn)
134 {
135         u32 reg_data;
136
137         reg_data = rd32(E1000_VMOLR(vfn));
138         reg_data |= E1000_VMOLR_BAM |    /* Accept broadcast */
139                     E1000_VMOLR_ROMPE |  /* Accept packets matched in MTA */
140                     E1000_VMOLR_AUPE |   /* Accept untagged packets */
141                     E1000_VMOLR_STRVLAN; /* Strip vlan tags */
142         wr32(E1000_VMOLR(vfn), reg_data);
143 }
144
145 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
146                                  int vfn)
147 {
148         struct e1000_hw *hw = &adapter->hw;
149         u32 vmolr;
150
151         /* if it isn't the PF check to see if VFs are enabled and
152          * increase the size to support vlan tags */
153         if (vfn < adapter->vfs_allocated_count &&
154             adapter->vf_data[vfn].vlans_enabled)
155                 size += VLAN_TAG_SIZE;
156
157         vmolr = rd32(E1000_VMOLR(vfn));
158         vmolr &= ~E1000_VMOLR_RLPML_MASK;
159         vmolr |= size | E1000_VMOLR_LPE;
160         wr32(E1000_VMOLR(vfn), vmolr);
161
162         return 0;
163 }
164
165 #ifdef CONFIG_PM
166 static int igb_suspend(struct pci_dev *, pm_message_t);
167 static int igb_resume(struct pci_dev *);
168 #endif
169 static void igb_shutdown(struct pci_dev *);
170 #ifdef CONFIG_IGB_DCA
171 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
172 static struct notifier_block dca_notifier = {
173         .notifier_call  = igb_notify_dca,
174         .next           = NULL,
175         .priority       = 0
176 };
177 #endif
178 #ifdef CONFIG_NET_POLL_CONTROLLER
179 /* for netdump / net console */
180 static void igb_netpoll(struct net_device *);
181 #endif
182 #ifdef CONFIG_PCI_IOV
183 static unsigned int max_vfs = 0;
184 module_param(max_vfs, uint, 0);
185 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
186                  "per physical function");
187 #endif /* CONFIG_PCI_IOV */
188
189 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
190                      pci_channel_state_t);
191 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
192 static void igb_io_resume(struct pci_dev *);
193
194 static struct pci_error_handlers igb_err_handler = {
195         .error_detected = igb_io_error_detected,
196         .slot_reset = igb_io_slot_reset,
197         .resume = igb_io_resume,
198 };
199
200
201 static struct pci_driver igb_driver = {
202         .name     = igb_driver_name,
203         .id_table = igb_pci_tbl,
204         .probe    = igb_probe,
205         .remove   = __devexit_p(igb_remove),
206 #ifdef CONFIG_PM
207         /* Power Managment Hooks */
208         .suspend  = igb_suspend,
209         .resume   = igb_resume,
210 #endif
211         .shutdown = igb_shutdown,
212         .err_handler = &igb_err_handler
213 };
214
215 static int global_quad_port_a; /* global quad port a indication */
216
217 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
218 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
219 MODULE_LICENSE("GPL");
220 MODULE_VERSION(DRV_VERSION);
221
222 /**
223  * Scale the NIC clock cycle by a large factor so that
224  * relatively small clock corrections can be added or
225  * substracted at each clock tick. The drawbacks of a
226  * large factor are a) that the clock register overflows
227  * more quickly (not such a big deal) and b) that the
228  * increment per tick has to fit into 24 bits.
229  *
230  * Note that
231  *   TIMINCA = IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS *
232  *             IGB_TSYNC_SCALE
233  *   TIMINCA += TIMINCA * adjustment [ppm] / 1e9
234  *
235  * The base scale factor is intentionally a power of two
236  * so that the division in %struct timecounter can be done with
237  * a shift.
238  */
239 #define IGB_TSYNC_SHIFT (19)
240 #define IGB_TSYNC_SCALE (1<<IGB_TSYNC_SHIFT)
241
242 /**
243  * The duration of one clock cycle of the NIC.
244  *
245  * @todo This hard-coded value is part of the specification and might change
246  * in future hardware revisions. Add revision check.
247  */
248 #define IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS 16
249
250 #if (IGB_TSYNC_SCALE * IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS) >= (1<<24)
251 # error IGB_TSYNC_SCALE and/or IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS are too large to fit into TIMINCA
252 #endif
253
254 /**
255  * igb_read_clock - read raw cycle counter (to be used by time counter)
256  */
257 static cycle_t igb_read_clock(const struct cyclecounter *tc)
258 {
259         struct igb_adapter *adapter =
260                 container_of(tc, struct igb_adapter, cycles);
261         struct e1000_hw *hw = &adapter->hw;
262         u64 stamp;
263
264         stamp =  rd32(E1000_SYSTIML);
265         stamp |= (u64)rd32(E1000_SYSTIMH) << 32ULL;
266
267         return stamp;
268 }
269
270 #ifdef DEBUG
271 /**
272  * igb_get_hw_dev_name - return device name string
273  * used by hardware layer to print debugging information
274  **/
275 char *igb_get_hw_dev_name(struct e1000_hw *hw)
276 {
277         struct igb_adapter *adapter = hw->back;
278         return adapter->netdev->name;
279 }
280
281 /**
282  * igb_get_time_str - format current NIC and system time as string
283  */
284 static char *igb_get_time_str(struct igb_adapter *adapter,
285                               char buffer[160])
286 {
287         cycle_t hw = adapter->cycles.read(&adapter->cycles);
288         struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
289         struct timespec sys;
290         struct timespec delta;
291         getnstimeofday(&sys);
292
293         delta = timespec_sub(nic, sys);
294
295         sprintf(buffer,
296                 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
297                 hw,
298                 (long)nic.tv_sec, nic.tv_nsec,
299                 (long)sys.tv_sec, sys.tv_nsec,
300                 (long)delta.tv_sec, delta.tv_nsec);
301
302         return buffer;
303 }
304 #endif
305
306 /**
307  * igb_init_module - Driver Registration Routine
308  *
309  * igb_init_module is the first routine called when the driver is
310  * loaded. All it does is register with the PCI subsystem.
311  **/
312 static int __init igb_init_module(void)
313 {
314         int ret;
315         printk(KERN_INFO "%s - version %s\n",
316                igb_driver_string, igb_driver_version);
317
318         printk(KERN_INFO "%s\n", igb_copyright);
319
320         global_quad_port_a = 0;
321
322 #ifdef CONFIG_IGB_DCA
323         dca_register_notify(&dca_notifier);
324 #endif
325
326         ret = pci_register_driver(&igb_driver);
327         return ret;
328 }
329
330 module_init(igb_init_module);
331
332 /**
333  * igb_exit_module - Driver Exit Cleanup Routine
334  *
335  * igb_exit_module is called just before the driver is removed
336  * from memory.
337  **/
338 static void __exit igb_exit_module(void)
339 {
340 #ifdef CONFIG_IGB_DCA
341         dca_unregister_notify(&dca_notifier);
342 #endif
343         pci_unregister_driver(&igb_driver);
344 }
345
346 module_exit(igb_exit_module);
347
348 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
349 /**
350  * igb_cache_ring_register - Descriptor ring to register mapping
351  * @adapter: board private structure to initialize
352  *
353  * Once we know the feature-set enabled for the device, we'll cache
354  * the register offset the descriptor ring is assigned to.
355  **/
356 static void igb_cache_ring_register(struct igb_adapter *adapter)
357 {
358         int i;
359         u32 rbase_offset = adapter->vfs_allocated_count;
360
361         switch (adapter->hw.mac.type) {
362         case e1000_82576:
363                 /* The queues are allocated for virtualization such that VF 0
364                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
365                  * In order to avoid collision we start at the first free queue
366                  * and continue consuming queues in the same sequence
367                  */
368                 for (i = 0; i < adapter->num_rx_queues; i++)
369                         adapter->rx_ring[i].reg_idx = rbase_offset +
370                                                       Q_IDX_82576(i);
371                 for (i = 0; i < adapter->num_tx_queues; i++)
372                         adapter->tx_ring[i].reg_idx = rbase_offset +
373                                                       Q_IDX_82576(i);
374                 break;
375         case e1000_82575:
376         default:
377                 for (i = 0; i < adapter->num_rx_queues; i++)
378                         adapter->rx_ring[i].reg_idx = i;
379                 for (i = 0; i < adapter->num_tx_queues; i++)
380                         adapter->tx_ring[i].reg_idx = i;
381                 break;
382         }
383 }
384
385 static void igb_free_queues(struct igb_adapter *adapter)
386 {
387         kfree(adapter->tx_ring);
388         kfree(adapter->rx_ring);
389
390         adapter->tx_ring = NULL;
391         adapter->rx_ring = NULL;
392
393         adapter->num_rx_queues = 0;
394         adapter->num_tx_queues = 0;
395 }
396
397 /**
398  * igb_alloc_queues - Allocate memory for all rings
399  * @adapter: board private structure to initialize
400  *
401  * We allocate one ring per queue at run-time since we don't know the
402  * number of queues at compile-time.
403  **/
404 static int igb_alloc_queues(struct igb_adapter *adapter)
405 {
406         int i;
407
408         adapter->tx_ring = kcalloc(adapter->num_tx_queues,
409                                    sizeof(struct igb_ring), GFP_KERNEL);
410         if (!adapter->tx_ring)
411                 goto err;
412
413         adapter->rx_ring = kcalloc(adapter->num_rx_queues,
414                                    sizeof(struct igb_ring), GFP_KERNEL);
415         if (!adapter->rx_ring)
416                 goto err;
417
418         for (i = 0; i < adapter->num_tx_queues; i++) {
419                 struct igb_ring *ring = &(adapter->tx_ring[i]);
420                 ring->count = adapter->tx_ring_count;
421                 ring->queue_index = i;
422                 ring->pdev = adapter->pdev;
423                 ring->netdev = adapter->netdev;
424                 /* For 82575, context index must be unique per ring. */
425                 if (adapter->hw.mac.type == e1000_82575)
426                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
427         }
428
429         for (i = 0; i < adapter->num_rx_queues; i++) {
430                 struct igb_ring *ring = &(adapter->rx_ring[i]);
431                 ring->count = adapter->rx_ring_count;
432                 ring->queue_index = i;
433                 ring->pdev = adapter->pdev;
434                 ring->netdev = adapter->netdev;
435                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
436                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
437                 /* set flag indicating ring supports SCTP checksum offload */
438                 if (adapter->hw.mac.type >= e1000_82576)
439                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
440         }
441
442         igb_cache_ring_register(adapter);
443
444         return 0;
445
446 err:
447         igb_free_queues(adapter);
448
449         return -ENOMEM;
450 }
451
452 #define IGB_N0_QUEUE -1
453 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
454 {
455         u32 msixbm = 0;
456         struct igb_adapter *adapter = q_vector->adapter;
457         struct e1000_hw *hw = &adapter->hw;
458         u32 ivar, index;
459         int rx_queue = IGB_N0_QUEUE;
460         int tx_queue = IGB_N0_QUEUE;
461
462         if (q_vector->rx_ring)
463                 rx_queue = q_vector->rx_ring->reg_idx;
464         if (q_vector->tx_ring)
465                 tx_queue = q_vector->tx_ring->reg_idx;
466
467         switch (hw->mac.type) {
468         case e1000_82575:
469                 /* The 82575 assigns vectors using a bitmask, which matches the
470                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
471                    or more queues to a vector, we write the appropriate bits
472                    into the MSIXBM register for that vector. */
473                 if (rx_queue > IGB_N0_QUEUE)
474                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
475                 if (tx_queue > IGB_N0_QUEUE)
476                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
477                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
478                 q_vector->eims_value = msixbm;
479                 break;
480         case e1000_82576:
481                 /* 82576 uses a table-based method for assigning vectors.
482                    Each queue has a single entry in the table to which we write
483                    a vector number along with a "valid" bit.  Sadly, the layout
484                    of the table is somewhat counterintuitive. */
485                 if (rx_queue > IGB_N0_QUEUE) {
486                         index = (rx_queue & 0x7);
487                         ivar = array_rd32(E1000_IVAR0, index);
488                         if (rx_queue < 8) {
489                                 /* vector goes into low byte of register */
490                                 ivar = ivar & 0xFFFFFF00;
491                                 ivar |= msix_vector | E1000_IVAR_VALID;
492                         } else {
493                                 /* vector goes into third byte of register */
494                                 ivar = ivar & 0xFF00FFFF;
495                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
496                         }
497                         array_wr32(E1000_IVAR0, index, ivar);
498                 }
499                 if (tx_queue > IGB_N0_QUEUE) {
500                         index = (tx_queue & 0x7);
501                         ivar = array_rd32(E1000_IVAR0, index);
502                         if (tx_queue < 8) {
503                                 /* vector goes into second byte of register */
504                                 ivar = ivar & 0xFFFF00FF;
505                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
506                         } else {
507                                 /* vector goes into high byte of register */
508                                 ivar = ivar & 0x00FFFFFF;
509                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
510                         }
511                         array_wr32(E1000_IVAR0, index, ivar);
512                 }
513                 q_vector->eims_value = 1 << msix_vector;
514                 break;
515         default:
516                 BUG();
517                 break;
518         }
519 }
520
521 /**
522  * igb_configure_msix - Configure MSI-X hardware
523  *
524  * igb_configure_msix sets up the hardware to properly
525  * generate MSI-X interrupts.
526  **/
527 static void igb_configure_msix(struct igb_adapter *adapter)
528 {
529         u32 tmp;
530         int i, vector = 0;
531         struct e1000_hw *hw = &adapter->hw;
532
533         adapter->eims_enable_mask = 0;
534
535         /* set vector for other causes, i.e. link changes */
536         switch (hw->mac.type) {
537         case e1000_82575:
538                 tmp = rd32(E1000_CTRL_EXT);
539                 /* enable MSI-X PBA support*/
540                 tmp |= E1000_CTRL_EXT_PBA_CLR;
541
542                 /* Auto-Mask interrupts upon ICR read. */
543                 tmp |= E1000_CTRL_EXT_EIAME;
544                 tmp |= E1000_CTRL_EXT_IRCA;
545
546                 wr32(E1000_CTRL_EXT, tmp);
547
548                 /* enable msix_other interrupt */
549                 array_wr32(E1000_MSIXBM(0), vector++,
550                                       E1000_EIMS_OTHER);
551                 adapter->eims_other = E1000_EIMS_OTHER;
552
553                 break;
554
555         case e1000_82576:
556                 /* Turn on MSI-X capability first, or our settings
557                  * won't stick.  And it will take days to debug. */
558                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
559                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
560                                 E1000_GPIE_NSICR);
561
562                 /* enable msix_other interrupt */
563                 adapter->eims_other = 1 << vector;
564                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
565
566                 wr32(E1000_IVAR_MISC, tmp);
567                 break;
568         default:
569                 /* do nothing, since nothing else supports MSI-X */
570                 break;
571         } /* switch (hw->mac.type) */
572
573         adapter->eims_enable_mask |= adapter->eims_other;
574
575         for (i = 0; i < adapter->num_q_vectors; i++) {
576                 struct igb_q_vector *q_vector = adapter->q_vector[i];
577                 igb_assign_vector(q_vector, vector++);
578                 adapter->eims_enable_mask |= q_vector->eims_value;
579         }
580
581         wrfl();
582 }
583
584 /**
585  * igb_request_msix - Initialize MSI-X interrupts
586  *
587  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
588  * kernel.
589  **/
590 static int igb_request_msix(struct igb_adapter *adapter)
591 {
592         struct net_device *netdev = adapter->netdev;
593         struct e1000_hw *hw = &adapter->hw;
594         int i, err = 0, vector = 0;
595
596         err = request_irq(adapter->msix_entries[vector].vector,
597                           &igb_msix_other, 0, netdev->name, adapter);
598         if (err)
599                 goto out;
600         vector++;
601
602         for (i = 0; i < adapter->num_q_vectors; i++) {
603                 struct igb_q_vector *q_vector = adapter->q_vector[i];
604
605                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
606
607                 if (q_vector->rx_ring && q_vector->tx_ring)
608                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
609                                 q_vector->rx_ring->queue_index);
610                 else if (q_vector->tx_ring)
611                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
612                                 q_vector->tx_ring->queue_index);
613                 else if (q_vector->rx_ring)
614                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
615                                 q_vector->rx_ring->queue_index);
616                 else
617                         sprintf(q_vector->name, "%s-unused", netdev->name);
618
619                 err = request_irq(adapter->msix_entries[vector].vector,
620                                   &igb_msix_ring, 0, q_vector->name,
621                                   q_vector);
622                 if (err)
623                         goto out;
624                 vector++;
625         }
626
627         igb_configure_msix(adapter);
628         return 0;
629 out:
630         return err;
631 }
632
633 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
634 {
635         if (adapter->msix_entries) {
636                 pci_disable_msix(adapter->pdev);
637                 kfree(adapter->msix_entries);
638                 adapter->msix_entries = NULL;
639         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
640                 pci_disable_msi(adapter->pdev);
641         }
642 }
643
644 /**
645  * igb_free_q_vectors - Free memory allocated for interrupt vectors
646  * @adapter: board private structure to initialize
647  *
648  * This function frees the memory allocated to the q_vectors.  In addition if
649  * NAPI is enabled it will delete any references to the NAPI struct prior
650  * to freeing the q_vector.
651  **/
652 static void igb_free_q_vectors(struct igb_adapter *adapter)
653 {
654         int v_idx;
655
656         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
657                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
658                 adapter->q_vector[v_idx] = NULL;
659                 netif_napi_del(&q_vector->napi);
660                 kfree(q_vector);
661         }
662         adapter->num_q_vectors = 0;
663 }
664
665 /**
666  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
667  *
668  * This function resets the device so that it has 0 rx queues, tx queues, and
669  * MSI-X interrupts allocated.
670  */
671 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
672 {
673         igb_free_queues(adapter);
674         igb_free_q_vectors(adapter);
675         igb_reset_interrupt_capability(adapter);
676 }
677
678 /**
679  * igb_set_interrupt_capability - set MSI or MSI-X if supported
680  *
681  * Attempt to configure interrupts using the best available
682  * capabilities of the hardware and kernel.
683  **/
684 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
685 {
686         int err;
687         int numvecs, i;
688
689         /* Number of supported queues. */
690         adapter->num_rx_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
691         adapter->num_tx_queues = min_t(u32, IGB_MAX_TX_QUEUES, num_online_cpus());
692
693         /* start with one vector for every rx queue */
694         numvecs = adapter->num_rx_queues;
695
696         /* if tx handler is seperate add 1 for every tx queue */
697         numvecs += adapter->num_tx_queues;
698
699         /* store the number of vectors reserved for queues */
700         adapter->num_q_vectors = numvecs;
701
702         /* add 1 vector for link status interrupts */
703         numvecs++;
704         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
705                                         GFP_KERNEL);
706         if (!adapter->msix_entries)
707                 goto msi_only;
708
709         for (i = 0; i < numvecs; i++)
710                 adapter->msix_entries[i].entry = i;
711
712         err = pci_enable_msix(adapter->pdev,
713                               adapter->msix_entries,
714                               numvecs);
715         if (err == 0)
716                 goto out;
717
718         igb_reset_interrupt_capability(adapter);
719
720         /* If we can't do MSI-X, try MSI */
721 msi_only:
722 #ifdef CONFIG_PCI_IOV
723         /* disable SR-IOV for non MSI-X configurations */
724         if (adapter->vf_data) {
725                 struct e1000_hw *hw = &adapter->hw;
726                 /* disable iov and allow time for transactions to clear */
727                 pci_disable_sriov(adapter->pdev);
728                 msleep(500);
729
730                 kfree(adapter->vf_data);
731                 adapter->vf_data = NULL;
732                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
733                 msleep(100);
734                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
735         }
736 #endif
737         adapter->vfs_allocated_count = 0;
738         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
739         adapter->num_rx_queues = 1;
740         adapter->num_tx_queues = 1;
741         adapter->num_q_vectors = 1;
742         if (!pci_enable_msi(adapter->pdev))
743                 adapter->flags |= IGB_FLAG_HAS_MSI;
744 out:
745         /* Notify the stack of the (possibly) reduced Tx Queue count. */
746         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
747         return;
748 }
749
750 /**
751  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
752  * @adapter: board private structure to initialize
753  *
754  * We allocate one q_vector per queue interrupt.  If allocation fails we
755  * return -ENOMEM.
756  **/
757 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
758 {
759         struct igb_q_vector *q_vector;
760         struct e1000_hw *hw = &adapter->hw;
761         int v_idx;
762
763         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
764                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
765                 if (!q_vector)
766                         goto err_out;
767                 q_vector->adapter = adapter;
768                 q_vector->itr_shift = (hw->mac.type == e1000_82575) ? 16 : 0;
769                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
770                 q_vector->itr_val = IGB_START_ITR;
771                 q_vector->set_itr = 1;
772                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
773                 adapter->q_vector[v_idx] = q_vector;
774         }
775         return 0;
776
777 err_out:
778         while (v_idx) {
779                 v_idx--;
780                 q_vector = adapter->q_vector[v_idx];
781                 netif_napi_del(&q_vector->napi);
782                 kfree(q_vector);
783                 adapter->q_vector[v_idx] = NULL;
784         }
785         return -ENOMEM;
786 }
787
788 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
789                                       int ring_idx, int v_idx)
790 {
791         struct igb_q_vector *q_vector;
792
793         q_vector = adapter->q_vector[v_idx];
794         q_vector->rx_ring = &adapter->rx_ring[ring_idx];
795         q_vector->rx_ring->q_vector = q_vector;
796         q_vector->itr_val = adapter->rx_itr_setting;
797         if (q_vector->itr_val && q_vector->itr_val <= 3)
798                 q_vector->itr_val = IGB_START_ITR;
799 }
800
801 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
802                                       int ring_idx, int v_idx)
803 {
804         struct igb_q_vector *q_vector;
805
806         q_vector = adapter->q_vector[v_idx];
807         q_vector->tx_ring = &adapter->tx_ring[ring_idx];
808         q_vector->tx_ring->q_vector = q_vector;
809         q_vector->itr_val = adapter->tx_itr_setting;
810         if (q_vector->itr_val && q_vector->itr_val <= 3)
811                 q_vector->itr_val = IGB_START_ITR;
812 }
813
814 /**
815  * igb_map_ring_to_vector - maps allocated queues to vectors
816  *
817  * This function maps the recently allocated queues to vectors.
818  **/
819 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
820 {
821         int i;
822         int v_idx = 0;
823
824         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
825             (adapter->num_q_vectors < adapter->num_tx_queues))
826                 return -ENOMEM;
827
828         if (adapter->num_q_vectors >=
829             (adapter->num_rx_queues + adapter->num_tx_queues)) {
830                 for (i = 0; i < adapter->num_rx_queues; i++)
831                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
832                 for (i = 0; i < adapter->num_tx_queues; i++)
833                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
834         } else {
835                 for (i = 0; i < adapter->num_rx_queues; i++) {
836                         if (i < adapter->num_tx_queues)
837                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
838                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
839                 }
840                 for (; i < adapter->num_tx_queues; i++)
841                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
842         }
843         return 0;
844 }
845
846 /**
847  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
848  *
849  * This function initializes the interrupts and allocates all of the queues.
850  **/
851 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
852 {
853         struct pci_dev *pdev = adapter->pdev;
854         int err;
855
856         igb_set_interrupt_capability(adapter);
857
858         err = igb_alloc_q_vectors(adapter);
859         if (err) {
860                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
861                 goto err_alloc_q_vectors;
862         }
863
864         err = igb_alloc_queues(adapter);
865         if (err) {
866                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
867                 goto err_alloc_queues;
868         }
869
870         err = igb_map_ring_to_vector(adapter);
871         if (err) {
872                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
873                 goto err_map_queues;
874         }
875
876
877         return 0;
878 err_map_queues:
879         igb_free_queues(adapter);
880 err_alloc_queues:
881         igb_free_q_vectors(adapter);
882 err_alloc_q_vectors:
883         igb_reset_interrupt_capability(adapter);
884         return err;
885 }
886
887 /**
888  * igb_request_irq - initialize interrupts
889  *
890  * Attempts to configure interrupts using the best available
891  * capabilities of the hardware and kernel.
892  **/
893 static int igb_request_irq(struct igb_adapter *adapter)
894 {
895         struct net_device *netdev = adapter->netdev;
896         struct pci_dev *pdev = adapter->pdev;
897         struct e1000_hw *hw = &adapter->hw;
898         int err = 0;
899
900         if (adapter->msix_entries) {
901                 err = igb_request_msix(adapter);
902                 if (!err)
903                         goto request_done;
904                 /* fall back to MSI */
905                 igb_clear_interrupt_scheme(adapter);
906                 if (!pci_enable_msi(adapter->pdev))
907                         adapter->flags |= IGB_FLAG_HAS_MSI;
908                 igb_free_all_tx_resources(adapter);
909                 igb_free_all_rx_resources(adapter);
910                 adapter->num_tx_queues = 1;
911                 adapter->num_rx_queues = 1;
912                 adapter->num_q_vectors = 1;
913                 err = igb_alloc_q_vectors(adapter);
914                 if (err) {
915                         dev_err(&pdev->dev,
916                                 "Unable to allocate memory for vectors\n");
917                         goto request_done;
918                 }
919                 err = igb_alloc_queues(adapter);
920                 if (err) {
921                         dev_err(&pdev->dev,
922                                 "Unable to allocate memory for queues\n");
923                         igb_free_q_vectors(adapter);
924                         goto request_done;
925                 }
926                 igb_setup_all_tx_resources(adapter);
927                 igb_setup_all_rx_resources(adapter);
928         } else {
929                 switch (hw->mac.type) {
930                 case e1000_82575:
931                         wr32(E1000_MSIXBM(0),
932                              (E1000_EICR_RX_QUEUE0 |
933                               E1000_EICR_TX_QUEUE0 |
934                               E1000_EIMS_OTHER));
935                         break;
936                 case e1000_82576:
937                         wr32(E1000_IVAR0, E1000_IVAR_VALID);
938                         break;
939                 default:
940                         break;
941                 }
942         }
943
944         if (adapter->flags & IGB_FLAG_HAS_MSI) {
945                 err = request_irq(adapter->pdev->irq, &igb_intr_msi, 0,
946                                   netdev->name, adapter);
947                 if (!err)
948                         goto request_done;
949
950                 /* fall back to legacy interrupts */
951                 igb_reset_interrupt_capability(adapter);
952                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
953         }
954
955         err = request_irq(adapter->pdev->irq, &igb_intr, IRQF_SHARED,
956                           netdev->name, adapter);
957
958         if (err)
959                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
960                         err);
961
962 request_done:
963         return err;
964 }
965
966 static void igb_free_irq(struct igb_adapter *adapter)
967 {
968         if (adapter->msix_entries) {
969                 int vector = 0, i;
970
971                 free_irq(adapter->msix_entries[vector++].vector, adapter);
972
973                 for (i = 0; i < adapter->num_q_vectors; i++) {
974                         struct igb_q_vector *q_vector = adapter->q_vector[i];
975                         free_irq(adapter->msix_entries[vector++].vector,
976                                  q_vector);
977                 }
978         } else {
979                 free_irq(adapter->pdev->irq, adapter);
980         }
981 }
982
983 /**
984  * igb_irq_disable - Mask off interrupt generation on the NIC
985  * @adapter: board private structure
986  **/
987 static void igb_irq_disable(struct igb_adapter *adapter)
988 {
989         struct e1000_hw *hw = &adapter->hw;
990
991         if (adapter->msix_entries) {
992                 u32 regval = rd32(E1000_EIAM);
993                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
994                 wr32(E1000_EIMC, adapter->eims_enable_mask);
995                 regval = rd32(E1000_EIAC);
996                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
997         }
998
999         wr32(E1000_IAM, 0);
1000         wr32(E1000_IMC, ~0);
1001         wrfl();
1002         synchronize_irq(adapter->pdev->irq);
1003 }
1004
1005 /**
1006  * igb_irq_enable - Enable default interrupt generation settings
1007  * @adapter: board private structure
1008  **/
1009 static void igb_irq_enable(struct igb_adapter *adapter)
1010 {
1011         struct e1000_hw *hw = &adapter->hw;
1012
1013         if (adapter->msix_entries) {
1014                 u32 regval = rd32(E1000_EIAC);
1015                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1016                 regval = rd32(E1000_EIAM);
1017                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1018                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1019                 if (adapter->vfs_allocated_count)
1020                         wr32(E1000_MBVFIMR, 0xFF);
1021                 wr32(E1000_IMS, (E1000_IMS_LSC | E1000_IMS_VMMB |
1022                                  E1000_IMS_DOUTSYNC));
1023         } else {
1024                 wr32(E1000_IMS, IMS_ENABLE_MASK);
1025                 wr32(E1000_IAM, IMS_ENABLE_MASK);
1026         }
1027 }
1028
1029 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1030 {
1031         struct net_device *netdev = adapter->netdev;
1032         u16 vid = adapter->hw.mng_cookie.vlan_id;
1033         u16 old_vid = adapter->mng_vlan_id;
1034         if (adapter->vlgrp) {
1035                 if (!vlan_group_get_device(adapter->vlgrp, vid)) {
1036                         if (adapter->hw.mng_cookie.status &
1037                                 E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1038                                 igb_vlan_rx_add_vid(netdev, vid);
1039                                 adapter->mng_vlan_id = vid;
1040                         } else
1041                                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1042
1043                         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1044                                         (vid != old_vid) &&
1045                             !vlan_group_get_device(adapter->vlgrp, old_vid))
1046                                 igb_vlan_rx_kill_vid(netdev, old_vid);
1047                 } else
1048                         adapter->mng_vlan_id = vid;
1049         }
1050 }
1051
1052 /**
1053  * igb_release_hw_control - release control of the h/w to f/w
1054  * @adapter: address of board private structure
1055  *
1056  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1057  * For ASF and Pass Through versions of f/w this means that the
1058  * driver is no longer loaded.
1059  *
1060  **/
1061 static void igb_release_hw_control(struct igb_adapter *adapter)
1062 {
1063         struct e1000_hw *hw = &adapter->hw;
1064         u32 ctrl_ext;
1065
1066         /* Let firmware take over control of h/w */
1067         ctrl_ext = rd32(E1000_CTRL_EXT);
1068         wr32(E1000_CTRL_EXT,
1069                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1070 }
1071
1072
1073 /**
1074  * igb_get_hw_control - get control of the h/w from f/w
1075  * @adapter: address of board private structure
1076  *
1077  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1078  * For ASF and Pass Through versions of f/w this means that
1079  * the driver is loaded.
1080  *
1081  **/
1082 static void igb_get_hw_control(struct igb_adapter *adapter)
1083 {
1084         struct e1000_hw *hw = &adapter->hw;
1085         u32 ctrl_ext;
1086
1087         /* Let firmware know the driver has taken over */
1088         ctrl_ext = rd32(E1000_CTRL_EXT);
1089         wr32(E1000_CTRL_EXT,
1090                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1091 }
1092
1093 /**
1094  * igb_configure - configure the hardware for RX and TX
1095  * @adapter: private board structure
1096  **/
1097 static void igb_configure(struct igb_adapter *adapter)
1098 {
1099         struct net_device *netdev = adapter->netdev;
1100         int i;
1101
1102         igb_get_hw_control(adapter);
1103         igb_set_rx_mode(netdev);
1104
1105         igb_restore_vlan(adapter);
1106
1107         igb_setup_tctl(adapter);
1108         igb_setup_mrqc(adapter);
1109         igb_setup_rctl(adapter);
1110
1111         igb_configure_tx(adapter);
1112         igb_configure_rx(adapter);
1113
1114         igb_rx_fifo_flush_82575(&adapter->hw);
1115
1116         /* call igb_desc_unused which always leaves
1117          * at least 1 descriptor unused to make sure
1118          * next_to_use != next_to_clean */
1119         for (i = 0; i < adapter->num_rx_queues; i++) {
1120                 struct igb_ring *ring = &adapter->rx_ring[i];
1121                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1122         }
1123
1124
1125         adapter->tx_queue_len = netdev->tx_queue_len;
1126 }
1127
1128
1129 /**
1130  * igb_up - Open the interface and prepare it to handle traffic
1131  * @adapter: board private structure
1132  **/
1133
1134 int igb_up(struct igb_adapter *adapter)
1135 {
1136         struct e1000_hw *hw = &adapter->hw;
1137         int i;
1138
1139         /* hardware has been reset, we need to reload some things */
1140         igb_configure(adapter);
1141
1142         clear_bit(__IGB_DOWN, &adapter->state);
1143
1144         for (i = 0; i < adapter->num_q_vectors; i++) {
1145                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1146                 napi_enable(&q_vector->napi);
1147         }
1148         if (adapter->msix_entries)
1149                 igb_configure_msix(adapter);
1150
1151         igb_set_vmolr(hw, adapter->vfs_allocated_count);
1152
1153         /* Clear any pending interrupts. */
1154         rd32(E1000_ICR);
1155         igb_irq_enable(adapter);
1156
1157         /* notify VFs that reset has been completed */
1158         if (adapter->vfs_allocated_count) {
1159                 u32 reg_data = rd32(E1000_CTRL_EXT);
1160                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1161                 wr32(E1000_CTRL_EXT, reg_data);
1162         }
1163
1164         netif_tx_start_all_queues(adapter->netdev);
1165
1166         /* Fire a link change interrupt to start the watchdog. */
1167         wr32(E1000_ICS, E1000_ICS_LSC);
1168         return 0;
1169 }
1170
1171 void igb_down(struct igb_adapter *adapter)
1172 {
1173         struct e1000_hw *hw = &adapter->hw;
1174         struct net_device *netdev = adapter->netdev;
1175         u32 tctl, rctl;
1176         int i;
1177
1178         /* signal that we're down so the interrupt handler does not
1179          * reschedule our watchdog timer */
1180         set_bit(__IGB_DOWN, &adapter->state);
1181
1182         /* disable receives in the hardware */
1183         rctl = rd32(E1000_RCTL);
1184         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1185         /* flush and sleep below */
1186
1187         netif_tx_stop_all_queues(netdev);
1188
1189         /* disable transmits in the hardware */
1190         tctl = rd32(E1000_TCTL);
1191         tctl &= ~E1000_TCTL_EN;
1192         wr32(E1000_TCTL, tctl);
1193         /* flush both disables and wait for them to finish */
1194         wrfl();
1195         msleep(10);
1196
1197         for (i = 0; i < adapter->num_q_vectors; i++) {
1198                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1199                 napi_disable(&q_vector->napi);
1200         }
1201
1202         igb_irq_disable(adapter);
1203
1204         del_timer_sync(&adapter->watchdog_timer);
1205         del_timer_sync(&adapter->phy_info_timer);
1206
1207         netdev->tx_queue_len = adapter->tx_queue_len;
1208         netif_carrier_off(netdev);
1209
1210         /* record the stats before reset*/
1211         igb_update_stats(adapter);
1212
1213         adapter->link_speed = 0;
1214         adapter->link_duplex = 0;
1215
1216         if (!pci_channel_offline(adapter->pdev))
1217                 igb_reset(adapter);
1218         igb_clean_all_tx_rings(adapter);
1219         igb_clean_all_rx_rings(adapter);
1220 #ifdef CONFIG_IGB_DCA
1221
1222         /* since we reset the hardware DCA settings were cleared */
1223         igb_setup_dca(adapter);
1224 #endif
1225 }
1226
1227 void igb_reinit_locked(struct igb_adapter *adapter)
1228 {
1229         WARN_ON(in_interrupt());
1230         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1231                 msleep(1);
1232         igb_down(adapter);
1233         igb_up(adapter);
1234         clear_bit(__IGB_RESETTING, &adapter->state);
1235 }
1236
1237 void igb_reset(struct igb_adapter *adapter)
1238 {
1239         struct e1000_hw *hw = &adapter->hw;
1240         struct e1000_mac_info *mac = &hw->mac;
1241         struct e1000_fc_info *fc = &hw->fc;
1242         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1243         u16 hwm;
1244
1245         /* Repartition Pba for greater than 9k mtu
1246          * To take effect CTRL.RST is required.
1247          */
1248         switch (mac->type) {
1249         case e1000_82576:
1250                 pba = E1000_PBA_64K;
1251                 break;
1252         case e1000_82575:
1253         default:
1254                 pba = E1000_PBA_34K;
1255                 break;
1256         }
1257
1258         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1259             (mac->type < e1000_82576)) {
1260                 /* adjust PBA for jumbo frames */
1261                 wr32(E1000_PBA, pba);
1262
1263                 /* To maintain wire speed transmits, the Tx FIFO should be
1264                  * large enough to accommodate two full transmit packets,
1265                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1266                  * the Rx FIFO should be large enough to accommodate at least
1267                  * one full receive packet and is similarly rounded up and
1268                  * expressed in KB. */
1269                 pba = rd32(E1000_PBA);
1270                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1271                 tx_space = pba >> 16;
1272                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1273                 pba &= 0xffff;
1274                 /* the tx fifo also stores 16 bytes of information about the tx
1275                  * but don't include ethernet FCS because hardware appends it */
1276                 min_tx_space = (adapter->max_frame_size +
1277                                 sizeof(union e1000_adv_tx_desc) -
1278                                 ETH_FCS_LEN) * 2;
1279                 min_tx_space = ALIGN(min_tx_space, 1024);
1280                 min_tx_space >>= 10;
1281                 /* software strips receive CRC, so leave room for it */
1282                 min_rx_space = adapter->max_frame_size;
1283                 min_rx_space = ALIGN(min_rx_space, 1024);
1284                 min_rx_space >>= 10;
1285
1286                 /* If current Tx allocation is less than the min Tx FIFO size,
1287                  * and the min Tx FIFO size is less than the current Rx FIFO
1288                  * allocation, take space away from current Rx allocation */
1289                 if (tx_space < min_tx_space &&
1290                     ((min_tx_space - tx_space) < pba)) {
1291                         pba = pba - (min_tx_space - tx_space);
1292
1293                         /* if short on rx space, rx wins and must trump tx
1294                          * adjustment */
1295                         if (pba < min_rx_space)
1296                                 pba = min_rx_space;
1297                 }
1298                 wr32(E1000_PBA, pba);
1299         }
1300
1301         /* flow control settings */
1302         /* The high water mark must be low enough to fit one full frame
1303          * (or the size used for early receive) above it in the Rx FIFO.
1304          * Set it to the lower of:
1305          * - 90% of the Rx FIFO size, or
1306          * - the full Rx FIFO size minus one full frame */
1307         hwm = min(((pba << 10) * 9 / 10),
1308                         ((pba << 10) - 2 * adapter->max_frame_size));
1309
1310         if (mac->type < e1000_82576) {
1311                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
1312                 fc->low_water = fc->high_water - 8;
1313         } else {
1314                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1315                 fc->low_water = fc->high_water - 16;
1316         }
1317         fc->pause_time = 0xFFFF;
1318         fc->send_xon = 1;
1319         fc->current_mode = fc->requested_mode;
1320
1321         /* disable receive for all VFs and wait one second */
1322         if (adapter->vfs_allocated_count) {
1323                 int i;
1324                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1325                         adapter->vf_data[i].clear_to_send = false;
1326
1327                 /* ping all the active vfs to let them know we are going down */
1328                         igb_ping_all_vfs(adapter);
1329
1330                 /* disable transmits and receives */
1331                 wr32(E1000_VFRE, 0);
1332                 wr32(E1000_VFTE, 0);
1333         }
1334
1335         /* Allow time for pending master requests to run */
1336         adapter->hw.mac.ops.reset_hw(&adapter->hw);
1337         wr32(E1000_WUC, 0);
1338
1339         if (adapter->hw.mac.ops.init_hw(&adapter->hw))
1340                 dev_err(&adapter->pdev->dev, "Hardware Error\n");
1341
1342         igb_update_mng_vlan(adapter);
1343
1344         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1345         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1346
1347         igb_reset_adaptive(&adapter->hw);
1348         igb_get_phy_info(&adapter->hw);
1349 }
1350
1351 static const struct net_device_ops igb_netdev_ops = {
1352         .ndo_open               = igb_open,
1353         .ndo_stop               = igb_close,
1354         .ndo_start_xmit         = igb_xmit_frame_adv,
1355         .ndo_get_stats          = igb_get_stats,
1356         .ndo_set_rx_mode        = igb_set_rx_mode,
1357         .ndo_set_multicast_list = igb_set_rx_mode,
1358         .ndo_set_mac_address    = igb_set_mac,
1359         .ndo_change_mtu         = igb_change_mtu,
1360         .ndo_do_ioctl           = igb_ioctl,
1361         .ndo_tx_timeout         = igb_tx_timeout,
1362         .ndo_validate_addr      = eth_validate_addr,
1363         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1364         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1365         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1366 #ifdef CONFIG_NET_POLL_CONTROLLER
1367         .ndo_poll_controller    = igb_netpoll,
1368 #endif
1369 };
1370
1371 /**
1372  * igb_probe - Device Initialization Routine
1373  * @pdev: PCI device information struct
1374  * @ent: entry in igb_pci_tbl
1375  *
1376  * Returns 0 on success, negative on failure
1377  *
1378  * igb_probe initializes an adapter identified by a pci_dev structure.
1379  * The OS initialization, configuring of the adapter private structure,
1380  * and a hardware reset occur.
1381  **/
1382 static int __devinit igb_probe(struct pci_dev *pdev,
1383                                const struct pci_device_id *ent)
1384 {
1385         struct net_device *netdev;
1386         struct igb_adapter *adapter;
1387         struct e1000_hw *hw;
1388         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1389         unsigned long mmio_start, mmio_len;
1390         int err, pci_using_dac;
1391         u16 eeprom_data = 0;
1392         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1393         u32 part_num;
1394
1395         err = pci_enable_device_mem(pdev);
1396         if (err)
1397                 return err;
1398
1399         pci_using_dac = 0;
1400         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1401         if (!err) {
1402                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1403                 if (!err)
1404                         pci_using_dac = 1;
1405         } else {
1406                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1407                 if (err) {
1408                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1409                         if (err) {
1410                                 dev_err(&pdev->dev, "No usable DMA "
1411                                         "configuration, aborting\n");
1412                                 goto err_dma;
1413                         }
1414                 }
1415         }
1416
1417         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1418                                            IORESOURCE_MEM),
1419                                            igb_driver_name);
1420         if (err)
1421                 goto err_pci_reg;
1422
1423         pci_enable_pcie_error_reporting(pdev);
1424
1425         pci_set_master(pdev);
1426         pci_save_state(pdev);
1427
1428         err = -ENOMEM;
1429         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1430                                    IGB_ABS_MAX_TX_QUEUES);
1431         if (!netdev)
1432                 goto err_alloc_etherdev;
1433
1434         SET_NETDEV_DEV(netdev, &pdev->dev);
1435
1436         pci_set_drvdata(pdev, netdev);
1437         adapter = netdev_priv(netdev);
1438         adapter->netdev = netdev;
1439         adapter->pdev = pdev;
1440         hw = &adapter->hw;
1441         hw->back = adapter;
1442         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1443
1444         mmio_start = pci_resource_start(pdev, 0);
1445         mmio_len = pci_resource_len(pdev, 0);
1446
1447         err = -EIO;
1448         hw->hw_addr = ioremap(mmio_start, mmio_len);
1449         if (!hw->hw_addr)
1450                 goto err_ioremap;
1451
1452         netdev->netdev_ops = &igb_netdev_ops;
1453         igb_set_ethtool_ops(netdev);
1454         netdev->watchdog_timeo = 5 * HZ;
1455
1456         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1457
1458         netdev->mem_start = mmio_start;
1459         netdev->mem_end = mmio_start + mmio_len;
1460
1461         /* PCI config space info */
1462         hw->vendor_id = pdev->vendor;
1463         hw->device_id = pdev->device;
1464         hw->revision_id = pdev->revision;
1465         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1466         hw->subsystem_device_id = pdev->subsystem_device;
1467
1468         /* setup the private structure */
1469         hw->back = adapter;
1470         /* Copy the default MAC, PHY and NVM function pointers */
1471         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1472         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1473         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1474         /* Initialize skew-specific constants */
1475         err = ei->get_invariants(hw);
1476         if (err)
1477                 goto err_sw_init;
1478
1479 #ifdef CONFIG_PCI_IOV
1480         /* since iov functionality isn't critical to base device function we
1481          * can accept failure.  If it fails we don't allow iov to be enabled */
1482         if (hw->mac.type == e1000_82576) {
1483                 /* 82576 supports a maximum of 7 VFs in addition to the PF */
1484                 unsigned int num_vfs = (max_vfs > 7) ? 7 : max_vfs;
1485                 int i;
1486                 unsigned char mac_addr[ETH_ALEN];
1487
1488                 if (num_vfs) {
1489                         adapter->vf_data = kcalloc(num_vfs,
1490                                                 sizeof(struct vf_data_storage),
1491                                                 GFP_KERNEL);
1492                         if (!adapter->vf_data) {
1493                                 dev_err(&pdev->dev,
1494                                         "Could not allocate VF private data - "
1495                                         "IOV enable failed\n");
1496                         } else {
1497                                 err = pci_enable_sriov(pdev, num_vfs);
1498                                 if (!err) {
1499                                         adapter->vfs_allocated_count = num_vfs;
1500                                         dev_info(&pdev->dev,
1501                                                  "%d vfs allocated\n",
1502                                                  num_vfs);
1503                                         for (i = 0;
1504                                              i < adapter->vfs_allocated_count;
1505                                              i++) {
1506                                                 random_ether_addr(mac_addr);
1507                                                 igb_set_vf_mac(adapter, i,
1508                                                                mac_addr);
1509                                         }
1510                                 } else {
1511                                         kfree(adapter->vf_data);
1512                                         adapter->vf_data = NULL;
1513                                 }
1514                         }
1515                 }
1516         }
1517
1518 #endif
1519         /* setup the private structure */
1520         err = igb_sw_init(adapter);
1521         if (err)
1522                 goto err_sw_init;
1523
1524         igb_get_bus_info_pcie(hw);
1525
1526         hw->phy.autoneg_wait_to_complete = false;
1527         hw->mac.adaptive_ifs = true;
1528
1529         /* Copper options */
1530         if (hw->phy.media_type == e1000_media_type_copper) {
1531                 hw->phy.mdix = AUTO_ALL_MODES;
1532                 hw->phy.disable_polarity_correction = false;
1533                 hw->phy.ms_type = e1000_ms_hw_default;
1534         }
1535
1536         if (igb_check_reset_block(hw))
1537                 dev_info(&pdev->dev,
1538                         "PHY reset is blocked due to SOL/IDER session.\n");
1539
1540         netdev->features = NETIF_F_SG |
1541                            NETIF_F_IP_CSUM |
1542                            NETIF_F_HW_VLAN_TX |
1543                            NETIF_F_HW_VLAN_RX |
1544                            NETIF_F_HW_VLAN_FILTER;
1545
1546         netdev->features |= NETIF_F_IPV6_CSUM;
1547         netdev->features |= NETIF_F_TSO;
1548         netdev->features |= NETIF_F_TSO6;
1549
1550         netdev->features |= NETIF_F_GRO;
1551
1552         netdev->vlan_features |= NETIF_F_TSO;
1553         netdev->vlan_features |= NETIF_F_TSO6;
1554         netdev->vlan_features |= NETIF_F_IP_CSUM;
1555         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1556         netdev->vlan_features |= NETIF_F_SG;
1557
1558         if (pci_using_dac)
1559                 netdev->features |= NETIF_F_HIGHDMA;
1560
1561         if (adapter->hw.mac.type == e1000_82576)
1562                 netdev->features |= NETIF_F_SCTP_CSUM;
1563
1564         adapter->en_mng_pt = igb_enable_mng_pass_thru(&adapter->hw);
1565
1566         /* before reading the NVM, reset the controller to put the device in a
1567          * known good starting state */
1568         hw->mac.ops.reset_hw(hw);
1569
1570         /* make sure the NVM is good */
1571         if (igb_validate_nvm_checksum(hw) < 0) {
1572                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1573                 err = -EIO;
1574                 goto err_eeprom;
1575         }
1576
1577         /* copy the MAC address out of the NVM */
1578         if (hw->mac.ops.read_mac_addr(hw))
1579                 dev_err(&pdev->dev, "NVM Read Error\n");
1580
1581         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1582         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1583
1584         if (!is_valid_ether_addr(netdev->perm_addr)) {
1585                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1586                 err = -EIO;
1587                 goto err_eeprom;
1588         }
1589
1590         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1591                     (unsigned long) adapter);
1592         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1593                     (unsigned long) adapter);
1594
1595         INIT_WORK(&adapter->reset_task, igb_reset_task);
1596         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1597
1598         /* Initialize link properties that are user-changeable */
1599         adapter->fc_autoneg = true;
1600         hw->mac.autoneg = true;
1601         hw->phy.autoneg_advertised = 0x2f;
1602
1603         hw->fc.requested_mode = e1000_fc_default;
1604         hw->fc.current_mode = e1000_fc_default;
1605
1606         igb_validate_mdi_setting(hw);
1607
1608         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1609          * enable the ACPI Magic Packet filter
1610          */
1611
1612         if (hw->bus.func == 0)
1613                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1614         else if (hw->bus.func == 1)
1615                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1616
1617         if (eeprom_data & eeprom_apme_mask)
1618                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1619
1620         /* now that we have the eeprom settings, apply the special cases where
1621          * the eeprom may be wrong or the board simply won't support wake on
1622          * lan on a particular port */
1623         switch (pdev->device) {
1624         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1625                 adapter->eeprom_wol = 0;
1626                 break;
1627         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1628         case E1000_DEV_ID_82576_FIBER:
1629         case E1000_DEV_ID_82576_SERDES:
1630                 /* Wake events only supported on port A for dual fiber
1631                  * regardless of eeprom setting */
1632                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1633                         adapter->eeprom_wol = 0;
1634                 break;
1635         case E1000_DEV_ID_82576_QUAD_COPPER:
1636                 /* if quad port adapter, disable WoL on all but port A */
1637                 if (global_quad_port_a != 0)
1638                         adapter->eeprom_wol = 0;
1639                 else
1640                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1641                 /* Reset for multiple quad port adapters */
1642                 if (++global_quad_port_a == 4)
1643                         global_quad_port_a = 0;
1644                 break;
1645         }
1646
1647         /* initialize the wol settings based on the eeprom settings */
1648         adapter->wol = adapter->eeprom_wol;
1649         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1650
1651         /* reset the hardware with the new settings */
1652         igb_reset(adapter);
1653
1654         /* let the f/w know that the h/w is now under the control of the
1655          * driver. */
1656         igb_get_hw_control(adapter);
1657
1658         strcpy(netdev->name, "eth%d");
1659         err = register_netdev(netdev);
1660         if (err)
1661                 goto err_register;
1662
1663         /* carrier off reporting is important to ethtool even BEFORE open */
1664         netif_carrier_off(netdev);
1665
1666 #ifdef CONFIG_IGB_DCA
1667         if (dca_add_requester(&pdev->dev) == 0) {
1668                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1669                 dev_info(&pdev->dev, "DCA enabled\n");
1670                 igb_setup_dca(adapter);
1671         }
1672 #endif
1673
1674         /*
1675          * Initialize hardware timer: we keep it running just in case
1676          * that some program needs it later on.
1677          */
1678         memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1679         adapter->cycles.read = igb_read_clock;
1680         adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1681         adapter->cycles.mult = 1;
1682         adapter->cycles.shift = IGB_TSYNC_SHIFT;
1683         wr32(E1000_TIMINCA,
1684              (1<<24) |
1685              IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS * IGB_TSYNC_SCALE);
1686 #if 0
1687         /*
1688          * Avoid rollover while we initialize by resetting the time counter.
1689          */
1690         wr32(E1000_SYSTIML, 0x00000000);
1691         wr32(E1000_SYSTIMH, 0x00000000);
1692 #else
1693         /*
1694          * Set registers so that rollover occurs soon to test this.
1695          */
1696         wr32(E1000_SYSTIML, 0x00000000);
1697         wr32(E1000_SYSTIMH, 0xFF800000);
1698 #endif
1699         wrfl();
1700         timecounter_init(&adapter->clock,
1701                          &adapter->cycles,
1702                          ktime_to_ns(ktime_get_real()));
1703
1704         /*
1705          * Synchronize our NIC clock against system wall clock. NIC
1706          * time stamp reading requires ~3us per sample, each sample
1707          * was pretty stable even under load => only require 10
1708          * samples for each offset comparison.
1709          */
1710         memset(&adapter->compare, 0, sizeof(adapter->compare));
1711         adapter->compare.source = &adapter->clock;
1712         adapter->compare.target = ktime_get_real;
1713         adapter->compare.num_samples = 10;
1714         timecompare_update(&adapter->compare, 0);
1715
1716 #ifdef DEBUG
1717         {
1718                 char buffer[160];
1719                 printk(KERN_DEBUG
1720                         "igb: %s: hw %p initialized timer\n",
1721                         igb_get_time_str(adapter, buffer),
1722                         &adapter->hw);
1723         }
1724 #endif
1725
1726         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1727         /* print bus type/speed/width info */
1728         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1729                  netdev->name,
1730                  ((hw->bus.speed == e1000_bus_speed_2500)
1731                   ? "2.5Gb/s" : "unknown"),
1732                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1733                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1734                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1735                    "unknown"),
1736                  netdev->dev_addr);
1737
1738         igb_read_part_num(hw, &part_num);
1739         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1740                 (part_num >> 8), (part_num & 0xff));
1741
1742         dev_info(&pdev->dev,
1743                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1744                 adapter->msix_entries ? "MSI-X" :
1745                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1746                 adapter->num_rx_queues, adapter->num_tx_queues);
1747
1748         return 0;
1749
1750 err_register:
1751         igb_release_hw_control(adapter);
1752 err_eeprom:
1753         if (!igb_check_reset_block(hw))
1754                 igb_reset_phy(hw);
1755
1756         if (hw->flash_address)
1757                 iounmap(hw->flash_address);
1758 err_sw_init:
1759         igb_clear_interrupt_scheme(adapter);
1760         iounmap(hw->hw_addr);
1761 err_ioremap:
1762         free_netdev(netdev);
1763 err_alloc_etherdev:
1764         pci_release_selected_regions(pdev, pci_select_bars(pdev,
1765                                      IORESOURCE_MEM));
1766 err_pci_reg:
1767 err_dma:
1768         pci_disable_device(pdev);
1769         return err;
1770 }
1771
1772 /**
1773  * igb_remove - Device Removal Routine
1774  * @pdev: PCI device information struct
1775  *
1776  * igb_remove is called by the PCI subsystem to alert the driver
1777  * that it should release a PCI device.  The could be caused by a
1778  * Hot-Plug event, or because the driver is going to be removed from
1779  * memory.
1780  **/
1781 static void __devexit igb_remove(struct pci_dev *pdev)
1782 {
1783         struct net_device *netdev = pci_get_drvdata(pdev);
1784         struct igb_adapter *adapter = netdev_priv(netdev);
1785         struct e1000_hw *hw = &adapter->hw;
1786
1787         /* flush_scheduled work may reschedule our watchdog task, so
1788          * explicitly disable watchdog tasks from being rescheduled  */
1789         set_bit(__IGB_DOWN, &adapter->state);
1790         del_timer_sync(&adapter->watchdog_timer);
1791         del_timer_sync(&adapter->phy_info_timer);
1792
1793         flush_scheduled_work();
1794
1795 #ifdef CONFIG_IGB_DCA
1796         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1797                 dev_info(&pdev->dev, "DCA disabled\n");
1798                 dca_remove_requester(&pdev->dev);
1799                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1800                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1801         }
1802 #endif
1803
1804         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1805          * would have already happened in close and is redundant. */
1806         igb_release_hw_control(adapter);
1807
1808         unregister_netdev(netdev);
1809
1810         if (!igb_check_reset_block(&adapter->hw))
1811                 igb_reset_phy(&adapter->hw);
1812
1813         igb_clear_interrupt_scheme(adapter);
1814
1815 #ifdef CONFIG_PCI_IOV
1816         /* reclaim resources allocated to VFs */
1817         if (adapter->vf_data) {
1818                 /* disable iov and allow time for transactions to clear */
1819                 pci_disable_sriov(pdev);
1820                 msleep(500);
1821
1822                 kfree(adapter->vf_data);
1823                 adapter->vf_data = NULL;
1824                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1825                 msleep(100);
1826                 dev_info(&pdev->dev, "IOV Disabled\n");
1827         }
1828 #endif
1829         iounmap(hw->hw_addr);
1830         if (hw->flash_address)
1831                 iounmap(hw->flash_address);
1832         pci_release_selected_regions(pdev, pci_select_bars(pdev,
1833                                      IORESOURCE_MEM));
1834
1835         free_netdev(netdev);
1836
1837         pci_disable_pcie_error_reporting(pdev);
1838
1839         pci_disable_device(pdev);
1840 }
1841
1842 /**
1843  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1844  * @adapter: board private structure to initialize
1845  *
1846  * igb_sw_init initializes the Adapter private data structure.
1847  * Fields are initialized based on PCI device information and
1848  * OS network device settings (MTU size).
1849  **/
1850 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1851 {
1852         struct e1000_hw *hw = &adapter->hw;
1853         struct net_device *netdev = adapter->netdev;
1854         struct pci_dev *pdev = adapter->pdev;
1855
1856         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1857
1858         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1859         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1860         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1861         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1862
1863         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1864         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1865
1866         /* This call may decrease the number of queues depending on
1867          * interrupt mode. */
1868         if (igb_init_interrupt_scheme(adapter)) {
1869                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1870                 return -ENOMEM;
1871         }
1872
1873         /* Explicitly disable IRQ since the NIC can be in any state. */
1874         igb_irq_disable(adapter);
1875
1876         set_bit(__IGB_DOWN, &adapter->state);
1877         return 0;
1878 }
1879
1880 /**
1881  * igb_open - Called when a network interface is made active
1882  * @netdev: network interface device structure
1883  *
1884  * Returns 0 on success, negative value on failure
1885  *
1886  * The open entry point is called when a network interface is made
1887  * active by the system (IFF_UP).  At this point all resources needed
1888  * for transmit and receive operations are allocated, the interrupt
1889  * handler is registered with the OS, the watchdog timer is started,
1890  * and the stack is notified that the interface is ready.
1891  **/
1892 static int igb_open(struct net_device *netdev)
1893 {
1894         struct igb_adapter *adapter = netdev_priv(netdev);
1895         struct e1000_hw *hw = &adapter->hw;
1896         int err;
1897         int i;
1898
1899         /* disallow open during test */
1900         if (test_bit(__IGB_TESTING, &adapter->state))
1901                 return -EBUSY;
1902
1903         netif_carrier_off(netdev);
1904
1905         /* allocate transmit descriptors */
1906         err = igb_setup_all_tx_resources(adapter);
1907         if (err)
1908                 goto err_setup_tx;
1909
1910         /* allocate receive descriptors */
1911         err = igb_setup_all_rx_resources(adapter);
1912         if (err)
1913                 goto err_setup_rx;
1914
1915         /* e1000_power_up_phy(adapter); */
1916
1917         adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1918         if ((adapter->hw.mng_cookie.status &
1919              E1000_MNG_DHCP_COOKIE_STATUS_VLAN))
1920                 igb_update_mng_vlan(adapter);
1921
1922         /* before we allocate an interrupt, we must be ready to handle it.
1923          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1924          * as soon as we call pci_request_irq, so we have to setup our
1925          * clean_rx handler before we do so.  */
1926         igb_configure(adapter);
1927
1928         igb_set_vmolr(hw, adapter->vfs_allocated_count);
1929
1930         err = igb_request_irq(adapter);
1931         if (err)
1932                 goto err_req_irq;
1933
1934         /* From here on the code is the same as igb_up() */
1935         clear_bit(__IGB_DOWN, &adapter->state);
1936
1937         for (i = 0; i < adapter->num_q_vectors; i++) {
1938                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1939                 napi_enable(&q_vector->napi);
1940         }
1941
1942         /* Clear any pending interrupts. */
1943         rd32(E1000_ICR);
1944
1945         igb_irq_enable(adapter);
1946
1947         /* notify VFs that reset has been completed */
1948         if (adapter->vfs_allocated_count) {
1949                 u32 reg_data = rd32(E1000_CTRL_EXT);
1950                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1951                 wr32(E1000_CTRL_EXT, reg_data);
1952         }
1953
1954         netif_tx_start_all_queues(netdev);
1955
1956         /* Fire a link status change interrupt to start the watchdog. */
1957         wr32(E1000_ICS, E1000_ICS_LSC);
1958
1959         return 0;
1960
1961 err_req_irq:
1962         igb_release_hw_control(adapter);
1963         /* e1000_power_down_phy(adapter); */
1964         igb_free_all_rx_resources(adapter);
1965 err_setup_rx:
1966         igb_free_all_tx_resources(adapter);
1967 err_setup_tx:
1968         igb_reset(adapter);
1969
1970         return err;
1971 }
1972
1973 /**
1974  * igb_close - Disables a network interface
1975  * @netdev: network interface device structure
1976  *
1977  * Returns 0, this is not allowed to fail
1978  *
1979  * The close entry point is called when an interface is de-activated
1980  * by the OS.  The hardware is still under the driver's control, but
1981  * needs to be disabled.  A global MAC reset is issued to stop the
1982  * hardware, and all transmit and receive resources are freed.
1983  **/
1984 static int igb_close(struct net_device *netdev)
1985 {
1986         struct igb_adapter *adapter = netdev_priv(netdev);
1987
1988         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
1989         igb_down(adapter);
1990
1991         igb_free_irq(adapter);
1992
1993         igb_free_all_tx_resources(adapter);
1994         igb_free_all_rx_resources(adapter);
1995
1996         /* kill manageability vlan ID if supported, but not if a vlan with
1997          * the same ID is registered on the host OS (let 8021q kill it) */
1998         if ((adapter->hw.mng_cookie.status &
1999                           E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
2000              !(adapter->vlgrp &&
2001                vlan_group_get_device(adapter->vlgrp, adapter->mng_vlan_id)))
2002                 igb_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
2003
2004         return 0;
2005 }
2006
2007 /**
2008  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2009  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2010  *
2011  * Return 0 on success, negative on failure
2012  **/
2013 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2014 {
2015         struct pci_dev *pdev = tx_ring->pdev;
2016         int size;
2017
2018         size = sizeof(struct igb_buffer) * tx_ring->count;
2019         tx_ring->buffer_info = vmalloc(size);
2020         if (!tx_ring->buffer_info)
2021                 goto err;
2022         memset(tx_ring->buffer_info, 0, size);
2023
2024         /* round up to nearest 4K */
2025         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2026         tx_ring->size = ALIGN(tx_ring->size, 4096);
2027
2028         tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size,
2029                                              &tx_ring->dma);
2030
2031         if (!tx_ring->desc)
2032                 goto err;
2033
2034         tx_ring->next_to_use = 0;
2035         tx_ring->next_to_clean = 0;
2036         return 0;
2037
2038 err:
2039         vfree(tx_ring->buffer_info);
2040         dev_err(&pdev->dev,
2041                 "Unable to allocate memory for the transmit descriptor ring\n");
2042         return -ENOMEM;
2043 }
2044
2045 /**
2046  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2047  *                                (Descriptors) for all queues
2048  * @adapter: board private structure
2049  *
2050  * Return 0 on success, negative on failure
2051  **/
2052 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2053 {
2054         int i, err = 0;
2055         int r_idx;
2056
2057         for (i = 0; i < adapter->num_tx_queues; i++) {
2058                 err = igb_setup_tx_resources(&adapter->tx_ring[i]);
2059                 if (err) {
2060                         dev_err(&adapter->pdev->dev,
2061                                 "Allocation for Tx Queue %u failed\n", i);
2062                         for (i--; i >= 0; i--)
2063                                 igb_free_tx_resources(&adapter->tx_ring[i]);
2064                         break;
2065                 }
2066         }
2067
2068         for (i = 0; i < IGB_MAX_TX_QUEUES; i++) {
2069                 r_idx = i % adapter->num_tx_queues;
2070                 adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
2071         }
2072         return err;
2073 }
2074
2075 /**
2076  * igb_setup_tctl - configure the transmit control registers
2077  * @adapter: Board private structure
2078  **/
2079 void igb_setup_tctl(struct igb_adapter *adapter)
2080 {
2081         struct e1000_hw *hw = &adapter->hw;
2082         u32 tctl;
2083
2084         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2085         wr32(E1000_TXDCTL(0), 0);
2086
2087         /* Program the Transmit Control Register */
2088         tctl = rd32(E1000_TCTL);
2089         tctl &= ~E1000_TCTL_CT;
2090         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2091                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2092
2093         igb_config_collision_dist(hw);
2094
2095         /* Enable transmits */
2096         tctl |= E1000_TCTL_EN;
2097
2098         wr32(E1000_TCTL, tctl);
2099 }
2100
2101 /**
2102  * igb_configure_tx_ring - Configure transmit ring after Reset
2103  * @adapter: board private structure
2104  * @ring: tx ring to configure
2105  *
2106  * Configure a transmit ring after a reset.
2107  **/
2108 void igb_configure_tx_ring(struct igb_adapter *adapter,
2109                            struct igb_ring *ring)
2110 {
2111         struct e1000_hw *hw = &adapter->hw;
2112         u32 txdctl;
2113         u64 tdba = ring->dma;
2114         int reg_idx = ring->reg_idx;
2115
2116         /* disable the queue */
2117         txdctl = rd32(E1000_TXDCTL(reg_idx));
2118         wr32(E1000_TXDCTL(reg_idx),
2119                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2120         wrfl();
2121         mdelay(10);
2122
2123         wr32(E1000_TDLEN(reg_idx),
2124                         ring->count * sizeof(union e1000_adv_tx_desc));
2125         wr32(E1000_TDBAL(reg_idx),
2126                         tdba & 0x00000000ffffffffULL);
2127         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2128
2129         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2130         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2131         writel(0, ring->head);
2132         writel(0, ring->tail);
2133
2134         txdctl |= IGB_TX_PTHRESH;
2135         txdctl |= IGB_TX_HTHRESH << 8;
2136         txdctl |= IGB_TX_WTHRESH << 16;
2137
2138         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2139         wr32(E1000_TXDCTL(reg_idx), txdctl);
2140 }
2141
2142 /**
2143  * igb_configure_tx - Configure transmit Unit after Reset
2144  * @adapter: board private structure
2145  *
2146  * Configure the Tx unit of the MAC after a reset.
2147  **/
2148 static void igb_configure_tx(struct igb_adapter *adapter)
2149 {
2150         int i;
2151
2152         for (i = 0; i < adapter->num_tx_queues; i++)
2153                 igb_configure_tx_ring(adapter, &adapter->tx_ring[i]);
2154 }
2155
2156 /**
2157  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2158  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2159  *
2160  * Returns 0 on success, negative on failure
2161  **/
2162 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2163 {
2164         struct pci_dev *pdev = rx_ring->pdev;
2165         int size, desc_len;
2166
2167         size = sizeof(struct igb_buffer) * rx_ring->count;
2168         rx_ring->buffer_info = vmalloc(size);
2169         if (!rx_ring->buffer_info)
2170                 goto err;
2171         memset(rx_ring->buffer_info, 0, size);
2172
2173         desc_len = sizeof(union e1000_adv_rx_desc);
2174
2175         /* Round up to nearest 4K */
2176         rx_ring->size = rx_ring->count * desc_len;
2177         rx_ring->size = ALIGN(rx_ring->size, 4096);
2178
2179         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2180                                              &rx_ring->dma);
2181
2182         if (!rx_ring->desc)
2183                 goto err;
2184
2185         rx_ring->next_to_clean = 0;
2186         rx_ring->next_to_use = 0;
2187
2188         return 0;
2189
2190 err:
2191         vfree(rx_ring->buffer_info);
2192         dev_err(&pdev->dev, "Unable to allocate memory for "
2193                 "the receive descriptor ring\n");
2194         return -ENOMEM;
2195 }
2196
2197 /**
2198  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2199  *                                (Descriptors) for all queues
2200  * @adapter: board private structure
2201  *
2202  * Return 0 on success, negative on failure
2203  **/
2204 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2205 {
2206         int i, err = 0;
2207
2208         for (i = 0; i < adapter->num_rx_queues; i++) {
2209                 err = igb_setup_rx_resources(&adapter->rx_ring[i]);
2210                 if (err) {
2211                         dev_err(&adapter->pdev->dev,
2212                                 "Allocation for Rx Queue %u failed\n", i);
2213                         for (i--; i >= 0; i--)
2214                                 igb_free_rx_resources(&adapter->rx_ring[i]);
2215                         break;
2216                 }
2217         }
2218
2219         return err;
2220 }
2221
2222 /**
2223  * igb_setup_mrqc - configure the multiple receive queue control registers
2224  * @adapter: Board private structure
2225  **/
2226 static void igb_setup_mrqc(struct igb_adapter *adapter)
2227 {
2228         struct e1000_hw *hw = &adapter->hw;
2229         u32 mrqc, rxcsum;
2230         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2231         union e1000_reta {
2232                 u32 dword;
2233                 u8  bytes[4];
2234         } reta;
2235         static const u8 rsshash[40] = {
2236                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2237                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2238                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2239                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2240
2241         /* Fill out hash function seeds */
2242         for (j = 0; j < 10; j++) {
2243                 u32 rsskey = rsshash[(j * 4)];
2244                 rsskey |= rsshash[(j * 4) + 1] << 8;
2245                 rsskey |= rsshash[(j * 4) + 2] << 16;
2246                 rsskey |= rsshash[(j * 4) + 3] << 24;
2247                 array_wr32(E1000_RSSRK(0), j, rsskey);
2248         }
2249
2250         num_rx_queues = adapter->num_rx_queues;
2251
2252         if (adapter->vfs_allocated_count) {
2253                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2254                 switch (hw->mac.type) {
2255                 case e1000_82576:
2256                         shift = 3;
2257                         num_rx_queues = 2;
2258                         break;
2259                 case e1000_82575:
2260                         shift = 2;
2261                         shift2 = 6;
2262                 default:
2263                         break;
2264                 }
2265         } else {
2266                 if (hw->mac.type == e1000_82575)
2267                         shift = 6;
2268         }
2269
2270         for (j = 0; j < (32 * 4); j++) {
2271                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2272                 if (shift2)
2273                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2274                 if ((j & 3) == 3)
2275                         wr32(E1000_RETA(j >> 2), reta.dword);
2276         }
2277
2278         /*
2279          * Disable raw packet checksumming so that RSS hash is placed in
2280          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2281          * offloads as they are enabled by default
2282          */
2283         rxcsum = rd32(E1000_RXCSUM);
2284         rxcsum |= E1000_RXCSUM_PCSD;
2285
2286         if (adapter->hw.mac.type >= e1000_82576)
2287                 /* Enable Receive Checksum Offload for SCTP */
2288                 rxcsum |= E1000_RXCSUM_CRCOFL;
2289
2290         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2291         wr32(E1000_RXCSUM, rxcsum);
2292
2293         /* If VMDq is enabled then we set the appropriate mode for that, else
2294          * we default to RSS so that an RSS hash is calculated per packet even
2295          * if we are only using one queue */
2296         if (adapter->vfs_allocated_count) {
2297                 if (hw->mac.type > e1000_82575) {
2298                         /* Set the default pool for the PF's first queue */
2299                         u32 vtctl = rd32(E1000_VT_CTL);
2300                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2301                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2302                         vtctl |= adapter->vfs_allocated_count <<
2303                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2304                         wr32(E1000_VT_CTL, vtctl);
2305                 }
2306                 if (adapter->num_rx_queues > 1)
2307                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2308                 else
2309                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2310         } else {
2311                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2312         }
2313         igb_vmm_control(adapter);
2314
2315         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2316                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2317         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2318                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2319         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2320                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2321         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2322                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2323
2324         wr32(E1000_MRQC, mrqc);
2325 }
2326
2327 /**
2328  * igb_setup_rctl - configure the receive control registers
2329  * @adapter: Board private structure
2330  **/
2331 void igb_setup_rctl(struct igb_adapter *adapter)
2332 {
2333         struct e1000_hw *hw = &adapter->hw;
2334         u32 rctl;
2335
2336         rctl = rd32(E1000_RCTL);
2337
2338         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2339         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2340
2341         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2342                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2343
2344         /*
2345          * enable stripping of CRC. It's unlikely this will break BMC
2346          * redirection as it did with e1000. Newer features require
2347          * that the HW strips the CRC.
2348          */
2349         rctl |= E1000_RCTL_SECRC;
2350
2351         /*
2352          * disable store bad packets and clear size bits.
2353          */
2354         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2355
2356         /* enable LPE to prevent packets larger than max_frame_size */
2357         rctl |= E1000_RCTL_LPE;
2358
2359         /* disable queue 0 to prevent tail write w/o re-config */
2360         wr32(E1000_RXDCTL(0), 0);
2361
2362         /* Attention!!!  For SR-IOV PF driver operations you must enable
2363          * queue drop for all VF and PF queues to prevent head of line blocking
2364          * if an un-trusted VF does not provide descriptors to hardware.
2365          */
2366         if (adapter->vfs_allocated_count) {
2367                 u32 vmolr;
2368
2369                 /* set all queue drop enable bits */
2370                 wr32(E1000_QDE, ALL_QUEUES);
2371
2372                 vmolr = rd32(E1000_VMOLR(adapter->vfs_allocated_count));
2373                 if (rctl & E1000_RCTL_LPE)
2374                         vmolr |= E1000_VMOLR_LPE;
2375                 if (adapter->num_rx_queues > 1)
2376                         vmolr |= E1000_VMOLR_RSSE;
2377                 wr32(E1000_VMOLR(adapter->vfs_allocated_count), vmolr);
2378         }
2379
2380         wr32(E1000_RCTL, rctl);
2381 }
2382
2383 /**
2384  * igb_rlpml_set - set maximum receive packet size
2385  * @adapter: board private structure
2386  *
2387  * Configure maximum receivable packet size.
2388  **/
2389 static void igb_rlpml_set(struct igb_adapter *adapter)
2390 {
2391         u32 max_frame_size = adapter->max_frame_size;
2392         struct e1000_hw *hw = &adapter->hw;
2393         u16 pf_id = adapter->vfs_allocated_count;
2394
2395         if (adapter->vlgrp)
2396                 max_frame_size += VLAN_TAG_SIZE;
2397
2398         /* if vfs are enabled we set RLPML to the largest possible request
2399          * size and set the VMOLR RLPML to the size we need */
2400         if (pf_id) {
2401                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2402                 max_frame_size = MAX_STD_JUMBO_FRAME_SIZE + VLAN_TAG_SIZE;
2403         }
2404
2405         wr32(E1000_RLPML, max_frame_size);
2406 }
2407
2408 /**
2409  * igb_configure_rx_ring - Configure a receive ring after Reset
2410  * @adapter: board private structure
2411  * @ring: receive ring to be configured
2412  *
2413  * Configure the Rx unit of the MAC after a reset.
2414  **/
2415 void igb_configure_rx_ring(struct igb_adapter *adapter,
2416                            struct igb_ring *ring)
2417 {
2418         struct e1000_hw *hw = &adapter->hw;
2419         u64 rdba = ring->dma;
2420         int reg_idx = ring->reg_idx;
2421         u32 srrctl, rxdctl;
2422
2423         /* disable the queue */
2424         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2425         wr32(E1000_RXDCTL(reg_idx),
2426                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2427
2428         /* Set DMA base address registers */
2429         wr32(E1000_RDBAL(reg_idx),
2430              rdba & 0x00000000ffffffffULL);
2431         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2432         wr32(E1000_RDLEN(reg_idx),
2433                        ring->count * sizeof(union e1000_adv_rx_desc));
2434
2435         /* initialize head and tail */
2436         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2437         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2438         writel(0, ring->head);
2439         writel(0, ring->tail);
2440
2441         /* set descriptor configuration */
2442         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2443                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2444                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2445 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2446                 srrctl |= IGB_RXBUFFER_16384 >>
2447                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2448 #else
2449                 srrctl |= (PAGE_SIZE / 2) >>
2450                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2451 #endif
2452                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2453         } else {
2454                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2455                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2456                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2457         }
2458
2459         wr32(E1000_SRRCTL(reg_idx), srrctl);
2460
2461         /* enable receive descriptor fetching */
2462         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2463         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2464         rxdctl &= 0xFFF00000;
2465         rxdctl |= IGB_RX_PTHRESH;
2466         rxdctl |= IGB_RX_HTHRESH << 8;
2467         rxdctl |= IGB_RX_WTHRESH << 16;
2468         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2469 }
2470
2471 /**
2472  * igb_configure_rx - Configure receive Unit after Reset
2473  * @adapter: board private structure
2474  *
2475  * Configure the Rx unit of the MAC after a reset.
2476  **/
2477 static void igb_configure_rx(struct igb_adapter *adapter)
2478 {
2479         int i;
2480
2481         /* set UTA to appropriate mode */
2482         igb_set_uta(adapter);
2483
2484         /* set the correct pool for the PF default MAC address in entry 0 */
2485         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2486                          adapter->vfs_allocated_count);
2487
2488         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2489          * the Base and Length of the Rx Descriptor Ring */
2490         for (i = 0; i < adapter->num_rx_queues; i++)
2491                 igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
2492 }
2493
2494 /**
2495  * igb_free_tx_resources - Free Tx Resources per Queue
2496  * @tx_ring: Tx descriptor ring for a specific queue
2497  *
2498  * Free all transmit software resources
2499  **/
2500 void igb_free_tx_resources(struct igb_ring *tx_ring)
2501 {
2502         igb_clean_tx_ring(tx_ring);
2503
2504         vfree(tx_ring->buffer_info);
2505         tx_ring->buffer_info = NULL;
2506
2507         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2508                             tx_ring->desc, tx_ring->dma);
2509
2510         tx_ring->desc = NULL;
2511 }
2512
2513 /**
2514  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2515  * @adapter: board private structure
2516  *
2517  * Free all transmit software resources
2518  **/
2519 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2520 {
2521         int i;
2522
2523         for (i = 0; i < adapter->num_tx_queues; i++)
2524                 igb_free_tx_resources(&adapter->tx_ring[i]);
2525 }
2526
2527 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2528                                     struct igb_buffer *buffer_info)
2529 {
2530         buffer_info->dma = 0;
2531         if (buffer_info->skb) {
2532                 skb_dma_unmap(&tx_ring->pdev->dev,
2533                               buffer_info->skb,
2534                               DMA_TO_DEVICE);
2535                 dev_kfree_skb_any(buffer_info->skb);
2536                 buffer_info->skb = NULL;
2537         }
2538         buffer_info->time_stamp = 0;
2539         /* buffer_info must be completely set up in the transmit path */
2540 }
2541
2542 /**
2543  * igb_clean_tx_ring - Free Tx Buffers
2544  * @tx_ring: ring to be cleaned
2545  **/
2546 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2547 {
2548         struct igb_buffer *buffer_info;
2549         unsigned long size;
2550         unsigned int i;
2551
2552         if (!tx_ring->buffer_info)
2553                 return;
2554         /* Free all the Tx ring sk_buffs */
2555
2556         for (i = 0; i < tx_ring->count; i++) {
2557                 buffer_info = &tx_ring->buffer_info[i];
2558                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2559         }
2560
2561         size = sizeof(struct igb_buffer) * tx_ring->count;
2562         memset(tx_ring->buffer_info, 0, size);
2563
2564         /* Zero out the descriptor ring */
2565
2566         memset(tx_ring->desc, 0, tx_ring->size);
2567
2568         tx_ring->next_to_use = 0;
2569         tx_ring->next_to_clean = 0;
2570
2571         writel(0, tx_ring->head);
2572         writel(0, tx_ring->tail);
2573 }
2574
2575 /**
2576  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2577  * @adapter: board private structure
2578  **/
2579 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2580 {
2581         int i;
2582
2583         for (i = 0; i < adapter->num_tx_queues; i++)
2584                 igb_clean_tx_ring(&adapter->tx_ring[i]);
2585 }
2586
2587 /**
2588  * igb_free_rx_resources - Free Rx Resources
2589  * @rx_ring: ring to clean the resources from
2590  *
2591  * Free all receive software resources
2592  **/
2593 void igb_free_rx_resources(struct igb_ring *rx_ring)
2594 {
2595         igb_clean_rx_ring(rx_ring);
2596
2597         vfree(rx_ring->buffer_info);
2598         rx_ring->buffer_info = NULL;
2599
2600         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2601                             rx_ring->desc, rx_ring->dma);
2602
2603         rx_ring->desc = NULL;
2604 }
2605
2606 /**
2607  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2608  * @adapter: board private structure
2609  *
2610  * Free all receive software resources
2611  **/
2612 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2613 {
2614         int i;
2615
2616         for (i = 0; i < adapter->num_rx_queues; i++)
2617                 igb_free_rx_resources(&adapter->rx_ring[i]);
2618 }
2619
2620 /**
2621  * igb_clean_rx_ring - Free Rx Buffers per Queue
2622  * @rx_ring: ring to free buffers from
2623  **/
2624 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2625 {
2626         struct igb_buffer *buffer_info;
2627         unsigned long size;
2628         unsigned int i;
2629
2630         if (!rx_ring->buffer_info)
2631                 return;
2632         /* Free all the Rx ring sk_buffs */
2633         for (i = 0; i < rx_ring->count; i++) {
2634                 buffer_info = &rx_ring->buffer_info[i];
2635                 if (buffer_info->dma) {
2636                         pci_unmap_single(rx_ring->pdev,
2637                                          buffer_info->dma,
2638                                          rx_ring->rx_buffer_len,
2639                                          PCI_DMA_FROMDEVICE);
2640                         buffer_info->dma = 0;
2641                 }
2642
2643                 if (buffer_info->skb) {
2644                         dev_kfree_skb(buffer_info->skb);
2645                         buffer_info->skb = NULL;
2646                 }
2647                 if (buffer_info->page_dma) {
2648                         pci_unmap_page(rx_ring->pdev,
2649                                        buffer_info->page_dma,
2650                                        PAGE_SIZE / 2,
2651                                        PCI_DMA_FROMDEVICE);
2652                         buffer_info->page_dma = 0;
2653                 }
2654                 if (buffer_info->page) {
2655                         put_page(buffer_info->page);
2656                         buffer_info->page = NULL;
2657                         buffer_info->page_offset = 0;
2658                 }
2659         }
2660
2661         size = sizeof(struct igb_buffer) * rx_ring->count;
2662         memset(rx_ring->buffer_info, 0, size);
2663
2664         /* Zero out the descriptor ring */
2665         memset(rx_ring->desc, 0, rx_ring->size);
2666
2667         rx_ring->next_to_clean = 0;
2668         rx_ring->next_to_use = 0;
2669
2670         writel(0, rx_ring->head);
2671         writel(0, rx_ring->tail);
2672 }
2673
2674 /**
2675  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2676  * @adapter: board private structure
2677  **/
2678 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2679 {
2680         int i;
2681
2682         for (i = 0; i < adapter->num_rx_queues; i++)
2683                 igb_clean_rx_ring(&adapter->rx_ring[i]);
2684 }
2685
2686 /**
2687  * igb_set_mac - Change the Ethernet Address of the NIC
2688  * @netdev: network interface device structure
2689  * @p: pointer to an address structure
2690  *
2691  * Returns 0 on success, negative on failure
2692  **/
2693 static int igb_set_mac(struct net_device *netdev, void *p)
2694 {
2695         struct igb_adapter *adapter = netdev_priv(netdev);
2696         struct e1000_hw *hw = &adapter->hw;
2697         struct sockaddr *addr = p;
2698
2699         if (!is_valid_ether_addr(addr->sa_data))
2700                 return -EADDRNOTAVAIL;
2701
2702         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2703         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2704
2705         /* set the correct pool for the new PF MAC address in entry 0 */
2706         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2707                          adapter->vfs_allocated_count);
2708
2709         return 0;
2710 }
2711
2712 /**
2713  * igb_write_mc_addr_list - write multicast addresses to MTA
2714  * @netdev: network interface device structure
2715  *
2716  * Writes multicast address list to the MTA hash table.
2717  * Returns: -ENOMEM on failure
2718  *                0 on no addresses written
2719  *                X on writing X addresses to MTA
2720  **/
2721 static int igb_write_mc_addr_list(struct net_device *netdev)
2722 {
2723         struct igb_adapter *adapter = netdev_priv(netdev);
2724         struct e1000_hw *hw = &adapter->hw;
2725         struct dev_mc_list *mc_ptr = netdev->mc_list;
2726         u8  *mta_list;
2727         u32 vmolr = 0;
2728         int i;
2729
2730         if (!netdev->mc_count) {
2731                 /* nothing to program, so clear mc list */
2732                 igb_update_mc_addr_list(hw, NULL, 0);
2733                 igb_restore_vf_multicasts(adapter);
2734                 return 0;
2735         }
2736
2737         mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
2738         if (!mta_list)
2739                 return -ENOMEM;
2740
2741         /* set vmolr receive overflow multicast bit */
2742         vmolr |= E1000_VMOLR_ROMPE;
2743
2744         /* The shared function expects a packed array of only addresses. */
2745         mc_ptr = netdev->mc_list;
2746
2747         for (i = 0; i < netdev->mc_count; i++) {
2748                 if (!mc_ptr)
2749                         break;
2750                 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2751                 mc_ptr = mc_ptr->next;
2752         }
2753         igb_update_mc_addr_list(hw, mta_list, i);
2754         kfree(mta_list);
2755
2756         return netdev->mc_count;
2757 }
2758
2759 /**
2760  * igb_write_uc_addr_list - write unicast addresses to RAR table
2761  * @netdev: network interface device structure
2762  *
2763  * Writes unicast address list to the RAR table.
2764  * Returns: -ENOMEM on failure/insufficient address space
2765  *                0 on no addresses written
2766  *                X on writing X addresses to the RAR table
2767  **/
2768 static int igb_write_uc_addr_list(struct net_device *netdev)
2769 {
2770         struct igb_adapter *adapter = netdev_priv(netdev);
2771         struct e1000_hw *hw = &adapter->hw;
2772         unsigned int vfn = adapter->vfs_allocated_count;
2773         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2774         int count = 0;
2775
2776         /* return ENOMEM indicating insufficient memory for addresses */
2777         if (netdev->uc.count > rar_entries)
2778                 return -ENOMEM;
2779
2780         if (netdev->uc.count && rar_entries) {
2781                 struct netdev_hw_addr *ha;
2782                 list_for_each_entry(ha, &netdev->uc.list, list) {
2783                         if (!rar_entries)
2784                                 break;
2785                         igb_rar_set_qsel(adapter, ha->addr,
2786                                          rar_entries--,
2787                                          vfn);
2788                         count++;
2789                 }
2790         }
2791         /* write the addresses in reverse order to avoid write combining */
2792         for (; rar_entries > 0 ; rar_entries--) {
2793                 wr32(E1000_RAH(rar_entries), 0);
2794                 wr32(E1000_RAL(rar_entries), 0);
2795         }
2796         wrfl();
2797
2798         return count;
2799 }
2800
2801 /**
2802  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2803  * @netdev: network interface device structure
2804  *
2805  * The set_rx_mode entry point is called whenever the unicast or multicast
2806  * address lists or the network interface flags are updated.  This routine is
2807  * responsible for configuring the hardware for proper unicast, multicast,
2808  * promiscuous mode, and all-multi behavior.
2809  **/
2810 static void igb_set_rx_mode(struct net_device *netdev)
2811 {
2812         struct igb_adapter *adapter = netdev_priv(netdev);
2813         struct e1000_hw *hw = &adapter->hw;
2814         unsigned int vfn = adapter->vfs_allocated_count;
2815         u32 rctl, vmolr = 0;
2816         int count;
2817
2818         /* Check for Promiscuous and All Multicast modes */
2819         rctl = rd32(E1000_RCTL);
2820
2821         /* clear the effected bits */
2822         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2823
2824         if (netdev->flags & IFF_PROMISC) {
2825                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2826                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2827         } else {
2828                 if (netdev->flags & IFF_ALLMULTI) {
2829                         rctl |= E1000_RCTL_MPE;
2830                         vmolr |= E1000_VMOLR_MPME;
2831                 } else {
2832                         /*
2833                          * Write addresses to the MTA, if the attempt fails
2834                          * then we should just turn on promiscous mode so
2835                          * that we can at least receive multicast traffic
2836                          */
2837                         count = igb_write_mc_addr_list(netdev);
2838                         if (count < 0) {
2839                                 rctl |= E1000_RCTL_MPE;
2840                                 vmolr |= E1000_VMOLR_MPME;
2841                         } else if (count) {
2842                                 vmolr |= E1000_VMOLR_ROMPE;
2843                         }
2844                 }
2845                 /*
2846                  * Write addresses to available RAR registers, if there is not
2847                  * sufficient space to store all the addresses then enable
2848                  * unicast promiscous mode
2849                  */
2850                 count = igb_write_uc_addr_list(netdev);
2851                 if (count < 0) {
2852                         rctl |= E1000_RCTL_UPE;
2853                         vmolr |= E1000_VMOLR_ROPE;
2854                 }
2855                 rctl |= E1000_RCTL_VFE;
2856         }
2857         wr32(E1000_RCTL, rctl);
2858
2859         /*
2860          * In order to support SR-IOV and eventually VMDq it is necessary to set
2861          * the VMOLR to enable the appropriate modes.  Without this workaround
2862          * we will have issues with VLAN tag stripping not being done for frames
2863          * that are only arriving because we are the default pool
2864          */
2865         if (hw->mac.type < e1000_82576)
2866                 return;
2867
2868         vmolr |= rd32(E1000_VMOLR(vfn)) &
2869                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
2870         wr32(E1000_VMOLR(vfn), vmolr);
2871         igb_restore_vf_multicasts(adapter);
2872 }
2873
2874 /* Need to wait a few seconds after link up to get diagnostic information from
2875  * the phy */
2876 static void igb_update_phy_info(unsigned long data)
2877 {
2878         struct igb_adapter *adapter = (struct igb_adapter *) data;
2879         igb_get_phy_info(&adapter->hw);
2880 }
2881
2882 /**
2883  * igb_has_link - check shared code for link and determine up/down
2884  * @adapter: pointer to driver private info
2885  **/
2886 static bool igb_has_link(struct igb_adapter *adapter)
2887 {
2888         struct e1000_hw *hw = &adapter->hw;
2889         bool link_active = false;
2890         s32 ret_val = 0;
2891
2892         /* get_link_status is set on LSC (link status) interrupt or
2893          * rx sequence error interrupt.  get_link_status will stay
2894          * false until the e1000_check_for_link establishes link
2895          * for copper adapters ONLY
2896          */
2897         switch (hw->phy.media_type) {
2898         case e1000_media_type_copper:
2899                 if (hw->mac.get_link_status) {
2900                         ret_val = hw->mac.ops.check_for_link(hw);
2901                         link_active = !hw->mac.get_link_status;
2902                 } else {
2903                         link_active = true;
2904                 }
2905                 break;
2906         case e1000_media_type_internal_serdes:
2907                 ret_val = hw->mac.ops.check_for_link(hw);
2908                 link_active = hw->mac.serdes_has_link;
2909                 break;
2910         default:
2911         case e1000_media_type_unknown:
2912                 break;
2913         }
2914
2915         return link_active;
2916 }
2917
2918 /**
2919  * igb_watchdog - Timer Call-back
2920  * @data: pointer to adapter cast into an unsigned long
2921  **/
2922 static void igb_watchdog(unsigned long data)
2923 {
2924         struct igb_adapter *adapter = (struct igb_adapter *)data;
2925         /* Do the rest outside of interrupt context */
2926         schedule_work(&adapter->watchdog_task);
2927 }
2928
2929 static void igb_watchdog_task(struct work_struct *work)
2930 {
2931         struct igb_adapter *adapter = container_of(work,
2932                                         struct igb_adapter, watchdog_task);
2933         struct e1000_hw *hw = &adapter->hw;
2934         struct net_device *netdev = adapter->netdev;
2935         struct igb_ring *tx_ring = adapter->tx_ring;
2936         u32 link;
2937         int i;
2938
2939         link = igb_has_link(adapter);
2940         if ((netif_carrier_ok(netdev)) && link)
2941                 goto link_up;
2942
2943         if (link) {
2944                 if (!netif_carrier_ok(netdev)) {
2945                         u32 ctrl;
2946                         hw->mac.ops.get_speed_and_duplex(&adapter->hw,
2947                                                    &adapter->link_speed,
2948                                                    &adapter->link_duplex);
2949
2950                         ctrl = rd32(E1000_CTRL);
2951                         /* Links status message must follow this format */
2952                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
2953                                  "Flow Control: %s\n",
2954                                  netdev->name,
2955                                  adapter->link_speed,
2956                                  adapter->link_duplex == FULL_DUPLEX ?
2957                                  "Full Duplex" : "Half Duplex",
2958                                  ((ctrl & E1000_CTRL_TFCE) && (ctrl &
2959                                  E1000_CTRL_RFCE)) ? "RX/TX" : ((ctrl &
2960                                  E1000_CTRL_RFCE) ? "RX" : ((ctrl &
2961                                  E1000_CTRL_TFCE) ? "TX" : "None")));
2962
2963                         /* tweak tx_queue_len according to speed/duplex and
2964                          * adjust the timeout factor */
2965                         netdev->tx_queue_len = adapter->tx_queue_len;
2966                         adapter->tx_timeout_factor = 1;
2967                         switch (adapter->link_speed) {
2968                         case SPEED_10:
2969                                 netdev->tx_queue_len = 10;
2970                                 adapter->tx_timeout_factor = 14;
2971                                 break;
2972                         case SPEED_100:
2973                                 netdev->tx_queue_len = 100;
2974                                 /* maybe add some timeout factor ? */
2975                                 break;
2976                         }
2977
2978                         netif_carrier_on(netdev);
2979
2980                         igb_ping_all_vfs(adapter);
2981
2982                         /* link state has changed, schedule phy info update */
2983                         if (!test_bit(__IGB_DOWN, &adapter->state))
2984                                 mod_timer(&adapter->phy_info_timer,
2985                                           round_jiffies(jiffies + 2 * HZ));
2986                 }
2987         } else {
2988                 if (netif_carrier_ok(netdev)) {
2989                         adapter->link_speed = 0;
2990                         adapter->link_duplex = 0;
2991                         /* Links status message must follow this format */
2992                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
2993                                netdev->name);
2994                         netif_carrier_off(netdev);
2995
2996                         igb_ping_all_vfs(adapter);
2997
2998                         /* link state has changed, schedule phy info update */
2999                         if (!test_bit(__IGB_DOWN, &adapter->state))
3000                                 mod_timer(&adapter->phy_info_timer,
3001                                           round_jiffies(jiffies + 2 * HZ));
3002                 }
3003         }
3004
3005 link_up:
3006         igb_update_stats(adapter);
3007
3008         hw->mac.tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;
3009         adapter->tpt_old = adapter->stats.tpt;
3010         hw->mac.collision_delta = adapter->stats.colc - adapter->colc_old;
3011         adapter->colc_old = adapter->stats.colc;
3012
3013         adapter->gorc = adapter->stats.gorc - adapter->gorc_old;
3014         adapter->gorc_old = adapter->stats.gorc;
3015         adapter->gotc = adapter->stats.gotc - adapter->gotc_old;
3016         adapter->gotc_old = adapter->stats.gotc;
3017
3018         igb_update_adaptive(&adapter->hw);
3019
3020         if (!netif_carrier_ok(netdev)) {
3021                 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3022                         /* We've lost link, so the controller stops DMA,
3023                          * but we've got queued Tx work that's never going
3024                          * to get done, so reset controller to flush Tx.
3025                          * (Do the reset outside of interrupt context). */
3026                         adapter->tx_timeout_count++;
3027                         schedule_work(&adapter->reset_task);
3028                         /* return immediately since reset is imminent */
3029                         return;
3030                 }
3031         }
3032
3033         /* Cause software interrupt to ensure rx ring is cleaned */
3034         if (adapter->msix_entries) {
3035                 u32 eics = 0;
3036                 for (i = 0; i < adapter->num_q_vectors; i++) {
3037                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3038                         eics |= q_vector->eims_value;
3039                 }
3040                 wr32(E1000_EICS, eics);
3041         } else {
3042                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3043         }
3044
3045         /* Force detection of hung controller every watchdog period */
3046         tx_ring->detect_tx_hung = true;
3047
3048         /* Reset the timer */
3049         if (!test_bit(__IGB_DOWN, &adapter->state))
3050                 mod_timer(&adapter->watchdog_timer,
3051                           round_jiffies(jiffies + 2 * HZ));
3052 }
3053
3054 enum latency_range {
3055         lowest_latency = 0,
3056         low_latency = 1,
3057         bulk_latency = 2,
3058         latency_invalid = 255
3059 };
3060
3061 /**
3062  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3063  *
3064  *      Stores a new ITR value based on strictly on packet size.  This
3065  *      algorithm is less sophisticated than that used in igb_update_itr,
3066  *      due to the difficulty of synchronizing statistics across multiple
3067  *      receive rings.  The divisors and thresholds used by this fuction
3068  *      were determined based on theoretical maximum wire speed and testing
3069  *      data, in order to minimize response time while increasing bulk
3070  *      throughput.
3071  *      This functionality is controlled by the InterruptThrottleRate module
3072  *      parameter (see igb_param.c)
3073  *      NOTE:  This function is called only when operating in a multiqueue
3074  *             receive environment.
3075  * @q_vector: pointer to q_vector
3076  **/
3077 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3078 {
3079         int new_val = q_vector->itr_val;
3080         int avg_wire_size = 0;
3081         struct igb_adapter *adapter = q_vector->adapter;
3082
3083         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3084          * ints/sec - ITR timer value of 120 ticks.
3085          */
3086         if (adapter->link_speed != SPEED_1000) {
3087                 new_val = 976;
3088                 goto set_itr_val;
3089         }
3090
3091         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3092                 struct igb_ring *ring = q_vector->rx_ring;
3093                 avg_wire_size = ring->total_bytes / ring->total_packets;
3094         }
3095
3096         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3097                 struct igb_ring *ring = q_vector->tx_ring;
3098                 avg_wire_size = max_t(u32, avg_wire_size,
3099                                       (ring->total_bytes /
3100                                        ring->total_packets));
3101         }
3102
3103         /* if avg_wire_size isn't set no work was done */
3104         if (!avg_wire_size)
3105                 goto clear_counts;
3106
3107         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3108         avg_wire_size += 24;
3109
3110         /* Don't starve jumbo frames */
3111         avg_wire_size = min(avg_wire_size, 3000);
3112
3113         /* Give a little boost to mid-size frames */
3114         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3115                 new_val = avg_wire_size / 3;
3116         else
3117                 new_val = avg_wire_size / 2;
3118
3119 set_itr_val:
3120         if (new_val != q_vector->itr_val) {
3121                 q_vector->itr_val = new_val;
3122                 q_vector->set_itr = 1;
3123         }
3124 clear_counts:
3125         if (q_vector->rx_ring) {
3126                 q_vector->rx_ring->total_bytes = 0;
3127                 q_vector->rx_ring->total_packets = 0;
3128         }
3129         if (q_vector->tx_ring) {
3130                 q_vector->tx_ring->total_bytes = 0;
3131                 q_vector->tx_ring->total_packets = 0;
3132         }
3133 }
3134
3135 /**
3136  * igb_update_itr - update the dynamic ITR value based on statistics
3137  *      Stores a new ITR value based on packets and byte
3138  *      counts during the last interrupt.  The advantage of per interrupt
3139  *      computation is faster updates and more accurate ITR for the current
3140  *      traffic pattern.  Constants in this function were computed
3141  *      based on theoretical maximum wire speed and thresholds were set based
3142  *      on testing data as well as attempting to minimize response time
3143  *      while increasing bulk throughput.
3144  *      this functionality is controlled by the InterruptThrottleRate module
3145  *      parameter (see igb_param.c)
3146  *      NOTE:  These calculations are only valid when operating in a single-
3147  *             queue environment.
3148  * @adapter: pointer to adapter
3149  * @itr_setting: current q_vector->itr_val
3150  * @packets: the number of packets during this measurement interval
3151  * @bytes: the number of bytes during this measurement interval
3152  **/
3153 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3154                                    int packets, int bytes)
3155 {
3156         unsigned int retval = itr_setting;
3157
3158         if (packets == 0)
3159                 goto update_itr_done;
3160
3161         switch (itr_setting) {
3162         case lowest_latency:
3163                 /* handle TSO and jumbo frames */
3164                 if (bytes/packets > 8000)
3165                         retval = bulk_latency;
3166                 else if ((packets < 5) && (bytes > 512))
3167                         retval = low_latency;
3168                 break;
3169         case low_latency:  /* 50 usec aka 20000 ints/s */
3170                 if (bytes > 10000) {
3171                         /* this if handles the TSO accounting */
3172                         if (bytes/packets > 8000) {
3173                                 retval = bulk_latency;
3174                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3175                                 retval = bulk_latency;
3176                         } else if ((packets > 35)) {
3177                                 retval = lowest_latency;
3178                         }
3179                 } else if (bytes/packets > 2000) {
3180                         retval = bulk_latency;
3181                 } else if (packets <= 2 && bytes < 512) {
3182                         retval = lowest_latency;
3183                 }
3184                 break;
3185         case bulk_latency: /* 250 usec aka 4000 ints/s */
3186                 if (bytes > 25000) {
3187                         if (packets > 35)
3188                                 retval = low_latency;
3189                 } else if (bytes < 1500) {
3190                         retval = low_latency;
3191                 }
3192                 break;
3193         }
3194
3195 update_itr_done:
3196         return retval;
3197 }
3198
3199 static void igb_set_itr(struct igb_adapter *adapter)
3200 {
3201         struct igb_q_vector *q_vector = adapter->q_vector[0];
3202         u16 current_itr;
3203         u32 new_itr = q_vector->itr_val;
3204
3205         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3206         if (adapter->link_speed != SPEED_1000) {
3207                 current_itr = 0;
3208                 new_itr = 4000;
3209                 goto set_itr_now;
3210         }
3211
3212         adapter->rx_itr = igb_update_itr(adapter,
3213                                     adapter->rx_itr,
3214                                     adapter->rx_ring->total_packets,
3215                                     adapter->rx_ring->total_bytes);
3216
3217         adapter->tx_itr = igb_update_itr(adapter,
3218                                     adapter->tx_itr,
3219                                     adapter->tx_ring->total_packets,
3220                                     adapter->tx_ring->total_bytes);
3221         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3222
3223         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3224         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3225                 current_itr = low_latency;
3226
3227         switch (current_itr) {
3228         /* counts and packets in update_itr are dependent on these numbers */
3229         case lowest_latency:
3230                 new_itr = 56;  /* aka 70,000 ints/sec */
3231                 break;
3232         case low_latency:
3233                 new_itr = 196; /* aka 20,000 ints/sec */
3234                 break;
3235         case bulk_latency:
3236                 new_itr = 980; /* aka 4,000 ints/sec */
3237                 break;
3238         default:
3239                 break;
3240         }
3241
3242 set_itr_now:
3243         adapter->rx_ring->total_bytes = 0;
3244         adapter->rx_ring->total_packets = 0;
3245         adapter->tx_ring->total_bytes = 0;
3246         adapter->tx_ring->total_packets = 0;
3247
3248         if (new_itr != q_vector->itr_val) {
3249                 /* this attempts to bias the interrupt rate towards Bulk
3250                  * by adding intermediate steps when interrupt rate is
3251                  * increasing */
3252                 new_itr = new_itr > q_vector->itr_val ?
3253                              max((new_itr * q_vector->itr_val) /
3254                                  (new_itr + (q_vector->itr_val >> 2)),
3255                                  new_itr) :
3256                              new_itr;
3257                 /* Don't write the value here; it resets the adapter's
3258                  * internal timer, and causes us to delay far longer than
3259                  * we should between interrupts.  Instead, we write the ITR
3260                  * value at the beginning of the next interrupt so the timing
3261                  * ends up being correct.
3262                  */
3263                 q_vector->itr_val = new_itr;
3264                 q_vector->set_itr = 1;
3265         }
3266
3267         return;
3268 }
3269
3270 #define IGB_TX_FLAGS_CSUM               0x00000001
3271 #define IGB_TX_FLAGS_VLAN               0x00000002
3272 #define IGB_TX_FLAGS_TSO                0x00000004
3273 #define IGB_TX_FLAGS_IPV4               0x00000008
3274 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3275 #define IGB_TX_FLAGS_VLAN_MASK  0xffff0000
3276 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3277
3278 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3279                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3280 {
3281         struct e1000_adv_tx_context_desc *context_desc;
3282         unsigned int i;
3283         int err;
3284         struct igb_buffer *buffer_info;
3285         u32 info = 0, tu_cmd = 0;
3286         u32 mss_l4len_idx, l4len;
3287         *hdr_len = 0;
3288
3289         if (skb_header_cloned(skb)) {
3290                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3291                 if (err)
3292                         return err;
3293         }
3294
3295         l4len = tcp_hdrlen(skb);
3296         *hdr_len += l4len;
3297
3298         if (skb->protocol == htons(ETH_P_IP)) {
3299                 struct iphdr *iph = ip_hdr(skb);
3300                 iph->tot_len = 0;
3301                 iph->check = 0;
3302                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3303                                                          iph->daddr, 0,
3304                                                          IPPROTO_TCP,
3305                                                          0);
3306         } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
3307                 ipv6_hdr(skb)->payload_len = 0;
3308                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3309                                                        &ipv6_hdr(skb)->daddr,
3310                                                        0, IPPROTO_TCP, 0);
3311         }
3312
3313         i = tx_ring->next_to_use;
3314
3315         buffer_info = &tx_ring->buffer_info[i];
3316         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3317         /* VLAN MACLEN IPLEN */
3318         if (tx_flags & IGB_TX_FLAGS_VLAN)
3319                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3320         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3321         *hdr_len += skb_network_offset(skb);
3322         info |= skb_network_header_len(skb);
3323         *hdr_len += skb_network_header_len(skb);
3324         context_desc->vlan_macip_lens = cpu_to_le32(info);
3325
3326         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3327         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3328
3329         if (skb->protocol == htons(ETH_P_IP))
3330                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3331         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3332
3333         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3334
3335         /* MSS L4LEN IDX */
3336         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3337         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3338
3339         /* For 82575, context index must be unique per ring. */
3340         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3341                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3342
3343         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3344         context_desc->seqnum_seed = 0;
3345
3346         buffer_info->time_stamp = jiffies;
3347         buffer_info->next_to_watch = i;
3348         buffer_info->dma = 0;
3349         i++;
3350         if (i == tx_ring->count)
3351                 i = 0;
3352
3353         tx_ring->next_to_use = i;
3354
3355         return true;
3356 }
3357
3358 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3359                                    struct sk_buff *skb, u32 tx_flags)
3360 {
3361         struct e1000_adv_tx_context_desc *context_desc;
3362         struct pci_dev *pdev = tx_ring->pdev;
3363         struct igb_buffer *buffer_info;
3364         u32 info = 0, tu_cmd = 0;
3365         unsigned int i;
3366
3367         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3368             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3369                 i = tx_ring->next_to_use;
3370                 buffer_info = &tx_ring->buffer_info[i];
3371                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3372
3373                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3374                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3375                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3376                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3377                         info |= skb_network_header_len(skb);
3378
3379                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3380
3381                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3382
3383                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3384                         __be16 protocol;
3385
3386                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3387                                 const struct vlan_ethhdr *vhdr =
3388                                           (const struct vlan_ethhdr*)skb->data;
3389
3390                                 protocol = vhdr->h_vlan_encapsulated_proto;
3391                         } else {
3392                                 protocol = skb->protocol;
3393                         }
3394
3395                         switch (protocol) {
3396                         case cpu_to_be16(ETH_P_IP):
3397                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3398                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3399                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3400                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3401                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3402                                 break;
3403                         case cpu_to_be16(ETH_P_IPV6):
3404                                 /* XXX what about other V6 headers?? */
3405                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3406                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3407                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3408                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3409                                 break;
3410                         default:
3411                                 if (unlikely(net_ratelimit()))
3412                                         dev_warn(&pdev->dev,
3413                                             "partial checksum but proto=%x!\n",
3414                                             skb->protocol);
3415                                 break;
3416                         }
3417                 }
3418
3419                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3420                 context_desc->seqnum_seed = 0;
3421                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3422                         context_desc->mss_l4len_idx =
3423                                 cpu_to_le32(tx_ring->reg_idx << 4);
3424
3425                 buffer_info->time_stamp = jiffies;
3426                 buffer_info->next_to_watch = i;
3427                 buffer_info->dma = 0;
3428
3429                 i++;
3430                 if (i == tx_ring->count)
3431                         i = 0;
3432                 tx_ring->next_to_use = i;
3433
3434                 return true;
3435         }
3436         return false;
3437 }
3438
3439 #define IGB_MAX_TXD_PWR 16
3440 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3441
3442 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3443                                  unsigned int first)
3444 {
3445         struct igb_buffer *buffer_info;
3446         struct pci_dev *pdev = tx_ring->pdev;
3447         unsigned int len = skb_headlen(skb);
3448         unsigned int count = 0, i;
3449         unsigned int f;
3450         dma_addr_t *map;
3451
3452         i = tx_ring->next_to_use;
3453
3454         if (skb_dma_map(&pdev->dev, skb, DMA_TO_DEVICE)) {
3455                 dev_err(&pdev->dev, "TX DMA map failed\n");
3456                 return 0;
3457         }
3458
3459         map = skb_shinfo(skb)->dma_maps;
3460
3461         buffer_info = &tx_ring->buffer_info[i];
3462         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3463         buffer_info->length = len;
3464         /* set time_stamp *before* dma to help avoid a possible race */
3465         buffer_info->time_stamp = jiffies;
3466         buffer_info->next_to_watch = i;
3467         buffer_info->dma = skb_shinfo(skb)->dma_head;
3468
3469         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3470                 struct skb_frag_struct *frag;
3471
3472                 i++;
3473                 if (i == tx_ring->count)
3474                         i = 0;
3475
3476                 frag = &skb_shinfo(skb)->frags[f];
3477                 len = frag->size;
3478
3479                 buffer_info = &tx_ring->buffer_info[i];
3480                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3481                 buffer_info->length = len;
3482                 buffer_info->time_stamp = jiffies;
3483                 buffer_info->next_to_watch = i;
3484                 buffer_info->dma = map[count];
3485                 count++;
3486         }
3487
3488         tx_ring->buffer_info[i].skb = skb;
3489         tx_ring->buffer_info[first].next_to_watch = i;
3490
3491         return count + 1;
3492 }
3493
3494 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3495                                     int tx_flags, int count, u32 paylen,
3496                                     u8 hdr_len)
3497 {
3498         union e1000_adv_tx_desc *tx_desc = NULL;
3499         struct igb_buffer *buffer_info;
3500         u32 olinfo_status = 0, cmd_type_len;
3501         unsigned int i;
3502
3503         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3504                         E1000_ADVTXD_DCMD_DEXT);
3505
3506         if (tx_flags & IGB_TX_FLAGS_VLAN)
3507                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3508
3509         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3510                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3511
3512         if (tx_flags & IGB_TX_FLAGS_TSO) {
3513                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3514
3515                 /* insert tcp checksum */
3516                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3517
3518                 /* insert ip checksum */
3519                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3520                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3521
3522         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3523                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3524         }
3525
3526         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3527             (tx_flags & (IGB_TX_FLAGS_CSUM |
3528                          IGB_TX_FLAGS_TSO |
3529                          IGB_TX_FLAGS_VLAN)))
3530                 olinfo_status |= tx_ring->reg_idx << 4;
3531
3532         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3533
3534         i = tx_ring->next_to_use;
3535         while (count--) {
3536                 buffer_info = &tx_ring->buffer_info[i];
3537                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3538                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3539                 tx_desc->read.cmd_type_len =
3540                         cpu_to_le32(cmd_type_len | buffer_info->length);
3541                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3542                 i++;
3543                 if (i == tx_ring->count)
3544                         i = 0;
3545         }
3546
3547         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3548         /* Force memory writes to complete before letting h/w
3549          * know there are new descriptors to fetch.  (Only
3550          * applicable for weak-ordered memory model archs,
3551          * such as IA-64). */
3552         wmb();
3553
3554         tx_ring->next_to_use = i;
3555         writel(i, tx_ring->tail);
3556         /* we need this if more than one processor can write to our tail
3557          * at a time, it syncronizes IO on IA64/Altix systems */
3558         mmiowb();
3559 }
3560
3561 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3562 {
3563         struct net_device *netdev = tx_ring->netdev;
3564
3565         netif_stop_subqueue(netdev, tx_ring->queue_index);
3566
3567         /* Herbert's original patch had:
3568          *  smp_mb__after_netif_stop_queue();
3569          * but since that doesn't exist yet, just open code it. */
3570         smp_mb();
3571
3572         /* We need to check again in a case another CPU has just
3573          * made room available. */
3574         if (igb_desc_unused(tx_ring) < size)
3575                 return -EBUSY;
3576
3577         /* A reprieve! */
3578         netif_wake_subqueue(netdev, tx_ring->queue_index);
3579         tx_ring->tx_stats.restart_queue++;
3580         return 0;
3581 }
3582
3583 static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3584 {
3585         if (igb_desc_unused(tx_ring) >= size)
3586                 return 0;
3587         return __igb_maybe_stop_tx(tx_ring, size);
3588 }
3589
3590 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3591                                     struct igb_ring *tx_ring)
3592 {
3593         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3594         unsigned int first;
3595         unsigned int tx_flags = 0;
3596         u8 hdr_len = 0;
3597         int count = 0;
3598         int tso = 0;
3599         union skb_shared_tx *shtx;
3600
3601         /* need: 1 descriptor per page,
3602          *       + 2 desc gap to keep tail from touching head,
3603          *       + 1 desc for skb->data,
3604          *       + 1 desc for context descriptor,
3605          * otherwise try next time */
3606         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3607                 /* this is a hard error */
3608                 return NETDEV_TX_BUSY;
3609         }
3610
3611         /*
3612          * TODO: check that there currently is no other packet with
3613          * time stamping in the queue
3614          *
3615          * When doing time stamping, keep the connection to the socket
3616          * a while longer: it is still needed by skb_hwtstamp_tx(),
3617          * called either in igb_tx_hwtstamp() or by our caller when
3618          * doing software time stamping.
3619          */
3620         shtx = skb_tx(skb);
3621         if (unlikely(shtx->hardware)) {
3622                 shtx->in_progress = 1;
3623                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3624         }
3625
3626         if (adapter->vlgrp && vlan_tx_tag_present(skb)) {
3627                 tx_flags |= IGB_TX_FLAGS_VLAN;
3628                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3629         }
3630
3631         if (skb->protocol == htons(ETH_P_IP))
3632                 tx_flags |= IGB_TX_FLAGS_IPV4;
3633
3634         first = tx_ring->next_to_use;
3635         if (skb_is_gso(skb)) {
3636                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3637                 if (tso < 0) {
3638                         dev_kfree_skb_any(skb);
3639                         return NETDEV_TX_OK;
3640                 }
3641         }
3642
3643         if (tso)
3644                 tx_flags |= IGB_TX_FLAGS_TSO;
3645         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3646                  (skb->ip_summed == CHECKSUM_PARTIAL))
3647                 tx_flags |= IGB_TX_FLAGS_CSUM;
3648
3649         /*
3650          * count reflects descriptors mapped, if 0 then mapping error
3651          * has occured and we need to rewind the descriptor queue
3652          */
3653         count = igb_tx_map_adv(tx_ring, skb, first);
3654
3655         if (!count) {
3656                 dev_kfree_skb_any(skb);
3657                 tx_ring->buffer_info[first].time_stamp = 0;
3658                 tx_ring->next_to_use = first;
3659                 return NETDEV_TX_OK;
3660         }
3661
3662         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3663
3664         /* Make sure there is space in the ring for the next send. */
3665         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3666
3667         return NETDEV_TX_OK;
3668 }
3669
3670 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3671                                       struct net_device *netdev)
3672 {
3673         struct igb_adapter *adapter = netdev_priv(netdev);
3674         struct igb_ring *tx_ring;
3675         int r_idx = 0;
3676
3677         if (test_bit(__IGB_DOWN, &adapter->state)) {
3678                 dev_kfree_skb_any(skb);
3679                 return NETDEV_TX_OK;
3680         }
3681
3682         if (skb->len <= 0) {
3683                 dev_kfree_skb_any(skb);
3684                 return NETDEV_TX_OK;
3685         }
3686
3687         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3688         tx_ring = adapter->multi_tx_table[r_idx];
3689
3690         /* This goes back to the question of how to logically map a tx queue
3691          * to a flow.  Right now, performance is impacted slightly negatively
3692          * if using multiple tx queues.  If the stack breaks away from a
3693          * single qdisc implementation, we can look at this again. */
3694         return igb_xmit_frame_ring_adv(skb, tx_ring);
3695 }
3696
3697 /**
3698  * igb_tx_timeout - Respond to a Tx Hang
3699  * @netdev: network interface device structure
3700  **/
3701 static void igb_tx_timeout(struct net_device *netdev)
3702 {
3703         struct igb_adapter *adapter = netdev_priv(netdev);
3704         struct e1000_hw *hw = &adapter->hw;
3705
3706         /* Do the reset outside of interrupt context */
3707         adapter->tx_timeout_count++;
3708         schedule_work(&adapter->reset_task);
3709         wr32(E1000_EICS,
3710              (adapter->eims_enable_mask & ~adapter->eims_other));
3711 }
3712
3713 static void igb_reset_task(struct work_struct *work)
3714 {
3715         struct igb_adapter *adapter;
3716         adapter = container_of(work, struct igb_adapter, reset_task);
3717
3718         igb_reinit_locked(adapter);
3719 }
3720
3721 /**
3722  * igb_get_stats - Get System Network Statistics
3723  * @netdev: network interface device structure
3724  *
3725  * Returns the address of the device statistics structure.
3726  * The statistics are actually updated from the timer callback.
3727  **/
3728 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3729 {
3730         /* only return the current stats */
3731         return &netdev->stats;
3732 }
3733
3734 /**
3735  * igb_change_mtu - Change the Maximum Transfer Unit
3736  * @netdev: network interface device structure
3737  * @new_mtu: new value for maximum frame size
3738  *
3739  * Returns 0 on success, negative on failure
3740  **/
3741 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3742 {
3743         struct igb_adapter *adapter = netdev_priv(netdev);
3744         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3745         u32 rx_buffer_len, i;
3746
3747         if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) ||
3748             (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3749                 dev_err(&adapter->pdev->dev, "Invalid MTU setting\n");
3750                 return -EINVAL;
3751         }
3752
3753         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3754                 dev_err(&adapter->pdev->dev, "MTU > 9216 not supported.\n");
3755                 return -EINVAL;
3756         }
3757
3758         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3759                 msleep(1);
3760
3761         /* igb_down has a dependency on max_frame_size */
3762         adapter->max_frame_size = max_frame;
3763         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3764          * means we reserve 2 more, this pushes us to allocate from the next
3765          * larger slab size.
3766          * i.e. RXBUFFER_2048 --> size-4096 slab
3767          */
3768
3769         if (max_frame <= IGB_RXBUFFER_1024)
3770                 rx_buffer_len = IGB_RXBUFFER_1024;
3771         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3772                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3773         else
3774                 rx_buffer_len = IGB_RXBUFFER_128;
3775
3776         if (netif_running(netdev))
3777                 igb_down(adapter);
3778
3779         dev_info(&adapter->pdev->dev, "changing MTU from %d to %d\n",
3780                  netdev->mtu, new_mtu);
3781         netdev->mtu = new_mtu;
3782
3783         for (i = 0; i < adapter->num_rx_queues; i++)
3784                 adapter->rx_ring[i].rx_buffer_len = rx_buffer_len;
3785
3786         if (netif_running(netdev))
3787                 igb_up(adapter);
3788         else
3789                 igb_reset(adapter);
3790
3791         clear_bit(__IGB_RESETTING, &adapter->state);
3792
3793         return 0;
3794 }
3795
3796 /**
3797  * igb_update_stats - Update the board statistics counters
3798  * @adapter: board private structure
3799  **/
3800
3801 void igb_update_stats(struct igb_adapter *adapter)
3802 {
3803         struct net_device *netdev = adapter->netdev;
3804         struct e1000_hw *hw = &adapter->hw;
3805         struct pci_dev *pdev = adapter->pdev;
3806         u16 phy_tmp;
3807
3808 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3809
3810         /*
3811          * Prevent stats update while adapter is being reset, or if the pci
3812          * connection is down.
3813          */
3814         if (adapter->link_speed == 0)
3815                 return;
3816         if (pci_channel_offline(pdev))
3817                 return;
3818
3819         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
3820         adapter->stats.gprc += rd32(E1000_GPRC);
3821         adapter->stats.gorc += rd32(E1000_GORCL);
3822         rd32(E1000_GORCH); /* clear GORCL */
3823         adapter->stats.bprc += rd32(E1000_BPRC);
3824         adapter->stats.mprc += rd32(E1000_MPRC);
3825         adapter->stats.roc += rd32(E1000_ROC);
3826
3827         adapter->stats.prc64 += rd32(E1000_PRC64);
3828         adapter->stats.prc127 += rd32(E1000_PRC127);
3829         adapter->stats.prc255 += rd32(E1000_PRC255);
3830         adapter->stats.prc511 += rd32(E1000_PRC511);
3831         adapter->stats.prc1023 += rd32(E1000_PRC1023);
3832         adapter->stats.prc1522 += rd32(E1000_PRC1522);
3833         adapter->stats.symerrs += rd32(E1000_SYMERRS);
3834         adapter->stats.sec += rd32(E1000_SEC);
3835
3836         adapter->stats.mpc += rd32(E1000_MPC);
3837         adapter->stats.scc += rd32(E1000_SCC);
3838         adapter->stats.ecol += rd32(E1000_ECOL);
3839         adapter->stats.mcc += rd32(E1000_MCC);
3840         adapter->stats.latecol += rd32(E1000_LATECOL);
3841         adapter->stats.dc += rd32(E1000_DC);
3842         adapter->stats.rlec += rd32(E1000_RLEC);
3843         adapter->stats.xonrxc += rd32(E1000_XONRXC);
3844         adapter->stats.xontxc += rd32(E1000_XONTXC);
3845         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
3846         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
3847         adapter->stats.fcruc += rd32(E1000_FCRUC);
3848         adapter->stats.gptc += rd32(E1000_GPTC);
3849         adapter->stats.gotc += rd32(E1000_GOTCL);
3850         rd32(E1000_GOTCH); /* clear GOTCL */
3851         adapter->stats.rnbc += rd32(E1000_RNBC);
3852         adapter->stats.ruc += rd32(E1000_RUC);
3853         adapter->stats.rfc += rd32(E1000_RFC);
3854         adapter->stats.rjc += rd32(E1000_RJC);
3855         adapter->stats.tor += rd32(E1000_TORH);
3856         adapter->stats.tot += rd32(E1000_TOTH);
3857         adapter->stats.tpr += rd32(E1000_TPR);
3858
3859         adapter->stats.ptc64 += rd32(E1000_PTC64);
3860         adapter->stats.ptc127 += rd32(E1000_PTC127);
3861         adapter->stats.ptc255 += rd32(E1000_PTC255);
3862         adapter->stats.ptc511 += rd32(E1000_PTC511);
3863         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
3864         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
3865
3866         adapter->stats.mptc += rd32(E1000_MPTC);
3867         adapter->stats.bptc += rd32(E1000_BPTC);
3868
3869         /* used for adaptive IFS */
3870
3871         hw->mac.tx_packet_delta = rd32(E1000_TPT);
3872         adapter->stats.tpt += hw->mac.tx_packet_delta;
3873         hw->mac.collision_delta = rd32(E1000_COLC);
3874         adapter->stats.colc += hw->mac.collision_delta;
3875
3876         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
3877         adapter->stats.rxerrc += rd32(E1000_RXERRC);
3878         adapter->stats.tncrs += rd32(E1000_TNCRS);
3879         adapter->stats.tsctc += rd32(E1000_TSCTC);
3880         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
3881
3882         adapter->stats.iac += rd32(E1000_IAC);
3883         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
3884         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
3885         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
3886         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
3887         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
3888         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
3889         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
3890         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
3891
3892         /* Fill out the OS statistics structure */
3893         netdev->stats.multicast = adapter->stats.mprc;
3894         netdev->stats.collisions = adapter->stats.colc;
3895
3896         /* Rx Errors */
3897
3898         if (hw->mac.type != e1000_82575) {
3899                 u32 rqdpc_tmp;
3900                 u64 rqdpc_total = 0;
3901                 int i;
3902                 /* Read out drops stats per RX queue.  Notice RQDPC (Receive
3903                  * Queue Drop Packet Count) stats only gets incremented, if
3904                  * the DROP_EN but it set (in the SRRCTL register for that
3905                  * queue).  If DROP_EN bit is NOT set, then the some what
3906                  * equivalent count is stored in RNBC (not per queue basis).
3907                  * Also note the drop count is due to lack of available
3908                  * descriptors.
3909                  */
3910                 for (i = 0; i < adapter->num_rx_queues; i++) {
3911                         rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0xFFF;
3912                         adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
3913                         rqdpc_total += adapter->rx_ring[i].rx_stats.drops;
3914                 }
3915                 netdev->stats.rx_fifo_errors = rqdpc_total;
3916         }
3917
3918         /* Note RNBC (Receive No Buffers Count) is an not an exact
3919          * drop count as the hardware FIFO might save the day.  Thats
3920          * one of the reason for saving it in rx_fifo_errors, as its
3921          * potentially not a true drop.
3922          */
3923         netdev->stats.rx_fifo_errors += adapter->stats.rnbc;
3924
3925         /* RLEC on some newer hardware can be incorrect so build
3926          * our own version based on RUC and ROC */
3927         netdev->stats.rx_errors = adapter->stats.rxerrc +
3928                 adapter->stats.crcerrs + adapter->stats.algnerrc +
3929                 adapter->stats.ruc + adapter->stats.roc +
3930                 adapter->stats.cexterr;
3931         netdev->stats.rx_length_errors = adapter->stats.ruc +
3932                                               adapter->stats.roc;
3933         netdev->stats.rx_crc_errors = adapter->stats.crcerrs;
3934         netdev->stats.rx_frame_errors = adapter->stats.algnerrc;
3935         netdev->stats.rx_missed_errors = adapter->stats.mpc;
3936
3937         /* Tx Errors */
3938         netdev->stats.tx_errors = adapter->stats.ecol +
3939                                        adapter->stats.latecol;
3940         netdev->stats.tx_aborted_errors = adapter->stats.ecol;
3941         netdev->stats.tx_window_errors = adapter->stats.latecol;
3942         netdev->stats.tx_carrier_errors = adapter->stats.tncrs;
3943
3944         /* Tx Dropped needs to be maintained elsewhere */
3945
3946         /* Phy Stats */
3947         if (hw->phy.media_type == e1000_media_type_copper) {
3948                 if ((adapter->link_speed == SPEED_1000) &&
3949                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
3950                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
3951                         adapter->phy_stats.idle_errors += phy_tmp;
3952                 }
3953         }
3954
3955         /* Management Stats */
3956         adapter->stats.mgptc += rd32(E1000_MGTPTC);
3957         adapter->stats.mgprc += rd32(E1000_MGTPRC);
3958         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
3959 }
3960
3961 static irqreturn_t igb_msix_other(int irq, void *data)
3962 {
3963         struct igb_adapter *adapter = data;
3964         struct e1000_hw *hw = &adapter->hw;
3965         u32 icr = rd32(E1000_ICR);
3966         /* reading ICR causes bit 31 of EICR to be cleared */
3967
3968         if (icr & E1000_ICR_DOUTSYNC) {
3969                 /* HW is reporting DMA is out of sync */
3970                 adapter->stats.doosync++;
3971         }
3972
3973         /* Check for a mailbox event */
3974         if (icr & E1000_ICR_VMMB)
3975                 igb_msg_task(adapter);
3976
3977         if (icr & E1000_ICR_LSC) {
3978                 hw->mac.get_link_status = 1;
3979                 /* guard against interrupt when we're going down */
3980                 if (!test_bit(__IGB_DOWN, &adapter->state))
3981                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
3982         }
3983
3984         wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_VMMB);
3985         wr32(E1000_EIMS, adapter->eims_other);
3986
3987         return IRQ_HANDLED;
3988 }
3989
3990 static void igb_write_itr(struct igb_q_vector *q_vector)
3991 {
3992         u32 itr_val = q_vector->itr_val & 0x7FFC;
3993
3994         if (!q_vector->set_itr)
3995                 return;
3996
3997         if (!itr_val)
3998                 itr_val = 0x4;
3999
4000         if (q_vector->itr_shift)
4001                 itr_val |= itr_val << q_vector->itr_shift;
4002         else
4003                 itr_val |= 0x8000000;
4004
4005         writel(itr_val, q_vector->itr_register);
4006         q_vector->set_itr = 0;
4007 }
4008
4009 static irqreturn_t igb_msix_ring(int irq, void *data)
4010 {
4011         struct igb_q_vector *q_vector = data;
4012
4013         /* Write the ITR value calculated from the previous interrupt. */
4014         igb_write_itr(q_vector);
4015
4016         napi_schedule(&q_vector->napi);
4017
4018         return IRQ_HANDLED;
4019 }
4020
4021 #ifdef CONFIG_IGB_DCA
4022 static void igb_update_dca(struct igb_q_vector *q_vector)
4023 {
4024         struct igb_adapter *adapter = q_vector->adapter;
4025         struct e1000_hw *hw = &adapter->hw;
4026         int cpu = get_cpu();
4027
4028         if (q_vector->cpu == cpu)
4029                 goto out_no_update;
4030
4031         if (q_vector->tx_ring) {
4032                 int q = q_vector->tx_ring->reg_idx;
4033                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4034                 if (hw->mac.type == e1000_82575) {
4035                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4036                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4037                 } else {
4038                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4039                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4040                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4041                 }
4042                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4043                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4044         }
4045         if (q_vector->rx_ring) {
4046                 int q = q_vector->rx_ring->reg_idx;
4047                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4048                 if (hw->mac.type == e1000_82575) {
4049                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4050                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4051                 } else {
4052                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4053                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4054                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4055                 }
4056                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4057                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4058                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4059                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4060         }
4061         q_vector->cpu = cpu;
4062 out_no_update:
4063         put_cpu();
4064 }
4065
4066 static void igb_setup_dca(struct igb_adapter *adapter)
4067 {
4068         struct e1000_hw *hw = &adapter->hw;
4069         int i;
4070
4071         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4072                 return;
4073
4074         /* Always use CB2 mode, difference is masked in the CB driver. */
4075         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4076
4077         for (i = 0; i < adapter->num_q_vectors; i++) {
4078                 struct igb_q_vector *q_vector = adapter->q_vector[i];
4079                 q_vector->cpu = -1;
4080                 igb_update_dca(q_vector);
4081         }
4082 }
4083
4084 static int __igb_notify_dca(struct device *dev, void *data)
4085 {
4086         struct net_device *netdev = dev_get_drvdata(dev);
4087         struct igb_adapter *adapter = netdev_priv(netdev);
4088         struct e1000_hw *hw = &adapter->hw;
4089         unsigned long event = *(unsigned long *)data;
4090
4091         switch (event) {
4092         case DCA_PROVIDER_ADD:
4093                 /* if already enabled, don't do it again */
4094                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4095                         break;
4096                 /* Always use CB2 mode, difference is masked
4097                  * in the CB driver. */
4098                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4099                 if (dca_add_requester(dev) == 0) {
4100                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4101                         dev_info(&adapter->pdev->dev, "DCA enabled\n");
4102                         igb_setup_dca(adapter);
4103                         break;
4104                 }
4105                 /* Fall Through since DCA is disabled. */
4106         case DCA_PROVIDER_REMOVE:
4107                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4108                         /* without this a class_device is left
4109                          * hanging around in the sysfs model */
4110                         dca_remove_requester(dev);
4111                         dev_info(&adapter->pdev->dev, "DCA disabled\n");
4112                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4113                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4114                 }
4115                 break;
4116         }
4117
4118         return 0;
4119 }
4120
4121 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4122                           void *p)
4123 {
4124         int ret_val;
4125
4126         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4127                                          __igb_notify_dca);
4128
4129         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4130 }
4131 #endif /* CONFIG_IGB_DCA */
4132
4133 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4134 {
4135         struct e1000_hw *hw = &adapter->hw;
4136         u32 ping;
4137         int i;
4138
4139         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4140                 ping = E1000_PF_CONTROL_MSG;
4141                 if (adapter->vf_data[i].clear_to_send)
4142                         ping |= E1000_VT_MSGTYPE_CTS;
4143                 igb_write_mbx(hw, &ping, 1, i);
4144         }
4145 }
4146
4147 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4148                                   u32 *msgbuf, u32 vf)
4149 {
4150         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4151         u16 *hash_list = (u16 *)&msgbuf[1];
4152         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4153         int i;
4154
4155         /* only up to 30 hash values supported */
4156         if (n > 30)
4157                 n = 30;
4158
4159         /* salt away the number of multi cast addresses assigned
4160          * to this VF for later use to restore when the PF multi cast
4161          * list changes
4162          */
4163         vf_data->num_vf_mc_hashes = n;
4164
4165         /* VFs are limited to using the MTA hash table for their multicast
4166          * addresses */
4167         for (i = 0; i < n; i++)
4168                 vf_data->vf_mc_hashes[i] = hash_list[i];
4169
4170         /* Flush and reset the mta with the new values */
4171         igb_set_rx_mode(adapter->netdev);
4172
4173         return 0;
4174 }
4175
4176 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4177 {
4178         struct e1000_hw *hw = &adapter->hw;
4179         struct vf_data_storage *vf_data;
4180         int i, j;
4181
4182         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4183                 vf_data = &adapter->vf_data[i];
4184                 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4185                         igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4186         }
4187 }
4188
4189 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4190 {
4191         struct e1000_hw *hw = &adapter->hw;
4192         u32 pool_mask, reg, vid;
4193         int i;
4194
4195         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4196
4197         /* Find the vlan filter for this id */
4198         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4199                 reg = rd32(E1000_VLVF(i));
4200
4201                 /* remove the vf from the pool */
4202                 reg &= ~pool_mask;
4203
4204                 /* if pool is empty then remove entry from vfta */
4205                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4206                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4207                         reg = 0;
4208                         vid = reg & E1000_VLVF_VLANID_MASK;
4209                         igb_vfta_set(hw, vid, false);
4210                 }
4211
4212                 wr32(E1000_VLVF(i), reg);
4213         }
4214
4215         adapter->vf_data[vf].vlans_enabled = 0;
4216 }
4217
4218 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4219 {
4220         struct e1000_hw *hw = &adapter->hw;
4221         u32 reg, i;
4222
4223         /* It is an error to call this function when VFs are not enabled */
4224         if (!adapter->vfs_allocated_count)
4225                 return -1;
4226
4227         /* Find the vlan filter for this id */
4228         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4229                 reg = rd32(E1000_VLVF(i));
4230                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4231                     vid == (reg & E1000_VLVF_VLANID_MASK))
4232                         break;
4233         }
4234
4235         if (add) {
4236                 if (i == E1000_VLVF_ARRAY_SIZE) {
4237                         /* Did not find a matching VLAN ID entry that was
4238                          * enabled.  Search for a free filter entry, i.e.
4239                          * one without the enable bit set
4240                          */
4241                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4242                                 reg = rd32(E1000_VLVF(i));
4243                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4244                                         break;
4245                         }
4246                 }
4247                 if (i < E1000_VLVF_ARRAY_SIZE) {
4248                         /* Found an enabled/available entry */
4249                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4250
4251                         /* if !enabled we need to set this up in vfta */
4252                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4253                                 /* add VID to filter table, if bit already set
4254                                  * PF must have added it outside of table */
4255                                 if (igb_vfta_set(hw, vid, true))
4256                                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT +
4257                                                 adapter->vfs_allocated_count);
4258                                 reg |= E1000_VLVF_VLANID_ENABLE;
4259                         }
4260                         reg &= ~E1000_VLVF_VLANID_MASK;
4261                         reg |= vid;
4262
4263                         wr32(E1000_VLVF(i), reg);
4264
4265                         /* do not modify RLPML for PF devices */
4266                         if (vf >= adapter->vfs_allocated_count)
4267                                 return 0;
4268
4269                         if (!adapter->vf_data[vf].vlans_enabled) {
4270                                 u32 size;
4271                                 reg = rd32(E1000_VMOLR(vf));
4272                                 size = reg & E1000_VMOLR_RLPML_MASK;
4273                                 size += 4;
4274                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4275                                 reg |= size;
4276                                 wr32(E1000_VMOLR(vf), reg);
4277                         }
4278                         adapter->vf_data[vf].vlans_enabled++;
4279
4280                         return 0;
4281                 }
4282         } else {
4283                 if (i < E1000_VLVF_ARRAY_SIZE) {
4284                         /* remove vf from the pool */
4285                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4286                         /* if pool is empty then remove entry from vfta */
4287                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4288                                 reg = 0;
4289                                 igb_vfta_set(hw, vid, false);
4290                         }
4291                         wr32(E1000_VLVF(i), reg);
4292
4293                         /* do not modify RLPML for PF devices */
4294                         if (vf >= adapter->vfs_allocated_count)
4295                                 return 0;
4296
4297                         adapter->vf_data[vf].vlans_enabled--;
4298                         if (!adapter->vf_data[vf].vlans_enabled) {
4299                                 u32 size;
4300                                 reg = rd32(E1000_VMOLR(vf));
4301                                 size = reg & E1000_VMOLR_RLPML_MASK;
4302                                 size -= 4;
4303                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4304                                 reg |= size;
4305                                 wr32(E1000_VMOLR(vf), reg);
4306                         }
4307                         return 0;
4308                 }
4309         }
4310         return -1;
4311 }
4312
4313 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4314 {
4315         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4316         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4317
4318         return igb_vlvf_set(adapter, vid, add, vf);
4319 }
4320
4321 static inline void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4322 {
4323         struct e1000_hw *hw = &adapter->hw;
4324
4325         /* disable mailbox functionality for vf */
4326         adapter->vf_data[vf].clear_to_send = false;
4327
4328         /* reset offloads to defaults */
4329         igb_set_vmolr(hw, vf);
4330
4331         /* reset vlans for device */
4332         igb_clear_vf_vfta(adapter, vf);
4333
4334         /* reset multicast table array for vf */
4335         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4336
4337         /* Flush and reset the mta with the new values */
4338         igb_set_rx_mode(adapter->netdev);
4339 }
4340
4341 static inline void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4342 {
4343         struct e1000_hw *hw = &adapter->hw;
4344         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4345         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4346         u32 reg, msgbuf[3];
4347         u8 *addr = (u8 *)(&msgbuf[1]);
4348
4349         /* process all the same items cleared in a function level reset */
4350         igb_vf_reset_event(adapter, vf);
4351
4352         /* set vf mac address */
4353         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4354
4355         /* enable transmit and receive for vf */
4356         reg = rd32(E1000_VFTE);
4357         wr32(E1000_VFTE, reg | (1 << vf));
4358         reg = rd32(E1000_VFRE);
4359         wr32(E1000_VFRE, reg | (1 << vf));
4360
4361         /* enable mailbox functionality for vf */
4362         adapter->vf_data[vf].clear_to_send = true;
4363
4364         /* reply to reset with ack and vf mac address */
4365         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4366         memcpy(addr, vf_mac, 6);
4367         igb_write_mbx(hw, msgbuf, 3, vf);
4368 }
4369
4370 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4371 {
4372                 unsigned char *addr = (char *)&msg[1];
4373                 int err = -1;
4374
4375                 if (is_valid_ether_addr(addr))
4376                         err = igb_set_vf_mac(adapter, vf, addr);
4377
4378                 return err;
4379
4380 }
4381
4382 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4383 {
4384         struct e1000_hw *hw = &adapter->hw;
4385         u32 msg = E1000_VT_MSGTYPE_NACK;
4386
4387         /* if device isn't clear to send it shouldn't be reading either */
4388         if (!adapter->vf_data[vf].clear_to_send)
4389                 igb_write_mbx(hw, &msg, 1, vf);
4390 }
4391
4392
4393 static void igb_msg_task(struct igb_adapter *adapter)
4394 {
4395         struct e1000_hw *hw = &adapter->hw;
4396         u32 vf;
4397
4398         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4399                 /* process any reset requests */
4400                 if (!igb_check_for_rst(hw, vf)) {
4401                         adapter->vf_data[vf].clear_to_send = false;
4402                         igb_vf_reset_event(adapter, vf);
4403                 }
4404
4405                 /* process any messages pending */
4406                 if (!igb_check_for_msg(hw, vf))
4407                         igb_rcv_msg_from_vf(adapter, vf);
4408
4409                 /* process any acks */
4410                 if (!igb_check_for_ack(hw, vf))
4411                         igb_rcv_ack_from_vf(adapter, vf);
4412
4413         }
4414 }
4415
4416 static int igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4417 {
4418         u32 mbx_size = E1000_VFMAILBOX_SIZE;
4419         u32 msgbuf[mbx_size];
4420         struct e1000_hw *hw = &adapter->hw;
4421         s32 retval;
4422
4423         retval = igb_read_mbx(hw, msgbuf, mbx_size, vf);
4424
4425         if (retval)
4426                 dev_err(&adapter->pdev->dev,
4427                         "Error receiving message from VF\n");
4428
4429         /* this is a message we already processed, do nothing */
4430         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4431                 return retval;
4432
4433         /*
4434          * until the vf completes a reset it should not be
4435          * allowed to start any configuration.
4436          */
4437
4438         if (msgbuf[0] == E1000_VF_RESET) {
4439                 igb_vf_reset_msg(adapter, vf);
4440
4441                 return retval;
4442         }
4443
4444         if (!adapter->vf_data[vf].clear_to_send) {
4445                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4446                 igb_write_mbx(hw, msgbuf, 1, vf);
4447                 return retval;
4448         }
4449
4450         switch ((msgbuf[0] & 0xFFFF)) {
4451         case E1000_VF_SET_MAC_ADDR:
4452                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4453                 break;
4454         case E1000_VF_SET_MULTICAST:
4455                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4456                 break;
4457         case E1000_VF_SET_LPE:
4458                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4459                 break;
4460         case E1000_VF_SET_VLAN:
4461                 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4462                 break;
4463         default:
4464                 dev_err(&adapter->pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4465                 retval = -1;
4466                 break;
4467         }
4468
4469         /* notify the VF of the results of what it sent us */
4470         if (retval)
4471                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4472         else
4473                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4474
4475         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4476
4477         igb_write_mbx(hw, msgbuf, 1, vf);
4478
4479         return retval;
4480 }
4481
4482 /**
4483  *  igb_set_uta - Set unicast filter table address
4484  *  @adapter: board private structure
4485  *
4486  *  The unicast table address is a register array of 32-bit registers.
4487  *  The table is meant to be used in a way similar to how the MTA is used
4488  *  however due to certain limitations in the hardware it is necessary to
4489  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4490  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4491  **/
4492 static void igb_set_uta(struct igb_adapter *adapter)
4493 {
4494         struct e1000_hw *hw = &adapter->hw;
4495         int i;
4496
4497         /* The UTA table only exists on 82576 hardware and newer */
4498         if (hw->mac.type < e1000_82576)
4499                 return;
4500
4501         /* we only need to do this if VMDq is enabled */
4502         if (!adapter->vfs_allocated_count)
4503                 return;
4504
4505         for (i = 0; i < hw->mac.uta_reg_count; i++)
4506                 array_wr32(E1000_UTA, i, ~0);
4507 }
4508
4509 /**
4510  * igb_intr_msi - Interrupt Handler
4511  * @irq: interrupt number
4512  * @data: pointer to a network interface device structure
4513  **/
4514 static irqreturn_t igb_intr_msi(int irq, void *data)
4515 {
4516         struct igb_adapter *adapter = data;
4517         struct igb_q_vector *q_vector = adapter->q_vector[0];
4518         struct e1000_hw *hw = &adapter->hw;
4519         /* read ICR disables interrupts using IAM */
4520         u32 icr = rd32(E1000_ICR);
4521
4522         igb_write_itr(q_vector);
4523
4524         if (icr & E1000_ICR_DOUTSYNC) {
4525                 /* HW is reporting DMA is out of sync */
4526                 adapter->stats.doosync++;
4527         }
4528
4529         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4530                 hw->mac.get_link_status = 1;
4531                 if (!test_bit(__IGB_DOWN, &adapter->state))
4532                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4533         }
4534
4535         napi_schedule(&q_vector->napi);
4536
4537         return IRQ_HANDLED;
4538 }
4539
4540 /**
4541  * igb_intr - Legacy Interrupt Handler
4542  * @irq: interrupt number
4543  * @data: pointer to a network interface device structure
4544  **/
4545 static irqreturn_t igb_intr(int irq, void *data)
4546 {
4547         struct igb_adapter *adapter = data;
4548         struct igb_q_vector *q_vector = adapter->q_vector[0];
4549         struct e1000_hw *hw = &adapter->hw;
4550         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4551          * need for the IMC write */
4552         u32 icr = rd32(E1000_ICR);
4553         if (!icr)
4554                 return IRQ_NONE;  /* Not our interrupt */
4555
4556         igb_write_itr(q_vector);
4557
4558         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4559          * not set, then the adapter didn't send an interrupt */
4560         if (!(icr & E1000_ICR_INT_ASSERTED))
4561                 return IRQ_NONE;
4562
4563         if (icr & E1000_ICR_DOUTSYNC) {
4564                 /* HW is reporting DMA is out of sync */
4565                 adapter->stats.doosync++;
4566         }
4567
4568         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4569                 hw->mac.get_link_status = 1;
4570                 /* guard against interrupt when we're going down */
4571                 if (!test_bit(__IGB_DOWN, &adapter->state))
4572                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4573         }
4574
4575         napi_schedule(&q_vector->napi);
4576
4577         return IRQ_HANDLED;
4578 }
4579
4580 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4581 {
4582         struct igb_adapter *adapter = q_vector->adapter;
4583         struct e1000_hw *hw = &adapter->hw;
4584
4585         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4586             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4587                 if (!adapter->msix_entries)
4588                         igb_set_itr(adapter);
4589                 else
4590                         igb_update_ring_itr(q_vector);
4591         }
4592
4593         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4594                 if (adapter->msix_entries)
4595                         wr32(E1000_EIMS, q_vector->eims_value);
4596                 else
4597                         igb_irq_enable(adapter);
4598         }
4599 }
4600
4601 /**
4602  * igb_poll - NAPI Rx polling callback
4603  * @napi: napi polling structure
4604  * @budget: count of how many packets we should handle
4605  **/
4606 static int igb_poll(struct napi_struct *napi, int budget)
4607 {
4608         struct igb_q_vector *q_vector = container_of(napi,
4609                                                      struct igb_q_vector,
4610                                                      napi);
4611         int tx_clean_complete = 1, work_done = 0;
4612
4613 #ifdef CONFIG_IGB_DCA
4614         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4615                 igb_update_dca(q_vector);
4616 #endif
4617         if (q_vector->tx_ring)
4618                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4619
4620         if (q_vector->rx_ring)
4621                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4622
4623         if (!tx_clean_complete)
4624                 work_done = budget;
4625
4626         /* If not enough Rx work done, exit the polling mode */
4627         if (work_done < budget) {
4628                 napi_complete(napi);
4629                 igb_ring_irq_enable(q_vector);
4630         }
4631
4632         return work_done;
4633 }
4634
4635 /**
4636  * igb_hwtstamp - utility function which checks for TX time stamp
4637  * @adapter: board private structure
4638  * @skb: packet that was just sent
4639  *
4640  * If we were asked to do hardware stamping and such a time stamp is
4641  * available, then it must have been for this skb here because we only
4642  * allow only one such packet into the queue.
4643  */
4644 static void igb_tx_hwtstamp(struct igb_adapter *adapter, struct sk_buff *skb)
4645 {
4646         union skb_shared_tx *shtx = skb_tx(skb);
4647         struct e1000_hw *hw = &adapter->hw;
4648
4649         if (unlikely(shtx->hardware)) {
4650                 u32 valid = rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID;
4651                 if (valid) {
4652                         u64 regval = rd32(E1000_TXSTMPL);
4653                         u64 ns;
4654                         struct skb_shared_hwtstamps shhwtstamps;
4655
4656                         memset(&shhwtstamps, 0, sizeof(shhwtstamps));
4657                         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4658                         ns = timecounter_cyc2time(&adapter->clock,
4659                                                   regval);
4660                         timecompare_update(&adapter->compare, ns);
4661                         shhwtstamps.hwtstamp = ns_to_ktime(ns);
4662                         shhwtstamps.syststamp =
4663                                 timecompare_transform(&adapter->compare, ns);
4664                         skb_tstamp_tx(skb, &shhwtstamps);
4665                 }
4666         }
4667 }
4668
4669 /**
4670  * igb_clean_tx_irq - Reclaim resources after transmit completes
4671  * @q_vector: pointer to q_vector containing needed info
4672  * returns true if ring is completely cleaned
4673  **/
4674 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4675 {
4676         struct igb_adapter *adapter = q_vector->adapter;
4677         struct igb_ring *tx_ring = q_vector->tx_ring;
4678         struct net_device *netdev = tx_ring->netdev;
4679         struct e1000_hw *hw = &adapter->hw;
4680         struct igb_buffer *buffer_info;
4681         struct sk_buff *skb;
4682         union e1000_adv_tx_desc *tx_desc, *eop_desc;
4683         unsigned int total_bytes = 0, total_packets = 0;
4684         unsigned int i, eop, count = 0;
4685         bool cleaned = false;
4686
4687         i = tx_ring->next_to_clean;
4688         eop = tx_ring->buffer_info[i].next_to_watch;
4689         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4690
4691         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
4692                (count < tx_ring->count)) {
4693                 for (cleaned = false; !cleaned; count++) {
4694                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4695                         buffer_info = &tx_ring->buffer_info[i];
4696                         cleaned = (i == eop);
4697                         skb = buffer_info->skb;
4698
4699                         if (skb) {
4700                                 unsigned int segs, bytecount;
4701                                 /* gso_segs is currently only valid for tcp */
4702                                 segs = skb_shinfo(skb)->gso_segs ?: 1;
4703                                 /* multiply data chunks by size of headers */
4704                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
4705                                             skb->len;
4706                                 total_packets += segs;
4707                                 total_bytes += bytecount;
4708
4709                                 igb_tx_hwtstamp(adapter, skb);
4710                         }
4711
4712                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4713                         tx_desc->wb.status = 0;
4714
4715                         i++;
4716                         if (i == tx_ring->count)
4717                                 i = 0;
4718                 }
4719                 eop = tx_ring->buffer_info[i].next_to_watch;
4720                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4721         }
4722
4723         tx_ring->next_to_clean = i;
4724
4725         if (unlikely(count &&
4726                      netif_carrier_ok(netdev) &&
4727                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
4728                 /* Make sure that anybody stopping the queue after this
4729                  * sees the new next_to_clean.
4730                  */
4731                 smp_mb();
4732                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
4733                     !(test_bit(__IGB_DOWN, &adapter->state))) {
4734                         netif_wake_subqueue(netdev, tx_ring->queue_index);
4735                         tx_ring->tx_stats.restart_queue++;
4736                 }
4737         }
4738
4739         if (tx_ring->detect_tx_hung) {
4740                 /* Detect a transmit hang in hardware, this serializes the
4741                  * check with the clearing of time_stamp and movement of i */
4742                 tx_ring->detect_tx_hung = false;
4743                 if (tx_ring->buffer_info[i].time_stamp &&
4744                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
4745                                (adapter->tx_timeout_factor * HZ))
4746                     && !(rd32(E1000_STATUS) &
4747                          E1000_STATUS_TXOFF)) {
4748
4749                         /* detected Tx unit hang */
4750                         dev_err(&tx_ring->pdev->dev,
4751                                 "Detected Tx Unit Hang\n"
4752                                 "  Tx Queue             <%d>\n"
4753                                 "  TDH                  <%x>\n"
4754                                 "  TDT                  <%x>\n"
4755                                 "  next_to_use          <%x>\n"
4756                                 "  next_to_clean        <%x>\n"
4757                                 "buffer_info[next_to_clean]\n"
4758                                 "  time_stamp           <%lx>\n"
4759                                 "  next_to_watch        <%x>\n"
4760                                 "  jiffies              <%lx>\n"
4761                                 "  desc.status          <%x>\n",
4762                                 tx_ring->queue_index,
4763                                 readl(tx_ring->head),
4764                                 readl(tx_ring->tail),
4765                                 tx_ring->next_to_use,
4766                                 tx_ring->next_to_clean,
4767                                 tx_ring->buffer_info[i].time_stamp,
4768                                 eop,
4769                                 jiffies,
4770                                 eop_desc->wb.status);
4771                         netif_stop_subqueue(netdev, tx_ring->queue_index);
4772                 }
4773         }
4774         tx_ring->total_bytes += total_bytes;
4775         tx_ring->total_packets += total_packets;
4776         tx_ring->tx_stats.bytes += total_bytes;
4777         tx_ring->tx_stats.packets += total_packets;
4778         netdev->stats.tx_bytes += total_bytes;
4779         netdev->stats.tx_packets += total_packets;
4780         return (count < tx_ring->count);
4781 }
4782
4783 /**
4784  * igb_receive_skb - helper function to handle rx indications
4785  * @q_vector: structure containing interrupt and ring information
4786  * @skb: packet to send up
4787  * @vlan_tag: vlan tag for packet
4788  **/
4789 static void igb_receive_skb(struct igb_q_vector *q_vector,
4790                             struct sk_buff *skb,
4791                             u16 vlan_tag)
4792 {
4793         struct igb_adapter *adapter = q_vector->adapter;
4794
4795         if (vlan_tag)
4796                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
4797                                  vlan_tag, skb);
4798         else
4799                 napi_gro_receive(&q_vector->napi, skb);
4800 }
4801
4802 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
4803                                        u32 status_err, struct sk_buff *skb)
4804 {
4805         skb->ip_summed = CHECKSUM_NONE;
4806
4807         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
4808         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
4809              (status_err & E1000_RXD_STAT_IXSM))
4810                 return;
4811
4812         /* TCP/UDP checksum error bit is set */
4813         if (status_err &
4814             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
4815                 /*
4816                  * work around errata with sctp packets where the TCPE aka
4817                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
4818                  * packets, (aka let the stack check the crc32c)
4819                  */
4820                 if ((skb->len == 60) &&
4821                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
4822                         ring->rx_stats.csum_err++;
4823
4824                 /* let the stack verify checksum errors */
4825                 return;
4826         }
4827         /* It must be a TCP or UDP packet with a valid checksum */
4828         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
4829                 skb->ip_summed = CHECKSUM_UNNECESSARY;
4830
4831         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
4832 }
4833
4834 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
4835                                union e1000_adv_rx_desc *rx_desc)
4836 {
4837         /* HW will not DMA in data larger than the given buffer, even if it
4838          * parses the (NFS, of course) header to be larger.  In that case, it
4839          * fills the header buffer and spills the rest into the page.
4840          */
4841         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
4842                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
4843         if (hlen > rx_ring->rx_buffer_len)
4844                 hlen = rx_ring->rx_buffer_len;
4845         return hlen;
4846 }
4847
4848 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
4849                                  int *work_done, int budget)
4850 {
4851         struct igb_adapter *adapter = q_vector->adapter;
4852         struct igb_ring *rx_ring = q_vector->rx_ring;
4853         struct net_device *netdev = rx_ring->netdev;
4854         struct e1000_hw *hw = &adapter->hw;
4855         struct pci_dev *pdev = rx_ring->pdev;
4856         union e1000_adv_rx_desc *rx_desc , *next_rxd;
4857         struct igb_buffer *buffer_info , *next_buffer;
4858         struct sk_buff *skb;
4859         bool cleaned = false;
4860         int cleaned_count = 0;
4861         unsigned int total_bytes = 0, total_packets = 0;
4862         unsigned int i;
4863         u32 staterr;
4864         u16 length;
4865         u16 vlan_tag;
4866
4867         i = rx_ring->next_to_clean;
4868         buffer_info = &rx_ring->buffer_info[i];
4869         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
4870         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4871
4872         while (staterr & E1000_RXD_STAT_DD) {
4873                 if (*work_done >= budget)
4874                         break;
4875                 (*work_done)++;
4876
4877                 skb = buffer_info->skb;
4878                 prefetch(skb->data - NET_IP_ALIGN);
4879                 buffer_info->skb = NULL;
4880
4881                 i++;
4882                 if (i == rx_ring->count)
4883                         i = 0;
4884                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
4885                 prefetch(next_rxd);
4886                 next_buffer = &rx_ring->buffer_info[i];
4887
4888                 length = le16_to_cpu(rx_desc->wb.upper.length);
4889                 cleaned = true;
4890                 cleaned_count++;
4891
4892                 if (buffer_info->dma) {
4893                         pci_unmap_single(pdev, buffer_info->dma,
4894                                          rx_ring->rx_buffer_len,
4895                                          PCI_DMA_FROMDEVICE);
4896                         buffer_info->dma = 0;
4897                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
4898                                 skb_put(skb, length);
4899                                 goto send_up;
4900                         }
4901                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
4902                 }
4903
4904                 if (length) {
4905                         pci_unmap_page(pdev, buffer_info->page_dma,
4906                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
4907                         buffer_info->page_dma = 0;
4908
4909                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
4910                                                 buffer_info->page,
4911                                                 buffer_info->page_offset,
4912                                                 length);
4913
4914                         if (page_count(buffer_info->page) != 1)
4915                                 buffer_info->page = NULL;
4916                         else
4917                                 get_page(buffer_info->page);
4918
4919                         skb->len += length;
4920                         skb->data_len += length;
4921
4922                         skb->truesize += length;
4923                 }
4924
4925                 if (!(staterr & E1000_RXD_STAT_EOP)) {
4926                         buffer_info->skb = next_buffer->skb;
4927                         buffer_info->dma = next_buffer->dma;
4928                         next_buffer->skb = skb;
4929                         next_buffer->dma = 0;
4930                         goto next_desc;
4931                 }
4932 send_up:
4933                 /*
4934                  * If this bit is set, then the RX registers contain
4935                  * the time stamp. No other packet will be time
4936                  * stamped until we read these registers, so read the
4937                  * registers to make them available again. Because
4938                  * only one packet can be time stamped at a time, we
4939                  * know that the register values must belong to this
4940                  * one here and therefore we don't need to compare
4941                  * any of the additional attributes stored for it.
4942                  *
4943                  * If nothing went wrong, then it should have a
4944                  * skb_shared_tx that we can turn into a
4945                  * skb_shared_hwtstamps.
4946                  *
4947                  * TODO: can time stamping be triggered (thus locking
4948                  * the registers) without the packet reaching this point
4949                  * here? In that case RX time stamping would get stuck.
4950                  *
4951                  * TODO: in "time stamp all packets" mode this bit is
4952                  * not set. Need a global flag for this mode and then
4953                  * always read the registers. Cannot be done without
4954                  * a race condition.
4955                  */
4956                 if (unlikely(staterr & E1000_RXD_STAT_TS)) {
4957                         u64 regval;
4958                         u64 ns;
4959                         struct skb_shared_hwtstamps *shhwtstamps =
4960                                 skb_hwtstamps(skb);
4961
4962                         WARN(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID),
4963                              "igb: no RX time stamp available for time stamped packet");
4964                         regval = rd32(E1000_RXSTMPL);
4965                         regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4966                         ns = timecounter_cyc2time(&adapter->clock, regval);
4967                         timecompare_update(&adapter->compare, ns);
4968                         memset(shhwtstamps, 0, sizeof(*shhwtstamps));
4969                         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4970                         shhwtstamps->syststamp =
4971                                 timecompare_transform(&adapter->compare, ns);
4972                 }
4973
4974                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
4975                         dev_kfree_skb_irq(skb);
4976                         goto next_desc;
4977                 }
4978
4979                 total_bytes += skb->len;
4980                 total_packets++;
4981
4982                 igb_rx_checksum_adv(rx_ring, staterr, skb);
4983
4984                 skb->protocol = eth_type_trans(skb, netdev);
4985                 skb_record_rx_queue(skb, rx_ring->queue_index);
4986
4987                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
4988                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
4989
4990                 igb_receive_skb(q_vector, skb, vlan_tag);
4991
4992 next_desc:
4993                 rx_desc->wb.upper.status_error = 0;
4994
4995                 /* return some buffers to hardware, one at a time is too slow */
4996                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
4997                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
4998                         cleaned_count = 0;
4999                 }
5000
5001                 /* use prefetched values */
5002                 rx_desc = next_rxd;
5003                 buffer_info = next_buffer;
5004                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5005         }
5006
5007         rx_ring->next_to_clean = i;
5008         cleaned_count = igb_desc_unused(rx_ring);
5009
5010         if (cleaned_count)
5011                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5012
5013         rx_ring->total_packets += total_packets;
5014         rx_ring->total_bytes += total_bytes;
5015         rx_ring->rx_stats.packets += total_packets;
5016         rx_ring->rx_stats.bytes += total_bytes;
5017         netdev->stats.rx_bytes += total_bytes;
5018         netdev->stats.rx_packets += total_packets;
5019         return cleaned;
5020 }
5021
5022 /**
5023  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5024  * @adapter: address of board private structure
5025  **/
5026 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5027 {
5028         struct net_device *netdev = rx_ring->netdev;
5029         union e1000_adv_rx_desc *rx_desc;
5030         struct igb_buffer *buffer_info;
5031         struct sk_buff *skb;
5032         unsigned int i;
5033         int bufsz;
5034
5035         i = rx_ring->next_to_use;
5036         buffer_info = &rx_ring->buffer_info[i];
5037
5038         bufsz = rx_ring->rx_buffer_len;
5039
5040         while (cleaned_count--) {
5041                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5042
5043                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5044                         if (!buffer_info->page) {
5045                                 buffer_info->page = alloc_page(GFP_ATOMIC);
5046                                 if (!buffer_info->page) {
5047                                         rx_ring->rx_stats.alloc_failed++;
5048                                         goto no_buffers;
5049                                 }
5050                                 buffer_info->page_offset = 0;
5051                         } else {
5052                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5053                         }
5054                         buffer_info->page_dma =
5055                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5056                                              buffer_info->page_offset,
5057                                              PAGE_SIZE / 2,
5058                                              PCI_DMA_FROMDEVICE);
5059                 }
5060
5061                 if (!buffer_info->skb) {
5062                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5063                         if (!skb) {
5064                                 rx_ring->rx_stats.alloc_failed++;
5065                                 goto no_buffers;
5066                         }
5067
5068                         buffer_info->skb = skb;
5069                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5070                                                           skb->data,
5071                                                           bufsz,
5072                                                           PCI_DMA_FROMDEVICE);
5073                 }
5074                 /* Refresh the desc even if buffer_addrs didn't change because
5075                  * each write-back erases this info. */
5076                 if (bufsz < IGB_RXBUFFER_1024) {
5077                         rx_desc->read.pkt_addr =
5078                              cpu_to_le64(buffer_info->page_dma);
5079                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5080                 } else {
5081                         rx_desc->read.pkt_addr =
5082                              cpu_to_le64(buffer_info->dma);
5083                         rx_desc->read.hdr_addr = 0;
5084                 }
5085
5086                 i++;
5087                 if (i == rx_ring->count)
5088                         i = 0;
5089                 buffer_info = &rx_ring->buffer_info[i];
5090         }
5091
5092 no_buffers:
5093         if (rx_ring->next_to_use != i) {
5094                 rx_ring->next_to_use = i;
5095                 if (i == 0)
5096                         i = (rx_ring->count - 1);
5097                 else
5098                         i--;
5099
5100                 /* Force memory writes to complete before letting h/w
5101                  * know there are new descriptors to fetch.  (Only
5102                  * applicable for weak-ordered memory model archs,
5103                  * such as IA-64). */
5104                 wmb();
5105                 writel(i, rx_ring->tail);
5106         }
5107 }
5108
5109 /**
5110  * igb_mii_ioctl -
5111  * @netdev:
5112  * @ifreq:
5113  * @cmd:
5114  **/
5115 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5116 {
5117         struct igb_adapter *adapter = netdev_priv(netdev);
5118         struct mii_ioctl_data *data = if_mii(ifr);
5119
5120         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5121                 return -EOPNOTSUPP;
5122
5123         switch (cmd) {
5124         case SIOCGMIIPHY:
5125                 data->phy_id = adapter->hw.phy.addr;
5126                 break;
5127         case SIOCGMIIREG:
5128                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5129                                      &data->val_out))
5130                         return -EIO;
5131                 break;
5132         case SIOCSMIIREG:
5133         default:
5134                 return -EOPNOTSUPP;
5135         }
5136         return 0;
5137 }
5138
5139 /**
5140  * igb_hwtstamp_ioctl - control hardware time stamping
5141  * @netdev:
5142  * @ifreq:
5143  * @cmd:
5144  *
5145  * Outgoing time stamping can be enabled and disabled. Play nice and
5146  * disable it when requested, although it shouldn't case any overhead
5147  * when no packet needs it. At most one packet in the queue may be
5148  * marked for time stamping, otherwise it would be impossible to tell
5149  * for sure to which packet the hardware time stamp belongs.
5150  *
5151  * Incoming time stamping has to be configured via the hardware
5152  * filters. Not all combinations are supported, in particular event
5153  * type has to be specified. Matching the kind of event packet is
5154  * not supported, with the exception of "all V2 events regardless of
5155  * level 2 or 4".
5156  *
5157  **/
5158 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5159                               struct ifreq *ifr, int cmd)
5160 {
5161         struct igb_adapter *adapter = netdev_priv(netdev);
5162         struct e1000_hw *hw = &adapter->hw;
5163         struct hwtstamp_config config;
5164         u32 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
5165         u32 tsync_rx_ctl_bit = E1000_TSYNCRXCTL_ENABLED;
5166         u32 tsync_rx_ctl_type = 0;
5167         u32 tsync_rx_cfg = 0;
5168         int is_l4 = 0;
5169         int is_l2 = 0;
5170         short port = 319; /* PTP */
5171         u32 regval;
5172
5173         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5174                 return -EFAULT;
5175
5176         /* reserved for future extensions */
5177         if (config.flags)
5178                 return -EINVAL;
5179
5180         switch (config.tx_type) {
5181         case HWTSTAMP_TX_OFF:
5182                 tsync_tx_ctl_bit = 0;
5183                 break;
5184         case HWTSTAMP_TX_ON:
5185                 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
5186                 break;
5187         default:
5188                 return -ERANGE;
5189         }
5190
5191         switch (config.rx_filter) {
5192         case HWTSTAMP_FILTER_NONE:
5193                 tsync_rx_ctl_bit = 0;
5194                 break;
5195         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5196         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5197         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5198         case HWTSTAMP_FILTER_ALL:
5199                 /*
5200                  * register TSYNCRXCFG must be set, therefore it is not
5201                  * possible to time stamp both Sync and Delay_Req messages
5202                  * => fall back to time stamping all packets
5203                  */
5204                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_ALL;
5205                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5206                 break;
5207         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5208                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
5209                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5210                 is_l4 = 1;
5211                 break;
5212         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5213                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
5214                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5215                 is_l4 = 1;
5216                 break;
5217         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5218         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5219                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5220                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5221                 is_l2 = 1;
5222                 is_l4 = 1;
5223                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5224                 break;
5225         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5226         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5227                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5228                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5229                 is_l2 = 1;
5230                 is_l4 = 1;
5231                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5232                 break;
5233         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5234         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5235         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5236                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5237                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5238                 is_l2 = 1;
5239                 break;
5240         default:
5241                 return -ERANGE;
5242         }
5243
5244         /* enable/disable TX */
5245         regval = rd32(E1000_TSYNCTXCTL);
5246         regval = (regval & ~E1000_TSYNCTXCTL_ENABLED) | tsync_tx_ctl_bit;
5247         wr32(E1000_TSYNCTXCTL, regval);
5248
5249         /* enable/disable RX, define which PTP packets are time stamped */
5250         regval = rd32(E1000_TSYNCRXCTL);
5251         regval = (regval & ~E1000_TSYNCRXCTL_ENABLED) | tsync_rx_ctl_bit;
5252         regval = (regval & ~0xE) | tsync_rx_ctl_type;
5253         wr32(E1000_TSYNCRXCTL, regval);
5254         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5255
5256         /*
5257          * Ethertype Filter Queue Filter[0][15:0] = 0x88F7
5258          *                                          (Ethertype to filter on)
5259          * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter)
5260          * Ethertype Filter Queue Filter[0][30] = 0x1 (Enable Timestamping)
5261          */
5262         wr32(E1000_ETQF0, is_l2 ? 0x440088f7 : 0);
5263
5264         /* L4 Queue Filter[0]: only filter by source and destination port */
5265         wr32(E1000_SPQF0, htons(port));
5266         wr32(E1000_IMIREXT(0), is_l4 ?
5267              ((1<<12) | (1<<19) /* bypass size and control flags */) : 0);
5268         wr32(E1000_IMIR(0), is_l4 ?
5269              (htons(port)
5270               | (0<<16) /* immediate interrupt disabled */
5271               | 0 /* (1<<17) bit cleared: do not bypass
5272                      destination port check */)
5273                 : 0);
5274         wr32(E1000_FTQF0, is_l4 ?
5275              (0x11 /* UDP */
5276               | (1<<15) /* VF not compared */
5277               | (1<<27) /* Enable Timestamping */
5278               | (7<<28) /* only source port filter enabled,
5279                            source/target address and protocol
5280                            masked */)
5281              : ((1<<15) | (15<<28) /* all mask bits set = filter not
5282                                       enabled */));
5283
5284         wrfl();
5285
5286         adapter->hwtstamp_config = config;
5287
5288         /* clear TX/RX time stamp registers, just to be sure */
5289         regval = rd32(E1000_TXSTMPH);
5290         regval = rd32(E1000_RXSTMPH);
5291
5292         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5293                 -EFAULT : 0;
5294 }
5295
5296 /**
5297  * igb_ioctl -
5298  * @netdev:
5299  * @ifreq:
5300  * @cmd:
5301  **/
5302 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5303 {
5304         switch (cmd) {
5305         case SIOCGMIIPHY:
5306         case SIOCGMIIREG:
5307         case SIOCSMIIREG:
5308                 return igb_mii_ioctl(netdev, ifr, cmd);
5309         case SIOCSHWTSTAMP:
5310                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5311         default:
5312                 return -EOPNOTSUPP;
5313         }
5314 }
5315
5316 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5317 {
5318         struct igb_adapter *adapter = hw->back;
5319         u16 cap_offset;
5320
5321         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5322         if (!cap_offset)
5323                 return -E1000_ERR_CONFIG;
5324
5325         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5326
5327         return 0;
5328 }
5329
5330 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5331 {
5332         struct igb_adapter *adapter = hw->back;
5333         u16 cap_offset;
5334
5335         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5336         if (!cap_offset)
5337                 return -E1000_ERR_CONFIG;
5338
5339         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5340
5341         return 0;
5342 }
5343
5344 static void igb_vlan_rx_register(struct net_device *netdev,
5345                                  struct vlan_group *grp)
5346 {
5347         struct igb_adapter *adapter = netdev_priv(netdev);
5348         struct e1000_hw *hw = &adapter->hw;
5349         u32 ctrl, rctl;
5350
5351         igb_irq_disable(adapter);
5352         adapter->vlgrp = grp;
5353
5354         if (grp) {
5355                 /* enable VLAN tag insert/strip */
5356                 ctrl = rd32(E1000_CTRL);
5357                 ctrl |= E1000_CTRL_VME;
5358                 wr32(E1000_CTRL, ctrl);
5359
5360                 /* enable VLAN receive filtering */
5361                 rctl = rd32(E1000_RCTL);
5362                 rctl &= ~E1000_RCTL_CFIEN;
5363                 wr32(E1000_RCTL, rctl);
5364                 igb_update_mng_vlan(adapter);
5365         } else {
5366                 /* disable VLAN tag insert/strip */
5367                 ctrl = rd32(E1000_CTRL);
5368                 ctrl &= ~E1000_CTRL_VME;
5369                 wr32(E1000_CTRL, ctrl);
5370
5371                 if (adapter->mng_vlan_id != (u16)IGB_MNG_VLAN_NONE) {
5372                         igb_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
5373                         adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
5374                 }
5375         }
5376
5377         igb_rlpml_set(adapter);
5378
5379         if (!test_bit(__IGB_DOWN, &adapter->state))
5380                 igb_irq_enable(adapter);
5381 }
5382
5383 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5384 {
5385         struct igb_adapter *adapter = netdev_priv(netdev);
5386         struct e1000_hw *hw = &adapter->hw;
5387         int pf_id = adapter->vfs_allocated_count;
5388
5389         if ((hw->mng_cookie.status &
5390              E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
5391             (vid == adapter->mng_vlan_id))
5392                 return;
5393
5394         /* add vid to vlvf if sr-iov is enabled,
5395          * if that fails add directly to filter table */
5396         if (igb_vlvf_set(adapter, vid, true, pf_id))
5397                 igb_vfta_set(hw, vid, true);
5398
5399 }
5400
5401 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5402 {
5403         struct igb_adapter *adapter = netdev_priv(netdev);
5404         struct e1000_hw *hw = &adapter->hw;
5405         int pf_id = adapter->vfs_allocated_count;
5406
5407         igb_irq_disable(adapter);
5408         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5409
5410         if (!test_bit(__IGB_DOWN, &adapter->state))
5411                 igb_irq_enable(adapter);
5412
5413         if ((adapter->hw.mng_cookie.status &
5414              E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
5415             (vid == adapter->mng_vlan_id)) {
5416                 /* release control to f/w */
5417                 igb_release_hw_control(adapter);
5418                 return;
5419         }
5420
5421         /* remove vid from vlvf if sr-iov is enabled,
5422          * if not in vlvf remove from vfta */
5423         if (igb_vlvf_set(adapter, vid, false, pf_id))
5424                 igb_vfta_set(hw, vid, false);
5425 }
5426
5427 static void igb_restore_vlan(struct igb_adapter *adapter)
5428 {
5429         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5430
5431         if (adapter->vlgrp) {
5432                 u16 vid;
5433                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5434                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5435                                 continue;
5436                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5437                 }
5438         }
5439 }
5440
5441 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5442 {
5443         struct e1000_mac_info *mac = &adapter->hw.mac;
5444
5445         mac->autoneg = 0;
5446
5447         switch (spddplx) {
5448         case SPEED_10 + DUPLEX_HALF:
5449                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5450                 break;
5451         case SPEED_10 + DUPLEX_FULL:
5452                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5453                 break;
5454         case SPEED_100 + DUPLEX_HALF:
5455                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5456                 break;
5457         case SPEED_100 + DUPLEX_FULL:
5458                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5459                 break;
5460         case SPEED_1000 + DUPLEX_FULL:
5461                 mac->autoneg = 1;
5462                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5463                 break;
5464         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5465         default:
5466                 dev_err(&adapter->pdev->dev,
5467                         "Unsupported Speed/Duplex configuration\n");
5468                 return -EINVAL;
5469         }
5470         return 0;
5471 }
5472
5473 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5474 {
5475         struct net_device *netdev = pci_get_drvdata(pdev);
5476         struct igb_adapter *adapter = netdev_priv(netdev);
5477         struct e1000_hw *hw = &adapter->hw;
5478         u32 ctrl, rctl, status;
5479         u32 wufc = adapter->wol;
5480 #ifdef CONFIG_PM
5481         int retval = 0;
5482 #endif
5483
5484         netif_device_detach(netdev);
5485
5486         if (netif_running(netdev))
5487                 igb_close(netdev);
5488
5489         igb_clear_interrupt_scheme(adapter);
5490
5491 #ifdef CONFIG_PM
5492         retval = pci_save_state(pdev);
5493         if (retval)
5494                 return retval;
5495 #endif
5496
5497         status = rd32(E1000_STATUS);
5498         if (status & E1000_STATUS_LU)
5499                 wufc &= ~E1000_WUFC_LNKC;
5500
5501         if (wufc) {
5502                 igb_setup_rctl(adapter);
5503                 igb_set_rx_mode(netdev);
5504
5505                 /* turn on all-multi mode if wake on multicast is enabled */
5506                 if (wufc & E1000_WUFC_MC) {
5507                         rctl = rd32(E1000_RCTL);
5508                         rctl |= E1000_RCTL_MPE;
5509                         wr32(E1000_RCTL, rctl);
5510                 }
5511
5512                 ctrl = rd32(E1000_CTRL);
5513                 /* advertise wake from D3Cold */
5514                 #define E1000_CTRL_ADVD3WUC 0x00100000
5515                 /* phy power management enable */
5516                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5517                 ctrl |= E1000_CTRL_ADVD3WUC;
5518                 wr32(E1000_CTRL, ctrl);
5519
5520                 /* Allow time for pending master requests to run */
5521                 igb_disable_pcie_master(&adapter->hw);
5522
5523                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5524                 wr32(E1000_WUFC, wufc);
5525         } else {
5526                 wr32(E1000_WUC, 0);
5527                 wr32(E1000_WUFC, 0);
5528         }
5529
5530         *enable_wake = wufc || adapter->en_mng_pt;
5531         if (!*enable_wake)
5532                 igb_shutdown_serdes_link_82575(hw);
5533
5534         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5535          * would have already happened in close and is redundant. */
5536         igb_release_hw_control(adapter);
5537
5538         pci_disable_device(pdev);
5539
5540         return 0;
5541 }
5542
5543 #ifdef CONFIG_PM
5544 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5545 {
5546         int retval;
5547         bool wake;
5548
5549         retval = __igb_shutdown(pdev, &wake);
5550         if (retval)
5551                 return retval;
5552
5553         if (wake) {
5554                 pci_prepare_to_sleep(pdev);
5555         } else {
5556                 pci_wake_from_d3(pdev, false);
5557                 pci_set_power_state(pdev, PCI_D3hot);
5558         }
5559
5560         return 0;
5561 }
5562
5563 static int igb_resume(struct pci_dev *pdev)
5564 {
5565         struct net_device *netdev = pci_get_drvdata(pdev);
5566         struct igb_adapter *adapter = netdev_priv(netdev);
5567         struct e1000_hw *hw = &adapter->hw;
5568         u32 err;
5569
5570         pci_set_power_state(pdev, PCI_D0);
5571         pci_restore_state(pdev);
5572
5573         err = pci_enable_device_mem(pdev);
5574         if (err) {
5575                 dev_err(&pdev->dev,
5576                         "igb: Cannot enable PCI device from suspend\n");
5577                 return err;
5578         }
5579         pci_set_master(pdev);
5580
5581         pci_enable_wake(pdev, PCI_D3hot, 0);
5582         pci_enable_wake(pdev, PCI_D3cold, 0);
5583
5584         if (igb_init_interrupt_scheme(adapter)) {
5585                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5586                 return -ENOMEM;
5587         }
5588
5589         /* e1000_power_up_phy(adapter); */
5590
5591         igb_reset(adapter);
5592
5593         /* let the f/w know that the h/w is now under the control of the
5594          * driver. */
5595         igb_get_hw_control(adapter);
5596
5597         wr32(E1000_WUS, ~0);
5598
5599         if (netif_running(netdev)) {
5600                 err = igb_open(netdev);
5601                 if (err)
5602                         return err;
5603         }
5604
5605         netif_device_attach(netdev);
5606
5607         return 0;
5608 }
5609 #endif
5610
5611 static void igb_shutdown(struct pci_dev *pdev)
5612 {
5613         bool wake;
5614
5615         __igb_shutdown(pdev, &wake);
5616
5617         if (system_state == SYSTEM_POWER_OFF) {
5618                 pci_wake_from_d3(pdev, wake);
5619                 pci_set_power_state(pdev, PCI_D3hot);
5620         }
5621 }
5622
5623 #ifdef CONFIG_NET_POLL_CONTROLLER
5624 /*
5625  * Polling 'interrupt' - used by things like netconsole to send skbs
5626  * without having to re-enable interrupts. It's not called while
5627  * the interrupt routine is executing.
5628  */
5629 static void igb_netpoll(struct net_device *netdev)
5630 {
5631         struct igb_adapter *adapter = netdev_priv(netdev);
5632         struct e1000_hw *hw = &adapter->hw;
5633         int i;
5634
5635         if (!adapter->msix_entries) {
5636                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5637                 igb_irq_disable(adapter);
5638                 napi_schedule(&q_vector->napi);
5639                 return;
5640         }
5641
5642         for (i = 0; i < adapter->num_q_vectors; i++) {
5643                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5644                 wr32(E1000_EIMC, q_vector->eims_value);
5645                 napi_schedule(&q_vector->napi);
5646         }
5647 }
5648 #endif /* CONFIG_NET_POLL_CONTROLLER */
5649
5650 /**
5651  * igb_io_error_detected - called when PCI error is detected
5652  * @pdev: Pointer to PCI device
5653  * @state: The current pci connection state
5654  *
5655  * This function is called after a PCI bus error affecting
5656  * this device has been detected.
5657  */
5658 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5659                                               pci_channel_state_t state)
5660 {
5661         struct net_device *netdev = pci_get_drvdata(pdev);
5662         struct igb_adapter *adapter = netdev_priv(netdev);
5663
5664         netif_device_detach(netdev);
5665
5666         if (state == pci_channel_io_perm_failure)
5667                 return PCI_ERS_RESULT_DISCONNECT;
5668
5669         if (netif_running(netdev))
5670                 igb_down(adapter);
5671         pci_disable_device(pdev);
5672
5673         /* Request a slot slot reset. */
5674         return PCI_ERS_RESULT_NEED_RESET;
5675 }
5676
5677 /**
5678  * igb_io_slot_reset - called after the pci bus has been reset.
5679  * @pdev: Pointer to PCI device
5680  *
5681  * Restart the card from scratch, as if from a cold-boot. Implementation
5682  * resembles the first-half of the igb_resume routine.
5683  */
5684 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5685 {
5686         struct net_device *netdev = pci_get_drvdata(pdev);
5687         struct igb_adapter *adapter = netdev_priv(netdev);
5688         struct e1000_hw *hw = &adapter->hw;
5689         pci_ers_result_t result;
5690         int err;
5691
5692         if (pci_enable_device_mem(pdev)) {
5693                 dev_err(&pdev->dev,
5694                         "Cannot re-enable PCI device after reset.\n");
5695                 result = PCI_ERS_RESULT_DISCONNECT;
5696         } else {
5697                 pci_set_master(pdev);
5698                 pci_restore_state(pdev);
5699
5700                 pci_enable_wake(pdev, PCI_D3hot, 0);
5701                 pci_enable_wake(pdev, PCI_D3cold, 0);
5702
5703                 igb_reset(adapter);
5704                 wr32(E1000_WUS, ~0);
5705                 result = PCI_ERS_RESULT_RECOVERED;
5706         }
5707
5708         err = pci_cleanup_aer_uncorrect_error_status(pdev);
5709         if (err) {
5710                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
5711                         "failed 0x%0x\n", err);
5712                 /* non-fatal, continue */
5713         }
5714
5715         return result;
5716 }
5717
5718 /**
5719  * igb_io_resume - called when traffic can start flowing again.
5720  * @pdev: Pointer to PCI device
5721  *
5722  * This callback is called when the error recovery driver tells us that
5723  * its OK to resume normal operation. Implementation resembles the
5724  * second-half of the igb_resume routine.
5725  */
5726 static void igb_io_resume(struct pci_dev *pdev)
5727 {
5728         struct net_device *netdev = pci_get_drvdata(pdev);
5729         struct igb_adapter *adapter = netdev_priv(netdev);
5730
5731         if (netif_running(netdev)) {
5732                 if (igb_up(adapter)) {
5733                         dev_err(&pdev->dev, "igb_up failed after reset\n");
5734                         return;
5735                 }
5736         }
5737
5738         netif_device_attach(netdev);
5739
5740         /* let the f/w know that the h/w is now under the control of the
5741          * driver. */
5742         igb_get_hw_control(adapter);
5743 }
5744
5745 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
5746                              u8 qsel)
5747 {
5748         u32 rar_low, rar_high;
5749         struct e1000_hw *hw = &adapter->hw;
5750
5751         /* HW expects these in little endian so we reverse the byte order
5752          * from network order (big endian) to little endian
5753          */
5754         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
5755                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
5756         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
5757
5758         /* Indicate to hardware the Address is Valid. */
5759         rar_high |= E1000_RAH_AV;
5760
5761         if (hw->mac.type == e1000_82575)
5762                 rar_high |= E1000_RAH_POOL_1 * qsel;
5763         else
5764                 rar_high |= E1000_RAH_POOL_1 << qsel;
5765
5766         wr32(E1000_RAL(index), rar_low);
5767         wrfl();
5768         wr32(E1000_RAH(index), rar_high);
5769         wrfl();
5770 }
5771
5772 static int igb_set_vf_mac(struct igb_adapter *adapter,
5773                           int vf, unsigned char *mac_addr)
5774 {
5775         struct e1000_hw *hw = &adapter->hw;
5776         /* VF MAC addresses start at end of receive addresses and moves
5777          * torwards the first, as a result a collision should not be possible */
5778         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5779
5780         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
5781
5782         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
5783
5784         return 0;
5785 }
5786
5787 static void igb_vmm_control(struct igb_adapter *adapter)
5788 {
5789         struct e1000_hw *hw = &adapter->hw;
5790         u32 reg;
5791
5792         /* replication is not supported for 82575 */
5793         if (hw->mac.type == e1000_82575)
5794                 return;
5795
5796         /* enable replication vlan tag stripping */
5797         reg = rd32(E1000_RPLOLR);
5798         reg |= E1000_RPLOLR_STRVLAN;
5799         wr32(E1000_RPLOLR, reg);
5800
5801         /* notify HW that the MAC is adding vlan tags */
5802         reg = rd32(E1000_DTXCTL);
5803         reg |= E1000_DTXCTL_VLAN_ADDED;
5804         wr32(E1000_DTXCTL, reg);
5805
5806         if (adapter->vfs_allocated_count) {
5807                 igb_vmdq_set_loopback_pf(hw, true);
5808                 igb_vmdq_set_replication_pf(hw, true);
5809         } else {
5810                 igb_vmdq_set_loopback_pf(hw, false);
5811                 igb_vmdq_set_replication_pf(hw, false);
5812         }
5813 }
5814
5815 /* igb_main.c */