igb: make tx ring map and free functionality non-static
[safe/jmp/linux-2.6] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "1.3.16-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56                                 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60         [board_82575] = &e1000_82575_info,
61 };
62
63 static struct pci_device_id igb_pci_tbl[] = {
64         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
74         /* required last entry */
75         {0, }
76 };
77
78 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
79
80 void igb_reset(struct igb_adapter *);
81 static int igb_setup_all_tx_resources(struct igb_adapter *);
82 static int igb_setup_all_rx_resources(struct igb_adapter *);
83 static void igb_free_all_tx_resources(struct igb_adapter *);
84 static void igb_free_all_rx_resources(struct igb_adapter *);
85 static void igb_setup_mrqc(struct igb_adapter *);
86 void igb_update_stats(struct igb_adapter *);
87 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
88 static void __devexit igb_remove(struct pci_dev *pdev);
89 static int igb_sw_init(struct igb_adapter *);
90 static int igb_open(struct net_device *);
91 static int igb_close(struct net_device *);
92 static void igb_configure_tx(struct igb_adapter *);
93 static void igb_configure_rx(struct igb_adapter *);
94 static void igb_clean_all_tx_rings(struct igb_adapter *);
95 static void igb_clean_all_rx_rings(struct igb_adapter *);
96 static void igb_clean_tx_ring(struct igb_ring *);
97 static void igb_clean_rx_ring(struct igb_ring *);
98 static void igb_set_rx_mode(struct net_device *);
99 static void igb_update_phy_info(unsigned long);
100 static void igb_watchdog(unsigned long);
101 static void igb_watchdog_task(struct work_struct *);
102 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
103 static struct net_device_stats *igb_get_stats(struct net_device *);
104 static int igb_change_mtu(struct net_device *, int);
105 static int igb_set_mac(struct net_device *, void *);
106 static void igb_set_uta(struct igb_adapter *adapter);
107 static irqreturn_t igb_intr(int irq, void *);
108 static irqreturn_t igb_intr_msi(int irq, void *);
109 static irqreturn_t igb_msix_other(int irq, void *);
110 static irqreturn_t igb_msix_ring(int irq, void *);
111 #ifdef CONFIG_IGB_DCA
112 static void igb_update_dca(struct igb_q_vector *);
113 static void igb_setup_dca(struct igb_adapter *);
114 #endif /* CONFIG_IGB_DCA */
115 static bool igb_clean_tx_irq(struct igb_q_vector *);
116 static int igb_poll(struct napi_struct *, int);
117 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
118 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
119 static void igb_tx_timeout(struct net_device *);
120 static void igb_reset_task(struct work_struct *);
121 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
122 static void igb_vlan_rx_add_vid(struct net_device *, u16);
123 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
124 static void igb_restore_vlan(struct igb_adapter *);
125 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
126 static void igb_ping_all_vfs(struct igb_adapter *);
127 static void igb_msg_task(struct igb_adapter *);
128 static int igb_rcv_msg_from_vf(struct igb_adapter *, u32);
129 static void igb_vmm_control(struct igb_adapter *);
130 static int igb_set_vf_mac(struct igb_adapter *adapter, int, unsigned char *);
131 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
132
133 static inline void igb_set_vmolr(struct e1000_hw *hw, int vfn)
134 {
135         u32 reg_data;
136
137         reg_data = rd32(E1000_VMOLR(vfn));
138         reg_data |= E1000_VMOLR_BAM |    /* Accept broadcast */
139                     E1000_VMOLR_ROMPE |  /* Accept packets matched in MTA */
140                     E1000_VMOLR_AUPE |   /* Accept untagged packets */
141                     E1000_VMOLR_STRVLAN; /* Strip vlan tags */
142         wr32(E1000_VMOLR(vfn), reg_data);
143 }
144
145 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
146                                  int vfn)
147 {
148         struct e1000_hw *hw = &adapter->hw;
149         u32 vmolr;
150
151         /* if it isn't the PF check to see if VFs are enabled and
152          * increase the size to support vlan tags */
153         if (vfn < adapter->vfs_allocated_count &&
154             adapter->vf_data[vfn].vlans_enabled)
155                 size += VLAN_TAG_SIZE;
156
157         vmolr = rd32(E1000_VMOLR(vfn));
158         vmolr &= ~E1000_VMOLR_RLPML_MASK;
159         vmolr |= size | E1000_VMOLR_LPE;
160         wr32(E1000_VMOLR(vfn), vmolr);
161
162         return 0;
163 }
164
165 #ifdef CONFIG_PM
166 static int igb_suspend(struct pci_dev *, pm_message_t);
167 static int igb_resume(struct pci_dev *);
168 #endif
169 static void igb_shutdown(struct pci_dev *);
170 #ifdef CONFIG_IGB_DCA
171 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
172 static struct notifier_block dca_notifier = {
173         .notifier_call  = igb_notify_dca,
174         .next           = NULL,
175         .priority       = 0
176 };
177 #endif
178 #ifdef CONFIG_NET_POLL_CONTROLLER
179 /* for netdump / net console */
180 static void igb_netpoll(struct net_device *);
181 #endif
182 #ifdef CONFIG_PCI_IOV
183 static unsigned int max_vfs = 0;
184 module_param(max_vfs, uint, 0);
185 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
186                  "per physical function");
187 #endif /* CONFIG_PCI_IOV */
188
189 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
190                      pci_channel_state_t);
191 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
192 static void igb_io_resume(struct pci_dev *);
193
194 static struct pci_error_handlers igb_err_handler = {
195         .error_detected = igb_io_error_detected,
196         .slot_reset = igb_io_slot_reset,
197         .resume = igb_io_resume,
198 };
199
200
201 static struct pci_driver igb_driver = {
202         .name     = igb_driver_name,
203         .id_table = igb_pci_tbl,
204         .probe    = igb_probe,
205         .remove   = __devexit_p(igb_remove),
206 #ifdef CONFIG_PM
207         /* Power Managment Hooks */
208         .suspend  = igb_suspend,
209         .resume   = igb_resume,
210 #endif
211         .shutdown = igb_shutdown,
212         .err_handler = &igb_err_handler
213 };
214
215 static int global_quad_port_a; /* global quad port a indication */
216
217 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
218 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
219 MODULE_LICENSE("GPL");
220 MODULE_VERSION(DRV_VERSION);
221
222 /**
223  * Scale the NIC clock cycle by a large factor so that
224  * relatively small clock corrections can be added or
225  * substracted at each clock tick. The drawbacks of a
226  * large factor are a) that the clock register overflows
227  * more quickly (not such a big deal) and b) that the
228  * increment per tick has to fit into 24 bits.
229  *
230  * Note that
231  *   TIMINCA = IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS *
232  *             IGB_TSYNC_SCALE
233  *   TIMINCA += TIMINCA * adjustment [ppm] / 1e9
234  *
235  * The base scale factor is intentionally a power of two
236  * so that the division in %struct timecounter can be done with
237  * a shift.
238  */
239 #define IGB_TSYNC_SHIFT (19)
240 #define IGB_TSYNC_SCALE (1<<IGB_TSYNC_SHIFT)
241
242 /**
243  * The duration of one clock cycle of the NIC.
244  *
245  * @todo This hard-coded value is part of the specification and might change
246  * in future hardware revisions. Add revision check.
247  */
248 #define IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS 16
249
250 #if (IGB_TSYNC_SCALE * IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS) >= (1<<24)
251 # error IGB_TSYNC_SCALE and/or IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS are too large to fit into TIMINCA
252 #endif
253
254 /**
255  * igb_read_clock - read raw cycle counter (to be used by time counter)
256  */
257 static cycle_t igb_read_clock(const struct cyclecounter *tc)
258 {
259         struct igb_adapter *adapter =
260                 container_of(tc, struct igb_adapter, cycles);
261         struct e1000_hw *hw = &adapter->hw;
262         u64 stamp;
263
264         stamp =  rd32(E1000_SYSTIML);
265         stamp |= (u64)rd32(E1000_SYSTIMH) << 32ULL;
266
267         return stamp;
268 }
269
270 #ifdef DEBUG
271 /**
272  * igb_get_hw_dev_name - return device name string
273  * used by hardware layer to print debugging information
274  **/
275 char *igb_get_hw_dev_name(struct e1000_hw *hw)
276 {
277         struct igb_adapter *adapter = hw->back;
278         return adapter->netdev->name;
279 }
280
281 /**
282  * igb_get_time_str - format current NIC and system time as string
283  */
284 static char *igb_get_time_str(struct igb_adapter *adapter,
285                               char buffer[160])
286 {
287         cycle_t hw = adapter->cycles.read(&adapter->cycles);
288         struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
289         struct timespec sys;
290         struct timespec delta;
291         getnstimeofday(&sys);
292
293         delta = timespec_sub(nic, sys);
294
295         sprintf(buffer,
296                 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
297                 hw,
298                 (long)nic.tv_sec, nic.tv_nsec,
299                 (long)sys.tv_sec, sys.tv_nsec,
300                 (long)delta.tv_sec, delta.tv_nsec);
301
302         return buffer;
303 }
304 #endif
305
306 /**
307  * igb_init_module - Driver Registration Routine
308  *
309  * igb_init_module is the first routine called when the driver is
310  * loaded. All it does is register with the PCI subsystem.
311  **/
312 static int __init igb_init_module(void)
313 {
314         int ret;
315         printk(KERN_INFO "%s - version %s\n",
316                igb_driver_string, igb_driver_version);
317
318         printk(KERN_INFO "%s\n", igb_copyright);
319
320         global_quad_port_a = 0;
321
322 #ifdef CONFIG_IGB_DCA
323         dca_register_notify(&dca_notifier);
324 #endif
325
326         ret = pci_register_driver(&igb_driver);
327         return ret;
328 }
329
330 module_init(igb_init_module);
331
332 /**
333  * igb_exit_module - Driver Exit Cleanup Routine
334  *
335  * igb_exit_module is called just before the driver is removed
336  * from memory.
337  **/
338 static void __exit igb_exit_module(void)
339 {
340 #ifdef CONFIG_IGB_DCA
341         dca_unregister_notify(&dca_notifier);
342 #endif
343         pci_unregister_driver(&igb_driver);
344 }
345
346 module_exit(igb_exit_module);
347
348 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
349 /**
350  * igb_cache_ring_register - Descriptor ring to register mapping
351  * @adapter: board private structure to initialize
352  *
353  * Once we know the feature-set enabled for the device, we'll cache
354  * the register offset the descriptor ring is assigned to.
355  **/
356 static void igb_cache_ring_register(struct igb_adapter *adapter)
357 {
358         int i;
359         u32 rbase_offset = adapter->vfs_allocated_count;
360
361         switch (adapter->hw.mac.type) {
362         case e1000_82576:
363                 /* The queues are allocated for virtualization such that VF 0
364                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
365                  * In order to avoid collision we start at the first free queue
366                  * and continue consuming queues in the same sequence
367                  */
368                 for (i = 0; i < adapter->num_rx_queues; i++)
369                         adapter->rx_ring[i].reg_idx = rbase_offset +
370                                                       Q_IDX_82576(i);
371                 for (i = 0; i < adapter->num_tx_queues; i++)
372                         adapter->tx_ring[i].reg_idx = rbase_offset +
373                                                       Q_IDX_82576(i);
374                 break;
375         case e1000_82575:
376         default:
377                 for (i = 0; i < adapter->num_rx_queues; i++)
378                         adapter->rx_ring[i].reg_idx = i;
379                 for (i = 0; i < adapter->num_tx_queues; i++)
380                         adapter->tx_ring[i].reg_idx = i;
381                 break;
382         }
383 }
384
385 static void igb_free_queues(struct igb_adapter *adapter)
386 {
387         kfree(adapter->tx_ring);
388         kfree(adapter->rx_ring);
389
390         adapter->tx_ring = NULL;
391         adapter->rx_ring = NULL;
392
393         adapter->num_rx_queues = 0;
394         adapter->num_tx_queues = 0;
395 }
396
397 /**
398  * igb_alloc_queues - Allocate memory for all rings
399  * @adapter: board private structure to initialize
400  *
401  * We allocate one ring per queue at run-time since we don't know the
402  * number of queues at compile-time.
403  **/
404 static int igb_alloc_queues(struct igb_adapter *adapter)
405 {
406         int i;
407
408         adapter->tx_ring = kcalloc(adapter->num_tx_queues,
409                                    sizeof(struct igb_ring), GFP_KERNEL);
410         if (!adapter->tx_ring)
411                 goto err;
412
413         adapter->rx_ring = kcalloc(adapter->num_rx_queues,
414                                    sizeof(struct igb_ring), GFP_KERNEL);
415         if (!adapter->rx_ring)
416                 goto err;
417
418         for (i = 0; i < adapter->num_tx_queues; i++) {
419                 struct igb_ring *ring = &(adapter->tx_ring[i]);
420                 ring->count = adapter->tx_ring_count;
421                 ring->queue_index = i;
422                 ring->pdev = adapter->pdev;
423                 ring->netdev = adapter->netdev;
424                 /* For 82575, context index must be unique per ring. */
425                 if (adapter->hw.mac.type == e1000_82575)
426                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
427         }
428
429         for (i = 0; i < adapter->num_rx_queues; i++) {
430                 struct igb_ring *ring = &(adapter->rx_ring[i]);
431                 ring->count = adapter->rx_ring_count;
432                 ring->queue_index = i;
433                 ring->pdev = adapter->pdev;
434                 ring->netdev = adapter->netdev;
435                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
436                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
437                 /* set flag indicating ring supports SCTP checksum offload */
438                 if (adapter->hw.mac.type >= e1000_82576)
439                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
440         }
441
442         igb_cache_ring_register(adapter);
443
444         return 0;
445
446 err:
447         igb_free_queues(adapter);
448
449         return -ENOMEM;
450 }
451
452 #define IGB_N0_QUEUE -1
453 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
454 {
455         u32 msixbm = 0;
456         struct igb_adapter *adapter = q_vector->adapter;
457         struct e1000_hw *hw = &adapter->hw;
458         u32 ivar, index;
459         int rx_queue = IGB_N0_QUEUE;
460         int tx_queue = IGB_N0_QUEUE;
461
462         if (q_vector->rx_ring)
463                 rx_queue = q_vector->rx_ring->reg_idx;
464         if (q_vector->tx_ring)
465                 tx_queue = q_vector->tx_ring->reg_idx;
466
467         switch (hw->mac.type) {
468         case e1000_82575:
469                 /* The 82575 assigns vectors using a bitmask, which matches the
470                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
471                    or more queues to a vector, we write the appropriate bits
472                    into the MSIXBM register for that vector. */
473                 if (rx_queue > IGB_N0_QUEUE)
474                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
475                 if (tx_queue > IGB_N0_QUEUE)
476                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
477                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
478                 q_vector->eims_value = msixbm;
479                 break;
480         case e1000_82576:
481                 /* 82576 uses a table-based method for assigning vectors.
482                    Each queue has a single entry in the table to which we write
483                    a vector number along with a "valid" bit.  Sadly, the layout
484                    of the table is somewhat counterintuitive. */
485                 if (rx_queue > IGB_N0_QUEUE) {
486                         index = (rx_queue & 0x7);
487                         ivar = array_rd32(E1000_IVAR0, index);
488                         if (rx_queue < 8) {
489                                 /* vector goes into low byte of register */
490                                 ivar = ivar & 0xFFFFFF00;
491                                 ivar |= msix_vector | E1000_IVAR_VALID;
492                         } else {
493                                 /* vector goes into third byte of register */
494                                 ivar = ivar & 0xFF00FFFF;
495                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
496                         }
497                         array_wr32(E1000_IVAR0, index, ivar);
498                 }
499                 if (tx_queue > IGB_N0_QUEUE) {
500                         index = (tx_queue & 0x7);
501                         ivar = array_rd32(E1000_IVAR0, index);
502                         if (tx_queue < 8) {
503                                 /* vector goes into second byte of register */
504                                 ivar = ivar & 0xFFFF00FF;
505                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
506                         } else {
507                                 /* vector goes into high byte of register */
508                                 ivar = ivar & 0x00FFFFFF;
509                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
510                         }
511                         array_wr32(E1000_IVAR0, index, ivar);
512                 }
513                 q_vector->eims_value = 1 << msix_vector;
514                 break;
515         default:
516                 BUG();
517                 break;
518         }
519 }
520
521 /**
522  * igb_configure_msix - Configure MSI-X hardware
523  *
524  * igb_configure_msix sets up the hardware to properly
525  * generate MSI-X interrupts.
526  **/
527 static void igb_configure_msix(struct igb_adapter *adapter)
528 {
529         u32 tmp;
530         int i, vector = 0;
531         struct e1000_hw *hw = &adapter->hw;
532
533         adapter->eims_enable_mask = 0;
534
535         /* set vector for other causes, i.e. link changes */
536         switch (hw->mac.type) {
537         case e1000_82575:
538                 tmp = rd32(E1000_CTRL_EXT);
539                 /* enable MSI-X PBA support*/
540                 tmp |= E1000_CTRL_EXT_PBA_CLR;
541
542                 /* Auto-Mask interrupts upon ICR read. */
543                 tmp |= E1000_CTRL_EXT_EIAME;
544                 tmp |= E1000_CTRL_EXT_IRCA;
545
546                 wr32(E1000_CTRL_EXT, tmp);
547
548                 /* enable msix_other interrupt */
549                 array_wr32(E1000_MSIXBM(0), vector++,
550                                       E1000_EIMS_OTHER);
551                 adapter->eims_other = E1000_EIMS_OTHER;
552
553                 break;
554
555         case e1000_82576:
556                 /* Turn on MSI-X capability first, or our settings
557                  * won't stick.  And it will take days to debug. */
558                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
559                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
560                                 E1000_GPIE_NSICR);
561
562                 /* enable msix_other interrupt */
563                 adapter->eims_other = 1 << vector;
564                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
565
566                 wr32(E1000_IVAR_MISC, tmp);
567                 break;
568         default:
569                 /* do nothing, since nothing else supports MSI-X */
570                 break;
571         } /* switch (hw->mac.type) */
572
573         adapter->eims_enable_mask |= adapter->eims_other;
574
575         for (i = 0; i < adapter->num_q_vectors; i++) {
576                 struct igb_q_vector *q_vector = adapter->q_vector[i];
577                 igb_assign_vector(q_vector, vector++);
578                 adapter->eims_enable_mask |= q_vector->eims_value;
579         }
580
581         wrfl();
582 }
583
584 /**
585  * igb_request_msix - Initialize MSI-X interrupts
586  *
587  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
588  * kernel.
589  **/
590 static int igb_request_msix(struct igb_adapter *adapter)
591 {
592         struct net_device *netdev = adapter->netdev;
593         struct e1000_hw *hw = &adapter->hw;
594         int i, err = 0, vector = 0;
595
596         err = request_irq(adapter->msix_entries[vector].vector,
597                           &igb_msix_other, 0, netdev->name, adapter);
598         if (err)
599                 goto out;
600         vector++;
601
602         for (i = 0; i < adapter->num_q_vectors; i++) {
603                 struct igb_q_vector *q_vector = adapter->q_vector[i];
604
605                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
606
607                 if (q_vector->rx_ring && q_vector->tx_ring)
608                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
609                                 q_vector->rx_ring->queue_index);
610                 else if (q_vector->tx_ring)
611                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
612                                 q_vector->tx_ring->queue_index);
613                 else if (q_vector->rx_ring)
614                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
615                                 q_vector->rx_ring->queue_index);
616                 else
617                         sprintf(q_vector->name, "%s-unused", netdev->name);
618
619                 err = request_irq(adapter->msix_entries[vector].vector,
620                                   &igb_msix_ring, 0, q_vector->name,
621                                   q_vector);
622                 if (err)
623                         goto out;
624                 vector++;
625         }
626
627         igb_configure_msix(adapter);
628         return 0;
629 out:
630         return err;
631 }
632
633 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
634 {
635         if (adapter->msix_entries) {
636                 pci_disable_msix(adapter->pdev);
637                 kfree(adapter->msix_entries);
638                 adapter->msix_entries = NULL;
639         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
640                 pci_disable_msi(adapter->pdev);
641         }
642 }
643
644 /**
645  * igb_free_q_vectors - Free memory allocated for interrupt vectors
646  * @adapter: board private structure to initialize
647  *
648  * This function frees the memory allocated to the q_vectors.  In addition if
649  * NAPI is enabled it will delete any references to the NAPI struct prior
650  * to freeing the q_vector.
651  **/
652 static void igb_free_q_vectors(struct igb_adapter *adapter)
653 {
654         int v_idx;
655
656         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
657                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
658                 adapter->q_vector[v_idx] = NULL;
659                 netif_napi_del(&q_vector->napi);
660                 kfree(q_vector);
661         }
662         adapter->num_q_vectors = 0;
663 }
664
665 /**
666  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
667  *
668  * This function resets the device so that it has 0 rx queues, tx queues, and
669  * MSI-X interrupts allocated.
670  */
671 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
672 {
673         igb_free_queues(adapter);
674         igb_free_q_vectors(adapter);
675         igb_reset_interrupt_capability(adapter);
676 }
677
678 /**
679  * igb_set_interrupt_capability - set MSI or MSI-X if supported
680  *
681  * Attempt to configure interrupts using the best available
682  * capabilities of the hardware and kernel.
683  **/
684 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
685 {
686         int err;
687         int numvecs, i;
688
689         /* Number of supported queues. */
690         adapter->num_rx_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
691         adapter->num_tx_queues = min_t(u32, IGB_MAX_TX_QUEUES, num_online_cpus());
692
693         /* start with one vector for every rx queue */
694         numvecs = adapter->num_rx_queues;
695
696         /* if tx handler is seperate add 1 for every tx queue */
697         numvecs += adapter->num_tx_queues;
698
699         /* store the number of vectors reserved for queues */
700         adapter->num_q_vectors = numvecs;
701
702         /* add 1 vector for link status interrupts */
703         numvecs++;
704         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
705                                         GFP_KERNEL);
706         if (!adapter->msix_entries)
707                 goto msi_only;
708
709         for (i = 0; i < numvecs; i++)
710                 adapter->msix_entries[i].entry = i;
711
712         err = pci_enable_msix(adapter->pdev,
713                               adapter->msix_entries,
714                               numvecs);
715         if (err == 0)
716                 goto out;
717
718         igb_reset_interrupt_capability(adapter);
719
720         /* If we can't do MSI-X, try MSI */
721 msi_only:
722 #ifdef CONFIG_PCI_IOV
723         /* disable SR-IOV for non MSI-X configurations */
724         if (adapter->vf_data) {
725                 struct e1000_hw *hw = &adapter->hw;
726                 /* disable iov and allow time for transactions to clear */
727                 pci_disable_sriov(adapter->pdev);
728                 msleep(500);
729
730                 kfree(adapter->vf_data);
731                 adapter->vf_data = NULL;
732                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
733                 msleep(100);
734                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
735         }
736 #endif
737         adapter->num_rx_queues = 1;
738         adapter->num_tx_queues = 1;
739         adapter->num_q_vectors = 1;
740         if (!pci_enable_msi(adapter->pdev))
741                 adapter->flags |= IGB_FLAG_HAS_MSI;
742 out:
743         /* Notify the stack of the (possibly) reduced Tx Queue count. */
744         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
745         return;
746 }
747
748 /**
749  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
750  * @adapter: board private structure to initialize
751  *
752  * We allocate one q_vector per queue interrupt.  If allocation fails we
753  * return -ENOMEM.
754  **/
755 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
756 {
757         struct igb_q_vector *q_vector;
758         struct e1000_hw *hw = &adapter->hw;
759         int v_idx;
760
761         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
762                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
763                 if (!q_vector)
764                         goto err_out;
765                 q_vector->adapter = adapter;
766                 q_vector->itr_shift = (hw->mac.type == e1000_82575) ? 16 : 0;
767                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
768                 q_vector->itr_val = IGB_START_ITR;
769                 q_vector->set_itr = 1;
770                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
771                 adapter->q_vector[v_idx] = q_vector;
772         }
773         return 0;
774
775 err_out:
776         while (v_idx) {
777                 v_idx--;
778                 q_vector = adapter->q_vector[v_idx];
779                 netif_napi_del(&q_vector->napi);
780                 kfree(q_vector);
781                 adapter->q_vector[v_idx] = NULL;
782         }
783         return -ENOMEM;
784 }
785
786 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
787                                       int ring_idx, int v_idx)
788 {
789         struct igb_q_vector *q_vector;
790
791         q_vector = adapter->q_vector[v_idx];
792         q_vector->rx_ring = &adapter->rx_ring[ring_idx];
793         q_vector->rx_ring->q_vector = q_vector;
794         q_vector->itr_val = adapter->itr;
795 }
796
797 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
798                                       int ring_idx, int v_idx)
799 {
800         struct igb_q_vector *q_vector;
801
802         q_vector = adapter->q_vector[v_idx];
803         q_vector->tx_ring = &adapter->tx_ring[ring_idx];
804         q_vector->tx_ring->q_vector = q_vector;
805         q_vector->itr_val = adapter->itr;
806 }
807
808 /**
809  * igb_map_ring_to_vector - maps allocated queues to vectors
810  *
811  * This function maps the recently allocated queues to vectors.
812  **/
813 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
814 {
815         int i;
816         int v_idx = 0;
817
818         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
819             (adapter->num_q_vectors < adapter->num_tx_queues))
820                 return -ENOMEM;
821
822         if (adapter->num_q_vectors >=
823             (adapter->num_rx_queues + adapter->num_tx_queues)) {
824                 for (i = 0; i < adapter->num_rx_queues; i++)
825                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
826                 for (i = 0; i < adapter->num_tx_queues; i++)
827                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
828         } else {
829                 for (i = 0; i < adapter->num_rx_queues; i++) {
830                         if (i < adapter->num_tx_queues)
831                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
832                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
833                 }
834                 for (; i < adapter->num_tx_queues; i++)
835                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
836         }
837         return 0;
838 }
839
840 /**
841  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
842  *
843  * This function initializes the interrupts and allocates all of the queues.
844  **/
845 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
846 {
847         struct pci_dev *pdev = adapter->pdev;
848         int err;
849
850         igb_set_interrupt_capability(adapter);
851
852         err = igb_alloc_q_vectors(adapter);
853         if (err) {
854                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
855                 goto err_alloc_q_vectors;
856         }
857
858         err = igb_alloc_queues(adapter);
859         if (err) {
860                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
861                 goto err_alloc_queues;
862         }
863
864         err = igb_map_ring_to_vector(adapter);
865         if (err) {
866                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
867                 goto err_map_queues;
868         }
869
870
871         return 0;
872 err_map_queues:
873         igb_free_queues(adapter);
874 err_alloc_queues:
875         igb_free_q_vectors(adapter);
876 err_alloc_q_vectors:
877         igb_reset_interrupt_capability(adapter);
878         return err;
879 }
880
881 /**
882  * igb_request_irq - initialize interrupts
883  *
884  * Attempts to configure interrupts using the best available
885  * capabilities of the hardware and kernel.
886  **/
887 static int igb_request_irq(struct igb_adapter *adapter)
888 {
889         struct net_device *netdev = adapter->netdev;
890         struct pci_dev *pdev = adapter->pdev;
891         struct e1000_hw *hw = &adapter->hw;
892         int err = 0;
893
894         if (adapter->msix_entries) {
895                 err = igb_request_msix(adapter);
896                 if (!err)
897                         goto request_done;
898                 /* fall back to MSI */
899                 igb_clear_interrupt_scheme(adapter);
900                 if (!pci_enable_msi(adapter->pdev))
901                         adapter->flags |= IGB_FLAG_HAS_MSI;
902                 igb_free_all_tx_resources(adapter);
903                 igb_free_all_rx_resources(adapter);
904                 adapter->num_tx_queues = 1;
905                 adapter->num_rx_queues = 1;
906                 adapter->num_q_vectors = 1;
907                 err = igb_alloc_q_vectors(adapter);
908                 if (err) {
909                         dev_err(&pdev->dev,
910                                 "Unable to allocate memory for vectors\n");
911                         goto request_done;
912                 }
913                 err = igb_alloc_queues(adapter);
914                 if (err) {
915                         dev_err(&pdev->dev,
916                                 "Unable to allocate memory for queues\n");
917                         igb_free_q_vectors(adapter);
918                         goto request_done;
919                 }
920                 igb_setup_all_tx_resources(adapter);
921                 igb_setup_all_rx_resources(adapter);
922         } else {
923                 switch (hw->mac.type) {
924                 case e1000_82575:
925                         wr32(E1000_MSIXBM(0),
926                              (E1000_EICR_RX_QUEUE0 |
927                               E1000_EICR_TX_QUEUE0 |
928                               E1000_EIMS_OTHER));
929                         break;
930                 case e1000_82576:
931                         wr32(E1000_IVAR0, E1000_IVAR_VALID);
932                         break;
933                 default:
934                         break;
935                 }
936         }
937
938         if (adapter->flags & IGB_FLAG_HAS_MSI) {
939                 err = request_irq(adapter->pdev->irq, &igb_intr_msi, 0,
940                                   netdev->name, adapter);
941                 if (!err)
942                         goto request_done;
943
944                 /* fall back to legacy interrupts */
945                 igb_reset_interrupt_capability(adapter);
946                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
947         }
948
949         err = request_irq(adapter->pdev->irq, &igb_intr, IRQF_SHARED,
950                           netdev->name, adapter);
951
952         if (err)
953                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
954                         err);
955
956 request_done:
957         return err;
958 }
959
960 static void igb_free_irq(struct igb_adapter *adapter)
961 {
962         if (adapter->msix_entries) {
963                 int vector = 0, i;
964
965                 free_irq(adapter->msix_entries[vector++].vector, adapter);
966
967                 for (i = 0; i < adapter->num_q_vectors; i++) {
968                         struct igb_q_vector *q_vector = adapter->q_vector[i];
969                         free_irq(adapter->msix_entries[vector++].vector,
970                                  q_vector);
971                 }
972         } else {
973                 free_irq(adapter->pdev->irq, adapter);
974         }
975 }
976
977 /**
978  * igb_irq_disable - Mask off interrupt generation on the NIC
979  * @adapter: board private structure
980  **/
981 static void igb_irq_disable(struct igb_adapter *adapter)
982 {
983         struct e1000_hw *hw = &adapter->hw;
984
985         if (adapter->msix_entries) {
986                 u32 regval = rd32(E1000_EIAM);
987                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
988                 wr32(E1000_EIMC, adapter->eims_enable_mask);
989                 regval = rd32(E1000_EIAC);
990                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
991         }
992
993         wr32(E1000_IAM, 0);
994         wr32(E1000_IMC, ~0);
995         wrfl();
996         synchronize_irq(adapter->pdev->irq);
997 }
998
999 /**
1000  * igb_irq_enable - Enable default interrupt generation settings
1001  * @adapter: board private structure
1002  **/
1003 static void igb_irq_enable(struct igb_adapter *adapter)
1004 {
1005         struct e1000_hw *hw = &adapter->hw;
1006
1007         if (adapter->msix_entries) {
1008                 u32 regval = rd32(E1000_EIAC);
1009                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1010                 regval = rd32(E1000_EIAM);
1011                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1012                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1013                 if (adapter->vfs_allocated_count)
1014                         wr32(E1000_MBVFIMR, 0xFF);
1015                 wr32(E1000_IMS, (E1000_IMS_LSC | E1000_IMS_VMMB |
1016                                  E1000_IMS_DOUTSYNC));
1017         } else {
1018                 wr32(E1000_IMS, IMS_ENABLE_MASK);
1019                 wr32(E1000_IAM, IMS_ENABLE_MASK);
1020         }
1021 }
1022
1023 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1024 {
1025         struct net_device *netdev = adapter->netdev;
1026         u16 vid = adapter->hw.mng_cookie.vlan_id;
1027         u16 old_vid = adapter->mng_vlan_id;
1028         if (adapter->vlgrp) {
1029                 if (!vlan_group_get_device(adapter->vlgrp, vid)) {
1030                         if (adapter->hw.mng_cookie.status &
1031                                 E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1032                                 igb_vlan_rx_add_vid(netdev, vid);
1033                                 adapter->mng_vlan_id = vid;
1034                         } else
1035                                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1036
1037                         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1038                                         (vid != old_vid) &&
1039                             !vlan_group_get_device(adapter->vlgrp, old_vid))
1040                                 igb_vlan_rx_kill_vid(netdev, old_vid);
1041                 } else
1042                         adapter->mng_vlan_id = vid;
1043         }
1044 }
1045
1046 /**
1047  * igb_release_hw_control - release control of the h/w to f/w
1048  * @adapter: address of board private structure
1049  *
1050  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1051  * For ASF and Pass Through versions of f/w this means that the
1052  * driver is no longer loaded.
1053  *
1054  **/
1055 static void igb_release_hw_control(struct igb_adapter *adapter)
1056 {
1057         struct e1000_hw *hw = &adapter->hw;
1058         u32 ctrl_ext;
1059
1060         /* Let firmware take over control of h/w */
1061         ctrl_ext = rd32(E1000_CTRL_EXT);
1062         wr32(E1000_CTRL_EXT,
1063                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1064 }
1065
1066
1067 /**
1068  * igb_get_hw_control - get control of the h/w from f/w
1069  * @adapter: address of board private structure
1070  *
1071  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1072  * For ASF and Pass Through versions of f/w this means that
1073  * the driver is loaded.
1074  *
1075  **/
1076 static void igb_get_hw_control(struct igb_adapter *adapter)
1077 {
1078         struct e1000_hw *hw = &adapter->hw;
1079         u32 ctrl_ext;
1080
1081         /* Let firmware know the driver has taken over */
1082         ctrl_ext = rd32(E1000_CTRL_EXT);
1083         wr32(E1000_CTRL_EXT,
1084                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1085 }
1086
1087 /**
1088  * igb_configure - configure the hardware for RX and TX
1089  * @adapter: private board structure
1090  **/
1091 static void igb_configure(struct igb_adapter *adapter)
1092 {
1093         struct net_device *netdev = adapter->netdev;
1094         int i;
1095
1096         igb_get_hw_control(adapter);
1097         igb_set_rx_mode(netdev);
1098
1099         igb_restore_vlan(adapter);
1100
1101         igb_setup_tctl(adapter);
1102         igb_setup_mrqc(adapter);
1103         igb_setup_rctl(adapter);
1104
1105         igb_configure_tx(adapter);
1106         igb_configure_rx(adapter);
1107
1108         igb_rx_fifo_flush_82575(&adapter->hw);
1109
1110         /* call igb_desc_unused which always leaves
1111          * at least 1 descriptor unused to make sure
1112          * next_to_use != next_to_clean */
1113         for (i = 0; i < adapter->num_rx_queues; i++) {
1114                 struct igb_ring *ring = &adapter->rx_ring[i];
1115                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1116         }
1117
1118
1119         adapter->tx_queue_len = netdev->tx_queue_len;
1120 }
1121
1122
1123 /**
1124  * igb_up - Open the interface and prepare it to handle traffic
1125  * @adapter: board private structure
1126  **/
1127
1128 int igb_up(struct igb_adapter *adapter)
1129 {
1130         struct e1000_hw *hw = &adapter->hw;
1131         int i;
1132
1133         /* hardware has been reset, we need to reload some things */
1134         igb_configure(adapter);
1135
1136         clear_bit(__IGB_DOWN, &adapter->state);
1137
1138         for (i = 0; i < adapter->num_q_vectors; i++) {
1139                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1140                 napi_enable(&q_vector->napi);
1141         }
1142         if (adapter->msix_entries)
1143                 igb_configure_msix(adapter);
1144
1145         igb_set_vmolr(hw, adapter->vfs_allocated_count);
1146
1147         /* Clear any pending interrupts. */
1148         rd32(E1000_ICR);
1149         igb_irq_enable(adapter);
1150
1151         /* notify VFs that reset has been completed */
1152         if (adapter->vfs_allocated_count) {
1153                 u32 reg_data = rd32(E1000_CTRL_EXT);
1154                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1155                 wr32(E1000_CTRL_EXT, reg_data);
1156         }
1157
1158         netif_tx_start_all_queues(adapter->netdev);
1159
1160         /* Fire a link change interrupt to start the watchdog. */
1161         wr32(E1000_ICS, E1000_ICS_LSC);
1162         return 0;
1163 }
1164
1165 void igb_down(struct igb_adapter *adapter)
1166 {
1167         struct e1000_hw *hw = &adapter->hw;
1168         struct net_device *netdev = adapter->netdev;
1169         u32 tctl, rctl;
1170         int i;
1171
1172         /* signal that we're down so the interrupt handler does not
1173          * reschedule our watchdog timer */
1174         set_bit(__IGB_DOWN, &adapter->state);
1175
1176         /* disable receives in the hardware */
1177         rctl = rd32(E1000_RCTL);
1178         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1179         /* flush and sleep below */
1180
1181         netif_tx_stop_all_queues(netdev);
1182
1183         /* disable transmits in the hardware */
1184         tctl = rd32(E1000_TCTL);
1185         tctl &= ~E1000_TCTL_EN;
1186         wr32(E1000_TCTL, tctl);
1187         /* flush both disables and wait for them to finish */
1188         wrfl();
1189         msleep(10);
1190
1191         for (i = 0; i < adapter->num_q_vectors; i++) {
1192                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1193                 napi_disable(&q_vector->napi);
1194         }
1195
1196         igb_irq_disable(adapter);
1197
1198         del_timer_sync(&adapter->watchdog_timer);
1199         del_timer_sync(&adapter->phy_info_timer);
1200
1201         netdev->tx_queue_len = adapter->tx_queue_len;
1202         netif_carrier_off(netdev);
1203
1204         /* record the stats before reset*/
1205         igb_update_stats(adapter);
1206
1207         adapter->link_speed = 0;
1208         adapter->link_duplex = 0;
1209
1210         if (!pci_channel_offline(adapter->pdev))
1211                 igb_reset(adapter);
1212         igb_clean_all_tx_rings(adapter);
1213         igb_clean_all_rx_rings(adapter);
1214 #ifdef CONFIG_IGB_DCA
1215
1216         /* since we reset the hardware DCA settings were cleared */
1217         igb_setup_dca(adapter);
1218 #endif
1219 }
1220
1221 void igb_reinit_locked(struct igb_adapter *adapter)
1222 {
1223         WARN_ON(in_interrupt());
1224         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1225                 msleep(1);
1226         igb_down(adapter);
1227         igb_up(adapter);
1228         clear_bit(__IGB_RESETTING, &adapter->state);
1229 }
1230
1231 void igb_reset(struct igb_adapter *adapter)
1232 {
1233         struct e1000_hw *hw = &adapter->hw;
1234         struct e1000_mac_info *mac = &hw->mac;
1235         struct e1000_fc_info *fc = &hw->fc;
1236         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1237         u16 hwm;
1238
1239         /* Repartition Pba for greater than 9k mtu
1240          * To take effect CTRL.RST is required.
1241          */
1242         switch (mac->type) {
1243         case e1000_82576:
1244                 pba = E1000_PBA_64K;
1245                 break;
1246         case e1000_82575:
1247         default:
1248                 pba = E1000_PBA_34K;
1249                 break;
1250         }
1251
1252         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1253             (mac->type < e1000_82576)) {
1254                 /* adjust PBA for jumbo frames */
1255                 wr32(E1000_PBA, pba);
1256
1257                 /* To maintain wire speed transmits, the Tx FIFO should be
1258                  * large enough to accommodate two full transmit packets,
1259                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1260                  * the Rx FIFO should be large enough to accommodate at least
1261                  * one full receive packet and is similarly rounded up and
1262                  * expressed in KB. */
1263                 pba = rd32(E1000_PBA);
1264                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1265                 tx_space = pba >> 16;
1266                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1267                 pba &= 0xffff;
1268                 /* the tx fifo also stores 16 bytes of information about the tx
1269                  * but don't include ethernet FCS because hardware appends it */
1270                 min_tx_space = (adapter->max_frame_size +
1271                                 sizeof(union e1000_adv_tx_desc) -
1272                                 ETH_FCS_LEN) * 2;
1273                 min_tx_space = ALIGN(min_tx_space, 1024);
1274                 min_tx_space >>= 10;
1275                 /* software strips receive CRC, so leave room for it */
1276                 min_rx_space = adapter->max_frame_size;
1277                 min_rx_space = ALIGN(min_rx_space, 1024);
1278                 min_rx_space >>= 10;
1279
1280                 /* If current Tx allocation is less than the min Tx FIFO size,
1281                  * and the min Tx FIFO size is less than the current Rx FIFO
1282                  * allocation, take space away from current Rx allocation */
1283                 if (tx_space < min_tx_space &&
1284                     ((min_tx_space - tx_space) < pba)) {
1285                         pba = pba - (min_tx_space - tx_space);
1286
1287                         /* if short on rx space, rx wins and must trump tx
1288                          * adjustment */
1289                         if (pba < min_rx_space)
1290                                 pba = min_rx_space;
1291                 }
1292                 wr32(E1000_PBA, pba);
1293         }
1294
1295         /* flow control settings */
1296         /* The high water mark must be low enough to fit one full frame
1297          * (or the size used for early receive) above it in the Rx FIFO.
1298          * Set it to the lower of:
1299          * - 90% of the Rx FIFO size, or
1300          * - the full Rx FIFO size minus one full frame */
1301         hwm = min(((pba << 10) * 9 / 10),
1302                         ((pba << 10) - 2 * adapter->max_frame_size));
1303
1304         if (mac->type < e1000_82576) {
1305                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
1306                 fc->low_water = fc->high_water - 8;
1307         } else {
1308                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1309                 fc->low_water = fc->high_water - 16;
1310         }
1311         fc->pause_time = 0xFFFF;
1312         fc->send_xon = 1;
1313         fc->current_mode = fc->requested_mode;
1314
1315         /* disable receive for all VFs and wait one second */
1316         if (adapter->vfs_allocated_count) {
1317                 int i;
1318                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1319                         adapter->vf_data[i].clear_to_send = false;
1320
1321                 /* ping all the active vfs to let them know we are going down */
1322                         igb_ping_all_vfs(adapter);
1323
1324                 /* disable transmits and receives */
1325                 wr32(E1000_VFRE, 0);
1326                 wr32(E1000_VFTE, 0);
1327         }
1328
1329         /* Allow time for pending master requests to run */
1330         adapter->hw.mac.ops.reset_hw(&adapter->hw);
1331         wr32(E1000_WUC, 0);
1332
1333         if (adapter->hw.mac.ops.init_hw(&adapter->hw))
1334                 dev_err(&adapter->pdev->dev, "Hardware Error\n");
1335
1336         igb_update_mng_vlan(adapter);
1337
1338         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1339         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1340
1341         igb_reset_adaptive(&adapter->hw);
1342         igb_get_phy_info(&adapter->hw);
1343 }
1344
1345 static const struct net_device_ops igb_netdev_ops = {
1346         .ndo_open               = igb_open,
1347         .ndo_stop               = igb_close,
1348         .ndo_start_xmit         = igb_xmit_frame_adv,
1349         .ndo_get_stats          = igb_get_stats,
1350         .ndo_set_rx_mode        = igb_set_rx_mode,
1351         .ndo_set_multicast_list = igb_set_rx_mode,
1352         .ndo_set_mac_address    = igb_set_mac,
1353         .ndo_change_mtu         = igb_change_mtu,
1354         .ndo_do_ioctl           = igb_ioctl,
1355         .ndo_tx_timeout         = igb_tx_timeout,
1356         .ndo_validate_addr      = eth_validate_addr,
1357         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1358         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1359         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1360 #ifdef CONFIG_NET_POLL_CONTROLLER
1361         .ndo_poll_controller    = igb_netpoll,
1362 #endif
1363 };
1364
1365 /**
1366  * igb_probe - Device Initialization Routine
1367  * @pdev: PCI device information struct
1368  * @ent: entry in igb_pci_tbl
1369  *
1370  * Returns 0 on success, negative on failure
1371  *
1372  * igb_probe initializes an adapter identified by a pci_dev structure.
1373  * The OS initialization, configuring of the adapter private structure,
1374  * and a hardware reset occur.
1375  **/
1376 static int __devinit igb_probe(struct pci_dev *pdev,
1377                                const struct pci_device_id *ent)
1378 {
1379         struct net_device *netdev;
1380         struct igb_adapter *adapter;
1381         struct e1000_hw *hw;
1382         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1383         unsigned long mmio_start, mmio_len;
1384         int err, pci_using_dac;
1385         u16 eeprom_data = 0;
1386         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1387         u32 part_num;
1388
1389         err = pci_enable_device_mem(pdev);
1390         if (err)
1391                 return err;
1392
1393         pci_using_dac = 0;
1394         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1395         if (!err) {
1396                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1397                 if (!err)
1398                         pci_using_dac = 1;
1399         } else {
1400                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1401                 if (err) {
1402                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1403                         if (err) {
1404                                 dev_err(&pdev->dev, "No usable DMA "
1405                                         "configuration, aborting\n");
1406                                 goto err_dma;
1407                         }
1408                 }
1409         }
1410
1411         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1412                                            IORESOURCE_MEM),
1413                                            igb_driver_name);
1414         if (err)
1415                 goto err_pci_reg;
1416
1417         pci_enable_pcie_error_reporting(pdev);
1418
1419         pci_set_master(pdev);
1420         pci_save_state(pdev);
1421
1422         err = -ENOMEM;
1423         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1424                                    IGB_ABS_MAX_TX_QUEUES);
1425         if (!netdev)
1426                 goto err_alloc_etherdev;
1427
1428         SET_NETDEV_DEV(netdev, &pdev->dev);
1429
1430         pci_set_drvdata(pdev, netdev);
1431         adapter = netdev_priv(netdev);
1432         adapter->netdev = netdev;
1433         adapter->pdev = pdev;
1434         hw = &adapter->hw;
1435         hw->back = adapter;
1436         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1437
1438         mmio_start = pci_resource_start(pdev, 0);
1439         mmio_len = pci_resource_len(pdev, 0);
1440
1441         err = -EIO;
1442         hw->hw_addr = ioremap(mmio_start, mmio_len);
1443         if (!hw->hw_addr)
1444                 goto err_ioremap;
1445
1446         netdev->netdev_ops = &igb_netdev_ops;
1447         igb_set_ethtool_ops(netdev);
1448         netdev->watchdog_timeo = 5 * HZ;
1449
1450         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1451
1452         netdev->mem_start = mmio_start;
1453         netdev->mem_end = mmio_start + mmio_len;
1454
1455         /* PCI config space info */
1456         hw->vendor_id = pdev->vendor;
1457         hw->device_id = pdev->device;
1458         hw->revision_id = pdev->revision;
1459         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1460         hw->subsystem_device_id = pdev->subsystem_device;
1461
1462         /* setup the private structure */
1463         hw->back = adapter;
1464         /* Copy the default MAC, PHY and NVM function pointers */
1465         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1466         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1467         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1468         /* Initialize skew-specific constants */
1469         err = ei->get_invariants(hw);
1470         if (err)
1471                 goto err_sw_init;
1472
1473 #ifdef CONFIG_PCI_IOV
1474         /* since iov functionality isn't critical to base device function we
1475          * can accept failure.  If it fails we don't allow iov to be enabled */
1476         if (hw->mac.type == e1000_82576) {
1477                 /* 82576 supports a maximum of 7 VFs in addition to the PF */
1478                 unsigned int num_vfs = (max_vfs > 7) ? 7 : max_vfs;
1479                 int i;
1480                 unsigned char mac_addr[ETH_ALEN];
1481
1482                 if (num_vfs) {
1483                         adapter->vf_data = kcalloc(num_vfs,
1484                                                 sizeof(struct vf_data_storage),
1485                                                 GFP_KERNEL);
1486                         if (!adapter->vf_data) {
1487                                 dev_err(&pdev->dev,
1488                                         "Could not allocate VF private data - "
1489                                         "IOV enable failed\n");
1490                         } else {
1491                                 err = pci_enable_sriov(pdev, num_vfs);
1492                                 if (!err) {
1493                                         adapter->vfs_allocated_count = num_vfs;
1494                                         dev_info(&pdev->dev,
1495                                                  "%d vfs allocated\n",
1496                                                  num_vfs);
1497                                         for (i = 0;
1498                                              i < adapter->vfs_allocated_count;
1499                                              i++) {
1500                                                 random_ether_addr(mac_addr);
1501                                                 igb_set_vf_mac(adapter, i,
1502                                                                mac_addr);
1503                                         }
1504                                 } else {
1505                                         kfree(adapter->vf_data);
1506                                         adapter->vf_data = NULL;
1507                                 }
1508                         }
1509                 }
1510         }
1511
1512 #endif
1513         /* setup the private structure */
1514         err = igb_sw_init(adapter);
1515         if (err)
1516                 goto err_sw_init;
1517
1518         igb_get_bus_info_pcie(hw);
1519
1520         hw->phy.autoneg_wait_to_complete = false;
1521         hw->mac.adaptive_ifs = true;
1522
1523         /* Copper options */
1524         if (hw->phy.media_type == e1000_media_type_copper) {
1525                 hw->phy.mdix = AUTO_ALL_MODES;
1526                 hw->phy.disable_polarity_correction = false;
1527                 hw->phy.ms_type = e1000_ms_hw_default;
1528         }
1529
1530         if (igb_check_reset_block(hw))
1531                 dev_info(&pdev->dev,
1532                         "PHY reset is blocked due to SOL/IDER session.\n");
1533
1534         netdev->features = NETIF_F_SG |
1535                            NETIF_F_IP_CSUM |
1536                            NETIF_F_HW_VLAN_TX |
1537                            NETIF_F_HW_VLAN_RX |
1538                            NETIF_F_HW_VLAN_FILTER;
1539
1540         netdev->features |= NETIF_F_IPV6_CSUM;
1541         netdev->features |= NETIF_F_TSO;
1542         netdev->features |= NETIF_F_TSO6;
1543
1544         netdev->features |= NETIF_F_GRO;
1545
1546         netdev->vlan_features |= NETIF_F_TSO;
1547         netdev->vlan_features |= NETIF_F_TSO6;
1548         netdev->vlan_features |= NETIF_F_IP_CSUM;
1549         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1550         netdev->vlan_features |= NETIF_F_SG;
1551
1552         if (pci_using_dac)
1553                 netdev->features |= NETIF_F_HIGHDMA;
1554
1555         if (adapter->hw.mac.type == e1000_82576)
1556                 netdev->features |= NETIF_F_SCTP_CSUM;
1557
1558         adapter->en_mng_pt = igb_enable_mng_pass_thru(&adapter->hw);
1559
1560         /* before reading the NVM, reset the controller to put the device in a
1561          * known good starting state */
1562         hw->mac.ops.reset_hw(hw);
1563
1564         /* make sure the NVM is good */
1565         if (igb_validate_nvm_checksum(hw) < 0) {
1566                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1567                 err = -EIO;
1568                 goto err_eeprom;
1569         }
1570
1571         /* copy the MAC address out of the NVM */
1572         if (hw->mac.ops.read_mac_addr(hw))
1573                 dev_err(&pdev->dev, "NVM Read Error\n");
1574
1575         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1576         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1577
1578         if (!is_valid_ether_addr(netdev->perm_addr)) {
1579                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1580                 err = -EIO;
1581                 goto err_eeprom;
1582         }
1583
1584         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1585                     (unsigned long) adapter);
1586         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1587                     (unsigned long) adapter);
1588
1589         INIT_WORK(&adapter->reset_task, igb_reset_task);
1590         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1591
1592         /* Initialize link properties that are user-changeable */
1593         adapter->fc_autoneg = true;
1594         hw->mac.autoneg = true;
1595         hw->phy.autoneg_advertised = 0x2f;
1596
1597         hw->fc.requested_mode = e1000_fc_default;
1598         hw->fc.current_mode = e1000_fc_default;
1599
1600         adapter->itr_setting = IGB_DEFAULT_ITR;
1601         adapter->itr = IGB_START_ITR;
1602
1603         igb_validate_mdi_setting(hw);
1604
1605         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1606          * enable the ACPI Magic Packet filter
1607          */
1608
1609         if (hw->bus.func == 0)
1610                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1611         else if (hw->bus.func == 1)
1612                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1613
1614         if (eeprom_data & eeprom_apme_mask)
1615                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1616
1617         /* now that we have the eeprom settings, apply the special cases where
1618          * the eeprom may be wrong or the board simply won't support wake on
1619          * lan on a particular port */
1620         switch (pdev->device) {
1621         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1622                 adapter->eeprom_wol = 0;
1623                 break;
1624         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1625         case E1000_DEV_ID_82576_FIBER:
1626         case E1000_DEV_ID_82576_SERDES:
1627                 /* Wake events only supported on port A for dual fiber
1628                  * regardless of eeprom setting */
1629                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1630                         adapter->eeprom_wol = 0;
1631                 break;
1632         case E1000_DEV_ID_82576_QUAD_COPPER:
1633                 /* if quad port adapter, disable WoL on all but port A */
1634                 if (global_quad_port_a != 0)
1635                         adapter->eeprom_wol = 0;
1636                 else
1637                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1638                 /* Reset for multiple quad port adapters */
1639                 if (++global_quad_port_a == 4)
1640                         global_quad_port_a = 0;
1641                 break;
1642         }
1643
1644         /* initialize the wol settings based on the eeprom settings */
1645         adapter->wol = adapter->eeprom_wol;
1646         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1647
1648         /* reset the hardware with the new settings */
1649         igb_reset(adapter);
1650
1651         /* let the f/w know that the h/w is now under the control of the
1652          * driver. */
1653         igb_get_hw_control(adapter);
1654
1655         strcpy(netdev->name, "eth%d");
1656         err = register_netdev(netdev);
1657         if (err)
1658                 goto err_register;
1659
1660         /* carrier off reporting is important to ethtool even BEFORE open */
1661         netif_carrier_off(netdev);
1662
1663 #ifdef CONFIG_IGB_DCA
1664         if (dca_add_requester(&pdev->dev) == 0) {
1665                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1666                 dev_info(&pdev->dev, "DCA enabled\n");
1667                 igb_setup_dca(adapter);
1668         }
1669 #endif
1670
1671         /*
1672          * Initialize hardware timer: we keep it running just in case
1673          * that some program needs it later on.
1674          */
1675         memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1676         adapter->cycles.read = igb_read_clock;
1677         adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1678         adapter->cycles.mult = 1;
1679         adapter->cycles.shift = IGB_TSYNC_SHIFT;
1680         wr32(E1000_TIMINCA,
1681              (1<<24) |
1682              IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS * IGB_TSYNC_SCALE);
1683 #if 0
1684         /*
1685          * Avoid rollover while we initialize by resetting the time counter.
1686          */
1687         wr32(E1000_SYSTIML, 0x00000000);
1688         wr32(E1000_SYSTIMH, 0x00000000);
1689 #else
1690         /*
1691          * Set registers so that rollover occurs soon to test this.
1692          */
1693         wr32(E1000_SYSTIML, 0x00000000);
1694         wr32(E1000_SYSTIMH, 0xFF800000);
1695 #endif
1696         wrfl();
1697         timecounter_init(&adapter->clock,
1698                          &adapter->cycles,
1699                          ktime_to_ns(ktime_get_real()));
1700
1701         /*
1702          * Synchronize our NIC clock against system wall clock. NIC
1703          * time stamp reading requires ~3us per sample, each sample
1704          * was pretty stable even under load => only require 10
1705          * samples for each offset comparison.
1706          */
1707         memset(&adapter->compare, 0, sizeof(adapter->compare));
1708         adapter->compare.source = &adapter->clock;
1709         adapter->compare.target = ktime_get_real;
1710         adapter->compare.num_samples = 10;
1711         timecompare_update(&adapter->compare, 0);
1712
1713 #ifdef DEBUG
1714         {
1715                 char buffer[160];
1716                 printk(KERN_DEBUG
1717                         "igb: %s: hw %p initialized timer\n",
1718                         igb_get_time_str(adapter, buffer),
1719                         &adapter->hw);
1720         }
1721 #endif
1722
1723         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1724         /* print bus type/speed/width info */
1725         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1726                  netdev->name,
1727                  ((hw->bus.speed == e1000_bus_speed_2500)
1728                   ? "2.5Gb/s" : "unknown"),
1729                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1730                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1731                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1732                    "unknown"),
1733                  netdev->dev_addr);
1734
1735         igb_read_part_num(hw, &part_num);
1736         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1737                 (part_num >> 8), (part_num & 0xff));
1738
1739         dev_info(&pdev->dev,
1740                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1741                 adapter->msix_entries ? "MSI-X" :
1742                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1743                 adapter->num_rx_queues, adapter->num_tx_queues);
1744
1745         return 0;
1746
1747 err_register:
1748         igb_release_hw_control(adapter);
1749 err_eeprom:
1750         if (!igb_check_reset_block(hw))
1751                 igb_reset_phy(hw);
1752
1753         if (hw->flash_address)
1754                 iounmap(hw->flash_address);
1755 err_sw_init:
1756         igb_clear_interrupt_scheme(adapter);
1757         iounmap(hw->hw_addr);
1758 err_ioremap:
1759         free_netdev(netdev);
1760 err_alloc_etherdev:
1761         pci_release_selected_regions(pdev, pci_select_bars(pdev,
1762                                      IORESOURCE_MEM));
1763 err_pci_reg:
1764 err_dma:
1765         pci_disable_device(pdev);
1766         return err;
1767 }
1768
1769 /**
1770  * igb_remove - Device Removal Routine
1771  * @pdev: PCI device information struct
1772  *
1773  * igb_remove is called by the PCI subsystem to alert the driver
1774  * that it should release a PCI device.  The could be caused by a
1775  * Hot-Plug event, or because the driver is going to be removed from
1776  * memory.
1777  **/
1778 static void __devexit igb_remove(struct pci_dev *pdev)
1779 {
1780         struct net_device *netdev = pci_get_drvdata(pdev);
1781         struct igb_adapter *adapter = netdev_priv(netdev);
1782         struct e1000_hw *hw = &adapter->hw;
1783
1784         /* flush_scheduled work may reschedule our watchdog task, so
1785          * explicitly disable watchdog tasks from being rescheduled  */
1786         set_bit(__IGB_DOWN, &adapter->state);
1787         del_timer_sync(&adapter->watchdog_timer);
1788         del_timer_sync(&adapter->phy_info_timer);
1789
1790         flush_scheduled_work();
1791
1792 #ifdef CONFIG_IGB_DCA
1793         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1794                 dev_info(&pdev->dev, "DCA disabled\n");
1795                 dca_remove_requester(&pdev->dev);
1796                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1797                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1798         }
1799 #endif
1800
1801         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1802          * would have already happened in close and is redundant. */
1803         igb_release_hw_control(adapter);
1804
1805         unregister_netdev(netdev);
1806
1807         if (!igb_check_reset_block(&adapter->hw))
1808                 igb_reset_phy(&adapter->hw);
1809
1810         igb_clear_interrupt_scheme(adapter);
1811
1812 #ifdef CONFIG_PCI_IOV
1813         /* reclaim resources allocated to VFs */
1814         if (adapter->vf_data) {
1815                 /* disable iov and allow time for transactions to clear */
1816                 pci_disable_sriov(pdev);
1817                 msleep(500);
1818
1819                 kfree(adapter->vf_data);
1820                 adapter->vf_data = NULL;
1821                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1822                 msleep(100);
1823                 dev_info(&pdev->dev, "IOV Disabled\n");
1824         }
1825 #endif
1826         iounmap(hw->hw_addr);
1827         if (hw->flash_address)
1828                 iounmap(hw->flash_address);
1829         pci_release_selected_regions(pdev, pci_select_bars(pdev,
1830                                      IORESOURCE_MEM));
1831
1832         free_netdev(netdev);
1833
1834         pci_disable_pcie_error_reporting(pdev);
1835
1836         pci_disable_device(pdev);
1837 }
1838
1839 /**
1840  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1841  * @adapter: board private structure to initialize
1842  *
1843  * igb_sw_init initializes the Adapter private data structure.
1844  * Fields are initialized based on PCI device information and
1845  * OS network device settings (MTU size).
1846  **/
1847 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1848 {
1849         struct e1000_hw *hw = &adapter->hw;
1850         struct net_device *netdev = adapter->netdev;
1851         struct pci_dev *pdev = adapter->pdev;
1852
1853         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1854
1855         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1856         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1857         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1858         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1859
1860         /* This call may decrease the number of queues depending on
1861          * interrupt mode. */
1862         if (igb_init_interrupt_scheme(adapter)) {
1863                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1864                 return -ENOMEM;
1865         }
1866
1867         /* Explicitly disable IRQ since the NIC can be in any state. */
1868         igb_irq_disable(adapter);
1869
1870         set_bit(__IGB_DOWN, &adapter->state);
1871         return 0;
1872 }
1873
1874 /**
1875  * igb_open - Called when a network interface is made active
1876  * @netdev: network interface device structure
1877  *
1878  * Returns 0 on success, negative value on failure
1879  *
1880  * The open entry point is called when a network interface is made
1881  * active by the system (IFF_UP).  At this point all resources needed
1882  * for transmit and receive operations are allocated, the interrupt
1883  * handler is registered with the OS, the watchdog timer is started,
1884  * and the stack is notified that the interface is ready.
1885  **/
1886 static int igb_open(struct net_device *netdev)
1887 {
1888         struct igb_adapter *adapter = netdev_priv(netdev);
1889         struct e1000_hw *hw = &adapter->hw;
1890         int err;
1891         int i;
1892
1893         /* disallow open during test */
1894         if (test_bit(__IGB_TESTING, &adapter->state))
1895                 return -EBUSY;
1896
1897         netif_carrier_off(netdev);
1898
1899         /* allocate transmit descriptors */
1900         err = igb_setup_all_tx_resources(adapter);
1901         if (err)
1902                 goto err_setup_tx;
1903
1904         /* allocate receive descriptors */
1905         err = igb_setup_all_rx_resources(adapter);
1906         if (err)
1907                 goto err_setup_rx;
1908
1909         /* e1000_power_up_phy(adapter); */
1910
1911         adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1912         if ((adapter->hw.mng_cookie.status &
1913              E1000_MNG_DHCP_COOKIE_STATUS_VLAN))
1914                 igb_update_mng_vlan(adapter);
1915
1916         /* before we allocate an interrupt, we must be ready to handle it.
1917          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1918          * as soon as we call pci_request_irq, so we have to setup our
1919          * clean_rx handler before we do so.  */
1920         igb_configure(adapter);
1921
1922         igb_set_vmolr(hw, adapter->vfs_allocated_count);
1923
1924         err = igb_request_irq(adapter);
1925         if (err)
1926                 goto err_req_irq;
1927
1928         /* From here on the code is the same as igb_up() */
1929         clear_bit(__IGB_DOWN, &adapter->state);
1930
1931         for (i = 0; i < adapter->num_q_vectors; i++) {
1932                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1933                 napi_enable(&q_vector->napi);
1934         }
1935
1936         /* Clear any pending interrupts. */
1937         rd32(E1000_ICR);
1938
1939         igb_irq_enable(adapter);
1940
1941         /* notify VFs that reset has been completed */
1942         if (adapter->vfs_allocated_count) {
1943                 u32 reg_data = rd32(E1000_CTRL_EXT);
1944                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1945                 wr32(E1000_CTRL_EXT, reg_data);
1946         }
1947
1948         netif_tx_start_all_queues(netdev);
1949
1950         /* Fire a link status change interrupt to start the watchdog. */
1951         wr32(E1000_ICS, E1000_ICS_LSC);
1952
1953         return 0;
1954
1955 err_req_irq:
1956         igb_release_hw_control(adapter);
1957         /* e1000_power_down_phy(adapter); */
1958         igb_free_all_rx_resources(adapter);
1959 err_setup_rx:
1960         igb_free_all_tx_resources(adapter);
1961 err_setup_tx:
1962         igb_reset(adapter);
1963
1964         return err;
1965 }
1966
1967 /**
1968  * igb_close - Disables a network interface
1969  * @netdev: network interface device structure
1970  *
1971  * Returns 0, this is not allowed to fail
1972  *
1973  * The close entry point is called when an interface is de-activated
1974  * by the OS.  The hardware is still under the driver's control, but
1975  * needs to be disabled.  A global MAC reset is issued to stop the
1976  * hardware, and all transmit and receive resources are freed.
1977  **/
1978 static int igb_close(struct net_device *netdev)
1979 {
1980         struct igb_adapter *adapter = netdev_priv(netdev);
1981
1982         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
1983         igb_down(adapter);
1984
1985         igb_free_irq(adapter);
1986
1987         igb_free_all_tx_resources(adapter);
1988         igb_free_all_rx_resources(adapter);
1989
1990         /* kill manageability vlan ID if supported, but not if a vlan with
1991          * the same ID is registered on the host OS (let 8021q kill it) */
1992         if ((adapter->hw.mng_cookie.status &
1993                           E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
1994              !(adapter->vlgrp &&
1995                vlan_group_get_device(adapter->vlgrp, adapter->mng_vlan_id)))
1996                 igb_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
1997
1998         return 0;
1999 }
2000
2001 /**
2002  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2003  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2004  *
2005  * Return 0 on success, negative on failure
2006  **/
2007 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2008 {
2009         struct pci_dev *pdev = tx_ring->pdev;
2010         int size;
2011
2012         size = sizeof(struct igb_buffer) * tx_ring->count;
2013         tx_ring->buffer_info = vmalloc(size);
2014         if (!tx_ring->buffer_info)
2015                 goto err;
2016         memset(tx_ring->buffer_info, 0, size);
2017
2018         /* round up to nearest 4K */
2019         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2020         tx_ring->size = ALIGN(tx_ring->size, 4096);
2021
2022         tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size,
2023                                              &tx_ring->dma);
2024
2025         if (!tx_ring->desc)
2026                 goto err;
2027
2028         tx_ring->next_to_use = 0;
2029         tx_ring->next_to_clean = 0;
2030         return 0;
2031
2032 err:
2033         vfree(tx_ring->buffer_info);
2034         dev_err(&pdev->dev,
2035                 "Unable to allocate memory for the transmit descriptor ring\n");
2036         return -ENOMEM;
2037 }
2038
2039 /**
2040  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2041  *                                (Descriptors) for all queues
2042  * @adapter: board private structure
2043  *
2044  * Return 0 on success, negative on failure
2045  **/
2046 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2047 {
2048         int i, err = 0;
2049         int r_idx;
2050
2051         for (i = 0; i < adapter->num_tx_queues; i++) {
2052                 err = igb_setup_tx_resources(&adapter->tx_ring[i]);
2053                 if (err) {
2054                         dev_err(&adapter->pdev->dev,
2055                                 "Allocation for Tx Queue %u failed\n", i);
2056                         for (i--; i >= 0; i--)
2057                                 igb_free_tx_resources(&adapter->tx_ring[i]);
2058                         break;
2059                 }
2060         }
2061
2062         for (i = 0; i < IGB_MAX_TX_QUEUES; i++) {
2063                 r_idx = i % adapter->num_tx_queues;
2064                 adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
2065         }
2066         return err;
2067 }
2068
2069 /**
2070  * igb_setup_tctl - configure the transmit control registers
2071  * @adapter: Board private structure
2072  **/
2073 void igb_setup_tctl(struct igb_adapter *adapter)
2074 {
2075         struct e1000_hw *hw = &adapter->hw;
2076         u32 tctl;
2077
2078         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2079         wr32(E1000_TXDCTL(0), 0);
2080
2081         /* Program the Transmit Control Register */
2082         tctl = rd32(E1000_TCTL);
2083         tctl &= ~E1000_TCTL_CT;
2084         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2085                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2086
2087         igb_config_collision_dist(hw);
2088
2089         /* Enable transmits */
2090         tctl |= E1000_TCTL_EN;
2091
2092         wr32(E1000_TCTL, tctl);
2093 }
2094
2095 /**
2096  * igb_configure_tx_ring - Configure transmit ring after Reset
2097  * @adapter: board private structure
2098  * @ring: tx ring to configure
2099  *
2100  * Configure a transmit ring after a reset.
2101  **/
2102 void igb_configure_tx_ring(struct igb_adapter *adapter,
2103                            struct igb_ring *ring)
2104 {
2105         struct e1000_hw *hw = &adapter->hw;
2106         u32 txdctl;
2107         u64 tdba = ring->dma;
2108         int reg_idx = ring->reg_idx;
2109
2110         /* disable the queue */
2111         txdctl = rd32(E1000_TXDCTL(reg_idx));
2112         wr32(E1000_TXDCTL(reg_idx),
2113                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2114         wrfl();
2115         mdelay(10);
2116
2117         wr32(E1000_TDLEN(reg_idx),
2118                         ring->count * sizeof(union e1000_adv_tx_desc));
2119         wr32(E1000_TDBAL(reg_idx),
2120                         tdba & 0x00000000ffffffffULL);
2121         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2122
2123         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2124         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2125         writel(0, ring->head);
2126         writel(0, ring->tail);
2127
2128         txdctl |= IGB_TX_PTHRESH;
2129         txdctl |= IGB_TX_HTHRESH << 8;
2130         txdctl |= IGB_TX_WTHRESH << 16;
2131
2132         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2133         wr32(E1000_TXDCTL(reg_idx), txdctl);
2134 }
2135
2136 /**
2137  * igb_configure_tx - Configure transmit Unit after Reset
2138  * @adapter: board private structure
2139  *
2140  * Configure the Tx unit of the MAC after a reset.
2141  **/
2142 static void igb_configure_tx(struct igb_adapter *adapter)
2143 {
2144         int i;
2145
2146         for (i = 0; i < adapter->num_tx_queues; i++)
2147                 igb_configure_tx_ring(adapter, &adapter->tx_ring[i]);
2148 }
2149
2150 /**
2151  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2152  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2153  *
2154  * Returns 0 on success, negative on failure
2155  **/
2156 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2157 {
2158         struct pci_dev *pdev = rx_ring->pdev;
2159         int size, desc_len;
2160
2161         size = sizeof(struct igb_buffer) * rx_ring->count;
2162         rx_ring->buffer_info = vmalloc(size);
2163         if (!rx_ring->buffer_info)
2164                 goto err;
2165         memset(rx_ring->buffer_info, 0, size);
2166
2167         desc_len = sizeof(union e1000_adv_rx_desc);
2168
2169         /* Round up to nearest 4K */
2170         rx_ring->size = rx_ring->count * desc_len;
2171         rx_ring->size = ALIGN(rx_ring->size, 4096);
2172
2173         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2174                                              &rx_ring->dma);
2175
2176         if (!rx_ring->desc)
2177                 goto err;
2178
2179         rx_ring->next_to_clean = 0;
2180         rx_ring->next_to_use = 0;
2181
2182         return 0;
2183
2184 err:
2185         vfree(rx_ring->buffer_info);
2186         dev_err(&pdev->dev, "Unable to allocate memory for "
2187                 "the receive descriptor ring\n");
2188         return -ENOMEM;
2189 }
2190
2191 /**
2192  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2193  *                                (Descriptors) for all queues
2194  * @adapter: board private structure
2195  *
2196  * Return 0 on success, negative on failure
2197  **/
2198 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2199 {
2200         int i, err = 0;
2201
2202         for (i = 0; i < adapter->num_rx_queues; i++) {
2203                 err = igb_setup_rx_resources(&adapter->rx_ring[i]);
2204                 if (err) {
2205                         dev_err(&adapter->pdev->dev,
2206                                 "Allocation for Rx Queue %u failed\n", i);
2207                         for (i--; i >= 0; i--)
2208                                 igb_free_rx_resources(&adapter->rx_ring[i]);
2209                         break;
2210                 }
2211         }
2212
2213         return err;
2214 }
2215
2216 /**
2217  * igb_setup_mrqc - configure the multiple receive queue control registers
2218  * @adapter: Board private structure
2219  **/
2220 static void igb_setup_mrqc(struct igb_adapter *adapter)
2221 {
2222         struct e1000_hw *hw = &adapter->hw;
2223         u32 mrqc, rxcsum;
2224         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2225         union e1000_reta {
2226                 u32 dword;
2227                 u8  bytes[4];
2228         } reta;
2229         static const u8 rsshash[40] = {
2230                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2231                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2232                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2233                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2234
2235         /* Fill out hash function seeds */
2236         for (j = 0; j < 10; j++) {
2237                 u32 rsskey = rsshash[(j * 4)];
2238                 rsskey |= rsshash[(j * 4) + 1] << 8;
2239                 rsskey |= rsshash[(j * 4) + 2] << 16;
2240                 rsskey |= rsshash[(j * 4) + 3] << 24;
2241                 array_wr32(E1000_RSSRK(0), j, rsskey);
2242         }
2243
2244         num_rx_queues = adapter->num_rx_queues;
2245
2246         if (adapter->vfs_allocated_count) {
2247                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2248                 switch (hw->mac.type) {
2249                 case e1000_82576:
2250                         shift = 3;
2251                         num_rx_queues = 2;
2252                         break;
2253                 case e1000_82575:
2254                         shift = 2;
2255                         shift2 = 6;
2256                 default:
2257                         break;
2258                 }
2259         } else {
2260                 if (hw->mac.type == e1000_82575)
2261                         shift = 6;
2262         }
2263
2264         for (j = 0; j < (32 * 4); j++) {
2265                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2266                 if (shift2)
2267                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2268                 if ((j & 3) == 3)
2269                         wr32(E1000_RETA(j >> 2), reta.dword);
2270         }
2271
2272         /*
2273          * Disable raw packet checksumming so that RSS hash is placed in
2274          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2275          * offloads as they are enabled by default
2276          */
2277         rxcsum = rd32(E1000_RXCSUM);
2278         rxcsum |= E1000_RXCSUM_PCSD;
2279
2280         if (adapter->hw.mac.type >= e1000_82576)
2281                 /* Enable Receive Checksum Offload for SCTP */
2282                 rxcsum |= E1000_RXCSUM_CRCOFL;
2283
2284         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2285         wr32(E1000_RXCSUM, rxcsum);
2286
2287         /* If VMDq is enabled then we set the appropriate mode for that, else
2288          * we default to RSS so that an RSS hash is calculated per packet even
2289          * if we are only using one queue */
2290         if (adapter->vfs_allocated_count) {
2291                 if (hw->mac.type > e1000_82575) {
2292                         /* Set the default pool for the PF's first queue */
2293                         u32 vtctl = rd32(E1000_VT_CTL);
2294                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2295                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2296                         vtctl |= adapter->vfs_allocated_count <<
2297                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2298                         wr32(E1000_VT_CTL, vtctl);
2299                 }
2300                 if (adapter->num_rx_queues > 1)
2301                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2302                 else
2303                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2304         } else {
2305                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2306         }
2307         igb_vmm_control(adapter);
2308
2309         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2310                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2311         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2312                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2313         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2314                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2315         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2316                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2317
2318         wr32(E1000_MRQC, mrqc);
2319 }
2320
2321 /**
2322  * igb_setup_rctl - configure the receive control registers
2323  * @adapter: Board private structure
2324  **/
2325 void igb_setup_rctl(struct igb_adapter *adapter)
2326 {
2327         struct e1000_hw *hw = &adapter->hw;
2328         u32 rctl;
2329
2330         rctl = rd32(E1000_RCTL);
2331
2332         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2333         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2334
2335         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2336                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2337
2338         /*
2339          * enable stripping of CRC. It's unlikely this will break BMC
2340          * redirection as it did with e1000. Newer features require
2341          * that the HW strips the CRC.
2342          */
2343         rctl |= E1000_RCTL_SECRC;
2344
2345         /*
2346          * disable store bad packets and clear size bits.
2347          */
2348         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2349
2350         /* enable LPE to prevent packets larger than max_frame_size */
2351         rctl |= E1000_RCTL_LPE;
2352
2353         /* disable queue 0 to prevent tail write w/o re-config */
2354         wr32(E1000_RXDCTL(0), 0);
2355
2356         /* Attention!!!  For SR-IOV PF driver operations you must enable
2357          * queue drop for all VF and PF queues to prevent head of line blocking
2358          * if an un-trusted VF does not provide descriptors to hardware.
2359          */
2360         if (adapter->vfs_allocated_count) {
2361                 u32 vmolr;
2362
2363                 /* set all queue drop enable bits */
2364                 wr32(E1000_QDE, ALL_QUEUES);
2365
2366                 vmolr = rd32(E1000_VMOLR(adapter->vfs_allocated_count));
2367                 if (rctl & E1000_RCTL_LPE)
2368                         vmolr |= E1000_VMOLR_LPE;
2369                 if (adapter->num_rx_queues > 1)
2370                         vmolr |= E1000_VMOLR_RSSE;
2371                 wr32(E1000_VMOLR(adapter->vfs_allocated_count), vmolr);
2372         }
2373
2374         wr32(E1000_RCTL, rctl);
2375 }
2376
2377 /**
2378  * igb_rlpml_set - set maximum receive packet size
2379  * @adapter: board private structure
2380  *
2381  * Configure maximum receivable packet size.
2382  **/
2383 static void igb_rlpml_set(struct igb_adapter *adapter)
2384 {
2385         u32 max_frame_size = adapter->max_frame_size;
2386         struct e1000_hw *hw = &adapter->hw;
2387         u16 pf_id = adapter->vfs_allocated_count;
2388
2389         if (adapter->vlgrp)
2390                 max_frame_size += VLAN_TAG_SIZE;
2391
2392         /* if vfs are enabled we set RLPML to the largest possible request
2393          * size and set the VMOLR RLPML to the size we need */
2394         if (pf_id) {
2395                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2396                 max_frame_size = MAX_STD_JUMBO_FRAME_SIZE + VLAN_TAG_SIZE;
2397         }
2398
2399         wr32(E1000_RLPML, max_frame_size);
2400 }
2401
2402 /**
2403  * igb_configure_rx_ring - Configure a receive ring after Reset
2404  * @adapter: board private structure
2405  * @ring: receive ring to be configured
2406  *
2407  * Configure the Rx unit of the MAC after a reset.
2408  **/
2409 void igb_configure_rx_ring(struct igb_adapter *adapter,
2410                            struct igb_ring *ring)
2411 {
2412         struct e1000_hw *hw = &adapter->hw;
2413         u64 rdba = ring->dma;
2414         int reg_idx = ring->reg_idx;
2415         u32 srrctl, rxdctl;
2416
2417         /* disable the queue */
2418         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2419         wr32(E1000_RXDCTL(reg_idx),
2420                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2421
2422         /* Set DMA base address registers */
2423         wr32(E1000_RDBAL(reg_idx),
2424              rdba & 0x00000000ffffffffULL);
2425         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2426         wr32(E1000_RDLEN(reg_idx),
2427                        ring->count * sizeof(union e1000_adv_rx_desc));
2428
2429         /* initialize head and tail */
2430         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2431         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2432         writel(0, ring->head);
2433         writel(0, ring->tail);
2434
2435         /* set descriptor configuration */
2436         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2437                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2438                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2439 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2440                 srrctl |= IGB_RXBUFFER_16384 >>
2441                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2442 #else
2443                 srrctl |= (PAGE_SIZE / 2) >>
2444                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2445 #endif
2446                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2447         } else {
2448                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2449                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2450                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2451         }
2452
2453         wr32(E1000_SRRCTL(reg_idx), srrctl);
2454
2455         /* enable receive descriptor fetching */
2456         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2457         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2458         rxdctl &= 0xFFF00000;
2459         rxdctl |= IGB_RX_PTHRESH;
2460         rxdctl |= IGB_RX_HTHRESH << 8;
2461         rxdctl |= IGB_RX_WTHRESH << 16;
2462         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2463 }
2464
2465 /**
2466  * igb_configure_rx - Configure receive Unit after Reset
2467  * @adapter: board private structure
2468  *
2469  * Configure the Rx unit of the MAC after a reset.
2470  **/
2471 static void igb_configure_rx(struct igb_adapter *adapter)
2472 {
2473         int i;
2474
2475         /* set UTA to appropriate mode */
2476         igb_set_uta(adapter);
2477
2478         /* set the correct pool for the PF default MAC address in entry 0 */
2479         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2480                          adapter->vfs_allocated_count);
2481
2482         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2483          * the Base and Length of the Rx Descriptor Ring */
2484         for (i = 0; i < adapter->num_rx_queues; i++)
2485                 igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
2486 }
2487
2488 /**
2489  * igb_free_tx_resources - Free Tx Resources per Queue
2490  * @tx_ring: Tx descriptor ring for a specific queue
2491  *
2492  * Free all transmit software resources
2493  **/
2494 void igb_free_tx_resources(struct igb_ring *tx_ring)
2495 {
2496         igb_clean_tx_ring(tx_ring);
2497
2498         vfree(tx_ring->buffer_info);
2499         tx_ring->buffer_info = NULL;
2500
2501         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2502                             tx_ring->desc, tx_ring->dma);
2503
2504         tx_ring->desc = NULL;
2505 }
2506
2507 /**
2508  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2509  * @adapter: board private structure
2510  *
2511  * Free all transmit software resources
2512  **/
2513 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2514 {
2515         int i;
2516
2517         for (i = 0; i < adapter->num_tx_queues; i++)
2518                 igb_free_tx_resources(&adapter->tx_ring[i]);
2519 }
2520
2521 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2522                                     struct igb_buffer *buffer_info)
2523 {
2524         buffer_info->dma = 0;
2525         if (buffer_info->skb) {
2526                 skb_dma_unmap(&tx_ring->pdev->dev,
2527                               buffer_info->skb,
2528                               DMA_TO_DEVICE);
2529                 dev_kfree_skb_any(buffer_info->skb);
2530                 buffer_info->skb = NULL;
2531         }
2532         buffer_info->time_stamp = 0;
2533         /* buffer_info must be completely set up in the transmit path */
2534 }
2535
2536 /**
2537  * igb_clean_tx_ring - Free Tx Buffers
2538  * @tx_ring: ring to be cleaned
2539  **/
2540 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2541 {
2542         struct igb_buffer *buffer_info;
2543         unsigned long size;
2544         unsigned int i;
2545
2546         if (!tx_ring->buffer_info)
2547                 return;
2548         /* Free all the Tx ring sk_buffs */
2549
2550         for (i = 0; i < tx_ring->count; i++) {
2551                 buffer_info = &tx_ring->buffer_info[i];
2552                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2553         }
2554
2555         size = sizeof(struct igb_buffer) * tx_ring->count;
2556         memset(tx_ring->buffer_info, 0, size);
2557
2558         /* Zero out the descriptor ring */
2559
2560         memset(tx_ring->desc, 0, tx_ring->size);
2561
2562         tx_ring->next_to_use = 0;
2563         tx_ring->next_to_clean = 0;
2564
2565         writel(0, tx_ring->head);
2566         writel(0, tx_ring->tail);
2567 }
2568
2569 /**
2570  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2571  * @adapter: board private structure
2572  **/
2573 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2574 {
2575         int i;
2576
2577         for (i = 0; i < adapter->num_tx_queues; i++)
2578                 igb_clean_tx_ring(&adapter->tx_ring[i]);
2579 }
2580
2581 /**
2582  * igb_free_rx_resources - Free Rx Resources
2583  * @rx_ring: ring to clean the resources from
2584  *
2585  * Free all receive software resources
2586  **/
2587 void igb_free_rx_resources(struct igb_ring *rx_ring)
2588 {
2589         igb_clean_rx_ring(rx_ring);
2590
2591         vfree(rx_ring->buffer_info);
2592         rx_ring->buffer_info = NULL;
2593
2594         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2595                             rx_ring->desc, rx_ring->dma);
2596
2597         rx_ring->desc = NULL;
2598 }
2599
2600 /**
2601  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2602  * @adapter: board private structure
2603  *
2604  * Free all receive software resources
2605  **/
2606 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2607 {
2608         int i;
2609
2610         for (i = 0; i < adapter->num_rx_queues; i++)
2611                 igb_free_rx_resources(&adapter->rx_ring[i]);
2612 }
2613
2614 /**
2615  * igb_clean_rx_ring - Free Rx Buffers per Queue
2616  * @rx_ring: ring to free buffers from
2617  **/
2618 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2619 {
2620         struct igb_buffer *buffer_info;
2621         unsigned long size;
2622         unsigned int i;
2623
2624         if (!rx_ring->buffer_info)
2625                 return;
2626         /* Free all the Rx ring sk_buffs */
2627         for (i = 0; i < rx_ring->count; i++) {
2628                 buffer_info = &rx_ring->buffer_info[i];
2629                 if (buffer_info->dma) {
2630                         pci_unmap_single(rx_ring->pdev,
2631                                          buffer_info->dma,
2632                                          rx_ring->rx_buffer_len,
2633                                          PCI_DMA_FROMDEVICE);
2634                         buffer_info->dma = 0;
2635                 }
2636
2637                 if (buffer_info->skb) {
2638                         dev_kfree_skb(buffer_info->skb);
2639                         buffer_info->skb = NULL;
2640                 }
2641                 if (buffer_info->page_dma) {
2642                         pci_unmap_page(rx_ring->pdev,
2643                                        buffer_info->page_dma,
2644                                        PAGE_SIZE / 2,
2645                                        PCI_DMA_FROMDEVICE);
2646                         buffer_info->page_dma = 0;
2647                 }
2648                 if (buffer_info->page) {
2649                         put_page(buffer_info->page);
2650                         buffer_info->page = NULL;
2651                         buffer_info->page_offset = 0;
2652                 }
2653         }
2654
2655         size = sizeof(struct igb_buffer) * rx_ring->count;
2656         memset(rx_ring->buffer_info, 0, size);
2657
2658         /* Zero out the descriptor ring */
2659         memset(rx_ring->desc, 0, rx_ring->size);
2660
2661         rx_ring->next_to_clean = 0;
2662         rx_ring->next_to_use = 0;
2663
2664         writel(0, rx_ring->head);
2665         writel(0, rx_ring->tail);
2666 }
2667
2668 /**
2669  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2670  * @adapter: board private structure
2671  **/
2672 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2673 {
2674         int i;
2675
2676         for (i = 0; i < adapter->num_rx_queues; i++)
2677                 igb_clean_rx_ring(&adapter->rx_ring[i]);
2678 }
2679
2680 /**
2681  * igb_set_mac - Change the Ethernet Address of the NIC
2682  * @netdev: network interface device structure
2683  * @p: pointer to an address structure
2684  *
2685  * Returns 0 on success, negative on failure
2686  **/
2687 static int igb_set_mac(struct net_device *netdev, void *p)
2688 {
2689         struct igb_adapter *adapter = netdev_priv(netdev);
2690         struct e1000_hw *hw = &adapter->hw;
2691         struct sockaddr *addr = p;
2692
2693         if (!is_valid_ether_addr(addr->sa_data))
2694                 return -EADDRNOTAVAIL;
2695
2696         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2697         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2698
2699         /* set the correct pool for the new PF MAC address in entry 0 */
2700         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2701                          adapter->vfs_allocated_count);
2702
2703         return 0;
2704 }
2705
2706 /**
2707  * igb_write_mc_addr_list - write multicast addresses to MTA
2708  * @netdev: network interface device structure
2709  *
2710  * Writes multicast address list to the MTA hash table.
2711  * Returns: -ENOMEM on failure
2712  *                0 on no addresses written
2713  *                X on writing X addresses to MTA
2714  **/
2715 static int igb_write_mc_addr_list(struct net_device *netdev)
2716 {
2717         struct igb_adapter *adapter = netdev_priv(netdev);
2718         struct e1000_hw *hw = &adapter->hw;
2719         struct dev_mc_list *mc_ptr = netdev->mc_list;
2720         u8  *mta_list;
2721         u32 vmolr = 0;
2722         int i;
2723
2724         if (!netdev->mc_count) {
2725                 /* nothing to program, so clear mc list */
2726                 igb_update_mc_addr_list(hw, NULL, 0);
2727                 igb_restore_vf_multicasts(adapter);
2728                 return 0;
2729         }
2730
2731         mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
2732         if (!mta_list)
2733                 return -ENOMEM;
2734
2735         /* set vmolr receive overflow multicast bit */
2736         vmolr |= E1000_VMOLR_ROMPE;
2737
2738         /* The shared function expects a packed array of only addresses. */
2739         mc_ptr = netdev->mc_list;
2740
2741         for (i = 0; i < netdev->mc_count; i++) {
2742                 if (!mc_ptr)
2743                         break;
2744                 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2745                 mc_ptr = mc_ptr->next;
2746         }
2747         igb_update_mc_addr_list(hw, mta_list, i);
2748         kfree(mta_list);
2749
2750         return netdev->mc_count;
2751 }
2752
2753 /**
2754  * igb_write_uc_addr_list - write unicast addresses to RAR table
2755  * @netdev: network interface device structure
2756  *
2757  * Writes unicast address list to the RAR table.
2758  * Returns: -ENOMEM on failure/insufficient address space
2759  *                0 on no addresses written
2760  *                X on writing X addresses to the RAR table
2761  **/
2762 static int igb_write_uc_addr_list(struct net_device *netdev)
2763 {
2764         struct igb_adapter *adapter = netdev_priv(netdev);
2765         struct e1000_hw *hw = &adapter->hw;
2766         unsigned int vfn = adapter->vfs_allocated_count;
2767         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2768         int count = 0;
2769
2770         /* return ENOMEM indicating insufficient memory for addresses */
2771         if (netdev->uc.count > rar_entries)
2772                 return -ENOMEM;
2773
2774         if (netdev->uc.count && rar_entries) {
2775                 struct netdev_hw_addr *ha;
2776                 list_for_each_entry(ha, &netdev->uc.list, list) {
2777                         if (!rar_entries)
2778                                 break;
2779                         igb_rar_set_qsel(adapter, ha->addr,
2780                                          rar_entries--,
2781                                          vfn);
2782                         count++;
2783                 }
2784         }
2785         /* write the addresses in reverse order to avoid write combining */
2786         for (; rar_entries > 0 ; rar_entries--) {
2787                 wr32(E1000_RAH(rar_entries), 0);
2788                 wr32(E1000_RAL(rar_entries), 0);
2789         }
2790         wrfl();
2791
2792         return count;
2793 }
2794
2795 /**
2796  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2797  * @netdev: network interface device structure
2798  *
2799  * The set_rx_mode entry point is called whenever the unicast or multicast
2800  * address lists or the network interface flags are updated.  This routine is
2801  * responsible for configuring the hardware for proper unicast, multicast,
2802  * promiscuous mode, and all-multi behavior.
2803  **/
2804 static void igb_set_rx_mode(struct net_device *netdev)
2805 {
2806         struct igb_adapter *adapter = netdev_priv(netdev);
2807         struct e1000_hw *hw = &adapter->hw;
2808         unsigned int vfn = adapter->vfs_allocated_count;
2809         u32 rctl, vmolr = 0;
2810         int count;
2811
2812         /* Check for Promiscuous and All Multicast modes */
2813         rctl = rd32(E1000_RCTL);
2814
2815         /* clear the effected bits */
2816         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2817
2818         if (netdev->flags & IFF_PROMISC) {
2819                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2820                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2821         } else {
2822                 if (netdev->flags & IFF_ALLMULTI) {
2823                         rctl |= E1000_RCTL_MPE;
2824                         vmolr |= E1000_VMOLR_MPME;
2825                 } else {
2826                         /*
2827                          * Write addresses to the MTA, if the attempt fails
2828                          * then we should just turn on promiscous mode so
2829                          * that we can at least receive multicast traffic
2830                          */
2831                         count = igb_write_mc_addr_list(netdev);
2832                         if (count < 0) {
2833                                 rctl |= E1000_RCTL_MPE;
2834                                 vmolr |= E1000_VMOLR_MPME;
2835                         } else if (count) {
2836                                 vmolr |= E1000_VMOLR_ROMPE;
2837                         }
2838                 }
2839                 /*
2840                  * Write addresses to available RAR registers, if there is not
2841                  * sufficient space to store all the addresses then enable
2842                  * unicast promiscous mode
2843                  */
2844                 count = igb_write_uc_addr_list(netdev);
2845                 if (count < 0) {
2846                         rctl |= E1000_RCTL_UPE;
2847                         vmolr |= E1000_VMOLR_ROPE;
2848                 }
2849                 rctl |= E1000_RCTL_VFE;
2850         }
2851         wr32(E1000_RCTL, rctl);
2852
2853         /*
2854          * In order to support SR-IOV and eventually VMDq it is necessary to set
2855          * the VMOLR to enable the appropriate modes.  Without this workaround
2856          * we will have issues with VLAN tag stripping not being done for frames
2857          * that are only arriving because we are the default pool
2858          */
2859         if (hw->mac.type < e1000_82576)
2860                 return;
2861
2862         vmolr |= rd32(E1000_VMOLR(vfn)) &
2863                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
2864         wr32(E1000_VMOLR(vfn), vmolr);
2865         igb_restore_vf_multicasts(adapter);
2866 }
2867
2868 /* Need to wait a few seconds after link up to get diagnostic information from
2869  * the phy */
2870 static void igb_update_phy_info(unsigned long data)
2871 {
2872         struct igb_adapter *adapter = (struct igb_adapter *) data;
2873         igb_get_phy_info(&adapter->hw);
2874 }
2875
2876 /**
2877  * igb_has_link - check shared code for link and determine up/down
2878  * @adapter: pointer to driver private info
2879  **/
2880 static bool igb_has_link(struct igb_adapter *adapter)
2881 {
2882         struct e1000_hw *hw = &adapter->hw;
2883         bool link_active = false;
2884         s32 ret_val = 0;
2885
2886         /* get_link_status is set on LSC (link status) interrupt or
2887          * rx sequence error interrupt.  get_link_status will stay
2888          * false until the e1000_check_for_link establishes link
2889          * for copper adapters ONLY
2890          */
2891         switch (hw->phy.media_type) {
2892         case e1000_media_type_copper:
2893                 if (hw->mac.get_link_status) {
2894                         ret_val = hw->mac.ops.check_for_link(hw);
2895                         link_active = !hw->mac.get_link_status;
2896                 } else {
2897                         link_active = true;
2898                 }
2899                 break;
2900         case e1000_media_type_internal_serdes:
2901                 ret_val = hw->mac.ops.check_for_link(hw);
2902                 link_active = hw->mac.serdes_has_link;
2903                 break;
2904         default:
2905         case e1000_media_type_unknown:
2906                 break;
2907         }
2908
2909         return link_active;
2910 }
2911
2912 /**
2913  * igb_watchdog - Timer Call-back
2914  * @data: pointer to adapter cast into an unsigned long
2915  **/
2916 static void igb_watchdog(unsigned long data)
2917 {
2918         struct igb_adapter *adapter = (struct igb_adapter *)data;
2919         /* Do the rest outside of interrupt context */
2920         schedule_work(&adapter->watchdog_task);
2921 }
2922
2923 static void igb_watchdog_task(struct work_struct *work)
2924 {
2925         struct igb_adapter *adapter = container_of(work,
2926                                         struct igb_adapter, watchdog_task);
2927         struct e1000_hw *hw = &adapter->hw;
2928         struct net_device *netdev = adapter->netdev;
2929         struct igb_ring *tx_ring = adapter->tx_ring;
2930         u32 link;
2931         int i;
2932
2933         link = igb_has_link(adapter);
2934         if ((netif_carrier_ok(netdev)) && link)
2935                 goto link_up;
2936
2937         if (link) {
2938                 if (!netif_carrier_ok(netdev)) {
2939                         u32 ctrl;
2940                         hw->mac.ops.get_speed_and_duplex(&adapter->hw,
2941                                                    &adapter->link_speed,
2942                                                    &adapter->link_duplex);
2943
2944                         ctrl = rd32(E1000_CTRL);
2945                         /* Links status message must follow this format */
2946                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
2947                                  "Flow Control: %s\n",
2948                                  netdev->name,
2949                                  adapter->link_speed,
2950                                  adapter->link_duplex == FULL_DUPLEX ?
2951                                  "Full Duplex" : "Half Duplex",
2952                                  ((ctrl & E1000_CTRL_TFCE) && (ctrl &
2953                                  E1000_CTRL_RFCE)) ? "RX/TX" : ((ctrl &
2954                                  E1000_CTRL_RFCE) ? "RX" : ((ctrl &
2955                                  E1000_CTRL_TFCE) ? "TX" : "None")));
2956
2957                         /* tweak tx_queue_len according to speed/duplex and
2958                          * adjust the timeout factor */
2959                         netdev->tx_queue_len = adapter->tx_queue_len;
2960                         adapter->tx_timeout_factor = 1;
2961                         switch (adapter->link_speed) {
2962                         case SPEED_10:
2963                                 netdev->tx_queue_len = 10;
2964                                 adapter->tx_timeout_factor = 14;
2965                                 break;
2966                         case SPEED_100:
2967                                 netdev->tx_queue_len = 100;
2968                                 /* maybe add some timeout factor ? */
2969                                 break;
2970                         }
2971
2972                         netif_carrier_on(netdev);
2973
2974                         igb_ping_all_vfs(adapter);
2975
2976                         /* link state has changed, schedule phy info update */
2977                         if (!test_bit(__IGB_DOWN, &adapter->state))
2978                                 mod_timer(&adapter->phy_info_timer,
2979                                           round_jiffies(jiffies + 2 * HZ));
2980                 }
2981         } else {
2982                 if (netif_carrier_ok(netdev)) {
2983                         adapter->link_speed = 0;
2984                         adapter->link_duplex = 0;
2985                         /* Links status message must follow this format */
2986                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
2987                                netdev->name);
2988                         netif_carrier_off(netdev);
2989
2990                         igb_ping_all_vfs(adapter);
2991
2992                         /* link state has changed, schedule phy info update */
2993                         if (!test_bit(__IGB_DOWN, &adapter->state))
2994                                 mod_timer(&adapter->phy_info_timer,
2995                                           round_jiffies(jiffies + 2 * HZ));
2996                 }
2997         }
2998
2999 link_up:
3000         igb_update_stats(adapter);
3001
3002         hw->mac.tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;
3003         adapter->tpt_old = adapter->stats.tpt;
3004         hw->mac.collision_delta = adapter->stats.colc - adapter->colc_old;
3005         adapter->colc_old = adapter->stats.colc;
3006
3007         adapter->gorc = adapter->stats.gorc - adapter->gorc_old;
3008         adapter->gorc_old = adapter->stats.gorc;
3009         adapter->gotc = adapter->stats.gotc - adapter->gotc_old;
3010         adapter->gotc_old = adapter->stats.gotc;
3011
3012         igb_update_adaptive(&adapter->hw);
3013
3014         if (!netif_carrier_ok(netdev)) {
3015                 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3016                         /* We've lost link, so the controller stops DMA,
3017                          * but we've got queued Tx work that's never going
3018                          * to get done, so reset controller to flush Tx.
3019                          * (Do the reset outside of interrupt context). */
3020                         adapter->tx_timeout_count++;
3021                         schedule_work(&adapter->reset_task);
3022                         /* return immediately since reset is imminent */
3023                         return;
3024                 }
3025         }
3026
3027         /* Cause software interrupt to ensure rx ring is cleaned */
3028         if (adapter->msix_entries) {
3029                 u32 eics = 0;
3030                 for (i = 0; i < adapter->num_q_vectors; i++) {
3031                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3032                         eics |= q_vector->eims_value;
3033                 }
3034                 wr32(E1000_EICS, eics);
3035         } else {
3036                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3037         }
3038
3039         /* Force detection of hung controller every watchdog period */
3040         tx_ring->detect_tx_hung = true;
3041
3042         /* Reset the timer */
3043         if (!test_bit(__IGB_DOWN, &adapter->state))
3044                 mod_timer(&adapter->watchdog_timer,
3045                           round_jiffies(jiffies + 2 * HZ));
3046 }
3047
3048 enum latency_range {
3049         lowest_latency = 0,
3050         low_latency = 1,
3051         bulk_latency = 2,
3052         latency_invalid = 255
3053 };
3054
3055
3056 /**
3057  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3058  *
3059  *      Stores a new ITR value based on strictly on packet size.  This
3060  *      algorithm is less sophisticated than that used in igb_update_itr,
3061  *      due to the difficulty of synchronizing statistics across multiple
3062  *      receive rings.  The divisors and thresholds used by this fuction
3063  *      were determined based on theoretical maximum wire speed and testing
3064  *      data, in order to minimize response time while increasing bulk
3065  *      throughput.
3066  *      This functionality is controlled by the InterruptThrottleRate module
3067  *      parameter (see igb_param.c)
3068  *      NOTE:  This function is called only when operating in a multiqueue
3069  *             receive environment.
3070  * @q_vector: pointer to q_vector
3071  **/
3072 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3073 {
3074         int new_val = q_vector->itr_val;
3075         int avg_wire_size = 0;
3076         struct igb_adapter *adapter = q_vector->adapter;
3077
3078         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3079          * ints/sec - ITR timer value of 120 ticks.
3080          */
3081         if (adapter->link_speed != SPEED_1000) {
3082                 new_val = 976;
3083                 goto set_itr_val;
3084         }
3085
3086         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3087                 struct igb_ring *ring = q_vector->rx_ring;
3088                 avg_wire_size = ring->total_bytes / ring->total_packets;
3089         }
3090
3091         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3092                 struct igb_ring *ring = q_vector->tx_ring;
3093                 avg_wire_size = max_t(u32, avg_wire_size,
3094                                       (ring->total_bytes /
3095                                        ring->total_packets));
3096         }
3097
3098         /* if avg_wire_size isn't set no work was done */
3099         if (!avg_wire_size)
3100                 goto clear_counts;
3101
3102         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3103         avg_wire_size += 24;
3104
3105         /* Don't starve jumbo frames */
3106         avg_wire_size = min(avg_wire_size, 3000);
3107
3108         /* Give a little boost to mid-size frames */
3109         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3110                 new_val = avg_wire_size / 3;
3111         else
3112                 new_val = avg_wire_size / 2;
3113
3114 set_itr_val:
3115         if (new_val != q_vector->itr_val) {
3116                 q_vector->itr_val = new_val;
3117                 q_vector->set_itr = 1;
3118         }
3119 clear_counts:
3120         if (q_vector->rx_ring) {
3121                 q_vector->rx_ring->total_bytes = 0;
3122                 q_vector->rx_ring->total_packets = 0;
3123         }
3124         if (q_vector->tx_ring) {
3125                 q_vector->tx_ring->total_bytes = 0;
3126                 q_vector->tx_ring->total_packets = 0;
3127         }
3128 }
3129
3130 /**
3131  * igb_update_itr - update the dynamic ITR value based on statistics
3132  *      Stores a new ITR value based on packets and byte
3133  *      counts during the last interrupt.  The advantage of per interrupt
3134  *      computation is faster updates and more accurate ITR for the current
3135  *      traffic pattern.  Constants in this function were computed
3136  *      based on theoretical maximum wire speed and thresholds were set based
3137  *      on testing data as well as attempting to minimize response time
3138  *      while increasing bulk throughput.
3139  *      this functionality is controlled by the InterruptThrottleRate module
3140  *      parameter (see igb_param.c)
3141  *      NOTE:  These calculations are only valid when operating in a single-
3142  *             queue environment.
3143  * @adapter: pointer to adapter
3144  * @itr_setting: current q_vector->itr_val
3145  * @packets: the number of packets during this measurement interval
3146  * @bytes: the number of bytes during this measurement interval
3147  **/
3148 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3149                                    int packets, int bytes)
3150 {
3151         unsigned int retval = itr_setting;
3152
3153         if (packets == 0)
3154                 goto update_itr_done;
3155
3156         switch (itr_setting) {
3157         case lowest_latency:
3158                 /* handle TSO and jumbo frames */
3159                 if (bytes/packets > 8000)
3160                         retval = bulk_latency;
3161                 else if ((packets < 5) && (bytes > 512))
3162                         retval = low_latency;
3163                 break;
3164         case low_latency:  /* 50 usec aka 20000 ints/s */
3165                 if (bytes > 10000) {
3166                         /* this if handles the TSO accounting */
3167                         if (bytes/packets > 8000) {
3168                                 retval = bulk_latency;
3169                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3170                                 retval = bulk_latency;
3171                         } else if ((packets > 35)) {
3172                                 retval = lowest_latency;
3173                         }
3174                 } else if (bytes/packets > 2000) {
3175                         retval = bulk_latency;
3176                 } else if (packets <= 2 && bytes < 512) {
3177                         retval = lowest_latency;
3178                 }
3179                 break;
3180         case bulk_latency: /* 250 usec aka 4000 ints/s */
3181                 if (bytes > 25000) {
3182                         if (packets > 35)
3183                                 retval = low_latency;
3184                 } else if (bytes < 1500) {
3185                         retval = low_latency;
3186                 }
3187                 break;
3188         }
3189
3190 update_itr_done:
3191         return retval;
3192 }
3193
3194 static void igb_set_itr(struct igb_adapter *adapter)
3195 {
3196         struct igb_q_vector *q_vector = adapter->q_vector[0];
3197         u16 current_itr;
3198         u32 new_itr = q_vector->itr_val;
3199
3200         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3201         if (adapter->link_speed != SPEED_1000) {
3202                 current_itr = 0;
3203                 new_itr = 4000;
3204                 goto set_itr_now;
3205         }
3206
3207         adapter->rx_itr = igb_update_itr(adapter,
3208                                     adapter->rx_itr,
3209                                     adapter->rx_ring->total_packets,
3210                                     adapter->rx_ring->total_bytes);
3211
3212         adapter->tx_itr = igb_update_itr(adapter,
3213                                     adapter->tx_itr,
3214                                     adapter->tx_ring->total_packets,
3215                                     adapter->tx_ring->total_bytes);
3216         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3217
3218         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3219         if (adapter->itr_setting == 3 && current_itr == lowest_latency)
3220                 current_itr = low_latency;
3221
3222         switch (current_itr) {
3223         /* counts and packets in update_itr are dependent on these numbers */
3224         case lowest_latency:
3225                 new_itr = 56;  /* aka 70,000 ints/sec */
3226                 break;
3227         case low_latency:
3228                 new_itr = 196; /* aka 20,000 ints/sec */
3229                 break;
3230         case bulk_latency:
3231                 new_itr = 980; /* aka 4,000 ints/sec */
3232                 break;
3233         default:
3234                 break;
3235         }
3236
3237 set_itr_now:
3238         adapter->rx_ring->total_bytes = 0;
3239         adapter->rx_ring->total_packets = 0;
3240         adapter->tx_ring->total_bytes = 0;
3241         adapter->tx_ring->total_packets = 0;
3242
3243         if (new_itr != q_vector->itr_val) {
3244                 /* this attempts to bias the interrupt rate towards Bulk
3245                  * by adding intermediate steps when interrupt rate is
3246                  * increasing */
3247                 new_itr = new_itr > q_vector->itr_val ?
3248                              max((new_itr * q_vector->itr_val) /
3249                                  (new_itr + (q_vector->itr_val >> 2)),
3250                                  new_itr) :
3251                              new_itr;
3252                 /* Don't write the value here; it resets the adapter's
3253                  * internal timer, and causes us to delay far longer than
3254                  * we should between interrupts.  Instead, we write the ITR
3255                  * value at the beginning of the next interrupt so the timing
3256                  * ends up being correct.
3257                  */
3258                 q_vector->itr_val = new_itr;
3259                 q_vector->set_itr = 1;
3260         }
3261
3262         return;
3263 }
3264
3265 #define IGB_TX_FLAGS_CSUM               0x00000001
3266 #define IGB_TX_FLAGS_VLAN               0x00000002
3267 #define IGB_TX_FLAGS_TSO                0x00000004
3268 #define IGB_TX_FLAGS_IPV4               0x00000008
3269 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3270 #define IGB_TX_FLAGS_VLAN_MASK  0xffff0000
3271 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3272
3273 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3274                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3275 {
3276         struct e1000_adv_tx_context_desc *context_desc;
3277         unsigned int i;
3278         int err;
3279         struct igb_buffer *buffer_info;
3280         u32 info = 0, tu_cmd = 0;
3281         u32 mss_l4len_idx, l4len;
3282         *hdr_len = 0;
3283
3284         if (skb_header_cloned(skb)) {
3285                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3286                 if (err)
3287                         return err;
3288         }
3289
3290         l4len = tcp_hdrlen(skb);
3291         *hdr_len += l4len;
3292
3293         if (skb->protocol == htons(ETH_P_IP)) {
3294                 struct iphdr *iph = ip_hdr(skb);
3295                 iph->tot_len = 0;
3296                 iph->check = 0;
3297                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3298                                                          iph->daddr, 0,
3299                                                          IPPROTO_TCP,
3300                                                          0);
3301         } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
3302                 ipv6_hdr(skb)->payload_len = 0;
3303                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3304                                                        &ipv6_hdr(skb)->daddr,
3305                                                        0, IPPROTO_TCP, 0);
3306         }
3307
3308         i = tx_ring->next_to_use;
3309
3310         buffer_info = &tx_ring->buffer_info[i];
3311         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3312         /* VLAN MACLEN IPLEN */
3313         if (tx_flags & IGB_TX_FLAGS_VLAN)
3314                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3315         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3316         *hdr_len += skb_network_offset(skb);
3317         info |= skb_network_header_len(skb);
3318         *hdr_len += skb_network_header_len(skb);
3319         context_desc->vlan_macip_lens = cpu_to_le32(info);
3320
3321         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3322         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3323
3324         if (skb->protocol == htons(ETH_P_IP))
3325                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3326         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3327
3328         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3329
3330         /* MSS L4LEN IDX */
3331         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3332         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3333
3334         /* For 82575, context index must be unique per ring. */
3335         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3336                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3337
3338         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3339         context_desc->seqnum_seed = 0;
3340
3341         buffer_info->time_stamp = jiffies;
3342         buffer_info->next_to_watch = i;
3343         buffer_info->dma = 0;
3344         i++;
3345         if (i == tx_ring->count)
3346                 i = 0;
3347
3348         tx_ring->next_to_use = i;
3349
3350         return true;
3351 }
3352
3353 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3354                                    struct sk_buff *skb, u32 tx_flags)
3355 {
3356         struct e1000_adv_tx_context_desc *context_desc;
3357         struct pci_dev *pdev = tx_ring->pdev;
3358         struct igb_buffer *buffer_info;
3359         u32 info = 0, tu_cmd = 0;
3360         unsigned int i;
3361
3362         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3363             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3364                 i = tx_ring->next_to_use;
3365                 buffer_info = &tx_ring->buffer_info[i];
3366                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3367
3368                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3369                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3370                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3371                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3372                         info |= skb_network_header_len(skb);
3373
3374                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3375
3376                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3377
3378                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3379                         __be16 protocol;
3380
3381                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3382                                 const struct vlan_ethhdr *vhdr =
3383                                           (const struct vlan_ethhdr*)skb->data;
3384
3385                                 protocol = vhdr->h_vlan_encapsulated_proto;
3386                         } else {
3387                                 protocol = skb->protocol;
3388                         }
3389
3390                         switch (protocol) {
3391                         case cpu_to_be16(ETH_P_IP):
3392                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3393                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3394                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3395                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3396                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3397                                 break;
3398                         case cpu_to_be16(ETH_P_IPV6):
3399                                 /* XXX what about other V6 headers?? */
3400                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3401                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3402                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3403                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3404                                 break;
3405                         default:
3406                                 if (unlikely(net_ratelimit()))
3407                                         dev_warn(&pdev->dev,
3408                                             "partial checksum but proto=%x!\n",
3409                                             skb->protocol);
3410                                 break;
3411                         }
3412                 }
3413
3414                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3415                 context_desc->seqnum_seed = 0;
3416                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3417                         context_desc->mss_l4len_idx =
3418                                 cpu_to_le32(tx_ring->reg_idx << 4);
3419
3420                 buffer_info->time_stamp = jiffies;
3421                 buffer_info->next_to_watch = i;
3422                 buffer_info->dma = 0;
3423
3424                 i++;
3425                 if (i == tx_ring->count)
3426                         i = 0;
3427                 tx_ring->next_to_use = i;
3428
3429                 return true;
3430         }
3431         return false;
3432 }
3433
3434 #define IGB_MAX_TXD_PWR 16
3435 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3436
3437 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3438                                  unsigned int first)
3439 {
3440         struct igb_buffer *buffer_info;
3441         struct pci_dev *pdev = tx_ring->pdev;
3442         unsigned int len = skb_headlen(skb);
3443         unsigned int count = 0, i;
3444         unsigned int f;
3445         dma_addr_t *map;
3446
3447         i = tx_ring->next_to_use;
3448
3449         if (skb_dma_map(&pdev->dev, skb, DMA_TO_DEVICE)) {
3450                 dev_err(&pdev->dev, "TX DMA map failed\n");
3451                 return 0;
3452         }
3453
3454         map = skb_shinfo(skb)->dma_maps;
3455
3456         buffer_info = &tx_ring->buffer_info[i];
3457         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3458         buffer_info->length = len;
3459         /* set time_stamp *before* dma to help avoid a possible race */
3460         buffer_info->time_stamp = jiffies;
3461         buffer_info->next_to_watch = i;
3462         buffer_info->dma = skb_shinfo(skb)->dma_head;
3463
3464         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3465                 struct skb_frag_struct *frag;
3466
3467                 i++;
3468                 if (i == tx_ring->count)
3469                         i = 0;
3470
3471                 frag = &skb_shinfo(skb)->frags[f];
3472                 len = frag->size;
3473
3474                 buffer_info = &tx_ring->buffer_info[i];
3475                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3476                 buffer_info->length = len;
3477                 buffer_info->time_stamp = jiffies;
3478                 buffer_info->next_to_watch = i;
3479                 buffer_info->dma = map[count];
3480                 count++;
3481         }
3482
3483         tx_ring->buffer_info[i].skb = skb;
3484         tx_ring->buffer_info[first].next_to_watch = i;
3485
3486         return count + 1;
3487 }
3488
3489 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3490                                     int tx_flags, int count, u32 paylen,
3491                                     u8 hdr_len)
3492 {
3493         union e1000_adv_tx_desc *tx_desc = NULL;
3494         struct igb_buffer *buffer_info;
3495         u32 olinfo_status = 0, cmd_type_len;
3496         unsigned int i;
3497
3498         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3499                         E1000_ADVTXD_DCMD_DEXT);
3500
3501         if (tx_flags & IGB_TX_FLAGS_VLAN)
3502                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3503
3504         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3505                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3506
3507         if (tx_flags & IGB_TX_FLAGS_TSO) {
3508                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3509
3510                 /* insert tcp checksum */
3511                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3512
3513                 /* insert ip checksum */
3514                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3515                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3516
3517         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3518                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3519         }
3520
3521         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3522             (tx_flags & (IGB_TX_FLAGS_CSUM |
3523                          IGB_TX_FLAGS_TSO |
3524                          IGB_TX_FLAGS_VLAN)))
3525                 olinfo_status |= tx_ring->reg_idx << 4;
3526
3527         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3528
3529         i = tx_ring->next_to_use;
3530         while (count--) {
3531                 buffer_info = &tx_ring->buffer_info[i];
3532                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3533                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3534                 tx_desc->read.cmd_type_len =
3535                         cpu_to_le32(cmd_type_len | buffer_info->length);
3536                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3537                 i++;
3538                 if (i == tx_ring->count)
3539                         i = 0;
3540         }
3541
3542         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3543         /* Force memory writes to complete before letting h/w
3544          * know there are new descriptors to fetch.  (Only
3545          * applicable for weak-ordered memory model archs,
3546          * such as IA-64). */
3547         wmb();
3548
3549         tx_ring->next_to_use = i;
3550         writel(i, tx_ring->tail);
3551         /* we need this if more than one processor can write to our tail
3552          * at a time, it syncronizes IO on IA64/Altix systems */
3553         mmiowb();
3554 }
3555
3556 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3557 {
3558         struct net_device *netdev = tx_ring->netdev;
3559
3560         netif_stop_subqueue(netdev, tx_ring->queue_index);
3561
3562         /* Herbert's original patch had:
3563          *  smp_mb__after_netif_stop_queue();
3564          * but since that doesn't exist yet, just open code it. */
3565         smp_mb();
3566
3567         /* We need to check again in a case another CPU has just
3568          * made room available. */
3569         if (igb_desc_unused(tx_ring) < size)
3570                 return -EBUSY;
3571
3572         /* A reprieve! */
3573         netif_wake_subqueue(netdev, tx_ring->queue_index);
3574         tx_ring->tx_stats.restart_queue++;
3575         return 0;
3576 }
3577
3578 static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3579 {
3580         if (igb_desc_unused(tx_ring) >= size)
3581                 return 0;
3582         return __igb_maybe_stop_tx(tx_ring, size);
3583 }
3584
3585 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3586                                     struct igb_ring *tx_ring)
3587 {
3588         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3589         unsigned int first;
3590         unsigned int tx_flags = 0;
3591         u8 hdr_len = 0;
3592         int count = 0;
3593         int tso = 0;
3594         union skb_shared_tx *shtx;
3595
3596         /* need: 1 descriptor per page,
3597          *       + 2 desc gap to keep tail from touching head,
3598          *       + 1 desc for skb->data,
3599          *       + 1 desc for context descriptor,
3600          * otherwise try next time */
3601         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3602                 /* this is a hard error */
3603                 return NETDEV_TX_BUSY;
3604         }
3605
3606         /*
3607          * TODO: check that there currently is no other packet with
3608          * time stamping in the queue
3609          *
3610          * When doing time stamping, keep the connection to the socket
3611          * a while longer: it is still needed by skb_hwtstamp_tx(),
3612          * called either in igb_tx_hwtstamp() or by our caller when
3613          * doing software time stamping.
3614          */
3615         shtx = skb_tx(skb);
3616         if (unlikely(shtx->hardware)) {
3617                 shtx->in_progress = 1;
3618                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3619         }
3620
3621         if (adapter->vlgrp && vlan_tx_tag_present(skb)) {
3622                 tx_flags |= IGB_TX_FLAGS_VLAN;
3623                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3624         }
3625
3626         if (skb->protocol == htons(ETH_P_IP))
3627                 tx_flags |= IGB_TX_FLAGS_IPV4;
3628
3629         first = tx_ring->next_to_use;
3630         if (skb_is_gso(skb)) {
3631                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3632                 if (tso < 0) {
3633                         dev_kfree_skb_any(skb);
3634                         return NETDEV_TX_OK;
3635                 }
3636         }
3637
3638         if (tso)
3639                 tx_flags |= IGB_TX_FLAGS_TSO;
3640         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3641                  (skb->ip_summed == CHECKSUM_PARTIAL))
3642                 tx_flags |= IGB_TX_FLAGS_CSUM;
3643
3644         /*
3645          * count reflects descriptors mapped, if 0 then mapping error
3646          * has occured and we need to rewind the descriptor queue
3647          */
3648         count = igb_tx_map_adv(tx_ring, skb, first);
3649
3650         if (!count) {
3651                 dev_kfree_skb_any(skb);
3652                 tx_ring->buffer_info[first].time_stamp = 0;
3653                 tx_ring->next_to_use = first;
3654                 return NETDEV_TX_OK;
3655         }
3656
3657         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3658
3659         /* Make sure there is space in the ring for the next send. */
3660         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3661
3662         return NETDEV_TX_OK;
3663 }
3664
3665 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3666                                       struct net_device *netdev)
3667 {
3668         struct igb_adapter *adapter = netdev_priv(netdev);
3669         struct igb_ring *tx_ring;
3670         int r_idx = 0;
3671
3672         if (test_bit(__IGB_DOWN, &adapter->state)) {
3673                 dev_kfree_skb_any(skb);
3674                 return NETDEV_TX_OK;
3675         }
3676
3677         if (skb->len <= 0) {
3678                 dev_kfree_skb_any(skb);
3679                 return NETDEV_TX_OK;
3680         }
3681
3682         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3683         tx_ring = adapter->multi_tx_table[r_idx];
3684
3685         /* This goes back to the question of how to logically map a tx queue
3686          * to a flow.  Right now, performance is impacted slightly negatively
3687          * if using multiple tx queues.  If the stack breaks away from a
3688          * single qdisc implementation, we can look at this again. */
3689         return igb_xmit_frame_ring_adv(skb, tx_ring);
3690 }
3691
3692 /**
3693  * igb_tx_timeout - Respond to a Tx Hang
3694  * @netdev: network interface device structure
3695  **/
3696 static void igb_tx_timeout(struct net_device *netdev)
3697 {
3698         struct igb_adapter *adapter = netdev_priv(netdev);
3699         struct e1000_hw *hw = &adapter->hw;
3700
3701         /* Do the reset outside of interrupt context */
3702         adapter->tx_timeout_count++;
3703         schedule_work(&adapter->reset_task);
3704         wr32(E1000_EICS,
3705              (adapter->eims_enable_mask & ~adapter->eims_other));
3706 }
3707
3708 static void igb_reset_task(struct work_struct *work)
3709 {
3710         struct igb_adapter *adapter;
3711         adapter = container_of(work, struct igb_adapter, reset_task);
3712
3713         igb_reinit_locked(adapter);
3714 }
3715
3716 /**
3717  * igb_get_stats - Get System Network Statistics
3718  * @netdev: network interface device structure
3719  *
3720  * Returns the address of the device statistics structure.
3721  * The statistics are actually updated from the timer callback.
3722  **/
3723 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3724 {
3725         /* only return the current stats */
3726         return &netdev->stats;
3727 }
3728
3729 /**
3730  * igb_change_mtu - Change the Maximum Transfer Unit
3731  * @netdev: network interface device structure
3732  * @new_mtu: new value for maximum frame size
3733  *
3734  * Returns 0 on success, negative on failure
3735  **/
3736 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3737 {
3738         struct igb_adapter *adapter = netdev_priv(netdev);
3739         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3740         u32 rx_buffer_len, i;
3741
3742         if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) ||
3743             (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3744                 dev_err(&adapter->pdev->dev, "Invalid MTU setting\n");
3745                 return -EINVAL;
3746         }
3747
3748         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3749                 dev_err(&adapter->pdev->dev, "MTU > 9216 not supported.\n");
3750                 return -EINVAL;
3751         }
3752
3753         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3754                 msleep(1);
3755
3756         /* igb_down has a dependency on max_frame_size */
3757         adapter->max_frame_size = max_frame;
3758         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3759          * means we reserve 2 more, this pushes us to allocate from the next
3760          * larger slab size.
3761          * i.e. RXBUFFER_2048 --> size-4096 slab
3762          */
3763
3764         if (max_frame <= IGB_RXBUFFER_1024)
3765                 rx_buffer_len = IGB_RXBUFFER_1024;
3766         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3767                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3768         else
3769                 rx_buffer_len = IGB_RXBUFFER_128;
3770
3771         if (netif_running(netdev))
3772                 igb_down(adapter);
3773
3774         dev_info(&adapter->pdev->dev, "changing MTU from %d to %d\n",
3775                  netdev->mtu, new_mtu);
3776         netdev->mtu = new_mtu;
3777
3778         for (i = 0; i < adapter->num_rx_queues; i++)
3779                 adapter->rx_ring[i].rx_buffer_len = rx_buffer_len;
3780
3781         if (netif_running(netdev))
3782                 igb_up(adapter);
3783         else
3784                 igb_reset(adapter);
3785
3786         clear_bit(__IGB_RESETTING, &adapter->state);
3787
3788         return 0;
3789 }
3790
3791 /**
3792  * igb_update_stats - Update the board statistics counters
3793  * @adapter: board private structure
3794  **/
3795
3796 void igb_update_stats(struct igb_adapter *adapter)
3797 {
3798         struct net_device *netdev = adapter->netdev;
3799         struct e1000_hw *hw = &adapter->hw;
3800         struct pci_dev *pdev = adapter->pdev;
3801         u16 phy_tmp;
3802
3803 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3804
3805         /*
3806          * Prevent stats update while adapter is being reset, or if the pci
3807          * connection is down.
3808          */
3809         if (adapter->link_speed == 0)
3810                 return;
3811         if (pci_channel_offline(pdev))
3812                 return;
3813
3814         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
3815         adapter->stats.gprc += rd32(E1000_GPRC);
3816         adapter->stats.gorc += rd32(E1000_GORCL);
3817         rd32(E1000_GORCH); /* clear GORCL */
3818         adapter->stats.bprc += rd32(E1000_BPRC);
3819         adapter->stats.mprc += rd32(E1000_MPRC);
3820         adapter->stats.roc += rd32(E1000_ROC);
3821
3822         adapter->stats.prc64 += rd32(E1000_PRC64);
3823         adapter->stats.prc127 += rd32(E1000_PRC127);
3824         adapter->stats.prc255 += rd32(E1000_PRC255);
3825         adapter->stats.prc511 += rd32(E1000_PRC511);
3826         adapter->stats.prc1023 += rd32(E1000_PRC1023);
3827         adapter->stats.prc1522 += rd32(E1000_PRC1522);
3828         adapter->stats.symerrs += rd32(E1000_SYMERRS);
3829         adapter->stats.sec += rd32(E1000_SEC);
3830
3831         adapter->stats.mpc += rd32(E1000_MPC);
3832         adapter->stats.scc += rd32(E1000_SCC);
3833         adapter->stats.ecol += rd32(E1000_ECOL);
3834         adapter->stats.mcc += rd32(E1000_MCC);
3835         adapter->stats.latecol += rd32(E1000_LATECOL);
3836         adapter->stats.dc += rd32(E1000_DC);
3837         adapter->stats.rlec += rd32(E1000_RLEC);
3838         adapter->stats.xonrxc += rd32(E1000_XONRXC);
3839         adapter->stats.xontxc += rd32(E1000_XONTXC);
3840         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
3841         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
3842         adapter->stats.fcruc += rd32(E1000_FCRUC);
3843         adapter->stats.gptc += rd32(E1000_GPTC);
3844         adapter->stats.gotc += rd32(E1000_GOTCL);
3845         rd32(E1000_GOTCH); /* clear GOTCL */
3846         adapter->stats.rnbc += rd32(E1000_RNBC);
3847         adapter->stats.ruc += rd32(E1000_RUC);
3848         adapter->stats.rfc += rd32(E1000_RFC);
3849         adapter->stats.rjc += rd32(E1000_RJC);
3850         adapter->stats.tor += rd32(E1000_TORH);
3851         adapter->stats.tot += rd32(E1000_TOTH);
3852         adapter->stats.tpr += rd32(E1000_TPR);
3853
3854         adapter->stats.ptc64 += rd32(E1000_PTC64);
3855         adapter->stats.ptc127 += rd32(E1000_PTC127);
3856         adapter->stats.ptc255 += rd32(E1000_PTC255);
3857         adapter->stats.ptc511 += rd32(E1000_PTC511);
3858         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
3859         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
3860
3861         adapter->stats.mptc += rd32(E1000_MPTC);
3862         adapter->stats.bptc += rd32(E1000_BPTC);
3863
3864         /* used for adaptive IFS */
3865
3866         hw->mac.tx_packet_delta = rd32(E1000_TPT);
3867         adapter->stats.tpt += hw->mac.tx_packet_delta;
3868         hw->mac.collision_delta = rd32(E1000_COLC);
3869         adapter->stats.colc += hw->mac.collision_delta;
3870
3871         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
3872         adapter->stats.rxerrc += rd32(E1000_RXERRC);
3873         adapter->stats.tncrs += rd32(E1000_TNCRS);
3874         adapter->stats.tsctc += rd32(E1000_TSCTC);
3875         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
3876
3877         adapter->stats.iac += rd32(E1000_IAC);
3878         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
3879         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
3880         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
3881         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
3882         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
3883         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
3884         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
3885         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
3886
3887         /* Fill out the OS statistics structure */
3888         netdev->stats.multicast = adapter->stats.mprc;
3889         netdev->stats.collisions = adapter->stats.colc;
3890
3891         /* Rx Errors */
3892
3893         if (hw->mac.type != e1000_82575) {
3894                 u32 rqdpc_tmp;
3895                 u64 rqdpc_total = 0;
3896                 int i;
3897                 /* Read out drops stats per RX queue.  Notice RQDPC (Receive
3898                  * Queue Drop Packet Count) stats only gets incremented, if
3899                  * the DROP_EN but it set (in the SRRCTL register for that
3900                  * queue).  If DROP_EN bit is NOT set, then the some what
3901                  * equivalent count is stored in RNBC (not per queue basis).
3902                  * Also note the drop count is due to lack of available
3903                  * descriptors.
3904                  */
3905                 for (i = 0; i < adapter->num_rx_queues; i++) {
3906                         rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0xFFF;
3907                         adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
3908                         rqdpc_total += adapter->rx_ring[i].rx_stats.drops;
3909                 }
3910                 netdev->stats.rx_fifo_errors = rqdpc_total;
3911         }
3912
3913         /* Note RNBC (Receive No Buffers Count) is an not an exact
3914          * drop count as the hardware FIFO might save the day.  Thats
3915          * one of the reason for saving it in rx_fifo_errors, as its
3916          * potentially not a true drop.
3917          */
3918         netdev->stats.rx_fifo_errors += adapter->stats.rnbc;
3919
3920         /* RLEC on some newer hardware can be incorrect so build
3921          * our own version based on RUC and ROC */
3922         netdev->stats.rx_errors = adapter->stats.rxerrc +
3923                 adapter->stats.crcerrs + adapter->stats.algnerrc +
3924                 adapter->stats.ruc + adapter->stats.roc +
3925                 adapter->stats.cexterr;
3926         netdev->stats.rx_length_errors = adapter->stats.ruc +
3927                                               adapter->stats.roc;
3928         netdev->stats.rx_crc_errors = adapter->stats.crcerrs;
3929         netdev->stats.rx_frame_errors = adapter->stats.algnerrc;
3930         netdev->stats.rx_missed_errors = adapter->stats.mpc;
3931
3932         /* Tx Errors */
3933         netdev->stats.tx_errors = adapter->stats.ecol +
3934                                        adapter->stats.latecol;
3935         netdev->stats.tx_aborted_errors = adapter->stats.ecol;
3936         netdev->stats.tx_window_errors = adapter->stats.latecol;
3937         netdev->stats.tx_carrier_errors = adapter->stats.tncrs;
3938
3939         /* Tx Dropped needs to be maintained elsewhere */
3940
3941         /* Phy Stats */
3942         if (hw->phy.media_type == e1000_media_type_copper) {
3943                 if ((adapter->link_speed == SPEED_1000) &&
3944                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
3945                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
3946                         adapter->phy_stats.idle_errors += phy_tmp;
3947                 }
3948         }
3949
3950         /* Management Stats */
3951         adapter->stats.mgptc += rd32(E1000_MGTPTC);
3952         adapter->stats.mgprc += rd32(E1000_MGTPRC);
3953         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
3954 }
3955
3956 static irqreturn_t igb_msix_other(int irq, void *data)
3957 {
3958         struct igb_adapter *adapter = data;
3959         struct e1000_hw *hw = &adapter->hw;
3960         u32 icr = rd32(E1000_ICR);
3961         /* reading ICR causes bit 31 of EICR to be cleared */
3962
3963         if (icr & E1000_ICR_DOUTSYNC) {
3964                 /* HW is reporting DMA is out of sync */
3965                 adapter->stats.doosync++;
3966         }
3967
3968         /* Check for a mailbox event */
3969         if (icr & E1000_ICR_VMMB)
3970                 igb_msg_task(adapter);
3971
3972         if (icr & E1000_ICR_LSC) {
3973                 hw->mac.get_link_status = 1;
3974                 /* guard against interrupt when we're going down */
3975                 if (!test_bit(__IGB_DOWN, &adapter->state))
3976                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
3977         }
3978
3979         wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_VMMB);
3980         wr32(E1000_EIMS, adapter->eims_other);
3981
3982         return IRQ_HANDLED;
3983 }
3984
3985 static void igb_write_itr(struct igb_q_vector *q_vector)
3986 {
3987         u32 itr_val = q_vector->itr_val & 0x7FFC;
3988
3989         if (!q_vector->set_itr)
3990                 return;
3991
3992         if (!itr_val)
3993                 itr_val = 0x4;
3994
3995         if (q_vector->itr_shift)
3996                 itr_val |= itr_val << q_vector->itr_shift;
3997         else
3998                 itr_val |= 0x8000000;
3999
4000         writel(itr_val, q_vector->itr_register);
4001         q_vector->set_itr = 0;
4002 }
4003
4004 static irqreturn_t igb_msix_ring(int irq, void *data)
4005 {
4006         struct igb_q_vector *q_vector = data;
4007
4008         /* Write the ITR value calculated from the previous interrupt. */
4009         igb_write_itr(q_vector);
4010
4011         napi_schedule(&q_vector->napi);
4012
4013         return IRQ_HANDLED;
4014 }
4015
4016 #ifdef CONFIG_IGB_DCA
4017 static void igb_update_dca(struct igb_q_vector *q_vector)
4018 {
4019         struct igb_adapter *adapter = q_vector->adapter;
4020         struct e1000_hw *hw = &adapter->hw;
4021         int cpu = get_cpu();
4022
4023         if (q_vector->cpu == cpu)
4024                 goto out_no_update;
4025
4026         if (q_vector->tx_ring) {
4027                 int q = q_vector->tx_ring->reg_idx;
4028                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4029                 if (hw->mac.type == e1000_82575) {
4030                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4031                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4032                 } else {
4033                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4034                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4035                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4036                 }
4037                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4038                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4039         }
4040         if (q_vector->rx_ring) {
4041                 int q = q_vector->rx_ring->reg_idx;
4042                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4043                 if (hw->mac.type == e1000_82575) {
4044                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4045                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4046                 } else {
4047                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4048                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4049                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4050                 }
4051                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4052                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4053                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4054                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4055         }
4056         q_vector->cpu = cpu;
4057 out_no_update:
4058         put_cpu();
4059 }
4060
4061 static void igb_setup_dca(struct igb_adapter *adapter)
4062 {
4063         struct e1000_hw *hw = &adapter->hw;
4064         int i;
4065
4066         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4067                 return;
4068
4069         /* Always use CB2 mode, difference is masked in the CB driver. */
4070         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4071
4072         for (i = 0; i < adapter->num_q_vectors; i++) {
4073                 struct igb_q_vector *q_vector = adapter->q_vector[i];
4074                 q_vector->cpu = -1;
4075                 igb_update_dca(q_vector);
4076         }
4077 }
4078
4079 static int __igb_notify_dca(struct device *dev, void *data)
4080 {
4081         struct net_device *netdev = dev_get_drvdata(dev);
4082         struct igb_adapter *adapter = netdev_priv(netdev);
4083         struct e1000_hw *hw = &adapter->hw;
4084         unsigned long event = *(unsigned long *)data;
4085
4086         switch (event) {
4087         case DCA_PROVIDER_ADD:
4088                 /* if already enabled, don't do it again */
4089                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4090                         break;
4091                 /* Always use CB2 mode, difference is masked
4092                  * in the CB driver. */
4093                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4094                 if (dca_add_requester(dev) == 0) {
4095                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4096                         dev_info(&adapter->pdev->dev, "DCA enabled\n");
4097                         igb_setup_dca(adapter);
4098                         break;
4099                 }
4100                 /* Fall Through since DCA is disabled. */
4101         case DCA_PROVIDER_REMOVE:
4102                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4103                         /* without this a class_device is left
4104                          * hanging around in the sysfs model */
4105                         dca_remove_requester(dev);
4106                         dev_info(&adapter->pdev->dev, "DCA disabled\n");
4107                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4108                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4109                 }
4110                 break;
4111         }
4112
4113         return 0;
4114 }
4115
4116 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4117                           void *p)
4118 {
4119         int ret_val;
4120
4121         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4122                                          __igb_notify_dca);
4123
4124         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4125 }
4126 #endif /* CONFIG_IGB_DCA */
4127
4128 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4129 {
4130         struct e1000_hw *hw = &adapter->hw;
4131         u32 ping;
4132         int i;
4133
4134         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4135                 ping = E1000_PF_CONTROL_MSG;
4136                 if (adapter->vf_data[i].clear_to_send)
4137                         ping |= E1000_VT_MSGTYPE_CTS;
4138                 igb_write_mbx(hw, &ping, 1, i);
4139         }
4140 }
4141
4142 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4143                                   u32 *msgbuf, u32 vf)
4144 {
4145         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4146         u16 *hash_list = (u16 *)&msgbuf[1];
4147         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4148         int i;
4149
4150         /* only up to 30 hash values supported */
4151         if (n > 30)
4152                 n = 30;
4153
4154         /* salt away the number of multi cast addresses assigned
4155          * to this VF for later use to restore when the PF multi cast
4156          * list changes
4157          */
4158         vf_data->num_vf_mc_hashes = n;
4159
4160         /* VFs are limited to using the MTA hash table for their multicast
4161          * addresses */
4162         for (i = 0; i < n; i++)
4163                 vf_data->vf_mc_hashes[i] = hash_list[i];
4164
4165         /* Flush and reset the mta with the new values */
4166         igb_set_rx_mode(adapter->netdev);
4167
4168         return 0;
4169 }
4170
4171 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4172 {
4173         struct e1000_hw *hw = &adapter->hw;
4174         struct vf_data_storage *vf_data;
4175         int i, j;
4176
4177         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4178                 vf_data = &adapter->vf_data[i];
4179                 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4180                         igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4181         }
4182 }
4183
4184 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4185 {
4186         struct e1000_hw *hw = &adapter->hw;
4187         u32 pool_mask, reg, vid;
4188         int i;
4189
4190         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4191
4192         /* Find the vlan filter for this id */
4193         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4194                 reg = rd32(E1000_VLVF(i));
4195
4196                 /* remove the vf from the pool */
4197                 reg &= ~pool_mask;
4198
4199                 /* if pool is empty then remove entry from vfta */
4200                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4201                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4202                         reg = 0;
4203                         vid = reg & E1000_VLVF_VLANID_MASK;
4204                         igb_vfta_set(hw, vid, false);
4205                 }
4206
4207                 wr32(E1000_VLVF(i), reg);
4208         }
4209
4210         adapter->vf_data[vf].vlans_enabled = 0;
4211 }
4212
4213 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4214 {
4215         struct e1000_hw *hw = &adapter->hw;
4216         u32 reg, i;
4217
4218         /* It is an error to call this function when VFs are not enabled */
4219         if (!adapter->vfs_allocated_count)
4220                 return -1;
4221
4222         /* Find the vlan filter for this id */
4223         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4224                 reg = rd32(E1000_VLVF(i));
4225                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4226                     vid == (reg & E1000_VLVF_VLANID_MASK))
4227                         break;
4228         }
4229
4230         if (add) {
4231                 if (i == E1000_VLVF_ARRAY_SIZE) {
4232                         /* Did not find a matching VLAN ID entry that was
4233                          * enabled.  Search for a free filter entry, i.e.
4234                          * one without the enable bit set
4235                          */
4236                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4237                                 reg = rd32(E1000_VLVF(i));
4238                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4239                                         break;
4240                         }
4241                 }
4242                 if (i < E1000_VLVF_ARRAY_SIZE) {
4243                         /* Found an enabled/available entry */
4244                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4245
4246                         /* if !enabled we need to set this up in vfta */
4247                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4248                                 /* add VID to filter table, if bit already set
4249                                  * PF must have added it outside of table */
4250                                 if (igb_vfta_set(hw, vid, true))
4251                                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT +
4252                                                 adapter->vfs_allocated_count);
4253                                 reg |= E1000_VLVF_VLANID_ENABLE;
4254                         }
4255                         reg &= ~E1000_VLVF_VLANID_MASK;
4256                         reg |= vid;
4257
4258                         wr32(E1000_VLVF(i), reg);
4259
4260                         /* do not modify RLPML for PF devices */
4261                         if (vf >= adapter->vfs_allocated_count)
4262                                 return 0;
4263
4264                         if (!adapter->vf_data[vf].vlans_enabled) {
4265                                 u32 size;
4266                                 reg = rd32(E1000_VMOLR(vf));
4267                                 size = reg & E1000_VMOLR_RLPML_MASK;
4268                                 size += 4;
4269                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4270                                 reg |= size;
4271                                 wr32(E1000_VMOLR(vf), reg);
4272                         }
4273                         adapter->vf_data[vf].vlans_enabled++;
4274
4275                         return 0;
4276                 }
4277         } else {
4278                 if (i < E1000_VLVF_ARRAY_SIZE) {
4279                         /* remove vf from the pool */
4280                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4281                         /* if pool is empty then remove entry from vfta */
4282                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4283                                 reg = 0;
4284                                 igb_vfta_set(hw, vid, false);
4285                         }
4286                         wr32(E1000_VLVF(i), reg);
4287
4288                         /* do not modify RLPML for PF devices */
4289                         if (vf >= adapter->vfs_allocated_count)
4290                                 return 0;
4291
4292                         adapter->vf_data[vf].vlans_enabled--;
4293                         if (!adapter->vf_data[vf].vlans_enabled) {
4294                                 u32 size;
4295                                 reg = rd32(E1000_VMOLR(vf));
4296                                 size = reg & E1000_VMOLR_RLPML_MASK;
4297                                 size -= 4;
4298                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4299                                 reg |= size;
4300                                 wr32(E1000_VMOLR(vf), reg);
4301                         }
4302                         return 0;
4303                 }
4304         }
4305         return -1;
4306 }
4307
4308 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4309 {
4310         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4311         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4312
4313         return igb_vlvf_set(adapter, vid, add, vf);
4314 }
4315
4316 static inline void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4317 {
4318         struct e1000_hw *hw = &adapter->hw;
4319
4320         /* disable mailbox functionality for vf */
4321         adapter->vf_data[vf].clear_to_send = false;
4322
4323         /* reset offloads to defaults */
4324         igb_set_vmolr(hw, vf);
4325
4326         /* reset vlans for device */
4327         igb_clear_vf_vfta(adapter, vf);
4328
4329         /* reset multicast table array for vf */
4330         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4331
4332         /* Flush and reset the mta with the new values */
4333         igb_set_rx_mode(adapter->netdev);
4334 }
4335
4336 static inline void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4337 {
4338         struct e1000_hw *hw = &adapter->hw;
4339         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4340         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4341         u32 reg, msgbuf[3];
4342         u8 *addr = (u8 *)(&msgbuf[1]);
4343
4344         /* process all the same items cleared in a function level reset */
4345         igb_vf_reset_event(adapter, vf);
4346
4347         /* set vf mac address */
4348         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4349
4350         /* enable transmit and receive for vf */
4351         reg = rd32(E1000_VFTE);
4352         wr32(E1000_VFTE, reg | (1 << vf));
4353         reg = rd32(E1000_VFRE);
4354         wr32(E1000_VFRE, reg | (1 << vf));
4355
4356         /* enable mailbox functionality for vf */
4357         adapter->vf_data[vf].clear_to_send = true;
4358
4359         /* reply to reset with ack and vf mac address */
4360         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4361         memcpy(addr, vf_mac, 6);
4362         igb_write_mbx(hw, msgbuf, 3, vf);
4363 }
4364
4365 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4366 {
4367                 unsigned char *addr = (char *)&msg[1];
4368                 int err = -1;
4369
4370                 if (is_valid_ether_addr(addr))
4371                         err = igb_set_vf_mac(adapter, vf, addr);
4372
4373                 return err;
4374
4375 }
4376
4377 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4378 {
4379         struct e1000_hw *hw = &adapter->hw;
4380         u32 msg = E1000_VT_MSGTYPE_NACK;
4381
4382         /* if device isn't clear to send it shouldn't be reading either */
4383         if (!adapter->vf_data[vf].clear_to_send)
4384                 igb_write_mbx(hw, &msg, 1, vf);
4385 }
4386
4387
4388 static void igb_msg_task(struct igb_adapter *adapter)
4389 {
4390         struct e1000_hw *hw = &adapter->hw;
4391         u32 vf;
4392
4393         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4394                 /* process any reset requests */
4395                 if (!igb_check_for_rst(hw, vf)) {
4396                         adapter->vf_data[vf].clear_to_send = false;
4397                         igb_vf_reset_event(adapter, vf);
4398                 }
4399
4400                 /* process any messages pending */
4401                 if (!igb_check_for_msg(hw, vf))
4402                         igb_rcv_msg_from_vf(adapter, vf);
4403
4404                 /* process any acks */
4405                 if (!igb_check_for_ack(hw, vf))
4406                         igb_rcv_ack_from_vf(adapter, vf);
4407
4408         }
4409 }
4410
4411 static int igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4412 {
4413         u32 mbx_size = E1000_VFMAILBOX_SIZE;
4414         u32 msgbuf[mbx_size];
4415         struct e1000_hw *hw = &adapter->hw;
4416         s32 retval;
4417
4418         retval = igb_read_mbx(hw, msgbuf, mbx_size, vf);
4419
4420         if (retval)
4421                 dev_err(&adapter->pdev->dev,
4422                         "Error receiving message from VF\n");
4423
4424         /* this is a message we already processed, do nothing */
4425         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4426                 return retval;
4427
4428         /*
4429          * until the vf completes a reset it should not be
4430          * allowed to start any configuration.
4431          */
4432
4433         if (msgbuf[0] == E1000_VF_RESET) {
4434                 igb_vf_reset_msg(adapter, vf);
4435
4436                 return retval;
4437         }
4438
4439         if (!adapter->vf_data[vf].clear_to_send) {
4440                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4441                 igb_write_mbx(hw, msgbuf, 1, vf);
4442                 return retval;
4443         }
4444
4445         switch ((msgbuf[0] & 0xFFFF)) {
4446         case E1000_VF_SET_MAC_ADDR:
4447                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4448                 break;
4449         case E1000_VF_SET_MULTICAST:
4450                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4451                 break;
4452         case E1000_VF_SET_LPE:
4453                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4454                 break;
4455         case E1000_VF_SET_VLAN:
4456                 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4457                 break;
4458         default:
4459                 dev_err(&adapter->pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4460                 retval = -1;
4461                 break;
4462         }
4463
4464         /* notify the VF of the results of what it sent us */
4465         if (retval)
4466                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4467         else
4468                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4469
4470         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4471
4472         igb_write_mbx(hw, msgbuf, 1, vf);
4473
4474         return retval;
4475 }
4476
4477 /**
4478  *  igb_set_uta - Set unicast filter table address
4479  *  @adapter: board private structure
4480  *
4481  *  The unicast table address is a register array of 32-bit registers.
4482  *  The table is meant to be used in a way similar to how the MTA is used
4483  *  however due to certain limitations in the hardware it is necessary to
4484  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4485  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4486  **/
4487 static void igb_set_uta(struct igb_adapter *adapter)
4488 {
4489         struct e1000_hw *hw = &adapter->hw;
4490         int i;
4491
4492         /* The UTA table only exists on 82576 hardware and newer */
4493         if (hw->mac.type < e1000_82576)
4494                 return;
4495
4496         /* we only need to do this if VMDq is enabled */
4497         if (!adapter->vfs_allocated_count)
4498                 return;
4499
4500         for (i = 0; i < hw->mac.uta_reg_count; i++)
4501                 array_wr32(E1000_UTA, i, ~0);
4502 }
4503
4504 /**
4505  * igb_intr_msi - Interrupt Handler
4506  * @irq: interrupt number
4507  * @data: pointer to a network interface device structure
4508  **/
4509 static irqreturn_t igb_intr_msi(int irq, void *data)
4510 {
4511         struct igb_adapter *adapter = data;
4512         struct igb_q_vector *q_vector = adapter->q_vector[0];
4513         struct e1000_hw *hw = &adapter->hw;
4514         /* read ICR disables interrupts using IAM */
4515         u32 icr = rd32(E1000_ICR);
4516
4517         igb_write_itr(q_vector);
4518
4519         if (icr & E1000_ICR_DOUTSYNC) {
4520                 /* HW is reporting DMA is out of sync */
4521                 adapter->stats.doosync++;
4522         }
4523
4524         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4525                 hw->mac.get_link_status = 1;
4526                 if (!test_bit(__IGB_DOWN, &adapter->state))
4527                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4528         }
4529
4530         napi_schedule(&q_vector->napi);
4531
4532         return IRQ_HANDLED;
4533 }
4534
4535 /**
4536  * igb_intr - Legacy Interrupt Handler
4537  * @irq: interrupt number
4538  * @data: pointer to a network interface device structure
4539  **/
4540 static irqreturn_t igb_intr(int irq, void *data)
4541 {
4542         struct igb_adapter *adapter = data;
4543         struct igb_q_vector *q_vector = adapter->q_vector[0];
4544         struct e1000_hw *hw = &adapter->hw;
4545         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4546          * need for the IMC write */
4547         u32 icr = rd32(E1000_ICR);
4548         if (!icr)
4549                 return IRQ_NONE;  /* Not our interrupt */
4550
4551         igb_write_itr(q_vector);
4552
4553         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4554          * not set, then the adapter didn't send an interrupt */
4555         if (!(icr & E1000_ICR_INT_ASSERTED))
4556                 return IRQ_NONE;
4557
4558         if (icr & E1000_ICR_DOUTSYNC) {
4559                 /* HW is reporting DMA is out of sync */
4560                 adapter->stats.doosync++;
4561         }
4562
4563         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4564                 hw->mac.get_link_status = 1;
4565                 /* guard against interrupt when we're going down */
4566                 if (!test_bit(__IGB_DOWN, &adapter->state))
4567                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4568         }
4569
4570         napi_schedule(&q_vector->napi);
4571
4572         return IRQ_HANDLED;
4573 }
4574
4575 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4576 {
4577         struct igb_adapter *adapter = q_vector->adapter;
4578         struct e1000_hw *hw = &adapter->hw;
4579
4580         if (adapter->itr_setting & 3) {
4581                 if (!adapter->msix_entries)
4582                         igb_set_itr(adapter);
4583                 else
4584                         igb_update_ring_itr(q_vector);
4585         }
4586
4587         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4588                 if (adapter->msix_entries)
4589                         wr32(E1000_EIMS, q_vector->eims_value);
4590                 else
4591                         igb_irq_enable(adapter);
4592         }
4593 }
4594
4595 /**
4596  * igb_poll - NAPI Rx polling callback
4597  * @napi: napi polling structure
4598  * @budget: count of how many packets we should handle
4599  **/
4600 static int igb_poll(struct napi_struct *napi, int budget)
4601 {
4602         struct igb_q_vector *q_vector = container_of(napi,
4603                                                      struct igb_q_vector,
4604                                                      napi);
4605         int tx_clean_complete = 1, work_done = 0;
4606
4607 #ifdef CONFIG_IGB_DCA
4608         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4609                 igb_update_dca(q_vector);
4610 #endif
4611         if (q_vector->tx_ring)
4612                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4613
4614         if (q_vector->rx_ring)
4615                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4616
4617         if (!tx_clean_complete)
4618                 work_done = budget;
4619
4620         /* If not enough Rx work done, exit the polling mode */
4621         if (work_done < budget) {
4622                 napi_complete(napi);
4623                 igb_ring_irq_enable(q_vector);
4624         }
4625
4626         return work_done;
4627 }
4628
4629 /**
4630  * igb_hwtstamp - utility function which checks for TX time stamp
4631  * @adapter: board private structure
4632  * @skb: packet that was just sent
4633  *
4634  * If we were asked to do hardware stamping and such a time stamp is
4635  * available, then it must have been for this skb here because we only
4636  * allow only one such packet into the queue.
4637  */
4638 static void igb_tx_hwtstamp(struct igb_adapter *adapter, struct sk_buff *skb)
4639 {
4640         union skb_shared_tx *shtx = skb_tx(skb);
4641         struct e1000_hw *hw = &adapter->hw;
4642
4643         if (unlikely(shtx->hardware)) {
4644                 u32 valid = rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID;
4645                 if (valid) {
4646                         u64 regval = rd32(E1000_TXSTMPL);
4647                         u64 ns;
4648                         struct skb_shared_hwtstamps shhwtstamps;
4649
4650                         memset(&shhwtstamps, 0, sizeof(shhwtstamps));
4651                         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4652                         ns = timecounter_cyc2time(&adapter->clock,
4653                                                   regval);
4654                         timecompare_update(&adapter->compare, ns);
4655                         shhwtstamps.hwtstamp = ns_to_ktime(ns);
4656                         shhwtstamps.syststamp =
4657                                 timecompare_transform(&adapter->compare, ns);
4658                         skb_tstamp_tx(skb, &shhwtstamps);
4659                 }
4660         }
4661 }
4662
4663 /**
4664  * igb_clean_tx_irq - Reclaim resources after transmit completes
4665  * @q_vector: pointer to q_vector containing needed info
4666  * returns true if ring is completely cleaned
4667  **/
4668 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4669 {
4670         struct igb_adapter *adapter = q_vector->adapter;
4671         struct igb_ring *tx_ring = q_vector->tx_ring;
4672         struct net_device *netdev = tx_ring->netdev;
4673         struct e1000_hw *hw = &adapter->hw;
4674         struct igb_buffer *buffer_info;
4675         struct sk_buff *skb;
4676         union e1000_adv_tx_desc *tx_desc, *eop_desc;
4677         unsigned int total_bytes = 0, total_packets = 0;
4678         unsigned int i, eop, count = 0;
4679         bool cleaned = false;
4680
4681         i = tx_ring->next_to_clean;
4682         eop = tx_ring->buffer_info[i].next_to_watch;
4683         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4684
4685         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
4686                (count < tx_ring->count)) {
4687                 for (cleaned = false; !cleaned; count++) {
4688                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4689                         buffer_info = &tx_ring->buffer_info[i];
4690                         cleaned = (i == eop);
4691                         skb = buffer_info->skb;
4692
4693                         if (skb) {
4694                                 unsigned int segs, bytecount;
4695                                 /* gso_segs is currently only valid for tcp */
4696                                 segs = skb_shinfo(skb)->gso_segs ?: 1;
4697                                 /* multiply data chunks by size of headers */
4698                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
4699                                             skb->len;
4700                                 total_packets += segs;
4701                                 total_bytes += bytecount;
4702
4703                                 igb_tx_hwtstamp(adapter, skb);
4704                         }
4705
4706                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4707                         tx_desc->wb.status = 0;
4708
4709                         i++;
4710                         if (i == tx_ring->count)
4711                                 i = 0;
4712                 }
4713                 eop = tx_ring->buffer_info[i].next_to_watch;
4714                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4715         }
4716
4717         tx_ring->next_to_clean = i;
4718
4719         if (unlikely(count &&
4720                      netif_carrier_ok(netdev) &&
4721                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
4722                 /* Make sure that anybody stopping the queue after this
4723                  * sees the new next_to_clean.
4724                  */
4725                 smp_mb();
4726                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
4727                     !(test_bit(__IGB_DOWN, &adapter->state))) {
4728                         netif_wake_subqueue(netdev, tx_ring->queue_index);
4729                         tx_ring->tx_stats.restart_queue++;
4730                 }
4731         }
4732
4733         if (tx_ring->detect_tx_hung) {
4734                 /* Detect a transmit hang in hardware, this serializes the
4735                  * check with the clearing of time_stamp and movement of i */
4736                 tx_ring->detect_tx_hung = false;
4737                 if (tx_ring->buffer_info[i].time_stamp &&
4738                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
4739                                (adapter->tx_timeout_factor * HZ))
4740                     && !(rd32(E1000_STATUS) &
4741                          E1000_STATUS_TXOFF)) {
4742
4743                         /* detected Tx unit hang */
4744                         dev_err(&tx_ring->pdev->dev,
4745                                 "Detected Tx Unit Hang\n"
4746                                 "  Tx Queue             <%d>\n"
4747                                 "  TDH                  <%x>\n"
4748                                 "  TDT                  <%x>\n"
4749                                 "  next_to_use          <%x>\n"
4750                                 "  next_to_clean        <%x>\n"
4751                                 "buffer_info[next_to_clean]\n"
4752                                 "  time_stamp           <%lx>\n"
4753                                 "  next_to_watch        <%x>\n"
4754                                 "  jiffies              <%lx>\n"
4755                                 "  desc.status          <%x>\n",
4756                                 tx_ring->queue_index,
4757                                 readl(tx_ring->head),
4758                                 readl(tx_ring->tail),
4759                                 tx_ring->next_to_use,
4760                                 tx_ring->next_to_clean,
4761                                 tx_ring->buffer_info[i].time_stamp,
4762                                 eop,
4763                                 jiffies,
4764                                 eop_desc->wb.status);
4765                         netif_stop_subqueue(netdev, tx_ring->queue_index);
4766                 }
4767         }
4768         tx_ring->total_bytes += total_bytes;
4769         tx_ring->total_packets += total_packets;
4770         tx_ring->tx_stats.bytes += total_bytes;
4771         tx_ring->tx_stats.packets += total_packets;
4772         netdev->stats.tx_bytes += total_bytes;
4773         netdev->stats.tx_packets += total_packets;
4774         return (count < tx_ring->count);
4775 }
4776
4777 /**
4778  * igb_receive_skb - helper function to handle rx indications
4779  * @q_vector: structure containing interrupt and ring information
4780  * @skb: packet to send up
4781  * @vlan_tag: vlan tag for packet
4782  **/
4783 static void igb_receive_skb(struct igb_q_vector *q_vector,
4784                             struct sk_buff *skb,
4785                             u16 vlan_tag)
4786 {
4787         struct igb_adapter *adapter = q_vector->adapter;
4788
4789         if (vlan_tag)
4790                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
4791                                  vlan_tag, skb);
4792         else
4793                 napi_gro_receive(&q_vector->napi, skb);
4794 }
4795
4796 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
4797                                        u32 status_err, struct sk_buff *skb)
4798 {
4799         skb->ip_summed = CHECKSUM_NONE;
4800
4801         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
4802         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
4803              (status_err & E1000_RXD_STAT_IXSM))
4804                 return;
4805
4806         /* TCP/UDP checksum error bit is set */
4807         if (status_err &
4808             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
4809                 /*
4810                  * work around errata with sctp packets where the TCPE aka
4811                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
4812                  * packets, (aka let the stack check the crc32c)
4813                  */
4814                 if ((skb->len == 60) &&
4815                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
4816                         ring->rx_stats.csum_err++;
4817
4818                 /* let the stack verify checksum errors */
4819                 return;
4820         }
4821         /* It must be a TCP or UDP packet with a valid checksum */
4822         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
4823                 skb->ip_summed = CHECKSUM_UNNECESSARY;
4824
4825         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
4826 }
4827
4828 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
4829                                union e1000_adv_rx_desc *rx_desc)
4830 {
4831         /* HW will not DMA in data larger than the given buffer, even if it
4832          * parses the (NFS, of course) header to be larger.  In that case, it
4833          * fills the header buffer and spills the rest into the page.
4834          */
4835         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
4836                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
4837         if (hlen > rx_ring->rx_buffer_len)
4838                 hlen = rx_ring->rx_buffer_len;
4839         return hlen;
4840 }
4841
4842 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
4843                                  int *work_done, int budget)
4844 {
4845         struct igb_adapter *adapter = q_vector->adapter;
4846         struct igb_ring *rx_ring = q_vector->rx_ring;
4847         struct net_device *netdev = rx_ring->netdev;
4848         struct e1000_hw *hw = &adapter->hw;
4849         struct pci_dev *pdev = rx_ring->pdev;
4850         union e1000_adv_rx_desc *rx_desc , *next_rxd;
4851         struct igb_buffer *buffer_info , *next_buffer;
4852         struct sk_buff *skb;
4853         bool cleaned = false;
4854         int cleaned_count = 0;
4855         unsigned int total_bytes = 0, total_packets = 0;
4856         unsigned int i;
4857         u32 staterr;
4858         u16 length;
4859         u16 vlan_tag;
4860
4861         i = rx_ring->next_to_clean;
4862         buffer_info = &rx_ring->buffer_info[i];
4863         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
4864         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4865
4866         while (staterr & E1000_RXD_STAT_DD) {
4867                 if (*work_done >= budget)
4868                         break;
4869                 (*work_done)++;
4870
4871                 skb = buffer_info->skb;
4872                 prefetch(skb->data - NET_IP_ALIGN);
4873                 buffer_info->skb = NULL;
4874
4875                 i++;
4876                 if (i == rx_ring->count)
4877                         i = 0;
4878                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
4879                 prefetch(next_rxd);
4880                 next_buffer = &rx_ring->buffer_info[i];
4881
4882                 length = le16_to_cpu(rx_desc->wb.upper.length);
4883                 cleaned = true;
4884                 cleaned_count++;
4885
4886                 if (buffer_info->dma) {
4887                         pci_unmap_single(pdev, buffer_info->dma,
4888                                          rx_ring->rx_buffer_len,
4889                                          PCI_DMA_FROMDEVICE);
4890                         buffer_info->dma = 0;
4891                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
4892                                 skb_put(skb, length);
4893                                 goto send_up;
4894                         }
4895                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
4896                 }
4897
4898                 if (length) {
4899                         pci_unmap_page(pdev, buffer_info->page_dma,
4900                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
4901                         buffer_info->page_dma = 0;
4902
4903                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
4904                                                 buffer_info->page,
4905                                                 buffer_info->page_offset,
4906                                                 length);
4907
4908                         if (page_count(buffer_info->page) != 1)
4909                                 buffer_info->page = NULL;
4910                         else
4911                                 get_page(buffer_info->page);
4912
4913                         skb->len += length;
4914                         skb->data_len += length;
4915
4916                         skb->truesize += length;
4917                 }
4918
4919                 if (!(staterr & E1000_RXD_STAT_EOP)) {
4920                         buffer_info->skb = next_buffer->skb;
4921                         buffer_info->dma = next_buffer->dma;
4922                         next_buffer->skb = skb;
4923                         next_buffer->dma = 0;
4924                         goto next_desc;
4925                 }
4926 send_up:
4927                 /*
4928                  * If this bit is set, then the RX registers contain
4929                  * the time stamp. No other packet will be time
4930                  * stamped until we read these registers, so read the
4931                  * registers to make them available again. Because
4932                  * only one packet can be time stamped at a time, we
4933                  * know that the register values must belong to this
4934                  * one here and therefore we don't need to compare
4935                  * any of the additional attributes stored for it.
4936                  *
4937                  * If nothing went wrong, then it should have a
4938                  * skb_shared_tx that we can turn into a
4939                  * skb_shared_hwtstamps.
4940                  *
4941                  * TODO: can time stamping be triggered (thus locking
4942                  * the registers) without the packet reaching this point
4943                  * here? In that case RX time stamping would get stuck.
4944                  *
4945                  * TODO: in "time stamp all packets" mode this bit is
4946                  * not set. Need a global flag for this mode and then
4947                  * always read the registers. Cannot be done without
4948                  * a race condition.
4949                  */
4950                 if (unlikely(staterr & E1000_RXD_STAT_TS)) {
4951                         u64 regval;
4952                         u64 ns;
4953                         struct skb_shared_hwtstamps *shhwtstamps =
4954                                 skb_hwtstamps(skb);
4955
4956                         WARN(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID),
4957                              "igb: no RX time stamp available for time stamped packet");
4958                         regval = rd32(E1000_RXSTMPL);
4959                         regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4960                         ns = timecounter_cyc2time(&adapter->clock, regval);
4961                         timecompare_update(&adapter->compare, ns);
4962                         memset(shhwtstamps, 0, sizeof(*shhwtstamps));
4963                         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4964                         shhwtstamps->syststamp =
4965                                 timecompare_transform(&adapter->compare, ns);
4966                 }
4967
4968                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
4969                         dev_kfree_skb_irq(skb);
4970                         goto next_desc;
4971                 }
4972
4973                 total_bytes += skb->len;
4974                 total_packets++;
4975
4976                 igb_rx_checksum_adv(rx_ring, staterr, skb);
4977
4978                 skb->protocol = eth_type_trans(skb, netdev);
4979                 skb_record_rx_queue(skb, rx_ring->queue_index);
4980
4981                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
4982                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
4983
4984                 igb_receive_skb(q_vector, skb, vlan_tag);
4985
4986 next_desc:
4987                 rx_desc->wb.upper.status_error = 0;
4988
4989                 /* return some buffers to hardware, one at a time is too slow */
4990                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
4991                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
4992                         cleaned_count = 0;
4993                 }
4994
4995                 /* use prefetched values */
4996                 rx_desc = next_rxd;
4997                 buffer_info = next_buffer;
4998                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4999         }
5000
5001         rx_ring->next_to_clean = i;
5002         cleaned_count = igb_desc_unused(rx_ring);
5003
5004         if (cleaned_count)
5005                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5006
5007         rx_ring->total_packets += total_packets;
5008         rx_ring->total_bytes += total_bytes;
5009         rx_ring->rx_stats.packets += total_packets;
5010         rx_ring->rx_stats.bytes += total_bytes;
5011         netdev->stats.rx_bytes += total_bytes;
5012         netdev->stats.rx_packets += total_packets;
5013         return cleaned;
5014 }
5015
5016 /**
5017  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5018  * @adapter: address of board private structure
5019  **/
5020 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5021 {
5022         struct net_device *netdev = rx_ring->netdev;
5023         union e1000_adv_rx_desc *rx_desc;
5024         struct igb_buffer *buffer_info;
5025         struct sk_buff *skb;
5026         unsigned int i;
5027         int bufsz;
5028
5029         i = rx_ring->next_to_use;
5030         buffer_info = &rx_ring->buffer_info[i];
5031
5032         bufsz = rx_ring->rx_buffer_len;
5033
5034         while (cleaned_count--) {
5035                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5036
5037                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5038                         if (!buffer_info->page) {
5039                                 buffer_info->page = alloc_page(GFP_ATOMIC);
5040                                 if (!buffer_info->page) {
5041                                         rx_ring->rx_stats.alloc_failed++;
5042                                         goto no_buffers;
5043                                 }
5044                                 buffer_info->page_offset = 0;
5045                         } else {
5046                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5047                         }
5048                         buffer_info->page_dma =
5049                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5050                                              buffer_info->page_offset,
5051                                              PAGE_SIZE / 2,
5052                                              PCI_DMA_FROMDEVICE);
5053                 }
5054
5055                 if (!buffer_info->skb) {
5056                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5057                         if (!skb) {
5058                                 rx_ring->rx_stats.alloc_failed++;
5059                                 goto no_buffers;
5060                         }
5061
5062                         buffer_info->skb = skb;
5063                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5064                                                           skb->data,
5065                                                           bufsz,
5066                                                           PCI_DMA_FROMDEVICE);
5067                 }
5068                 /* Refresh the desc even if buffer_addrs didn't change because
5069                  * each write-back erases this info. */
5070                 if (bufsz < IGB_RXBUFFER_1024) {
5071                         rx_desc->read.pkt_addr =
5072                              cpu_to_le64(buffer_info->page_dma);
5073                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5074                 } else {
5075                         rx_desc->read.pkt_addr =
5076                              cpu_to_le64(buffer_info->dma);
5077                         rx_desc->read.hdr_addr = 0;
5078                 }
5079
5080                 i++;
5081                 if (i == rx_ring->count)
5082                         i = 0;
5083                 buffer_info = &rx_ring->buffer_info[i];
5084         }
5085
5086 no_buffers:
5087         if (rx_ring->next_to_use != i) {
5088                 rx_ring->next_to_use = i;
5089                 if (i == 0)
5090                         i = (rx_ring->count - 1);
5091                 else
5092                         i--;
5093
5094                 /* Force memory writes to complete before letting h/w
5095                  * know there are new descriptors to fetch.  (Only
5096                  * applicable for weak-ordered memory model archs,
5097                  * such as IA-64). */
5098                 wmb();
5099                 writel(i, rx_ring->tail);
5100         }
5101 }
5102
5103 /**
5104  * igb_mii_ioctl -
5105  * @netdev:
5106  * @ifreq:
5107  * @cmd:
5108  **/
5109 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5110 {
5111         struct igb_adapter *adapter = netdev_priv(netdev);
5112         struct mii_ioctl_data *data = if_mii(ifr);
5113
5114         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5115                 return -EOPNOTSUPP;
5116
5117         switch (cmd) {
5118         case SIOCGMIIPHY:
5119                 data->phy_id = adapter->hw.phy.addr;
5120                 break;
5121         case SIOCGMIIREG:
5122                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5123                                      &data->val_out))
5124                         return -EIO;
5125                 break;
5126         case SIOCSMIIREG:
5127         default:
5128                 return -EOPNOTSUPP;
5129         }
5130         return 0;
5131 }
5132
5133 /**
5134  * igb_hwtstamp_ioctl - control hardware time stamping
5135  * @netdev:
5136  * @ifreq:
5137  * @cmd:
5138  *
5139  * Outgoing time stamping can be enabled and disabled. Play nice and
5140  * disable it when requested, although it shouldn't case any overhead
5141  * when no packet needs it. At most one packet in the queue may be
5142  * marked for time stamping, otherwise it would be impossible to tell
5143  * for sure to which packet the hardware time stamp belongs.
5144  *
5145  * Incoming time stamping has to be configured via the hardware
5146  * filters. Not all combinations are supported, in particular event
5147  * type has to be specified. Matching the kind of event packet is
5148  * not supported, with the exception of "all V2 events regardless of
5149  * level 2 or 4".
5150  *
5151  **/
5152 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5153                               struct ifreq *ifr, int cmd)
5154 {
5155         struct igb_adapter *adapter = netdev_priv(netdev);
5156         struct e1000_hw *hw = &adapter->hw;
5157         struct hwtstamp_config config;
5158         u32 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
5159         u32 tsync_rx_ctl_bit = E1000_TSYNCRXCTL_ENABLED;
5160         u32 tsync_rx_ctl_type = 0;
5161         u32 tsync_rx_cfg = 0;
5162         int is_l4 = 0;
5163         int is_l2 = 0;
5164         short port = 319; /* PTP */
5165         u32 regval;
5166
5167         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5168                 return -EFAULT;
5169
5170         /* reserved for future extensions */
5171         if (config.flags)
5172                 return -EINVAL;
5173
5174         switch (config.tx_type) {
5175         case HWTSTAMP_TX_OFF:
5176                 tsync_tx_ctl_bit = 0;
5177                 break;
5178         case HWTSTAMP_TX_ON:
5179                 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
5180                 break;
5181         default:
5182                 return -ERANGE;
5183         }
5184
5185         switch (config.rx_filter) {
5186         case HWTSTAMP_FILTER_NONE:
5187                 tsync_rx_ctl_bit = 0;
5188                 break;
5189         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5190         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5191         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5192         case HWTSTAMP_FILTER_ALL:
5193                 /*
5194                  * register TSYNCRXCFG must be set, therefore it is not
5195                  * possible to time stamp both Sync and Delay_Req messages
5196                  * => fall back to time stamping all packets
5197                  */
5198                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_ALL;
5199                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5200                 break;
5201         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5202                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
5203                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5204                 is_l4 = 1;
5205                 break;
5206         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5207                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
5208                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5209                 is_l4 = 1;
5210                 break;
5211         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5212         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5213                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5214                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5215                 is_l2 = 1;
5216                 is_l4 = 1;
5217                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5218                 break;
5219         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5220         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5221                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5222                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5223                 is_l2 = 1;
5224                 is_l4 = 1;
5225                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5226                 break;
5227         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5228         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5229         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5230                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5231                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5232                 is_l2 = 1;
5233                 break;
5234         default:
5235                 return -ERANGE;
5236         }
5237
5238         /* enable/disable TX */
5239         regval = rd32(E1000_TSYNCTXCTL);
5240         regval = (regval & ~E1000_TSYNCTXCTL_ENABLED) | tsync_tx_ctl_bit;
5241         wr32(E1000_TSYNCTXCTL, regval);
5242
5243         /* enable/disable RX, define which PTP packets are time stamped */
5244         regval = rd32(E1000_TSYNCRXCTL);
5245         regval = (regval & ~E1000_TSYNCRXCTL_ENABLED) | tsync_rx_ctl_bit;
5246         regval = (regval & ~0xE) | tsync_rx_ctl_type;
5247         wr32(E1000_TSYNCRXCTL, regval);
5248         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5249
5250         /*
5251          * Ethertype Filter Queue Filter[0][15:0] = 0x88F7
5252          *                                          (Ethertype to filter on)
5253          * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter)
5254          * Ethertype Filter Queue Filter[0][30] = 0x1 (Enable Timestamping)
5255          */
5256         wr32(E1000_ETQF0, is_l2 ? 0x440088f7 : 0);
5257
5258         /* L4 Queue Filter[0]: only filter by source and destination port */
5259         wr32(E1000_SPQF0, htons(port));
5260         wr32(E1000_IMIREXT(0), is_l4 ?
5261              ((1<<12) | (1<<19) /* bypass size and control flags */) : 0);
5262         wr32(E1000_IMIR(0), is_l4 ?
5263              (htons(port)
5264               | (0<<16) /* immediate interrupt disabled */
5265               | 0 /* (1<<17) bit cleared: do not bypass
5266                      destination port check */)
5267                 : 0);
5268         wr32(E1000_FTQF0, is_l4 ?
5269              (0x11 /* UDP */
5270               | (1<<15) /* VF not compared */
5271               | (1<<27) /* Enable Timestamping */
5272               | (7<<28) /* only source port filter enabled,
5273                            source/target address and protocol
5274                            masked */)
5275              : ((1<<15) | (15<<28) /* all mask bits set = filter not
5276                                       enabled */));
5277
5278         wrfl();
5279
5280         adapter->hwtstamp_config = config;
5281
5282         /* clear TX/RX time stamp registers, just to be sure */
5283         regval = rd32(E1000_TXSTMPH);
5284         regval = rd32(E1000_RXSTMPH);
5285
5286         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5287                 -EFAULT : 0;
5288 }
5289
5290 /**
5291  * igb_ioctl -
5292  * @netdev:
5293  * @ifreq:
5294  * @cmd:
5295  **/
5296 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5297 {
5298         switch (cmd) {
5299         case SIOCGMIIPHY:
5300         case SIOCGMIIREG:
5301         case SIOCSMIIREG:
5302                 return igb_mii_ioctl(netdev, ifr, cmd);
5303         case SIOCSHWTSTAMP:
5304                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5305         default:
5306                 return -EOPNOTSUPP;
5307         }
5308 }
5309
5310 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5311 {
5312         struct igb_adapter *adapter = hw->back;
5313         u16 cap_offset;
5314
5315         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5316         if (!cap_offset)
5317                 return -E1000_ERR_CONFIG;
5318
5319         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5320
5321         return 0;
5322 }
5323
5324 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5325 {
5326         struct igb_adapter *adapter = hw->back;
5327         u16 cap_offset;
5328
5329         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5330         if (!cap_offset)
5331                 return -E1000_ERR_CONFIG;
5332
5333         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5334
5335         return 0;
5336 }
5337
5338 static void igb_vlan_rx_register(struct net_device *netdev,
5339                                  struct vlan_group *grp)
5340 {
5341         struct igb_adapter *adapter = netdev_priv(netdev);
5342         struct e1000_hw *hw = &adapter->hw;
5343         u32 ctrl, rctl;
5344
5345         igb_irq_disable(adapter);
5346         adapter->vlgrp = grp;
5347
5348         if (grp) {
5349                 /* enable VLAN tag insert/strip */
5350                 ctrl = rd32(E1000_CTRL);
5351                 ctrl |= E1000_CTRL_VME;
5352                 wr32(E1000_CTRL, ctrl);
5353
5354                 /* enable VLAN receive filtering */
5355                 rctl = rd32(E1000_RCTL);
5356                 rctl &= ~E1000_RCTL_CFIEN;
5357                 wr32(E1000_RCTL, rctl);
5358                 igb_update_mng_vlan(adapter);
5359         } else {
5360                 /* disable VLAN tag insert/strip */
5361                 ctrl = rd32(E1000_CTRL);
5362                 ctrl &= ~E1000_CTRL_VME;
5363                 wr32(E1000_CTRL, ctrl);
5364
5365                 if (adapter->mng_vlan_id != (u16)IGB_MNG_VLAN_NONE) {
5366                         igb_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
5367                         adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
5368                 }
5369         }
5370
5371         igb_rlpml_set(adapter);
5372
5373         if (!test_bit(__IGB_DOWN, &adapter->state))
5374                 igb_irq_enable(adapter);
5375 }
5376
5377 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5378 {
5379         struct igb_adapter *adapter = netdev_priv(netdev);
5380         struct e1000_hw *hw = &adapter->hw;
5381         int pf_id = adapter->vfs_allocated_count;
5382
5383         if ((hw->mng_cookie.status &
5384              E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
5385             (vid == adapter->mng_vlan_id))
5386                 return;
5387
5388         /* add vid to vlvf if sr-iov is enabled,
5389          * if that fails add directly to filter table */
5390         if (igb_vlvf_set(adapter, vid, true, pf_id))
5391                 igb_vfta_set(hw, vid, true);
5392
5393 }
5394
5395 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5396 {
5397         struct igb_adapter *adapter = netdev_priv(netdev);
5398         struct e1000_hw *hw = &adapter->hw;
5399         int pf_id = adapter->vfs_allocated_count;
5400
5401         igb_irq_disable(adapter);
5402         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5403
5404         if (!test_bit(__IGB_DOWN, &adapter->state))
5405                 igb_irq_enable(adapter);
5406
5407         if ((adapter->hw.mng_cookie.status &
5408              E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
5409             (vid == adapter->mng_vlan_id)) {
5410                 /* release control to f/w */
5411                 igb_release_hw_control(adapter);
5412                 return;
5413         }
5414
5415         /* remove vid from vlvf if sr-iov is enabled,
5416          * if not in vlvf remove from vfta */
5417         if (igb_vlvf_set(adapter, vid, false, pf_id))
5418                 igb_vfta_set(hw, vid, false);
5419 }
5420
5421 static void igb_restore_vlan(struct igb_adapter *adapter)
5422 {
5423         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5424
5425         if (adapter->vlgrp) {
5426                 u16 vid;
5427                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5428                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5429                                 continue;
5430                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5431                 }
5432         }
5433 }
5434
5435 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5436 {
5437         struct e1000_mac_info *mac = &adapter->hw.mac;
5438
5439         mac->autoneg = 0;
5440
5441         switch (spddplx) {
5442         case SPEED_10 + DUPLEX_HALF:
5443                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5444                 break;
5445         case SPEED_10 + DUPLEX_FULL:
5446                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5447                 break;
5448         case SPEED_100 + DUPLEX_HALF:
5449                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5450                 break;
5451         case SPEED_100 + DUPLEX_FULL:
5452                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5453                 break;
5454         case SPEED_1000 + DUPLEX_FULL:
5455                 mac->autoneg = 1;
5456                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5457                 break;
5458         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5459         default:
5460                 dev_err(&adapter->pdev->dev,
5461                         "Unsupported Speed/Duplex configuration\n");
5462                 return -EINVAL;
5463         }
5464         return 0;
5465 }
5466
5467 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5468 {
5469         struct net_device *netdev = pci_get_drvdata(pdev);
5470         struct igb_adapter *adapter = netdev_priv(netdev);
5471         struct e1000_hw *hw = &adapter->hw;
5472         u32 ctrl, rctl, status;
5473         u32 wufc = adapter->wol;
5474 #ifdef CONFIG_PM
5475         int retval = 0;
5476 #endif
5477
5478         netif_device_detach(netdev);
5479
5480         if (netif_running(netdev))
5481                 igb_close(netdev);
5482
5483         igb_clear_interrupt_scheme(adapter);
5484
5485 #ifdef CONFIG_PM
5486         retval = pci_save_state(pdev);
5487         if (retval)
5488                 return retval;
5489 #endif
5490
5491         status = rd32(E1000_STATUS);
5492         if (status & E1000_STATUS_LU)
5493                 wufc &= ~E1000_WUFC_LNKC;
5494
5495         if (wufc) {
5496                 igb_setup_rctl(adapter);
5497                 igb_set_rx_mode(netdev);
5498
5499                 /* turn on all-multi mode if wake on multicast is enabled */
5500                 if (wufc & E1000_WUFC_MC) {
5501                         rctl = rd32(E1000_RCTL);
5502                         rctl |= E1000_RCTL_MPE;
5503                         wr32(E1000_RCTL, rctl);
5504                 }
5505
5506                 ctrl = rd32(E1000_CTRL);
5507                 /* advertise wake from D3Cold */
5508                 #define E1000_CTRL_ADVD3WUC 0x00100000
5509                 /* phy power management enable */
5510                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5511                 ctrl |= E1000_CTRL_ADVD3WUC;
5512                 wr32(E1000_CTRL, ctrl);
5513
5514                 /* Allow time for pending master requests to run */
5515                 igb_disable_pcie_master(&adapter->hw);
5516
5517                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5518                 wr32(E1000_WUFC, wufc);
5519         } else {
5520                 wr32(E1000_WUC, 0);
5521                 wr32(E1000_WUFC, 0);
5522         }
5523
5524         *enable_wake = wufc || adapter->en_mng_pt;
5525         if (!*enable_wake)
5526                 igb_shutdown_serdes_link_82575(hw);
5527
5528         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5529          * would have already happened in close and is redundant. */
5530         igb_release_hw_control(adapter);
5531
5532         pci_disable_device(pdev);
5533
5534         return 0;
5535 }
5536
5537 #ifdef CONFIG_PM
5538 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5539 {
5540         int retval;
5541         bool wake;
5542
5543         retval = __igb_shutdown(pdev, &wake);
5544         if (retval)
5545                 return retval;
5546
5547         if (wake) {
5548                 pci_prepare_to_sleep(pdev);
5549         } else {
5550                 pci_wake_from_d3(pdev, false);
5551                 pci_set_power_state(pdev, PCI_D3hot);
5552         }
5553
5554         return 0;
5555 }
5556
5557 static int igb_resume(struct pci_dev *pdev)
5558 {
5559         struct net_device *netdev = pci_get_drvdata(pdev);
5560         struct igb_adapter *adapter = netdev_priv(netdev);
5561         struct e1000_hw *hw = &adapter->hw;
5562         u32 err;
5563
5564         pci_set_power_state(pdev, PCI_D0);
5565         pci_restore_state(pdev);
5566
5567         err = pci_enable_device_mem(pdev);
5568         if (err) {
5569                 dev_err(&pdev->dev,
5570                         "igb: Cannot enable PCI device from suspend\n");
5571                 return err;
5572         }
5573         pci_set_master(pdev);
5574
5575         pci_enable_wake(pdev, PCI_D3hot, 0);
5576         pci_enable_wake(pdev, PCI_D3cold, 0);
5577
5578         if (igb_init_interrupt_scheme(adapter)) {
5579                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5580                 return -ENOMEM;
5581         }
5582
5583         /* e1000_power_up_phy(adapter); */
5584
5585         igb_reset(adapter);
5586
5587         /* let the f/w know that the h/w is now under the control of the
5588          * driver. */
5589         igb_get_hw_control(adapter);
5590
5591         wr32(E1000_WUS, ~0);
5592
5593         if (netif_running(netdev)) {
5594                 err = igb_open(netdev);
5595                 if (err)
5596                         return err;
5597         }
5598
5599         netif_device_attach(netdev);
5600
5601         return 0;
5602 }
5603 #endif
5604
5605 static void igb_shutdown(struct pci_dev *pdev)
5606 {
5607         bool wake;
5608
5609         __igb_shutdown(pdev, &wake);
5610
5611         if (system_state == SYSTEM_POWER_OFF) {
5612                 pci_wake_from_d3(pdev, wake);
5613                 pci_set_power_state(pdev, PCI_D3hot);
5614         }
5615 }
5616
5617 #ifdef CONFIG_NET_POLL_CONTROLLER
5618 /*
5619  * Polling 'interrupt' - used by things like netconsole to send skbs
5620  * without having to re-enable interrupts. It's not called while
5621  * the interrupt routine is executing.
5622  */
5623 static void igb_netpoll(struct net_device *netdev)
5624 {
5625         struct igb_adapter *adapter = netdev_priv(netdev);
5626         struct e1000_hw *hw = &adapter->hw;
5627         int i;
5628
5629         if (!adapter->msix_entries) {
5630                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5631                 igb_irq_disable(adapter);
5632                 napi_schedule(&q_vector->napi);
5633                 return;
5634         }
5635
5636         for (i = 0; i < adapter->num_q_vectors; i++) {
5637                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5638                 wr32(E1000_EIMC, q_vector->eims_value);
5639                 napi_schedule(&q_vector->napi);
5640         }
5641 }
5642 #endif /* CONFIG_NET_POLL_CONTROLLER */
5643
5644 /**
5645  * igb_io_error_detected - called when PCI error is detected
5646  * @pdev: Pointer to PCI device
5647  * @state: The current pci connection state
5648  *
5649  * This function is called after a PCI bus error affecting
5650  * this device has been detected.
5651  */
5652 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5653                                               pci_channel_state_t state)
5654 {
5655         struct net_device *netdev = pci_get_drvdata(pdev);
5656         struct igb_adapter *adapter = netdev_priv(netdev);
5657
5658         netif_device_detach(netdev);
5659
5660         if (state == pci_channel_io_perm_failure)
5661                 return PCI_ERS_RESULT_DISCONNECT;
5662
5663         if (netif_running(netdev))
5664                 igb_down(adapter);
5665         pci_disable_device(pdev);
5666
5667         /* Request a slot slot reset. */
5668         return PCI_ERS_RESULT_NEED_RESET;
5669 }
5670
5671 /**
5672  * igb_io_slot_reset - called after the pci bus has been reset.
5673  * @pdev: Pointer to PCI device
5674  *
5675  * Restart the card from scratch, as if from a cold-boot. Implementation
5676  * resembles the first-half of the igb_resume routine.
5677  */
5678 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5679 {
5680         struct net_device *netdev = pci_get_drvdata(pdev);
5681         struct igb_adapter *adapter = netdev_priv(netdev);
5682         struct e1000_hw *hw = &adapter->hw;
5683         pci_ers_result_t result;
5684         int err;
5685
5686         if (pci_enable_device_mem(pdev)) {
5687                 dev_err(&pdev->dev,
5688                         "Cannot re-enable PCI device after reset.\n");
5689                 result = PCI_ERS_RESULT_DISCONNECT;
5690         } else {
5691                 pci_set_master(pdev);
5692                 pci_restore_state(pdev);
5693
5694                 pci_enable_wake(pdev, PCI_D3hot, 0);
5695                 pci_enable_wake(pdev, PCI_D3cold, 0);
5696
5697                 igb_reset(adapter);
5698                 wr32(E1000_WUS, ~0);
5699                 result = PCI_ERS_RESULT_RECOVERED;
5700         }
5701
5702         err = pci_cleanup_aer_uncorrect_error_status(pdev);
5703         if (err) {
5704                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
5705                         "failed 0x%0x\n", err);
5706                 /* non-fatal, continue */
5707         }
5708
5709         return result;
5710 }
5711
5712 /**
5713  * igb_io_resume - called when traffic can start flowing again.
5714  * @pdev: Pointer to PCI device
5715  *
5716  * This callback is called when the error recovery driver tells us that
5717  * its OK to resume normal operation. Implementation resembles the
5718  * second-half of the igb_resume routine.
5719  */
5720 static void igb_io_resume(struct pci_dev *pdev)
5721 {
5722         struct net_device *netdev = pci_get_drvdata(pdev);
5723         struct igb_adapter *adapter = netdev_priv(netdev);
5724
5725         if (netif_running(netdev)) {
5726                 if (igb_up(adapter)) {
5727                         dev_err(&pdev->dev, "igb_up failed after reset\n");
5728                         return;
5729                 }
5730         }
5731
5732         netif_device_attach(netdev);
5733
5734         /* let the f/w know that the h/w is now under the control of the
5735          * driver. */
5736         igb_get_hw_control(adapter);
5737 }
5738
5739 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
5740                              u8 qsel)
5741 {
5742         u32 rar_low, rar_high;
5743         struct e1000_hw *hw = &adapter->hw;
5744
5745         /* HW expects these in little endian so we reverse the byte order
5746          * from network order (big endian) to little endian
5747          */
5748         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
5749                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
5750         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
5751
5752         /* Indicate to hardware the Address is Valid. */
5753         rar_high |= E1000_RAH_AV;
5754
5755         if (hw->mac.type == e1000_82575)
5756                 rar_high |= E1000_RAH_POOL_1 * qsel;
5757         else
5758                 rar_high |= E1000_RAH_POOL_1 << qsel;
5759
5760         wr32(E1000_RAL(index), rar_low);
5761         wrfl();
5762         wr32(E1000_RAH(index), rar_high);
5763         wrfl();
5764 }
5765
5766 static int igb_set_vf_mac(struct igb_adapter *adapter,
5767                           int vf, unsigned char *mac_addr)
5768 {
5769         struct e1000_hw *hw = &adapter->hw;
5770         /* VF MAC addresses start at end of receive addresses and moves
5771          * torwards the first, as a result a collision should not be possible */
5772         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5773
5774         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
5775
5776         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
5777
5778         return 0;
5779 }
5780
5781 static void igb_vmm_control(struct igb_adapter *adapter)
5782 {
5783         struct e1000_hw *hw = &adapter->hw;
5784         u32 reg;
5785
5786         /* replication is not supported for 82575 */
5787         if (hw->mac.type == e1000_82575)
5788                 return;
5789
5790         /* enable replication vlan tag stripping */
5791         reg = rd32(E1000_RPLOLR);
5792         reg |= E1000_RPLOLR_STRVLAN;
5793         wr32(E1000_RPLOLR, reg);
5794
5795         /* notify HW that the MAC is adding vlan tags */
5796         reg = rd32(E1000_DTXCTL);
5797         reg |= E1000_DTXCTL_VLAN_ADDED;
5798         wr32(E1000_DTXCTL, reg);
5799
5800         if (adapter->vfs_allocated_count) {
5801                 igb_vmdq_set_loopback_pf(hw, true);
5802                 igb_vmdq_set_replication_pf(hw, true);
5803         } else {
5804                 igb_vmdq_set_loopback_pf(hw, false);
5805                 igb_vmdq_set_replication_pf(hw, false);
5806         }
5807 }
5808
5809 /* igb_main.c */