igb: call pci_save_state after pci_restore_state
[safe/jmp/linux-2.6] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "2.1.0-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56                                 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60         [board_82575] = &e1000_82575_info,
61 };
62
63 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
64         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
79         /* required last entry */
80         {0, }
81 };
82
83 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
84
85 void igb_reset(struct igb_adapter *);
86 static int igb_setup_all_tx_resources(struct igb_adapter *);
87 static int igb_setup_all_rx_resources(struct igb_adapter *);
88 static void igb_free_all_tx_resources(struct igb_adapter *);
89 static void igb_free_all_rx_resources(struct igb_adapter *);
90 static void igb_setup_mrqc(struct igb_adapter *);
91 void igb_update_stats(struct igb_adapter *);
92 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
93 static void __devexit igb_remove(struct pci_dev *pdev);
94 static int igb_sw_init(struct igb_adapter *);
95 static int igb_open(struct net_device *);
96 static int igb_close(struct net_device *);
97 static void igb_configure_tx(struct igb_adapter *);
98 static void igb_configure_rx(struct igb_adapter *);
99 static void igb_clean_all_tx_rings(struct igb_adapter *);
100 static void igb_clean_all_rx_rings(struct igb_adapter *);
101 static void igb_clean_tx_ring(struct igb_ring *);
102 static void igb_clean_rx_ring(struct igb_ring *);
103 static void igb_set_rx_mode(struct net_device *);
104 static void igb_update_phy_info(unsigned long);
105 static void igb_watchdog(unsigned long);
106 static void igb_watchdog_task(struct work_struct *);
107 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
108 static struct net_device_stats *igb_get_stats(struct net_device *);
109 static int igb_change_mtu(struct net_device *, int);
110 static int igb_set_mac(struct net_device *, void *);
111 static void igb_set_uta(struct igb_adapter *adapter);
112 static irqreturn_t igb_intr(int irq, void *);
113 static irqreturn_t igb_intr_msi(int irq, void *);
114 static irqreturn_t igb_msix_other(int irq, void *);
115 static irqreturn_t igb_msix_ring(int irq, void *);
116 #ifdef CONFIG_IGB_DCA
117 static void igb_update_dca(struct igb_q_vector *);
118 static void igb_setup_dca(struct igb_adapter *);
119 #endif /* CONFIG_IGB_DCA */
120 static bool igb_clean_tx_irq(struct igb_q_vector *);
121 static int igb_poll(struct napi_struct *, int);
122 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
123 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
124 static void igb_tx_timeout(struct net_device *);
125 static void igb_reset_task(struct work_struct *);
126 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
127 static void igb_vlan_rx_add_vid(struct net_device *, u16);
128 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
129 static void igb_restore_vlan(struct igb_adapter *);
130 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
131 static void igb_ping_all_vfs(struct igb_adapter *);
132 static void igb_msg_task(struct igb_adapter *);
133 static void igb_vmm_control(struct igb_adapter *);
134 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
135 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
136 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
137 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
138                                int vf, u16 vlan, u8 qos);
139 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
140 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
141                                  struct ifla_vf_info *ivi);
142
143 #ifdef CONFIG_PM
144 static int igb_suspend(struct pci_dev *, pm_message_t);
145 static int igb_resume(struct pci_dev *);
146 #endif
147 static void igb_shutdown(struct pci_dev *);
148 #ifdef CONFIG_IGB_DCA
149 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
150 static struct notifier_block dca_notifier = {
151         .notifier_call  = igb_notify_dca,
152         .next           = NULL,
153         .priority       = 0
154 };
155 #endif
156 #ifdef CONFIG_NET_POLL_CONTROLLER
157 /* for netdump / net console */
158 static void igb_netpoll(struct net_device *);
159 #endif
160 #ifdef CONFIG_PCI_IOV
161 static unsigned int max_vfs = 0;
162 module_param(max_vfs, uint, 0);
163 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
164                  "per physical function");
165 #endif /* CONFIG_PCI_IOV */
166
167 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
168                      pci_channel_state_t);
169 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
170 static void igb_io_resume(struct pci_dev *);
171
172 static struct pci_error_handlers igb_err_handler = {
173         .error_detected = igb_io_error_detected,
174         .slot_reset = igb_io_slot_reset,
175         .resume = igb_io_resume,
176 };
177
178
179 static struct pci_driver igb_driver = {
180         .name     = igb_driver_name,
181         .id_table = igb_pci_tbl,
182         .probe    = igb_probe,
183         .remove   = __devexit_p(igb_remove),
184 #ifdef CONFIG_PM
185         /* Power Managment Hooks */
186         .suspend  = igb_suspend,
187         .resume   = igb_resume,
188 #endif
189         .shutdown = igb_shutdown,
190         .err_handler = &igb_err_handler
191 };
192
193 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
194 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
195 MODULE_LICENSE("GPL");
196 MODULE_VERSION(DRV_VERSION);
197
198 /**
199  * igb_read_clock - read raw cycle counter (to be used by time counter)
200  */
201 static cycle_t igb_read_clock(const struct cyclecounter *tc)
202 {
203         struct igb_adapter *adapter =
204                 container_of(tc, struct igb_adapter, cycles);
205         struct e1000_hw *hw = &adapter->hw;
206         u64 stamp = 0;
207         int shift = 0;
208
209         /*
210          * The timestamp latches on lowest register read. For the 82580
211          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
212          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
213          */
214         if (hw->mac.type == e1000_82580) {
215                 stamp = rd32(E1000_SYSTIMR) >> 8;
216                 shift = IGB_82580_TSYNC_SHIFT;
217         }
218
219         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
220         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
221         return stamp;
222 }
223
224 #ifdef DEBUG
225 /**
226  * igb_get_hw_dev_name - return device name string
227  * used by hardware layer to print debugging information
228  **/
229 char *igb_get_hw_dev_name(struct e1000_hw *hw)
230 {
231         struct igb_adapter *adapter = hw->back;
232         return adapter->netdev->name;
233 }
234
235 /**
236  * igb_get_time_str - format current NIC and system time as string
237  */
238 static char *igb_get_time_str(struct igb_adapter *adapter,
239                               char buffer[160])
240 {
241         cycle_t hw = adapter->cycles.read(&adapter->cycles);
242         struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
243         struct timespec sys;
244         struct timespec delta;
245         getnstimeofday(&sys);
246
247         delta = timespec_sub(nic, sys);
248
249         sprintf(buffer,
250                 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
251                 hw,
252                 (long)nic.tv_sec, nic.tv_nsec,
253                 (long)sys.tv_sec, sys.tv_nsec,
254                 (long)delta.tv_sec, delta.tv_nsec);
255
256         return buffer;
257 }
258 #endif
259
260 /**
261  * igb_init_module - Driver Registration Routine
262  *
263  * igb_init_module is the first routine called when the driver is
264  * loaded. All it does is register with the PCI subsystem.
265  **/
266 static int __init igb_init_module(void)
267 {
268         int ret;
269         printk(KERN_INFO "%s - version %s\n",
270                igb_driver_string, igb_driver_version);
271
272         printk(KERN_INFO "%s\n", igb_copyright);
273
274 #ifdef CONFIG_IGB_DCA
275         dca_register_notify(&dca_notifier);
276 #endif
277         ret = pci_register_driver(&igb_driver);
278         return ret;
279 }
280
281 module_init(igb_init_module);
282
283 /**
284  * igb_exit_module - Driver Exit Cleanup Routine
285  *
286  * igb_exit_module is called just before the driver is removed
287  * from memory.
288  **/
289 static void __exit igb_exit_module(void)
290 {
291 #ifdef CONFIG_IGB_DCA
292         dca_unregister_notify(&dca_notifier);
293 #endif
294         pci_unregister_driver(&igb_driver);
295 }
296
297 module_exit(igb_exit_module);
298
299 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
300 /**
301  * igb_cache_ring_register - Descriptor ring to register mapping
302  * @adapter: board private structure to initialize
303  *
304  * Once we know the feature-set enabled for the device, we'll cache
305  * the register offset the descriptor ring is assigned to.
306  **/
307 static void igb_cache_ring_register(struct igb_adapter *adapter)
308 {
309         int i = 0, j = 0;
310         u32 rbase_offset = adapter->vfs_allocated_count;
311
312         switch (adapter->hw.mac.type) {
313         case e1000_82576:
314                 /* The queues are allocated for virtualization such that VF 0
315                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
316                  * In order to avoid collision we start at the first free queue
317                  * and continue consuming queues in the same sequence
318                  */
319                 if (adapter->vfs_allocated_count) {
320                         for (; i < adapter->rss_queues; i++)
321                                 adapter->rx_ring[i].reg_idx = rbase_offset +
322                                                               Q_IDX_82576(i);
323                         for (; j < adapter->rss_queues; j++)
324                                 adapter->tx_ring[j].reg_idx = rbase_offset +
325                                                               Q_IDX_82576(j);
326                 }
327         case e1000_82575:
328         case e1000_82580:
329         default:
330                 for (; i < adapter->num_rx_queues; i++)
331                         adapter->rx_ring[i].reg_idx = rbase_offset + i;
332                 for (; j < adapter->num_tx_queues; j++)
333                         adapter->tx_ring[j].reg_idx = rbase_offset + j;
334                 break;
335         }
336 }
337
338 static void igb_free_queues(struct igb_adapter *adapter)
339 {
340         kfree(adapter->tx_ring);
341         kfree(adapter->rx_ring);
342
343         adapter->tx_ring = NULL;
344         adapter->rx_ring = NULL;
345
346         adapter->num_rx_queues = 0;
347         adapter->num_tx_queues = 0;
348 }
349
350 /**
351  * igb_alloc_queues - Allocate memory for all rings
352  * @adapter: board private structure to initialize
353  *
354  * We allocate one ring per queue at run-time since we don't know the
355  * number of queues at compile-time.
356  **/
357 static int igb_alloc_queues(struct igb_adapter *adapter)
358 {
359         int i;
360
361         adapter->tx_ring = kcalloc(adapter->num_tx_queues,
362                                    sizeof(struct igb_ring), GFP_KERNEL);
363         if (!adapter->tx_ring)
364                 goto err;
365
366         adapter->rx_ring = kcalloc(adapter->num_rx_queues,
367                                    sizeof(struct igb_ring), GFP_KERNEL);
368         if (!adapter->rx_ring)
369                 goto err;
370
371         for (i = 0; i < adapter->num_tx_queues; i++) {
372                 struct igb_ring *ring = &(adapter->tx_ring[i]);
373                 ring->count = adapter->tx_ring_count;
374                 ring->queue_index = i;
375                 ring->pdev = adapter->pdev;
376                 ring->netdev = adapter->netdev;
377                 /* For 82575, context index must be unique per ring. */
378                 if (adapter->hw.mac.type == e1000_82575)
379                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
380         }
381
382         for (i = 0; i < adapter->num_rx_queues; i++) {
383                 struct igb_ring *ring = &(adapter->rx_ring[i]);
384                 ring->count = adapter->rx_ring_count;
385                 ring->queue_index = i;
386                 ring->pdev = adapter->pdev;
387                 ring->netdev = adapter->netdev;
388                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
389                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
390                 /* set flag indicating ring supports SCTP checksum offload */
391                 if (adapter->hw.mac.type >= e1000_82576)
392                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
393         }
394
395         igb_cache_ring_register(adapter);
396
397         return 0;
398
399 err:
400         igb_free_queues(adapter);
401
402         return -ENOMEM;
403 }
404
405 #define IGB_N0_QUEUE -1
406 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
407 {
408         u32 msixbm = 0;
409         struct igb_adapter *adapter = q_vector->adapter;
410         struct e1000_hw *hw = &adapter->hw;
411         u32 ivar, index;
412         int rx_queue = IGB_N0_QUEUE;
413         int tx_queue = IGB_N0_QUEUE;
414
415         if (q_vector->rx_ring)
416                 rx_queue = q_vector->rx_ring->reg_idx;
417         if (q_vector->tx_ring)
418                 tx_queue = q_vector->tx_ring->reg_idx;
419
420         switch (hw->mac.type) {
421         case e1000_82575:
422                 /* The 82575 assigns vectors using a bitmask, which matches the
423                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
424                    or more queues to a vector, we write the appropriate bits
425                    into the MSIXBM register for that vector. */
426                 if (rx_queue > IGB_N0_QUEUE)
427                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
428                 if (tx_queue > IGB_N0_QUEUE)
429                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
430                 if (!adapter->msix_entries && msix_vector == 0)
431                         msixbm |= E1000_EIMS_OTHER;
432                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
433                 q_vector->eims_value = msixbm;
434                 break;
435         case e1000_82576:
436                 /* 82576 uses a table-based method for assigning vectors.
437                    Each queue has a single entry in the table to which we write
438                    a vector number along with a "valid" bit.  Sadly, the layout
439                    of the table is somewhat counterintuitive. */
440                 if (rx_queue > IGB_N0_QUEUE) {
441                         index = (rx_queue & 0x7);
442                         ivar = array_rd32(E1000_IVAR0, index);
443                         if (rx_queue < 8) {
444                                 /* vector goes into low byte of register */
445                                 ivar = ivar & 0xFFFFFF00;
446                                 ivar |= msix_vector | E1000_IVAR_VALID;
447                         } else {
448                                 /* vector goes into third byte of register */
449                                 ivar = ivar & 0xFF00FFFF;
450                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
451                         }
452                         array_wr32(E1000_IVAR0, index, ivar);
453                 }
454                 if (tx_queue > IGB_N0_QUEUE) {
455                         index = (tx_queue & 0x7);
456                         ivar = array_rd32(E1000_IVAR0, index);
457                         if (tx_queue < 8) {
458                                 /* vector goes into second byte of register */
459                                 ivar = ivar & 0xFFFF00FF;
460                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
461                         } else {
462                                 /* vector goes into high byte of register */
463                                 ivar = ivar & 0x00FFFFFF;
464                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
465                         }
466                         array_wr32(E1000_IVAR0, index, ivar);
467                 }
468                 q_vector->eims_value = 1 << msix_vector;
469                 break;
470         case e1000_82580:
471                 /* 82580 uses the same table-based approach as 82576 but has fewer
472                    entries as a result we carry over for queues greater than 4. */
473                 if (rx_queue > IGB_N0_QUEUE) {
474                         index = (rx_queue >> 1);
475                         ivar = array_rd32(E1000_IVAR0, index);
476                         if (rx_queue & 0x1) {
477                                 /* vector goes into third byte of register */
478                                 ivar = ivar & 0xFF00FFFF;
479                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
480                         } else {
481                                 /* vector goes into low byte of register */
482                                 ivar = ivar & 0xFFFFFF00;
483                                 ivar |= msix_vector | E1000_IVAR_VALID;
484                         }
485                         array_wr32(E1000_IVAR0, index, ivar);
486                 }
487                 if (tx_queue > IGB_N0_QUEUE) {
488                         index = (tx_queue >> 1);
489                         ivar = array_rd32(E1000_IVAR0, index);
490                         if (tx_queue & 0x1) {
491                                 /* vector goes into high byte of register */
492                                 ivar = ivar & 0x00FFFFFF;
493                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
494                         } else {
495                                 /* vector goes into second byte of register */
496                                 ivar = ivar & 0xFFFF00FF;
497                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
498                         }
499                         array_wr32(E1000_IVAR0, index, ivar);
500                 }
501                 q_vector->eims_value = 1 << msix_vector;
502                 break;
503         default:
504                 BUG();
505                 break;
506         }
507
508         /* add q_vector eims value to global eims_enable_mask */
509         adapter->eims_enable_mask |= q_vector->eims_value;
510
511         /* configure q_vector to set itr on first interrupt */
512         q_vector->set_itr = 1;
513 }
514
515 /**
516  * igb_configure_msix - Configure MSI-X hardware
517  *
518  * igb_configure_msix sets up the hardware to properly
519  * generate MSI-X interrupts.
520  **/
521 static void igb_configure_msix(struct igb_adapter *adapter)
522 {
523         u32 tmp;
524         int i, vector = 0;
525         struct e1000_hw *hw = &adapter->hw;
526
527         adapter->eims_enable_mask = 0;
528
529         /* set vector for other causes, i.e. link changes */
530         switch (hw->mac.type) {
531         case e1000_82575:
532                 tmp = rd32(E1000_CTRL_EXT);
533                 /* enable MSI-X PBA support*/
534                 tmp |= E1000_CTRL_EXT_PBA_CLR;
535
536                 /* Auto-Mask interrupts upon ICR read. */
537                 tmp |= E1000_CTRL_EXT_EIAME;
538                 tmp |= E1000_CTRL_EXT_IRCA;
539
540                 wr32(E1000_CTRL_EXT, tmp);
541
542                 /* enable msix_other interrupt */
543                 array_wr32(E1000_MSIXBM(0), vector++,
544                                       E1000_EIMS_OTHER);
545                 adapter->eims_other = E1000_EIMS_OTHER;
546
547                 break;
548
549         case e1000_82576:
550         case e1000_82580:
551                 /* Turn on MSI-X capability first, or our settings
552                  * won't stick.  And it will take days to debug. */
553                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
554                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
555                                 E1000_GPIE_NSICR);
556
557                 /* enable msix_other interrupt */
558                 adapter->eims_other = 1 << vector;
559                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
560
561                 wr32(E1000_IVAR_MISC, tmp);
562                 break;
563         default:
564                 /* do nothing, since nothing else supports MSI-X */
565                 break;
566         } /* switch (hw->mac.type) */
567
568         adapter->eims_enable_mask |= adapter->eims_other;
569
570         for (i = 0; i < adapter->num_q_vectors; i++)
571                 igb_assign_vector(adapter->q_vector[i], vector++);
572
573         wrfl();
574 }
575
576 /**
577  * igb_request_msix - Initialize MSI-X interrupts
578  *
579  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
580  * kernel.
581  **/
582 static int igb_request_msix(struct igb_adapter *adapter)
583 {
584         struct net_device *netdev = adapter->netdev;
585         struct e1000_hw *hw = &adapter->hw;
586         int i, err = 0, vector = 0;
587
588         err = request_irq(adapter->msix_entries[vector].vector,
589                           igb_msix_other, 0, netdev->name, adapter);
590         if (err)
591                 goto out;
592         vector++;
593
594         for (i = 0; i < adapter->num_q_vectors; i++) {
595                 struct igb_q_vector *q_vector = adapter->q_vector[i];
596
597                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
598
599                 if (q_vector->rx_ring && q_vector->tx_ring)
600                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
601                                 q_vector->rx_ring->queue_index);
602                 else if (q_vector->tx_ring)
603                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
604                                 q_vector->tx_ring->queue_index);
605                 else if (q_vector->rx_ring)
606                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
607                                 q_vector->rx_ring->queue_index);
608                 else
609                         sprintf(q_vector->name, "%s-unused", netdev->name);
610
611                 err = request_irq(adapter->msix_entries[vector].vector,
612                                   igb_msix_ring, 0, q_vector->name,
613                                   q_vector);
614                 if (err)
615                         goto out;
616                 vector++;
617         }
618
619         igb_configure_msix(adapter);
620         return 0;
621 out:
622         return err;
623 }
624
625 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
626 {
627         if (adapter->msix_entries) {
628                 pci_disable_msix(adapter->pdev);
629                 kfree(adapter->msix_entries);
630                 adapter->msix_entries = NULL;
631         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
632                 pci_disable_msi(adapter->pdev);
633         }
634 }
635
636 /**
637  * igb_free_q_vectors - Free memory allocated for interrupt vectors
638  * @adapter: board private structure to initialize
639  *
640  * This function frees the memory allocated to the q_vectors.  In addition if
641  * NAPI is enabled it will delete any references to the NAPI struct prior
642  * to freeing the q_vector.
643  **/
644 static void igb_free_q_vectors(struct igb_adapter *adapter)
645 {
646         int v_idx;
647
648         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
649                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
650                 adapter->q_vector[v_idx] = NULL;
651                 netif_napi_del(&q_vector->napi);
652                 kfree(q_vector);
653         }
654         adapter->num_q_vectors = 0;
655 }
656
657 /**
658  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
659  *
660  * This function resets the device so that it has 0 rx queues, tx queues, and
661  * MSI-X interrupts allocated.
662  */
663 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
664 {
665         igb_free_queues(adapter);
666         igb_free_q_vectors(adapter);
667         igb_reset_interrupt_capability(adapter);
668 }
669
670 /**
671  * igb_set_interrupt_capability - set MSI or MSI-X if supported
672  *
673  * Attempt to configure interrupts using the best available
674  * capabilities of the hardware and kernel.
675  **/
676 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
677 {
678         int err;
679         int numvecs, i;
680
681         /* Number of supported queues. */
682         adapter->num_rx_queues = adapter->rss_queues;
683         adapter->num_tx_queues = adapter->rss_queues;
684
685         /* start with one vector for every rx queue */
686         numvecs = adapter->num_rx_queues;
687
688         /* if tx handler is seperate add 1 for every tx queue */
689         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
690                 numvecs += adapter->num_tx_queues;
691
692         /* store the number of vectors reserved for queues */
693         adapter->num_q_vectors = numvecs;
694
695         /* add 1 vector for link status interrupts */
696         numvecs++;
697         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
698                                         GFP_KERNEL);
699         if (!adapter->msix_entries)
700                 goto msi_only;
701
702         for (i = 0; i < numvecs; i++)
703                 adapter->msix_entries[i].entry = i;
704
705         err = pci_enable_msix(adapter->pdev,
706                               adapter->msix_entries,
707                               numvecs);
708         if (err == 0)
709                 goto out;
710
711         igb_reset_interrupt_capability(adapter);
712
713         /* If we can't do MSI-X, try MSI */
714 msi_only:
715 #ifdef CONFIG_PCI_IOV
716         /* disable SR-IOV for non MSI-X configurations */
717         if (adapter->vf_data) {
718                 struct e1000_hw *hw = &adapter->hw;
719                 /* disable iov and allow time for transactions to clear */
720                 pci_disable_sriov(adapter->pdev);
721                 msleep(500);
722
723                 kfree(adapter->vf_data);
724                 adapter->vf_data = NULL;
725                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
726                 msleep(100);
727                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
728         }
729 #endif
730         adapter->vfs_allocated_count = 0;
731         adapter->rss_queues = 1;
732         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
733         adapter->num_rx_queues = 1;
734         adapter->num_tx_queues = 1;
735         adapter->num_q_vectors = 1;
736         if (!pci_enable_msi(adapter->pdev))
737                 adapter->flags |= IGB_FLAG_HAS_MSI;
738 out:
739         /* Notify the stack of the (possibly) reduced Tx Queue count. */
740         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
741         return;
742 }
743
744 /**
745  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
746  * @adapter: board private structure to initialize
747  *
748  * We allocate one q_vector per queue interrupt.  If allocation fails we
749  * return -ENOMEM.
750  **/
751 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
752 {
753         struct igb_q_vector *q_vector;
754         struct e1000_hw *hw = &adapter->hw;
755         int v_idx;
756
757         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
758                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
759                 if (!q_vector)
760                         goto err_out;
761                 q_vector->adapter = adapter;
762                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
763                 q_vector->itr_val = IGB_START_ITR;
764                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
765                 adapter->q_vector[v_idx] = q_vector;
766         }
767         return 0;
768
769 err_out:
770         while (v_idx) {
771                 v_idx--;
772                 q_vector = adapter->q_vector[v_idx];
773                 netif_napi_del(&q_vector->napi);
774                 kfree(q_vector);
775                 adapter->q_vector[v_idx] = NULL;
776         }
777         return -ENOMEM;
778 }
779
780 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
781                                       int ring_idx, int v_idx)
782 {
783         struct igb_q_vector *q_vector;
784
785         q_vector = adapter->q_vector[v_idx];
786         q_vector->rx_ring = &adapter->rx_ring[ring_idx];
787         q_vector->rx_ring->q_vector = q_vector;
788         q_vector->itr_val = adapter->rx_itr_setting;
789         if (q_vector->itr_val && q_vector->itr_val <= 3)
790                 q_vector->itr_val = IGB_START_ITR;
791 }
792
793 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
794                                       int ring_idx, int v_idx)
795 {
796         struct igb_q_vector *q_vector;
797
798         q_vector = adapter->q_vector[v_idx];
799         q_vector->tx_ring = &adapter->tx_ring[ring_idx];
800         q_vector->tx_ring->q_vector = q_vector;
801         q_vector->itr_val = adapter->tx_itr_setting;
802         if (q_vector->itr_val && q_vector->itr_val <= 3)
803                 q_vector->itr_val = IGB_START_ITR;
804 }
805
806 /**
807  * igb_map_ring_to_vector - maps allocated queues to vectors
808  *
809  * This function maps the recently allocated queues to vectors.
810  **/
811 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
812 {
813         int i;
814         int v_idx = 0;
815
816         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
817             (adapter->num_q_vectors < adapter->num_tx_queues))
818                 return -ENOMEM;
819
820         if (adapter->num_q_vectors >=
821             (adapter->num_rx_queues + adapter->num_tx_queues)) {
822                 for (i = 0; i < adapter->num_rx_queues; i++)
823                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
824                 for (i = 0; i < adapter->num_tx_queues; i++)
825                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
826         } else {
827                 for (i = 0; i < adapter->num_rx_queues; i++) {
828                         if (i < adapter->num_tx_queues)
829                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
830                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
831                 }
832                 for (; i < adapter->num_tx_queues; i++)
833                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
834         }
835         return 0;
836 }
837
838 /**
839  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
840  *
841  * This function initializes the interrupts and allocates all of the queues.
842  **/
843 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
844 {
845         struct pci_dev *pdev = adapter->pdev;
846         int err;
847
848         igb_set_interrupt_capability(adapter);
849
850         err = igb_alloc_q_vectors(adapter);
851         if (err) {
852                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
853                 goto err_alloc_q_vectors;
854         }
855
856         err = igb_alloc_queues(adapter);
857         if (err) {
858                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
859                 goto err_alloc_queues;
860         }
861
862         err = igb_map_ring_to_vector(adapter);
863         if (err) {
864                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
865                 goto err_map_queues;
866         }
867
868
869         return 0;
870 err_map_queues:
871         igb_free_queues(adapter);
872 err_alloc_queues:
873         igb_free_q_vectors(adapter);
874 err_alloc_q_vectors:
875         igb_reset_interrupt_capability(adapter);
876         return err;
877 }
878
879 /**
880  * igb_request_irq - initialize interrupts
881  *
882  * Attempts to configure interrupts using the best available
883  * capabilities of the hardware and kernel.
884  **/
885 static int igb_request_irq(struct igb_adapter *adapter)
886 {
887         struct net_device *netdev = adapter->netdev;
888         struct pci_dev *pdev = adapter->pdev;
889         int err = 0;
890
891         if (adapter->msix_entries) {
892                 err = igb_request_msix(adapter);
893                 if (!err)
894                         goto request_done;
895                 /* fall back to MSI */
896                 igb_clear_interrupt_scheme(adapter);
897                 if (!pci_enable_msi(adapter->pdev))
898                         adapter->flags |= IGB_FLAG_HAS_MSI;
899                 igb_free_all_tx_resources(adapter);
900                 igb_free_all_rx_resources(adapter);
901                 adapter->num_tx_queues = 1;
902                 adapter->num_rx_queues = 1;
903                 adapter->num_q_vectors = 1;
904                 err = igb_alloc_q_vectors(adapter);
905                 if (err) {
906                         dev_err(&pdev->dev,
907                                 "Unable to allocate memory for vectors\n");
908                         goto request_done;
909                 }
910                 err = igb_alloc_queues(adapter);
911                 if (err) {
912                         dev_err(&pdev->dev,
913                                 "Unable to allocate memory for queues\n");
914                         igb_free_q_vectors(adapter);
915                         goto request_done;
916                 }
917                 igb_setup_all_tx_resources(adapter);
918                 igb_setup_all_rx_resources(adapter);
919         } else {
920                 igb_assign_vector(adapter->q_vector[0], 0);
921         }
922
923         if (adapter->flags & IGB_FLAG_HAS_MSI) {
924                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
925                                   netdev->name, adapter);
926                 if (!err)
927                         goto request_done;
928
929                 /* fall back to legacy interrupts */
930                 igb_reset_interrupt_capability(adapter);
931                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
932         }
933
934         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
935                           netdev->name, adapter);
936
937         if (err)
938                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
939                         err);
940
941 request_done:
942         return err;
943 }
944
945 static void igb_free_irq(struct igb_adapter *adapter)
946 {
947         if (adapter->msix_entries) {
948                 int vector = 0, i;
949
950                 free_irq(adapter->msix_entries[vector++].vector, adapter);
951
952                 for (i = 0; i < adapter->num_q_vectors; i++) {
953                         struct igb_q_vector *q_vector = adapter->q_vector[i];
954                         free_irq(adapter->msix_entries[vector++].vector,
955                                  q_vector);
956                 }
957         } else {
958                 free_irq(adapter->pdev->irq, adapter);
959         }
960 }
961
962 /**
963  * igb_irq_disable - Mask off interrupt generation on the NIC
964  * @adapter: board private structure
965  **/
966 static void igb_irq_disable(struct igb_adapter *adapter)
967 {
968         struct e1000_hw *hw = &adapter->hw;
969
970         /*
971          * we need to be careful when disabling interrupts.  The VFs are also
972          * mapped into these registers and so clearing the bits can cause
973          * issues on the VF drivers so we only need to clear what we set
974          */
975         if (adapter->msix_entries) {
976                 u32 regval = rd32(E1000_EIAM);
977                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
978                 wr32(E1000_EIMC, adapter->eims_enable_mask);
979                 regval = rd32(E1000_EIAC);
980                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
981         }
982
983         wr32(E1000_IAM, 0);
984         wr32(E1000_IMC, ~0);
985         wrfl();
986         synchronize_irq(adapter->pdev->irq);
987 }
988
989 /**
990  * igb_irq_enable - Enable default interrupt generation settings
991  * @adapter: board private structure
992  **/
993 static void igb_irq_enable(struct igb_adapter *adapter)
994 {
995         struct e1000_hw *hw = &adapter->hw;
996
997         if (adapter->msix_entries) {
998                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
999                 u32 regval = rd32(E1000_EIAC);
1000                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1001                 regval = rd32(E1000_EIAM);
1002                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1003                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1004                 if (adapter->vfs_allocated_count) {
1005                         wr32(E1000_MBVFIMR, 0xFF);
1006                         ims |= E1000_IMS_VMMB;
1007                 }
1008                 if (adapter->hw.mac.type == e1000_82580)
1009                         ims |= E1000_IMS_DRSTA;
1010
1011                 wr32(E1000_IMS, ims);
1012         } else {
1013                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1014                                 E1000_IMS_DRSTA);
1015                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1016                                 E1000_IMS_DRSTA);
1017         }
1018 }
1019
1020 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1021 {
1022         struct e1000_hw *hw = &adapter->hw;
1023         u16 vid = adapter->hw.mng_cookie.vlan_id;
1024         u16 old_vid = adapter->mng_vlan_id;
1025
1026         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1027                 /* add VID to filter table */
1028                 igb_vfta_set(hw, vid, true);
1029                 adapter->mng_vlan_id = vid;
1030         } else {
1031                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1032         }
1033
1034         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1035             (vid != old_vid) &&
1036             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1037                 /* remove VID from filter table */
1038                 igb_vfta_set(hw, old_vid, false);
1039         }
1040 }
1041
1042 /**
1043  * igb_release_hw_control - release control of the h/w to f/w
1044  * @adapter: address of board private structure
1045  *
1046  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1047  * For ASF and Pass Through versions of f/w this means that the
1048  * driver is no longer loaded.
1049  *
1050  **/
1051 static void igb_release_hw_control(struct igb_adapter *adapter)
1052 {
1053         struct e1000_hw *hw = &adapter->hw;
1054         u32 ctrl_ext;
1055
1056         /* Let firmware take over control of h/w */
1057         ctrl_ext = rd32(E1000_CTRL_EXT);
1058         wr32(E1000_CTRL_EXT,
1059                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1060 }
1061
1062 /**
1063  * igb_get_hw_control - get control of the h/w from f/w
1064  * @adapter: address of board private structure
1065  *
1066  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1067  * For ASF and Pass Through versions of f/w this means that
1068  * the driver is loaded.
1069  *
1070  **/
1071 static void igb_get_hw_control(struct igb_adapter *adapter)
1072 {
1073         struct e1000_hw *hw = &adapter->hw;
1074         u32 ctrl_ext;
1075
1076         /* Let firmware know the driver has taken over */
1077         ctrl_ext = rd32(E1000_CTRL_EXT);
1078         wr32(E1000_CTRL_EXT,
1079                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1080 }
1081
1082 /**
1083  * igb_configure - configure the hardware for RX and TX
1084  * @adapter: private board structure
1085  **/
1086 static void igb_configure(struct igb_adapter *adapter)
1087 {
1088         struct net_device *netdev = adapter->netdev;
1089         int i;
1090
1091         igb_get_hw_control(adapter);
1092         igb_set_rx_mode(netdev);
1093
1094         igb_restore_vlan(adapter);
1095
1096         igb_setup_tctl(adapter);
1097         igb_setup_mrqc(adapter);
1098         igb_setup_rctl(adapter);
1099
1100         igb_configure_tx(adapter);
1101         igb_configure_rx(adapter);
1102
1103         igb_rx_fifo_flush_82575(&adapter->hw);
1104
1105         /* call igb_desc_unused which always leaves
1106          * at least 1 descriptor unused to make sure
1107          * next_to_use != next_to_clean */
1108         for (i = 0; i < adapter->num_rx_queues; i++) {
1109                 struct igb_ring *ring = &adapter->rx_ring[i];
1110                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1111         }
1112
1113
1114         adapter->tx_queue_len = netdev->tx_queue_len;
1115 }
1116
1117 /**
1118  * igb_power_up_link - Power up the phy/serdes link
1119  * @adapter: address of board private structure
1120  **/
1121 void igb_power_up_link(struct igb_adapter *adapter)
1122 {
1123         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1124                 igb_power_up_phy_copper(&adapter->hw);
1125         else
1126                 igb_power_up_serdes_link_82575(&adapter->hw);
1127 }
1128
1129 /**
1130  * igb_power_down_link - Power down the phy/serdes link
1131  * @adapter: address of board private structure
1132  */
1133 static void igb_power_down_link(struct igb_adapter *adapter)
1134 {
1135         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1136                 igb_power_down_phy_copper_82575(&adapter->hw);
1137         else
1138                 igb_shutdown_serdes_link_82575(&adapter->hw);
1139 }
1140
1141 /**
1142  * igb_up - Open the interface and prepare it to handle traffic
1143  * @adapter: board private structure
1144  **/
1145 int igb_up(struct igb_adapter *adapter)
1146 {
1147         struct e1000_hw *hw = &adapter->hw;
1148         int i;
1149
1150         /* hardware has been reset, we need to reload some things */
1151         igb_configure(adapter);
1152
1153         clear_bit(__IGB_DOWN, &adapter->state);
1154
1155         for (i = 0; i < adapter->num_q_vectors; i++) {
1156                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1157                 napi_enable(&q_vector->napi);
1158         }
1159         if (adapter->msix_entries)
1160                 igb_configure_msix(adapter);
1161         else
1162                 igb_assign_vector(adapter->q_vector[0], 0);
1163
1164         /* Clear any pending interrupts. */
1165         rd32(E1000_ICR);
1166         igb_irq_enable(adapter);
1167
1168         /* notify VFs that reset has been completed */
1169         if (adapter->vfs_allocated_count) {
1170                 u32 reg_data = rd32(E1000_CTRL_EXT);
1171                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1172                 wr32(E1000_CTRL_EXT, reg_data);
1173         }
1174
1175         netif_tx_start_all_queues(adapter->netdev);
1176
1177         /* start the watchdog. */
1178         hw->mac.get_link_status = 1;
1179         schedule_work(&adapter->watchdog_task);
1180
1181         return 0;
1182 }
1183
1184 void igb_down(struct igb_adapter *adapter)
1185 {
1186         struct net_device *netdev = adapter->netdev;
1187         struct e1000_hw *hw = &adapter->hw;
1188         u32 tctl, rctl;
1189         int i;
1190
1191         /* signal that we're down so the interrupt handler does not
1192          * reschedule our watchdog timer */
1193         set_bit(__IGB_DOWN, &adapter->state);
1194
1195         /* disable receives in the hardware */
1196         rctl = rd32(E1000_RCTL);
1197         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1198         /* flush and sleep below */
1199
1200         netif_tx_stop_all_queues(netdev);
1201
1202         /* disable transmits in the hardware */
1203         tctl = rd32(E1000_TCTL);
1204         tctl &= ~E1000_TCTL_EN;
1205         wr32(E1000_TCTL, tctl);
1206         /* flush both disables and wait for them to finish */
1207         wrfl();
1208         msleep(10);
1209
1210         for (i = 0; i < adapter->num_q_vectors; i++) {
1211                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1212                 napi_disable(&q_vector->napi);
1213         }
1214
1215         igb_irq_disable(adapter);
1216
1217         del_timer_sync(&adapter->watchdog_timer);
1218         del_timer_sync(&adapter->phy_info_timer);
1219
1220         netdev->tx_queue_len = adapter->tx_queue_len;
1221         netif_carrier_off(netdev);
1222
1223         /* record the stats before reset*/
1224         igb_update_stats(adapter);
1225
1226         adapter->link_speed = 0;
1227         adapter->link_duplex = 0;
1228
1229         if (!pci_channel_offline(adapter->pdev))
1230                 igb_reset(adapter);
1231         igb_clean_all_tx_rings(adapter);
1232         igb_clean_all_rx_rings(adapter);
1233 #ifdef CONFIG_IGB_DCA
1234
1235         /* since we reset the hardware DCA settings were cleared */
1236         igb_setup_dca(adapter);
1237 #endif
1238 }
1239
1240 void igb_reinit_locked(struct igb_adapter *adapter)
1241 {
1242         WARN_ON(in_interrupt());
1243         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1244                 msleep(1);
1245         igb_down(adapter);
1246         igb_up(adapter);
1247         clear_bit(__IGB_RESETTING, &adapter->state);
1248 }
1249
1250 void igb_reset(struct igb_adapter *adapter)
1251 {
1252         struct pci_dev *pdev = adapter->pdev;
1253         struct e1000_hw *hw = &adapter->hw;
1254         struct e1000_mac_info *mac = &hw->mac;
1255         struct e1000_fc_info *fc = &hw->fc;
1256         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1257         u16 hwm;
1258
1259         /* Repartition Pba for greater than 9k mtu
1260          * To take effect CTRL.RST is required.
1261          */
1262         switch (mac->type) {
1263         case e1000_82580:
1264                 pba = rd32(E1000_RXPBS);
1265                 pba = igb_rxpbs_adjust_82580(pba);
1266                 break;
1267         case e1000_82576:
1268                 pba = rd32(E1000_RXPBS);
1269                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1270                 break;
1271         case e1000_82575:
1272         default:
1273                 pba = E1000_PBA_34K;
1274                 break;
1275         }
1276
1277         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1278             (mac->type < e1000_82576)) {
1279                 /* adjust PBA for jumbo frames */
1280                 wr32(E1000_PBA, pba);
1281
1282                 /* To maintain wire speed transmits, the Tx FIFO should be
1283                  * large enough to accommodate two full transmit packets,
1284                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1285                  * the Rx FIFO should be large enough to accommodate at least
1286                  * one full receive packet and is similarly rounded up and
1287                  * expressed in KB. */
1288                 pba = rd32(E1000_PBA);
1289                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1290                 tx_space = pba >> 16;
1291                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1292                 pba &= 0xffff;
1293                 /* the tx fifo also stores 16 bytes of information about the tx
1294                  * but don't include ethernet FCS because hardware appends it */
1295                 min_tx_space = (adapter->max_frame_size +
1296                                 sizeof(union e1000_adv_tx_desc) -
1297                                 ETH_FCS_LEN) * 2;
1298                 min_tx_space = ALIGN(min_tx_space, 1024);
1299                 min_tx_space >>= 10;
1300                 /* software strips receive CRC, so leave room for it */
1301                 min_rx_space = adapter->max_frame_size;
1302                 min_rx_space = ALIGN(min_rx_space, 1024);
1303                 min_rx_space >>= 10;
1304
1305                 /* If current Tx allocation is less than the min Tx FIFO size,
1306                  * and the min Tx FIFO size is less than the current Rx FIFO
1307                  * allocation, take space away from current Rx allocation */
1308                 if (tx_space < min_tx_space &&
1309                     ((min_tx_space - tx_space) < pba)) {
1310                         pba = pba - (min_tx_space - tx_space);
1311
1312                         /* if short on rx space, rx wins and must trump tx
1313                          * adjustment */
1314                         if (pba < min_rx_space)
1315                                 pba = min_rx_space;
1316                 }
1317                 wr32(E1000_PBA, pba);
1318         }
1319
1320         /* flow control settings */
1321         /* The high water mark must be low enough to fit one full frame
1322          * (or the size used for early receive) above it in the Rx FIFO.
1323          * Set it to the lower of:
1324          * - 90% of the Rx FIFO size, or
1325          * - the full Rx FIFO size minus one full frame */
1326         hwm = min(((pba << 10) * 9 / 10),
1327                         ((pba << 10) - 2 * adapter->max_frame_size));
1328
1329         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1330         fc->low_water = fc->high_water - 16;
1331         fc->pause_time = 0xFFFF;
1332         fc->send_xon = 1;
1333         fc->current_mode = fc->requested_mode;
1334
1335         /* disable receive for all VFs and wait one second */
1336         if (adapter->vfs_allocated_count) {
1337                 int i;
1338                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1339                         adapter->vf_data[i].flags = 0;
1340
1341                 /* ping all the active vfs to let them know we are going down */
1342                 igb_ping_all_vfs(adapter);
1343
1344                 /* disable transmits and receives */
1345                 wr32(E1000_VFRE, 0);
1346                 wr32(E1000_VFTE, 0);
1347         }
1348
1349         /* Allow time for pending master requests to run */
1350         hw->mac.ops.reset_hw(hw);
1351         wr32(E1000_WUC, 0);
1352
1353         if (hw->mac.ops.init_hw(hw))
1354                 dev_err(&pdev->dev, "Hardware Error\n");
1355
1356         if (hw->mac.type == e1000_82580) {
1357                 u32 reg = rd32(E1000_PCIEMISC);
1358                 wr32(E1000_PCIEMISC,
1359                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1360         }
1361         if (!netif_running(adapter->netdev))
1362                 igb_power_down_link(adapter);
1363
1364         igb_update_mng_vlan(adapter);
1365
1366         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1367         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1368
1369         igb_reset_adaptive(hw);
1370         igb_get_phy_info(hw);
1371 }
1372
1373 static const struct net_device_ops igb_netdev_ops = {
1374         .ndo_open               = igb_open,
1375         .ndo_stop               = igb_close,
1376         .ndo_start_xmit         = igb_xmit_frame_adv,
1377         .ndo_get_stats          = igb_get_stats,
1378         .ndo_set_rx_mode        = igb_set_rx_mode,
1379         .ndo_set_multicast_list = igb_set_rx_mode,
1380         .ndo_set_mac_address    = igb_set_mac,
1381         .ndo_change_mtu         = igb_change_mtu,
1382         .ndo_do_ioctl           = igb_ioctl,
1383         .ndo_tx_timeout         = igb_tx_timeout,
1384         .ndo_validate_addr      = eth_validate_addr,
1385         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1386         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1387         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1388         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1389         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1390         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1391         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1392 #ifdef CONFIG_NET_POLL_CONTROLLER
1393         .ndo_poll_controller    = igb_netpoll,
1394 #endif
1395 };
1396
1397 /**
1398  * igb_probe - Device Initialization Routine
1399  * @pdev: PCI device information struct
1400  * @ent: entry in igb_pci_tbl
1401  *
1402  * Returns 0 on success, negative on failure
1403  *
1404  * igb_probe initializes an adapter identified by a pci_dev structure.
1405  * The OS initialization, configuring of the adapter private structure,
1406  * and a hardware reset occur.
1407  **/
1408 static int __devinit igb_probe(struct pci_dev *pdev,
1409                                const struct pci_device_id *ent)
1410 {
1411         struct net_device *netdev;
1412         struct igb_adapter *adapter;
1413         struct e1000_hw *hw;
1414         u16 eeprom_data = 0;
1415         static int global_quad_port_a; /* global quad port a indication */
1416         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1417         unsigned long mmio_start, mmio_len;
1418         int err, pci_using_dac;
1419         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1420         u32 part_num;
1421
1422         err = pci_enable_device_mem(pdev);
1423         if (err)
1424                 return err;
1425
1426         pci_using_dac = 0;
1427         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1428         if (!err) {
1429                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1430                 if (!err)
1431                         pci_using_dac = 1;
1432         } else {
1433                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1434                 if (err) {
1435                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1436                         if (err) {
1437                                 dev_err(&pdev->dev, "No usable DMA "
1438                                         "configuration, aborting\n");
1439                                 goto err_dma;
1440                         }
1441                 }
1442         }
1443
1444         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1445                                            IORESOURCE_MEM),
1446                                            igb_driver_name);
1447         if (err)
1448                 goto err_pci_reg;
1449
1450         pci_enable_pcie_error_reporting(pdev);
1451
1452         pci_set_master(pdev);
1453         pci_save_state(pdev);
1454
1455         err = -ENOMEM;
1456         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1457                                    IGB_ABS_MAX_TX_QUEUES);
1458         if (!netdev)
1459                 goto err_alloc_etherdev;
1460
1461         SET_NETDEV_DEV(netdev, &pdev->dev);
1462
1463         pci_set_drvdata(pdev, netdev);
1464         adapter = netdev_priv(netdev);
1465         adapter->netdev = netdev;
1466         adapter->pdev = pdev;
1467         hw = &adapter->hw;
1468         hw->back = adapter;
1469         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1470
1471         mmio_start = pci_resource_start(pdev, 0);
1472         mmio_len = pci_resource_len(pdev, 0);
1473
1474         err = -EIO;
1475         hw->hw_addr = ioremap(mmio_start, mmio_len);
1476         if (!hw->hw_addr)
1477                 goto err_ioremap;
1478
1479         netdev->netdev_ops = &igb_netdev_ops;
1480         igb_set_ethtool_ops(netdev);
1481         netdev->watchdog_timeo = 5 * HZ;
1482
1483         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1484
1485         netdev->mem_start = mmio_start;
1486         netdev->mem_end = mmio_start + mmio_len;
1487
1488         /* PCI config space info */
1489         hw->vendor_id = pdev->vendor;
1490         hw->device_id = pdev->device;
1491         hw->revision_id = pdev->revision;
1492         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1493         hw->subsystem_device_id = pdev->subsystem_device;
1494
1495         /* Copy the default MAC, PHY and NVM function pointers */
1496         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1497         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1498         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1499         /* Initialize skew-specific constants */
1500         err = ei->get_invariants(hw);
1501         if (err)
1502                 goto err_sw_init;
1503
1504         /* setup the private structure */
1505         err = igb_sw_init(adapter);
1506         if (err)
1507                 goto err_sw_init;
1508
1509         igb_get_bus_info_pcie(hw);
1510
1511         hw->phy.autoneg_wait_to_complete = false;
1512         hw->mac.adaptive_ifs = true;
1513
1514         /* Copper options */
1515         if (hw->phy.media_type == e1000_media_type_copper) {
1516                 hw->phy.mdix = AUTO_ALL_MODES;
1517                 hw->phy.disable_polarity_correction = false;
1518                 hw->phy.ms_type = e1000_ms_hw_default;
1519         }
1520
1521         if (igb_check_reset_block(hw))
1522                 dev_info(&pdev->dev,
1523                         "PHY reset is blocked due to SOL/IDER session.\n");
1524
1525         netdev->features = NETIF_F_SG |
1526                            NETIF_F_IP_CSUM |
1527                            NETIF_F_HW_VLAN_TX |
1528                            NETIF_F_HW_VLAN_RX |
1529                            NETIF_F_HW_VLAN_FILTER;
1530
1531         netdev->features |= NETIF_F_IPV6_CSUM;
1532         netdev->features |= NETIF_F_TSO;
1533         netdev->features |= NETIF_F_TSO6;
1534         netdev->features |= NETIF_F_GRO;
1535
1536         netdev->vlan_features |= NETIF_F_TSO;
1537         netdev->vlan_features |= NETIF_F_TSO6;
1538         netdev->vlan_features |= NETIF_F_IP_CSUM;
1539         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1540         netdev->vlan_features |= NETIF_F_SG;
1541
1542         if (pci_using_dac)
1543                 netdev->features |= NETIF_F_HIGHDMA;
1544
1545         if (hw->mac.type >= e1000_82576)
1546                 netdev->features |= NETIF_F_SCTP_CSUM;
1547
1548         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1549
1550         /* before reading the NVM, reset the controller to put the device in a
1551          * known good starting state */
1552         hw->mac.ops.reset_hw(hw);
1553
1554         /* make sure the NVM is good */
1555         if (igb_validate_nvm_checksum(hw) < 0) {
1556                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1557                 err = -EIO;
1558                 goto err_eeprom;
1559         }
1560
1561         /* copy the MAC address out of the NVM */
1562         if (hw->mac.ops.read_mac_addr(hw))
1563                 dev_err(&pdev->dev, "NVM Read Error\n");
1564
1565         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1566         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1567
1568         if (!is_valid_ether_addr(netdev->perm_addr)) {
1569                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1570                 err = -EIO;
1571                 goto err_eeprom;
1572         }
1573
1574         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1575                     (unsigned long) adapter);
1576         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1577                     (unsigned long) adapter);
1578
1579         INIT_WORK(&adapter->reset_task, igb_reset_task);
1580         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1581
1582         /* Initialize link properties that are user-changeable */
1583         adapter->fc_autoneg = true;
1584         hw->mac.autoneg = true;
1585         hw->phy.autoneg_advertised = 0x2f;
1586
1587         hw->fc.requested_mode = e1000_fc_default;
1588         hw->fc.current_mode = e1000_fc_default;
1589
1590         igb_validate_mdi_setting(hw);
1591
1592         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1593          * enable the ACPI Magic Packet filter
1594          */
1595
1596         if (hw->bus.func == 0)
1597                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1598         else if (hw->mac.type == e1000_82580)
1599                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1600                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1601                                  &eeprom_data);
1602         else if (hw->bus.func == 1)
1603                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1604
1605         if (eeprom_data & eeprom_apme_mask)
1606                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1607
1608         /* now that we have the eeprom settings, apply the special cases where
1609          * the eeprom may be wrong or the board simply won't support wake on
1610          * lan on a particular port */
1611         switch (pdev->device) {
1612         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1613                 adapter->eeprom_wol = 0;
1614                 break;
1615         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1616         case E1000_DEV_ID_82576_FIBER:
1617         case E1000_DEV_ID_82576_SERDES:
1618                 /* Wake events only supported on port A for dual fiber
1619                  * regardless of eeprom setting */
1620                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1621                         adapter->eeprom_wol = 0;
1622                 break;
1623         case E1000_DEV_ID_82576_QUAD_COPPER:
1624                 /* if quad port adapter, disable WoL on all but port A */
1625                 if (global_quad_port_a != 0)
1626                         adapter->eeprom_wol = 0;
1627                 else
1628                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1629                 /* Reset for multiple quad port adapters */
1630                 if (++global_quad_port_a == 4)
1631                         global_quad_port_a = 0;
1632                 break;
1633         }
1634
1635         /* initialize the wol settings based on the eeprom settings */
1636         adapter->wol = adapter->eeprom_wol;
1637         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1638
1639         /* reset the hardware with the new settings */
1640         igb_reset(adapter);
1641
1642         /* let the f/w know that the h/w is now under the control of the
1643          * driver. */
1644         igb_get_hw_control(adapter);
1645
1646         strcpy(netdev->name, "eth%d");
1647         err = register_netdev(netdev);
1648         if (err)
1649                 goto err_register;
1650
1651         /* carrier off reporting is important to ethtool even BEFORE open */
1652         netif_carrier_off(netdev);
1653
1654 #ifdef CONFIG_IGB_DCA
1655         if (dca_add_requester(&pdev->dev) == 0) {
1656                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1657                 dev_info(&pdev->dev, "DCA enabled\n");
1658                 igb_setup_dca(adapter);
1659         }
1660
1661 #endif
1662         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1663         /* print bus type/speed/width info */
1664         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1665                  netdev->name,
1666                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1667                                                             "unknown"),
1668                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1669                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1670                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1671                    "unknown"),
1672                  netdev->dev_addr);
1673
1674         igb_read_part_num(hw, &part_num);
1675         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1676                 (part_num >> 8), (part_num & 0xff));
1677
1678         dev_info(&pdev->dev,
1679                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1680                 adapter->msix_entries ? "MSI-X" :
1681                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1682                 adapter->num_rx_queues, adapter->num_tx_queues);
1683
1684         return 0;
1685
1686 err_register:
1687         igb_release_hw_control(adapter);
1688 err_eeprom:
1689         if (!igb_check_reset_block(hw))
1690                 igb_reset_phy(hw);
1691
1692         if (hw->flash_address)
1693                 iounmap(hw->flash_address);
1694 err_sw_init:
1695         igb_clear_interrupt_scheme(adapter);
1696         iounmap(hw->hw_addr);
1697 err_ioremap:
1698         free_netdev(netdev);
1699 err_alloc_etherdev:
1700         pci_release_selected_regions(pdev,
1701                                      pci_select_bars(pdev, IORESOURCE_MEM));
1702 err_pci_reg:
1703 err_dma:
1704         pci_disable_device(pdev);
1705         return err;
1706 }
1707
1708 /**
1709  * igb_remove - Device Removal Routine
1710  * @pdev: PCI device information struct
1711  *
1712  * igb_remove is called by the PCI subsystem to alert the driver
1713  * that it should release a PCI device.  The could be caused by a
1714  * Hot-Plug event, or because the driver is going to be removed from
1715  * memory.
1716  **/
1717 static void __devexit igb_remove(struct pci_dev *pdev)
1718 {
1719         struct net_device *netdev = pci_get_drvdata(pdev);
1720         struct igb_adapter *adapter = netdev_priv(netdev);
1721         struct e1000_hw *hw = &adapter->hw;
1722
1723         /* flush_scheduled work may reschedule our watchdog task, so
1724          * explicitly disable watchdog tasks from being rescheduled  */
1725         set_bit(__IGB_DOWN, &adapter->state);
1726         del_timer_sync(&adapter->watchdog_timer);
1727         del_timer_sync(&adapter->phy_info_timer);
1728
1729         flush_scheduled_work();
1730
1731 #ifdef CONFIG_IGB_DCA
1732         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1733                 dev_info(&pdev->dev, "DCA disabled\n");
1734                 dca_remove_requester(&pdev->dev);
1735                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1736                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1737         }
1738 #endif
1739
1740         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1741          * would have already happened in close and is redundant. */
1742         igb_release_hw_control(adapter);
1743
1744         unregister_netdev(netdev);
1745
1746         igb_clear_interrupt_scheme(adapter);
1747
1748 #ifdef CONFIG_PCI_IOV
1749         /* reclaim resources allocated to VFs */
1750         if (adapter->vf_data) {
1751                 /* disable iov and allow time for transactions to clear */
1752                 pci_disable_sriov(pdev);
1753                 msleep(500);
1754
1755                 kfree(adapter->vf_data);
1756                 adapter->vf_data = NULL;
1757                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1758                 msleep(100);
1759                 dev_info(&pdev->dev, "IOV Disabled\n");
1760         }
1761 #endif
1762
1763         iounmap(hw->hw_addr);
1764         if (hw->flash_address)
1765                 iounmap(hw->flash_address);
1766         pci_release_selected_regions(pdev,
1767                                      pci_select_bars(pdev, IORESOURCE_MEM));
1768
1769         free_netdev(netdev);
1770
1771         pci_disable_pcie_error_reporting(pdev);
1772
1773         pci_disable_device(pdev);
1774 }
1775
1776 /**
1777  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1778  * @adapter: board private structure to initialize
1779  *
1780  * This function initializes the vf specific data storage and then attempts to
1781  * allocate the VFs.  The reason for ordering it this way is because it is much
1782  * mor expensive time wise to disable SR-IOV than it is to allocate and free
1783  * the memory for the VFs.
1784  **/
1785 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1786 {
1787 #ifdef CONFIG_PCI_IOV
1788         struct pci_dev *pdev = adapter->pdev;
1789
1790         if (adapter->vfs_allocated_count > 7)
1791                 adapter->vfs_allocated_count = 7;
1792
1793         if (adapter->vfs_allocated_count) {
1794                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1795                                            sizeof(struct vf_data_storage),
1796                                            GFP_KERNEL);
1797                 /* if allocation failed then we do not support SR-IOV */
1798                 if (!adapter->vf_data) {
1799                         adapter->vfs_allocated_count = 0;
1800                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
1801                                 "Data Storage\n");
1802                 }
1803         }
1804
1805         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1806                 kfree(adapter->vf_data);
1807                 adapter->vf_data = NULL;
1808 #endif /* CONFIG_PCI_IOV */
1809                 adapter->vfs_allocated_count = 0;
1810 #ifdef CONFIG_PCI_IOV
1811         } else {
1812                 unsigned char mac_addr[ETH_ALEN];
1813                 int i;
1814                 dev_info(&pdev->dev, "%d vfs allocated\n",
1815                          adapter->vfs_allocated_count);
1816                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1817                         random_ether_addr(mac_addr);
1818                         igb_set_vf_mac(adapter, i, mac_addr);
1819                 }
1820         }
1821 #endif /* CONFIG_PCI_IOV */
1822 }
1823
1824
1825 /**
1826  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1827  * @adapter: board private structure to initialize
1828  *
1829  * igb_init_hw_timer initializes the function pointer and values for the hw
1830  * timer found in hardware.
1831  **/
1832 static void igb_init_hw_timer(struct igb_adapter *adapter)
1833 {
1834         struct e1000_hw *hw = &adapter->hw;
1835
1836         switch (hw->mac.type) {
1837         case e1000_82580:
1838                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1839                 adapter->cycles.read = igb_read_clock;
1840                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1841                 adapter->cycles.mult = 1;
1842                 /*
1843                  * The 82580 timesync updates the system timer every 8ns by 8ns
1844                  * and the value cannot be shifted.  Instead we need to shift
1845                  * the registers to generate a 64bit timer value.  As a result
1846                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
1847                  * 24 in order to generate a larger value for synchronization.
1848                  */
1849                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
1850                 /* disable system timer temporarily by setting bit 31 */
1851                 wr32(E1000_TSAUXC, 0x80000000);
1852                 wrfl();
1853
1854                 /* Set registers so that rollover occurs soon to test this. */
1855                 wr32(E1000_SYSTIMR, 0x00000000);
1856                 wr32(E1000_SYSTIML, 0x80000000);
1857                 wr32(E1000_SYSTIMH, 0x000000FF);
1858                 wrfl();
1859
1860                 /* enable system timer by clearing bit 31 */
1861                 wr32(E1000_TSAUXC, 0x0);
1862                 wrfl();
1863
1864                 timecounter_init(&adapter->clock,
1865                                  &adapter->cycles,
1866                                  ktime_to_ns(ktime_get_real()));
1867                 /*
1868                  * Synchronize our NIC clock against system wall clock. NIC
1869                  * time stamp reading requires ~3us per sample, each sample
1870                  * was pretty stable even under load => only require 10
1871                  * samples for each offset comparison.
1872                  */
1873                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1874                 adapter->compare.source = &adapter->clock;
1875                 adapter->compare.target = ktime_get_real;
1876                 adapter->compare.num_samples = 10;
1877                 timecompare_update(&adapter->compare, 0);
1878                 break;
1879         case e1000_82576:
1880                 /*
1881                  * Initialize hardware timer: we keep it running just in case
1882                  * that some program needs it later on.
1883                  */
1884                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1885                 adapter->cycles.read = igb_read_clock;
1886                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1887                 adapter->cycles.mult = 1;
1888                 /**
1889                  * Scale the NIC clock cycle by a large factor so that
1890                  * relatively small clock corrections can be added or
1891                  * substracted at each clock tick. The drawbacks of a large
1892                  * factor are a) that the clock register overflows more quickly
1893                  * (not such a big deal) and b) that the increment per tick has
1894                  * to fit into 24 bits.  As a result we need to use a shift of
1895                  * 19 so we can fit a value of 16 into the TIMINCA register.
1896                  */
1897                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1898                 wr32(E1000_TIMINCA,
1899                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1900                                 (16 << IGB_82576_TSYNC_SHIFT));
1901
1902                 /* Set registers so that rollover occurs soon to test this. */
1903                 wr32(E1000_SYSTIML, 0x00000000);
1904                 wr32(E1000_SYSTIMH, 0xFF800000);
1905                 wrfl();
1906
1907                 timecounter_init(&adapter->clock,
1908                                  &adapter->cycles,
1909                                  ktime_to_ns(ktime_get_real()));
1910                 /*
1911                  * Synchronize our NIC clock against system wall clock. NIC
1912                  * time stamp reading requires ~3us per sample, each sample
1913                  * was pretty stable even under load => only require 10
1914                  * samples for each offset comparison.
1915                  */
1916                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1917                 adapter->compare.source = &adapter->clock;
1918                 adapter->compare.target = ktime_get_real;
1919                 adapter->compare.num_samples = 10;
1920                 timecompare_update(&adapter->compare, 0);
1921                 break;
1922         case e1000_82575:
1923                 /* 82575 does not support timesync */
1924         default:
1925                 break;
1926         }
1927
1928 }
1929
1930 /**
1931  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1932  * @adapter: board private structure to initialize
1933  *
1934  * igb_sw_init initializes the Adapter private data structure.
1935  * Fields are initialized based on PCI device information and
1936  * OS network device settings (MTU size).
1937  **/
1938 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1939 {
1940         struct e1000_hw *hw = &adapter->hw;
1941         struct net_device *netdev = adapter->netdev;
1942         struct pci_dev *pdev = adapter->pdev;
1943
1944         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1945
1946         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1947         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1948         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1949         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1950
1951         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1952         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1953
1954 #ifdef CONFIG_PCI_IOV
1955         if (hw->mac.type == e1000_82576)
1956                 adapter->vfs_allocated_count = max_vfs;
1957
1958 #endif /* CONFIG_PCI_IOV */
1959         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1960
1961         /*
1962          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1963          * then we should combine the queues into a queue pair in order to
1964          * conserve interrupts due to limited supply
1965          */
1966         if ((adapter->rss_queues > 4) ||
1967             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1968                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1969
1970         /* This call may decrease the number of queues */
1971         if (igb_init_interrupt_scheme(adapter)) {
1972                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1973                 return -ENOMEM;
1974         }
1975
1976         igb_init_hw_timer(adapter);
1977         igb_probe_vfs(adapter);
1978
1979         /* Explicitly disable IRQ since the NIC can be in any state. */
1980         igb_irq_disable(adapter);
1981
1982         set_bit(__IGB_DOWN, &adapter->state);
1983         return 0;
1984 }
1985
1986 /**
1987  * igb_open - Called when a network interface is made active
1988  * @netdev: network interface device structure
1989  *
1990  * Returns 0 on success, negative value on failure
1991  *
1992  * The open entry point is called when a network interface is made
1993  * active by the system (IFF_UP).  At this point all resources needed
1994  * for transmit and receive operations are allocated, the interrupt
1995  * handler is registered with the OS, the watchdog timer is started,
1996  * and the stack is notified that the interface is ready.
1997  **/
1998 static int igb_open(struct net_device *netdev)
1999 {
2000         struct igb_adapter *adapter = netdev_priv(netdev);
2001         struct e1000_hw *hw = &adapter->hw;
2002         int err;
2003         int i;
2004
2005         /* disallow open during test */
2006         if (test_bit(__IGB_TESTING, &adapter->state))
2007                 return -EBUSY;
2008
2009         netif_carrier_off(netdev);
2010
2011         /* allocate transmit descriptors */
2012         err = igb_setup_all_tx_resources(adapter);
2013         if (err)
2014                 goto err_setup_tx;
2015
2016         /* allocate receive descriptors */
2017         err = igb_setup_all_rx_resources(adapter);
2018         if (err)
2019                 goto err_setup_rx;
2020
2021         igb_power_up_link(adapter);
2022
2023         /* before we allocate an interrupt, we must be ready to handle it.
2024          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2025          * as soon as we call pci_request_irq, so we have to setup our
2026          * clean_rx handler before we do so.  */
2027         igb_configure(adapter);
2028
2029         err = igb_request_irq(adapter);
2030         if (err)
2031                 goto err_req_irq;
2032
2033         /* From here on the code is the same as igb_up() */
2034         clear_bit(__IGB_DOWN, &adapter->state);
2035
2036         for (i = 0; i < adapter->num_q_vectors; i++) {
2037                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2038                 napi_enable(&q_vector->napi);
2039         }
2040
2041         /* Clear any pending interrupts. */
2042         rd32(E1000_ICR);
2043
2044         igb_irq_enable(adapter);
2045
2046         /* notify VFs that reset has been completed */
2047         if (adapter->vfs_allocated_count) {
2048                 u32 reg_data = rd32(E1000_CTRL_EXT);
2049                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2050                 wr32(E1000_CTRL_EXT, reg_data);
2051         }
2052
2053         netif_tx_start_all_queues(netdev);
2054
2055         /* start the watchdog. */
2056         hw->mac.get_link_status = 1;
2057         schedule_work(&adapter->watchdog_task);
2058
2059         return 0;
2060
2061 err_req_irq:
2062         igb_release_hw_control(adapter);
2063         igb_power_down_link(adapter);
2064         igb_free_all_rx_resources(adapter);
2065 err_setup_rx:
2066         igb_free_all_tx_resources(adapter);
2067 err_setup_tx:
2068         igb_reset(adapter);
2069
2070         return err;
2071 }
2072
2073 /**
2074  * igb_close - Disables a network interface
2075  * @netdev: network interface device structure
2076  *
2077  * Returns 0, this is not allowed to fail
2078  *
2079  * The close entry point is called when an interface is de-activated
2080  * by the OS.  The hardware is still under the driver's control, but
2081  * needs to be disabled.  A global MAC reset is issued to stop the
2082  * hardware, and all transmit and receive resources are freed.
2083  **/
2084 static int igb_close(struct net_device *netdev)
2085 {
2086         struct igb_adapter *adapter = netdev_priv(netdev);
2087
2088         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2089         igb_down(adapter);
2090
2091         igb_free_irq(adapter);
2092
2093         igb_free_all_tx_resources(adapter);
2094         igb_free_all_rx_resources(adapter);
2095
2096         return 0;
2097 }
2098
2099 /**
2100  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2101  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2102  *
2103  * Return 0 on success, negative on failure
2104  **/
2105 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2106 {
2107         struct pci_dev *pdev = tx_ring->pdev;
2108         int size;
2109
2110         size = sizeof(struct igb_buffer) * tx_ring->count;
2111         tx_ring->buffer_info = vmalloc(size);
2112         if (!tx_ring->buffer_info)
2113                 goto err;
2114         memset(tx_ring->buffer_info, 0, size);
2115
2116         /* round up to nearest 4K */
2117         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2118         tx_ring->size = ALIGN(tx_ring->size, 4096);
2119
2120         tx_ring->desc = pci_alloc_consistent(pdev,
2121                                              tx_ring->size,
2122                                              &tx_ring->dma);
2123
2124         if (!tx_ring->desc)
2125                 goto err;
2126
2127         tx_ring->next_to_use = 0;
2128         tx_ring->next_to_clean = 0;
2129         return 0;
2130
2131 err:
2132         vfree(tx_ring->buffer_info);
2133         dev_err(&pdev->dev,
2134                 "Unable to allocate memory for the transmit descriptor ring\n");
2135         return -ENOMEM;
2136 }
2137
2138 /**
2139  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2140  *                                (Descriptors) for all queues
2141  * @adapter: board private structure
2142  *
2143  * Return 0 on success, negative on failure
2144  **/
2145 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2146 {
2147         struct pci_dev *pdev = adapter->pdev;
2148         int i, err = 0;
2149
2150         for (i = 0; i < adapter->num_tx_queues; i++) {
2151                 err = igb_setup_tx_resources(&adapter->tx_ring[i]);
2152                 if (err) {
2153                         dev_err(&pdev->dev,
2154                                 "Allocation for Tx Queue %u failed\n", i);
2155                         for (i--; i >= 0; i--)
2156                                 igb_free_tx_resources(&adapter->tx_ring[i]);
2157                         break;
2158                 }
2159         }
2160
2161         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2162                 int r_idx = i % adapter->num_tx_queues;
2163                 adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
2164         }
2165         return err;
2166 }
2167
2168 /**
2169  * igb_setup_tctl - configure the transmit control registers
2170  * @adapter: Board private structure
2171  **/
2172 void igb_setup_tctl(struct igb_adapter *adapter)
2173 {
2174         struct e1000_hw *hw = &adapter->hw;
2175         u32 tctl;
2176
2177         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2178         wr32(E1000_TXDCTL(0), 0);
2179
2180         /* Program the Transmit Control Register */
2181         tctl = rd32(E1000_TCTL);
2182         tctl &= ~E1000_TCTL_CT;
2183         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2184                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2185
2186         igb_config_collision_dist(hw);
2187
2188         /* Enable transmits */
2189         tctl |= E1000_TCTL_EN;
2190
2191         wr32(E1000_TCTL, tctl);
2192 }
2193
2194 /**
2195  * igb_configure_tx_ring - Configure transmit ring after Reset
2196  * @adapter: board private structure
2197  * @ring: tx ring to configure
2198  *
2199  * Configure a transmit ring after a reset.
2200  **/
2201 void igb_configure_tx_ring(struct igb_adapter *adapter,
2202                            struct igb_ring *ring)
2203 {
2204         struct e1000_hw *hw = &adapter->hw;
2205         u32 txdctl;
2206         u64 tdba = ring->dma;
2207         int reg_idx = ring->reg_idx;
2208
2209         /* disable the queue */
2210         txdctl = rd32(E1000_TXDCTL(reg_idx));
2211         wr32(E1000_TXDCTL(reg_idx),
2212                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2213         wrfl();
2214         mdelay(10);
2215
2216         wr32(E1000_TDLEN(reg_idx),
2217                         ring->count * sizeof(union e1000_adv_tx_desc));
2218         wr32(E1000_TDBAL(reg_idx),
2219                         tdba & 0x00000000ffffffffULL);
2220         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2221
2222         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2223         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2224         writel(0, ring->head);
2225         writel(0, ring->tail);
2226
2227         txdctl |= IGB_TX_PTHRESH;
2228         txdctl |= IGB_TX_HTHRESH << 8;
2229         txdctl |= IGB_TX_WTHRESH << 16;
2230
2231         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2232         wr32(E1000_TXDCTL(reg_idx), txdctl);
2233 }
2234
2235 /**
2236  * igb_configure_tx - Configure transmit Unit after Reset
2237  * @adapter: board private structure
2238  *
2239  * Configure the Tx unit of the MAC after a reset.
2240  **/
2241 static void igb_configure_tx(struct igb_adapter *adapter)
2242 {
2243         int i;
2244
2245         for (i = 0; i < adapter->num_tx_queues; i++)
2246                 igb_configure_tx_ring(adapter, &adapter->tx_ring[i]);
2247 }
2248
2249 /**
2250  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2251  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2252  *
2253  * Returns 0 on success, negative on failure
2254  **/
2255 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2256 {
2257         struct pci_dev *pdev = rx_ring->pdev;
2258         int size, desc_len;
2259
2260         size = sizeof(struct igb_buffer) * rx_ring->count;
2261         rx_ring->buffer_info = vmalloc(size);
2262         if (!rx_ring->buffer_info)
2263                 goto err;
2264         memset(rx_ring->buffer_info, 0, size);
2265
2266         desc_len = sizeof(union e1000_adv_rx_desc);
2267
2268         /* Round up to nearest 4K */
2269         rx_ring->size = rx_ring->count * desc_len;
2270         rx_ring->size = ALIGN(rx_ring->size, 4096);
2271
2272         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2273                                              &rx_ring->dma);
2274
2275         if (!rx_ring->desc)
2276                 goto err;
2277
2278         rx_ring->next_to_clean = 0;
2279         rx_ring->next_to_use = 0;
2280
2281         return 0;
2282
2283 err:
2284         vfree(rx_ring->buffer_info);
2285         rx_ring->buffer_info = NULL;
2286         dev_err(&pdev->dev, "Unable to allocate memory for "
2287                 "the receive descriptor ring\n");
2288         return -ENOMEM;
2289 }
2290
2291 /**
2292  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2293  *                                (Descriptors) for all queues
2294  * @adapter: board private structure
2295  *
2296  * Return 0 on success, negative on failure
2297  **/
2298 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2299 {
2300         struct pci_dev *pdev = adapter->pdev;
2301         int i, err = 0;
2302
2303         for (i = 0; i < adapter->num_rx_queues; i++) {
2304                 err = igb_setup_rx_resources(&adapter->rx_ring[i]);
2305                 if (err) {
2306                         dev_err(&pdev->dev,
2307                                 "Allocation for Rx Queue %u failed\n", i);
2308                         for (i--; i >= 0; i--)
2309                                 igb_free_rx_resources(&adapter->rx_ring[i]);
2310                         break;
2311                 }
2312         }
2313
2314         return err;
2315 }
2316
2317 /**
2318  * igb_setup_mrqc - configure the multiple receive queue control registers
2319  * @adapter: Board private structure
2320  **/
2321 static void igb_setup_mrqc(struct igb_adapter *adapter)
2322 {
2323         struct e1000_hw *hw = &adapter->hw;
2324         u32 mrqc, rxcsum;
2325         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2326         union e1000_reta {
2327                 u32 dword;
2328                 u8  bytes[4];
2329         } reta;
2330         static const u8 rsshash[40] = {
2331                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2332                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2333                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2334                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2335
2336         /* Fill out hash function seeds */
2337         for (j = 0; j < 10; j++) {
2338                 u32 rsskey = rsshash[(j * 4)];
2339                 rsskey |= rsshash[(j * 4) + 1] << 8;
2340                 rsskey |= rsshash[(j * 4) + 2] << 16;
2341                 rsskey |= rsshash[(j * 4) + 3] << 24;
2342                 array_wr32(E1000_RSSRK(0), j, rsskey);
2343         }
2344
2345         num_rx_queues = adapter->rss_queues;
2346
2347         if (adapter->vfs_allocated_count) {
2348                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2349                 switch (hw->mac.type) {
2350                 case e1000_82580:
2351                         num_rx_queues = 1;
2352                         shift = 0;
2353                         break;
2354                 case e1000_82576:
2355                         shift = 3;
2356                         num_rx_queues = 2;
2357                         break;
2358                 case e1000_82575:
2359                         shift = 2;
2360                         shift2 = 6;
2361                 default:
2362                         break;
2363                 }
2364         } else {
2365                 if (hw->mac.type == e1000_82575)
2366                         shift = 6;
2367         }
2368
2369         for (j = 0; j < (32 * 4); j++) {
2370                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2371                 if (shift2)
2372                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2373                 if ((j & 3) == 3)
2374                         wr32(E1000_RETA(j >> 2), reta.dword);
2375         }
2376
2377         /*
2378          * Disable raw packet checksumming so that RSS hash is placed in
2379          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2380          * offloads as they are enabled by default
2381          */
2382         rxcsum = rd32(E1000_RXCSUM);
2383         rxcsum |= E1000_RXCSUM_PCSD;
2384
2385         if (adapter->hw.mac.type >= e1000_82576)
2386                 /* Enable Receive Checksum Offload for SCTP */
2387                 rxcsum |= E1000_RXCSUM_CRCOFL;
2388
2389         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2390         wr32(E1000_RXCSUM, rxcsum);
2391
2392         /* If VMDq is enabled then we set the appropriate mode for that, else
2393          * we default to RSS so that an RSS hash is calculated per packet even
2394          * if we are only using one queue */
2395         if (adapter->vfs_allocated_count) {
2396                 if (hw->mac.type > e1000_82575) {
2397                         /* Set the default pool for the PF's first queue */
2398                         u32 vtctl = rd32(E1000_VT_CTL);
2399                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2400                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2401                         vtctl |= adapter->vfs_allocated_count <<
2402                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2403                         wr32(E1000_VT_CTL, vtctl);
2404                 }
2405                 if (adapter->rss_queues > 1)
2406                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2407                 else
2408                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2409         } else {
2410                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2411         }
2412         igb_vmm_control(adapter);
2413
2414         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2415                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2416         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2417                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2418         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2419                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2420         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2421                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2422
2423         wr32(E1000_MRQC, mrqc);
2424 }
2425
2426 /**
2427  * igb_setup_rctl - configure the receive control registers
2428  * @adapter: Board private structure
2429  **/
2430 void igb_setup_rctl(struct igb_adapter *adapter)
2431 {
2432         struct e1000_hw *hw = &adapter->hw;
2433         u32 rctl;
2434
2435         rctl = rd32(E1000_RCTL);
2436
2437         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2438         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2439
2440         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2441                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2442
2443         /*
2444          * enable stripping of CRC. It's unlikely this will break BMC
2445          * redirection as it did with e1000. Newer features require
2446          * that the HW strips the CRC.
2447          */
2448         rctl |= E1000_RCTL_SECRC;
2449
2450         /* disable store bad packets and clear size bits. */
2451         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2452
2453         /* enable LPE to prevent packets larger than max_frame_size */
2454         rctl |= E1000_RCTL_LPE;
2455
2456         /* disable queue 0 to prevent tail write w/o re-config */
2457         wr32(E1000_RXDCTL(0), 0);
2458
2459         /* Attention!!!  For SR-IOV PF driver operations you must enable
2460          * queue drop for all VF and PF queues to prevent head of line blocking
2461          * if an un-trusted VF does not provide descriptors to hardware.
2462          */
2463         if (adapter->vfs_allocated_count) {
2464                 /* set all queue drop enable bits */
2465                 wr32(E1000_QDE, ALL_QUEUES);
2466         }
2467
2468         wr32(E1000_RCTL, rctl);
2469 }
2470
2471 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2472                                    int vfn)
2473 {
2474         struct e1000_hw *hw = &adapter->hw;
2475         u32 vmolr;
2476
2477         /* if it isn't the PF check to see if VFs are enabled and
2478          * increase the size to support vlan tags */
2479         if (vfn < adapter->vfs_allocated_count &&
2480             adapter->vf_data[vfn].vlans_enabled)
2481                 size += VLAN_TAG_SIZE;
2482
2483         vmolr = rd32(E1000_VMOLR(vfn));
2484         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2485         vmolr |= size | E1000_VMOLR_LPE;
2486         wr32(E1000_VMOLR(vfn), vmolr);
2487
2488         return 0;
2489 }
2490
2491 /**
2492  * igb_rlpml_set - set maximum receive packet size
2493  * @adapter: board private structure
2494  *
2495  * Configure maximum receivable packet size.
2496  **/
2497 static void igb_rlpml_set(struct igb_adapter *adapter)
2498 {
2499         u32 max_frame_size = adapter->max_frame_size;
2500         struct e1000_hw *hw = &adapter->hw;
2501         u16 pf_id = adapter->vfs_allocated_count;
2502
2503         if (adapter->vlgrp)
2504                 max_frame_size += VLAN_TAG_SIZE;
2505
2506         /* if vfs are enabled we set RLPML to the largest possible request
2507          * size and set the VMOLR RLPML to the size we need */
2508         if (pf_id) {
2509                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2510                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2511         }
2512
2513         wr32(E1000_RLPML, max_frame_size);
2514 }
2515
2516 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2517                                  int vfn, bool aupe)
2518 {
2519         struct e1000_hw *hw = &adapter->hw;
2520         u32 vmolr;
2521
2522         /*
2523          * This register exists only on 82576 and newer so if we are older then
2524          * we should exit and do nothing
2525          */
2526         if (hw->mac.type < e1000_82576)
2527                 return;
2528
2529         vmolr = rd32(E1000_VMOLR(vfn));
2530         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2531         if (aupe)
2532                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2533         else
2534                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2535
2536         /* clear all bits that might not be set */
2537         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2538
2539         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2540                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2541         /*
2542          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2543          * multicast packets
2544          */
2545         if (vfn <= adapter->vfs_allocated_count)
2546                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2547
2548         wr32(E1000_VMOLR(vfn), vmolr);
2549 }
2550
2551 /**
2552  * igb_configure_rx_ring - Configure a receive ring after Reset
2553  * @adapter: board private structure
2554  * @ring: receive ring to be configured
2555  *
2556  * Configure the Rx unit of the MAC after a reset.
2557  **/
2558 void igb_configure_rx_ring(struct igb_adapter *adapter,
2559                            struct igb_ring *ring)
2560 {
2561         struct e1000_hw *hw = &adapter->hw;
2562         u64 rdba = ring->dma;
2563         int reg_idx = ring->reg_idx;
2564         u32 srrctl, rxdctl;
2565
2566         /* disable the queue */
2567         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2568         wr32(E1000_RXDCTL(reg_idx),
2569                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2570
2571         /* Set DMA base address registers */
2572         wr32(E1000_RDBAL(reg_idx),
2573              rdba & 0x00000000ffffffffULL);
2574         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2575         wr32(E1000_RDLEN(reg_idx),
2576                        ring->count * sizeof(union e1000_adv_rx_desc));
2577
2578         /* initialize head and tail */
2579         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2580         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2581         writel(0, ring->head);
2582         writel(0, ring->tail);
2583
2584         /* set descriptor configuration */
2585         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2586                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2587                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2588 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2589                 srrctl |= IGB_RXBUFFER_16384 >>
2590                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2591 #else
2592                 srrctl |= (PAGE_SIZE / 2) >>
2593                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2594 #endif
2595                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2596         } else {
2597                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2598                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2599                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2600         }
2601
2602         wr32(E1000_SRRCTL(reg_idx), srrctl);
2603
2604         /* set filtering for VMDQ pools */
2605         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2606
2607         /* enable receive descriptor fetching */
2608         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2609         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2610         rxdctl &= 0xFFF00000;
2611         rxdctl |= IGB_RX_PTHRESH;
2612         rxdctl |= IGB_RX_HTHRESH << 8;
2613         rxdctl |= IGB_RX_WTHRESH << 16;
2614         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2615 }
2616
2617 /**
2618  * igb_configure_rx - Configure receive Unit after Reset
2619  * @adapter: board private structure
2620  *
2621  * Configure the Rx unit of the MAC after a reset.
2622  **/
2623 static void igb_configure_rx(struct igb_adapter *adapter)
2624 {
2625         int i;
2626
2627         /* set UTA to appropriate mode */
2628         igb_set_uta(adapter);
2629
2630         /* set the correct pool for the PF default MAC address in entry 0 */
2631         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2632                          adapter->vfs_allocated_count);
2633
2634         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2635          * the Base and Length of the Rx Descriptor Ring */
2636         for (i = 0; i < adapter->num_rx_queues; i++)
2637                 igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
2638 }
2639
2640 /**
2641  * igb_free_tx_resources - Free Tx Resources per Queue
2642  * @tx_ring: Tx descriptor ring for a specific queue
2643  *
2644  * Free all transmit software resources
2645  **/
2646 void igb_free_tx_resources(struct igb_ring *tx_ring)
2647 {
2648         igb_clean_tx_ring(tx_ring);
2649
2650         vfree(tx_ring->buffer_info);
2651         tx_ring->buffer_info = NULL;
2652
2653         /* if not set, then don't free */
2654         if (!tx_ring->desc)
2655                 return;
2656
2657         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2658                             tx_ring->desc, tx_ring->dma);
2659
2660         tx_ring->desc = NULL;
2661 }
2662
2663 /**
2664  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2665  * @adapter: board private structure
2666  *
2667  * Free all transmit software resources
2668  **/
2669 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2670 {
2671         int i;
2672
2673         for (i = 0; i < adapter->num_tx_queues; i++)
2674                 igb_free_tx_resources(&adapter->tx_ring[i]);
2675 }
2676
2677 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2678                                     struct igb_buffer *buffer_info)
2679 {
2680         if (buffer_info->dma) {
2681                 if (buffer_info->mapped_as_page)
2682                         pci_unmap_page(tx_ring->pdev,
2683                                         buffer_info->dma,
2684                                         buffer_info->length,
2685                                         PCI_DMA_TODEVICE);
2686                 else
2687                         pci_unmap_single(tx_ring->pdev,
2688                                         buffer_info->dma,
2689                                         buffer_info->length,
2690                                         PCI_DMA_TODEVICE);
2691                 buffer_info->dma = 0;
2692         }
2693         if (buffer_info->skb) {
2694                 dev_kfree_skb_any(buffer_info->skb);
2695                 buffer_info->skb = NULL;
2696         }
2697         buffer_info->time_stamp = 0;
2698         buffer_info->length = 0;
2699         buffer_info->next_to_watch = 0;
2700         buffer_info->mapped_as_page = false;
2701 }
2702
2703 /**
2704  * igb_clean_tx_ring - Free Tx Buffers
2705  * @tx_ring: ring to be cleaned
2706  **/
2707 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2708 {
2709         struct igb_buffer *buffer_info;
2710         unsigned long size;
2711         unsigned int i;
2712
2713         if (!tx_ring->buffer_info)
2714                 return;
2715         /* Free all the Tx ring sk_buffs */
2716
2717         for (i = 0; i < tx_ring->count; i++) {
2718                 buffer_info = &tx_ring->buffer_info[i];
2719                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2720         }
2721
2722         size = sizeof(struct igb_buffer) * tx_ring->count;
2723         memset(tx_ring->buffer_info, 0, size);
2724
2725         /* Zero out the descriptor ring */
2726         memset(tx_ring->desc, 0, tx_ring->size);
2727
2728         tx_ring->next_to_use = 0;
2729         tx_ring->next_to_clean = 0;
2730 }
2731
2732 /**
2733  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2734  * @adapter: board private structure
2735  **/
2736 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2737 {
2738         int i;
2739
2740         for (i = 0; i < adapter->num_tx_queues; i++)
2741                 igb_clean_tx_ring(&adapter->tx_ring[i]);
2742 }
2743
2744 /**
2745  * igb_free_rx_resources - Free Rx Resources
2746  * @rx_ring: ring to clean the resources from
2747  *
2748  * Free all receive software resources
2749  **/
2750 void igb_free_rx_resources(struct igb_ring *rx_ring)
2751 {
2752         igb_clean_rx_ring(rx_ring);
2753
2754         vfree(rx_ring->buffer_info);
2755         rx_ring->buffer_info = NULL;
2756
2757         /* if not set, then don't free */
2758         if (!rx_ring->desc)
2759                 return;
2760
2761         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2762                             rx_ring->desc, rx_ring->dma);
2763
2764         rx_ring->desc = NULL;
2765 }
2766
2767 /**
2768  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2769  * @adapter: board private structure
2770  *
2771  * Free all receive software resources
2772  **/
2773 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2774 {
2775         int i;
2776
2777         for (i = 0; i < adapter->num_rx_queues; i++)
2778                 igb_free_rx_resources(&adapter->rx_ring[i]);
2779 }
2780
2781 /**
2782  * igb_clean_rx_ring - Free Rx Buffers per Queue
2783  * @rx_ring: ring to free buffers from
2784  **/
2785 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2786 {
2787         struct igb_buffer *buffer_info;
2788         unsigned long size;
2789         unsigned int i;
2790
2791         if (!rx_ring->buffer_info)
2792                 return;
2793
2794         /* Free all the Rx ring sk_buffs */
2795         for (i = 0; i < rx_ring->count; i++) {
2796                 buffer_info = &rx_ring->buffer_info[i];
2797                 if (buffer_info->dma) {
2798                         pci_unmap_single(rx_ring->pdev,
2799                                          buffer_info->dma,
2800                                          rx_ring->rx_buffer_len,
2801                                          PCI_DMA_FROMDEVICE);
2802                         buffer_info->dma = 0;
2803                 }
2804
2805                 if (buffer_info->skb) {
2806                         dev_kfree_skb(buffer_info->skb);
2807                         buffer_info->skb = NULL;
2808                 }
2809                 if (buffer_info->page_dma) {
2810                         pci_unmap_page(rx_ring->pdev,
2811                                        buffer_info->page_dma,
2812                                        PAGE_SIZE / 2,
2813                                        PCI_DMA_FROMDEVICE);
2814                         buffer_info->page_dma = 0;
2815                 }
2816                 if (buffer_info->page) {
2817                         put_page(buffer_info->page);
2818                         buffer_info->page = NULL;
2819                         buffer_info->page_offset = 0;
2820                 }
2821         }
2822
2823         size = sizeof(struct igb_buffer) * rx_ring->count;
2824         memset(rx_ring->buffer_info, 0, size);
2825
2826         /* Zero out the descriptor ring */
2827         memset(rx_ring->desc, 0, rx_ring->size);
2828
2829         rx_ring->next_to_clean = 0;
2830         rx_ring->next_to_use = 0;
2831 }
2832
2833 /**
2834  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2835  * @adapter: board private structure
2836  **/
2837 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2838 {
2839         int i;
2840
2841         for (i = 0; i < adapter->num_rx_queues; i++)
2842                 igb_clean_rx_ring(&adapter->rx_ring[i]);
2843 }
2844
2845 /**
2846  * igb_set_mac - Change the Ethernet Address of the NIC
2847  * @netdev: network interface device structure
2848  * @p: pointer to an address structure
2849  *
2850  * Returns 0 on success, negative on failure
2851  **/
2852 static int igb_set_mac(struct net_device *netdev, void *p)
2853 {
2854         struct igb_adapter *adapter = netdev_priv(netdev);
2855         struct e1000_hw *hw = &adapter->hw;
2856         struct sockaddr *addr = p;
2857
2858         if (!is_valid_ether_addr(addr->sa_data))
2859                 return -EADDRNOTAVAIL;
2860
2861         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2862         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2863
2864         /* set the correct pool for the new PF MAC address in entry 0 */
2865         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2866                          adapter->vfs_allocated_count);
2867
2868         return 0;
2869 }
2870
2871 /**
2872  * igb_write_mc_addr_list - write multicast addresses to MTA
2873  * @netdev: network interface device structure
2874  *
2875  * Writes multicast address list to the MTA hash table.
2876  * Returns: -ENOMEM on failure
2877  *                0 on no addresses written
2878  *                X on writing X addresses to MTA
2879  **/
2880 static int igb_write_mc_addr_list(struct net_device *netdev)
2881 {
2882         struct igb_adapter *adapter = netdev_priv(netdev);
2883         struct e1000_hw *hw = &adapter->hw;
2884         struct dev_mc_list *mc_ptr = netdev->mc_list;
2885         u8  *mta_list;
2886         u32 vmolr = 0;
2887         int i;
2888
2889         if (netdev_mc_empty(netdev)) {
2890                 /* nothing to program, so clear mc list */
2891                 igb_update_mc_addr_list(hw, NULL, 0);
2892                 igb_restore_vf_multicasts(adapter);
2893                 return 0;
2894         }
2895
2896         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
2897         if (!mta_list)
2898                 return -ENOMEM;
2899
2900         /* set vmolr receive overflow multicast bit */
2901         vmolr |= E1000_VMOLR_ROMPE;
2902
2903         /* The shared function expects a packed array of only addresses. */
2904         mc_ptr = netdev->mc_list;
2905
2906         for (i = 0; i < netdev_mc_count(netdev); i++) {
2907                 if (!mc_ptr)
2908                         break;
2909                 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2910                 mc_ptr = mc_ptr->next;
2911         }
2912         igb_update_mc_addr_list(hw, mta_list, i);
2913         kfree(mta_list);
2914
2915         return netdev_mc_count(netdev);
2916 }
2917
2918 /**
2919  * igb_write_uc_addr_list - write unicast addresses to RAR table
2920  * @netdev: network interface device structure
2921  *
2922  * Writes unicast address list to the RAR table.
2923  * Returns: -ENOMEM on failure/insufficient address space
2924  *                0 on no addresses written
2925  *                X on writing X addresses to the RAR table
2926  **/
2927 static int igb_write_uc_addr_list(struct net_device *netdev)
2928 {
2929         struct igb_adapter *adapter = netdev_priv(netdev);
2930         struct e1000_hw *hw = &adapter->hw;
2931         unsigned int vfn = adapter->vfs_allocated_count;
2932         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2933         int count = 0;
2934
2935         /* return ENOMEM indicating insufficient memory for addresses */
2936         if (netdev_uc_count(netdev) > rar_entries)
2937                 return -ENOMEM;
2938
2939         if (!netdev_uc_empty(netdev) && rar_entries) {
2940                 struct netdev_hw_addr *ha;
2941
2942                 netdev_for_each_uc_addr(ha, netdev) {
2943                         if (!rar_entries)
2944                                 break;
2945                         igb_rar_set_qsel(adapter, ha->addr,
2946                                          rar_entries--,
2947                                          vfn);
2948                         count++;
2949                 }
2950         }
2951         /* write the addresses in reverse order to avoid write combining */
2952         for (; rar_entries > 0 ; rar_entries--) {
2953                 wr32(E1000_RAH(rar_entries), 0);
2954                 wr32(E1000_RAL(rar_entries), 0);
2955         }
2956         wrfl();
2957
2958         return count;
2959 }
2960
2961 /**
2962  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2963  * @netdev: network interface device structure
2964  *
2965  * The set_rx_mode entry point is called whenever the unicast or multicast
2966  * address lists or the network interface flags are updated.  This routine is
2967  * responsible for configuring the hardware for proper unicast, multicast,
2968  * promiscuous mode, and all-multi behavior.
2969  **/
2970 static void igb_set_rx_mode(struct net_device *netdev)
2971 {
2972         struct igb_adapter *adapter = netdev_priv(netdev);
2973         struct e1000_hw *hw = &adapter->hw;
2974         unsigned int vfn = adapter->vfs_allocated_count;
2975         u32 rctl, vmolr = 0;
2976         int count;
2977
2978         /* Check for Promiscuous and All Multicast modes */
2979         rctl = rd32(E1000_RCTL);
2980
2981         /* clear the effected bits */
2982         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2983
2984         if (netdev->flags & IFF_PROMISC) {
2985                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2986                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2987         } else {
2988                 if (netdev->flags & IFF_ALLMULTI) {
2989                         rctl |= E1000_RCTL_MPE;
2990                         vmolr |= E1000_VMOLR_MPME;
2991                 } else {
2992                         /*
2993                          * Write addresses to the MTA, if the attempt fails
2994                          * then we should just turn on promiscous mode so
2995                          * that we can at least receive multicast traffic
2996                          */
2997                         count = igb_write_mc_addr_list(netdev);
2998                         if (count < 0) {
2999                                 rctl |= E1000_RCTL_MPE;
3000                                 vmolr |= E1000_VMOLR_MPME;
3001                         } else if (count) {
3002                                 vmolr |= E1000_VMOLR_ROMPE;
3003                         }
3004                 }
3005                 /*
3006                  * Write addresses to available RAR registers, if there is not
3007                  * sufficient space to store all the addresses then enable
3008                  * unicast promiscous mode
3009                  */
3010                 count = igb_write_uc_addr_list(netdev);
3011                 if (count < 0) {
3012                         rctl |= E1000_RCTL_UPE;
3013                         vmolr |= E1000_VMOLR_ROPE;
3014                 }
3015                 rctl |= E1000_RCTL_VFE;
3016         }
3017         wr32(E1000_RCTL, rctl);
3018
3019         /*
3020          * In order to support SR-IOV and eventually VMDq it is necessary to set
3021          * the VMOLR to enable the appropriate modes.  Without this workaround
3022          * we will have issues with VLAN tag stripping not being done for frames
3023          * that are only arriving because we are the default pool
3024          */
3025         if (hw->mac.type < e1000_82576)
3026                 return;
3027
3028         vmolr |= rd32(E1000_VMOLR(vfn)) &
3029                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3030         wr32(E1000_VMOLR(vfn), vmolr);
3031         igb_restore_vf_multicasts(adapter);
3032 }
3033
3034 /* Need to wait a few seconds after link up to get diagnostic information from
3035  * the phy */
3036 static void igb_update_phy_info(unsigned long data)
3037 {
3038         struct igb_adapter *adapter = (struct igb_adapter *) data;
3039         igb_get_phy_info(&adapter->hw);
3040 }
3041
3042 /**
3043  * igb_has_link - check shared code for link and determine up/down
3044  * @adapter: pointer to driver private info
3045  **/
3046 bool igb_has_link(struct igb_adapter *adapter)
3047 {
3048         struct e1000_hw *hw = &adapter->hw;
3049         bool link_active = false;
3050         s32 ret_val = 0;
3051
3052         /* get_link_status is set on LSC (link status) interrupt or
3053          * rx sequence error interrupt.  get_link_status will stay
3054          * false until the e1000_check_for_link establishes link
3055          * for copper adapters ONLY
3056          */
3057         switch (hw->phy.media_type) {
3058         case e1000_media_type_copper:
3059                 if (hw->mac.get_link_status) {
3060                         ret_val = hw->mac.ops.check_for_link(hw);
3061                         link_active = !hw->mac.get_link_status;
3062                 } else {
3063                         link_active = true;
3064                 }
3065                 break;
3066         case e1000_media_type_internal_serdes:
3067                 ret_val = hw->mac.ops.check_for_link(hw);
3068                 link_active = hw->mac.serdes_has_link;
3069                 break;
3070         default:
3071         case e1000_media_type_unknown:
3072                 break;
3073         }
3074
3075         return link_active;
3076 }
3077
3078 /**
3079  * igb_watchdog - Timer Call-back
3080  * @data: pointer to adapter cast into an unsigned long
3081  **/
3082 static void igb_watchdog(unsigned long data)
3083 {
3084         struct igb_adapter *adapter = (struct igb_adapter *)data;
3085         /* Do the rest outside of interrupt context */
3086         schedule_work(&adapter->watchdog_task);
3087 }
3088
3089 static void igb_watchdog_task(struct work_struct *work)
3090 {
3091         struct igb_adapter *adapter = container_of(work,
3092                                                    struct igb_adapter,
3093                                                    watchdog_task);
3094         struct e1000_hw *hw = &adapter->hw;
3095         struct net_device *netdev = adapter->netdev;
3096         u32 link;
3097         int i;
3098
3099         link = igb_has_link(adapter);
3100         if (link) {
3101                 if (!netif_carrier_ok(netdev)) {
3102                         u32 ctrl;
3103                         hw->mac.ops.get_speed_and_duplex(hw,
3104                                                          &adapter->link_speed,
3105                                                          &adapter->link_duplex);
3106
3107                         ctrl = rd32(E1000_CTRL);
3108                         /* Links status message must follow this format */
3109                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3110                                  "Flow Control: %s\n",
3111                                netdev->name,
3112                                adapter->link_speed,
3113                                adapter->link_duplex == FULL_DUPLEX ?
3114                                  "Full Duplex" : "Half Duplex",
3115                                ((ctrl & E1000_CTRL_TFCE) &&
3116                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3117                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3118                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3119
3120                         /* tweak tx_queue_len according to speed/duplex and
3121                          * adjust the timeout factor */
3122                         netdev->tx_queue_len = adapter->tx_queue_len;
3123                         adapter->tx_timeout_factor = 1;
3124                         switch (adapter->link_speed) {
3125                         case SPEED_10:
3126                                 netdev->tx_queue_len = 10;
3127                                 adapter->tx_timeout_factor = 14;
3128                                 break;
3129                         case SPEED_100:
3130                                 netdev->tx_queue_len = 100;
3131                                 /* maybe add some timeout factor ? */
3132                                 break;
3133                         }
3134
3135                         netif_carrier_on(netdev);
3136
3137                         igb_ping_all_vfs(adapter);
3138
3139                         /* link state has changed, schedule phy info update */
3140                         if (!test_bit(__IGB_DOWN, &adapter->state))
3141                                 mod_timer(&adapter->phy_info_timer,
3142                                           round_jiffies(jiffies + 2 * HZ));
3143                 }
3144         } else {
3145                 if (netif_carrier_ok(netdev)) {
3146                         adapter->link_speed = 0;
3147                         adapter->link_duplex = 0;
3148                         /* Links status message must follow this format */
3149                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3150                                netdev->name);
3151                         netif_carrier_off(netdev);
3152
3153                         igb_ping_all_vfs(adapter);
3154
3155                         /* link state has changed, schedule phy info update */
3156                         if (!test_bit(__IGB_DOWN, &adapter->state))
3157                                 mod_timer(&adapter->phy_info_timer,
3158                                           round_jiffies(jiffies + 2 * HZ));
3159                 }
3160         }
3161
3162         igb_update_stats(adapter);
3163         igb_update_adaptive(hw);
3164
3165         for (i = 0; i < adapter->num_tx_queues; i++) {
3166                 struct igb_ring *tx_ring = &adapter->tx_ring[i];
3167                 if (!netif_carrier_ok(netdev)) {
3168                         /* We've lost link, so the controller stops DMA,
3169                          * but we've got queued Tx work that's never going
3170                          * to get done, so reset controller to flush Tx.
3171                          * (Do the reset outside of interrupt context). */
3172                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3173                                 adapter->tx_timeout_count++;
3174                                 schedule_work(&adapter->reset_task);
3175                                 /* return immediately since reset is imminent */
3176                                 return;
3177                         }
3178                 }
3179
3180                 /* Force detection of hung controller every watchdog period */
3181                 tx_ring->detect_tx_hung = true;
3182         }
3183
3184         /* Cause software interrupt to ensure rx ring is cleaned */
3185         if (adapter->msix_entries) {
3186                 u32 eics = 0;
3187                 for (i = 0; i < adapter->num_q_vectors; i++) {
3188                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3189                         eics |= q_vector->eims_value;
3190                 }
3191                 wr32(E1000_EICS, eics);
3192         } else {
3193                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3194         }
3195
3196         /* Reset the timer */
3197         if (!test_bit(__IGB_DOWN, &adapter->state))
3198                 mod_timer(&adapter->watchdog_timer,
3199                           round_jiffies(jiffies + 2 * HZ));
3200 }
3201
3202 enum latency_range {
3203         lowest_latency = 0,
3204         low_latency = 1,
3205         bulk_latency = 2,
3206         latency_invalid = 255
3207 };
3208
3209 /**
3210  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3211  *
3212  *      Stores a new ITR value based on strictly on packet size.  This
3213  *      algorithm is less sophisticated than that used in igb_update_itr,
3214  *      due to the difficulty of synchronizing statistics across multiple
3215  *      receive rings.  The divisors and thresholds used by this fuction
3216  *      were determined based on theoretical maximum wire speed and testing
3217  *      data, in order to minimize response time while increasing bulk
3218  *      throughput.
3219  *      This functionality is controlled by the InterruptThrottleRate module
3220  *      parameter (see igb_param.c)
3221  *      NOTE:  This function is called only when operating in a multiqueue
3222  *             receive environment.
3223  * @q_vector: pointer to q_vector
3224  **/
3225 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3226 {
3227         int new_val = q_vector->itr_val;
3228         int avg_wire_size = 0;
3229         struct igb_adapter *adapter = q_vector->adapter;
3230
3231         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3232          * ints/sec - ITR timer value of 120 ticks.
3233          */
3234         if (adapter->link_speed != SPEED_1000) {
3235                 new_val = 976;
3236                 goto set_itr_val;
3237         }
3238
3239         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3240                 struct igb_ring *ring = q_vector->rx_ring;
3241                 avg_wire_size = ring->total_bytes / ring->total_packets;
3242         }
3243
3244         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3245                 struct igb_ring *ring = q_vector->tx_ring;
3246                 avg_wire_size = max_t(u32, avg_wire_size,
3247                                       (ring->total_bytes /
3248                                        ring->total_packets));
3249         }
3250
3251         /* if avg_wire_size isn't set no work was done */
3252         if (!avg_wire_size)
3253                 goto clear_counts;
3254
3255         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3256         avg_wire_size += 24;
3257
3258         /* Don't starve jumbo frames */
3259         avg_wire_size = min(avg_wire_size, 3000);
3260
3261         /* Give a little boost to mid-size frames */
3262         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3263                 new_val = avg_wire_size / 3;
3264         else
3265                 new_val = avg_wire_size / 2;
3266
3267 set_itr_val:
3268         if (new_val != q_vector->itr_val) {
3269                 q_vector->itr_val = new_val;
3270                 q_vector->set_itr = 1;
3271         }
3272 clear_counts:
3273         if (q_vector->rx_ring) {
3274                 q_vector->rx_ring->total_bytes = 0;
3275                 q_vector->rx_ring->total_packets = 0;
3276         }
3277         if (q_vector->tx_ring) {
3278                 q_vector->tx_ring->total_bytes = 0;
3279                 q_vector->tx_ring->total_packets = 0;
3280         }
3281 }
3282
3283 /**
3284  * igb_update_itr - update the dynamic ITR value based on statistics
3285  *      Stores a new ITR value based on packets and byte
3286  *      counts during the last interrupt.  The advantage of per interrupt
3287  *      computation is faster updates and more accurate ITR for the current
3288  *      traffic pattern.  Constants in this function were computed
3289  *      based on theoretical maximum wire speed and thresholds were set based
3290  *      on testing data as well as attempting to minimize response time
3291  *      while increasing bulk throughput.
3292  *      this functionality is controlled by the InterruptThrottleRate module
3293  *      parameter (see igb_param.c)
3294  *      NOTE:  These calculations are only valid when operating in a single-
3295  *             queue environment.
3296  * @adapter: pointer to adapter
3297  * @itr_setting: current q_vector->itr_val
3298  * @packets: the number of packets during this measurement interval
3299  * @bytes: the number of bytes during this measurement interval
3300  **/
3301 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3302                                    int packets, int bytes)
3303 {
3304         unsigned int retval = itr_setting;
3305
3306         if (packets == 0)
3307                 goto update_itr_done;
3308
3309         switch (itr_setting) {
3310         case lowest_latency:
3311                 /* handle TSO and jumbo frames */
3312                 if (bytes/packets > 8000)
3313                         retval = bulk_latency;
3314                 else if ((packets < 5) && (bytes > 512))
3315                         retval = low_latency;
3316                 break;
3317         case low_latency:  /* 50 usec aka 20000 ints/s */
3318                 if (bytes > 10000) {
3319                         /* this if handles the TSO accounting */
3320                         if (bytes/packets > 8000) {
3321                                 retval = bulk_latency;
3322                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3323                                 retval = bulk_latency;
3324                         } else if ((packets > 35)) {
3325                                 retval = lowest_latency;
3326                         }
3327                 } else if (bytes/packets > 2000) {
3328                         retval = bulk_latency;
3329                 } else if (packets <= 2 && bytes < 512) {
3330                         retval = lowest_latency;
3331                 }
3332                 break;
3333         case bulk_latency: /* 250 usec aka 4000 ints/s */
3334                 if (bytes > 25000) {
3335                         if (packets > 35)
3336                                 retval = low_latency;
3337                 } else if (bytes < 1500) {
3338                         retval = low_latency;
3339                 }
3340                 break;
3341         }
3342
3343 update_itr_done:
3344         return retval;
3345 }
3346
3347 static void igb_set_itr(struct igb_adapter *adapter)
3348 {
3349         struct igb_q_vector *q_vector = adapter->q_vector[0];
3350         u16 current_itr;
3351         u32 new_itr = q_vector->itr_val;
3352
3353         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3354         if (adapter->link_speed != SPEED_1000) {
3355                 current_itr = 0;
3356                 new_itr = 4000;
3357                 goto set_itr_now;
3358         }
3359
3360         adapter->rx_itr = igb_update_itr(adapter,
3361                                     adapter->rx_itr,
3362                                     adapter->rx_ring->total_packets,
3363                                     adapter->rx_ring->total_bytes);
3364
3365         adapter->tx_itr = igb_update_itr(adapter,
3366                                     adapter->tx_itr,
3367                                     adapter->tx_ring->total_packets,
3368                                     adapter->tx_ring->total_bytes);
3369         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3370
3371         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3372         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3373                 current_itr = low_latency;
3374
3375         switch (current_itr) {
3376         /* counts and packets in update_itr are dependent on these numbers */
3377         case lowest_latency:
3378                 new_itr = 56;  /* aka 70,000 ints/sec */
3379                 break;
3380         case low_latency:
3381                 new_itr = 196; /* aka 20,000 ints/sec */
3382                 break;
3383         case bulk_latency:
3384                 new_itr = 980; /* aka 4,000 ints/sec */
3385                 break;
3386         default:
3387                 break;
3388         }
3389
3390 set_itr_now:
3391         adapter->rx_ring->total_bytes = 0;
3392         adapter->rx_ring->total_packets = 0;
3393         adapter->tx_ring->total_bytes = 0;
3394         adapter->tx_ring->total_packets = 0;
3395
3396         if (new_itr != q_vector->itr_val) {
3397                 /* this attempts to bias the interrupt rate towards Bulk
3398                  * by adding intermediate steps when interrupt rate is
3399                  * increasing */
3400                 new_itr = new_itr > q_vector->itr_val ?
3401                              max((new_itr * q_vector->itr_val) /
3402                                  (new_itr + (q_vector->itr_val >> 2)),
3403                                  new_itr) :
3404                              new_itr;
3405                 /* Don't write the value here; it resets the adapter's
3406                  * internal timer, and causes us to delay far longer than
3407                  * we should between interrupts.  Instead, we write the ITR
3408                  * value at the beginning of the next interrupt so the timing
3409                  * ends up being correct.
3410                  */
3411                 q_vector->itr_val = new_itr;
3412                 q_vector->set_itr = 1;
3413         }
3414
3415         return;
3416 }
3417
3418 #define IGB_TX_FLAGS_CSUM               0x00000001
3419 #define IGB_TX_FLAGS_VLAN               0x00000002
3420 #define IGB_TX_FLAGS_TSO                0x00000004
3421 #define IGB_TX_FLAGS_IPV4               0x00000008
3422 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3423 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3424 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3425
3426 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3427                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3428 {
3429         struct e1000_adv_tx_context_desc *context_desc;
3430         unsigned int i;
3431         int err;
3432         struct igb_buffer *buffer_info;
3433         u32 info = 0, tu_cmd = 0;
3434         u32 mss_l4len_idx, l4len;
3435         *hdr_len = 0;
3436
3437         if (skb_header_cloned(skb)) {
3438                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3439                 if (err)
3440                         return err;
3441         }
3442
3443         l4len = tcp_hdrlen(skb);
3444         *hdr_len += l4len;
3445
3446         if (skb->protocol == htons(ETH_P_IP)) {
3447                 struct iphdr *iph = ip_hdr(skb);
3448                 iph->tot_len = 0;
3449                 iph->check = 0;
3450                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3451                                                          iph->daddr, 0,
3452                                                          IPPROTO_TCP,
3453                                                          0);
3454         } else if (skb_is_gso_v6(skb)) {
3455                 ipv6_hdr(skb)->payload_len = 0;
3456                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3457                                                        &ipv6_hdr(skb)->daddr,
3458                                                        0, IPPROTO_TCP, 0);
3459         }
3460
3461         i = tx_ring->next_to_use;
3462
3463         buffer_info = &tx_ring->buffer_info[i];
3464         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3465         /* VLAN MACLEN IPLEN */
3466         if (tx_flags & IGB_TX_FLAGS_VLAN)
3467                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3468         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3469         *hdr_len += skb_network_offset(skb);
3470         info |= skb_network_header_len(skb);
3471         *hdr_len += skb_network_header_len(skb);
3472         context_desc->vlan_macip_lens = cpu_to_le32(info);
3473
3474         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3475         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3476
3477         if (skb->protocol == htons(ETH_P_IP))
3478                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3479         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3480
3481         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3482
3483         /* MSS L4LEN IDX */
3484         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3485         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3486
3487         /* For 82575, context index must be unique per ring. */
3488         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3489                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3490
3491         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3492         context_desc->seqnum_seed = 0;
3493
3494         buffer_info->time_stamp = jiffies;
3495         buffer_info->next_to_watch = i;
3496         buffer_info->dma = 0;
3497         i++;
3498         if (i == tx_ring->count)
3499                 i = 0;
3500
3501         tx_ring->next_to_use = i;
3502
3503         return true;
3504 }
3505
3506 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3507                                    struct sk_buff *skb, u32 tx_flags)
3508 {
3509         struct e1000_adv_tx_context_desc *context_desc;
3510         struct pci_dev *pdev = tx_ring->pdev;
3511         struct igb_buffer *buffer_info;
3512         u32 info = 0, tu_cmd = 0;
3513         unsigned int i;
3514
3515         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3516             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3517                 i = tx_ring->next_to_use;
3518                 buffer_info = &tx_ring->buffer_info[i];
3519                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3520
3521                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3522                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3523
3524                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3525                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3526                         info |= skb_network_header_len(skb);
3527
3528                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3529
3530                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3531
3532                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3533                         __be16 protocol;
3534
3535                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3536                                 const struct vlan_ethhdr *vhdr =
3537                                           (const struct vlan_ethhdr*)skb->data;
3538
3539                                 protocol = vhdr->h_vlan_encapsulated_proto;
3540                         } else {
3541                                 protocol = skb->protocol;
3542                         }
3543
3544                         switch (protocol) {
3545                         case cpu_to_be16(ETH_P_IP):
3546                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3547                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3548                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3549                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3550                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3551                                 break;
3552                         case cpu_to_be16(ETH_P_IPV6):
3553                                 /* XXX what about other V6 headers?? */
3554                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3555                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3556                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3557                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3558                                 break;
3559                         default:
3560                                 if (unlikely(net_ratelimit()))
3561                                         dev_warn(&pdev->dev,
3562                                             "partial checksum but proto=%x!\n",
3563                                             skb->protocol);
3564                                 break;
3565                         }
3566                 }
3567
3568                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3569                 context_desc->seqnum_seed = 0;
3570                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3571                         context_desc->mss_l4len_idx =
3572                                 cpu_to_le32(tx_ring->reg_idx << 4);
3573
3574                 buffer_info->time_stamp = jiffies;
3575                 buffer_info->next_to_watch = i;
3576                 buffer_info->dma = 0;
3577
3578                 i++;
3579                 if (i == tx_ring->count)
3580                         i = 0;
3581                 tx_ring->next_to_use = i;
3582
3583                 return true;
3584         }
3585         return false;
3586 }
3587
3588 #define IGB_MAX_TXD_PWR 16
3589 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3590
3591 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3592                                  unsigned int first)
3593 {
3594         struct igb_buffer *buffer_info;
3595         struct pci_dev *pdev = tx_ring->pdev;
3596         unsigned int len = skb_headlen(skb);
3597         unsigned int count = 0, i;
3598         unsigned int f;
3599
3600         i = tx_ring->next_to_use;
3601
3602         buffer_info = &tx_ring->buffer_info[i];
3603         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3604         buffer_info->length = len;
3605         /* set time_stamp *before* dma to help avoid a possible race */
3606         buffer_info->time_stamp = jiffies;
3607         buffer_info->next_to_watch = i;
3608         buffer_info->dma = pci_map_single(pdev, skb->data, len,
3609                                           PCI_DMA_TODEVICE);
3610         if (pci_dma_mapping_error(pdev, buffer_info->dma))
3611                 goto dma_error;
3612
3613         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3614                 struct skb_frag_struct *frag;
3615
3616                 count++;
3617                 i++;
3618                 if (i == tx_ring->count)
3619                         i = 0;
3620
3621                 frag = &skb_shinfo(skb)->frags[f];
3622                 len = frag->size;
3623
3624                 buffer_info = &tx_ring->buffer_info[i];
3625                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3626                 buffer_info->length = len;
3627                 buffer_info->time_stamp = jiffies;
3628                 buffer_info->next_to_watch = i;
3629                 buffer_info->mapped_as_page = true;
3630                 buffer_info->dma = pci_map_page(pdev,
3631                                                 frag->page,
3632                                                 frag->page_offset,
3633                                                 len,
3634                                                 PCI_DMA_TODEVICE);
3635                 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3636                         goto dma_error;
3637
3638         }
3639
3640         tx_ring->buffer_info[i].skb = skb;
3641         tx_ring->buffer_info[first].next_to_watch = i;
3642
3643         return ++count;
3644
3645 dma_error:
3646         dev_err(&pdev->dev, "TX DMA map failed\n");
3647
3648         /* clear timestamp and dma mappings for failed buffer_info mapping */
3649         buffer_info->dma = 0;
3650         buffer_info->time_stamp = 0;
3651         buffer_info->length = 0;
3652         buffer_info->next_to_watch = 0;
3653         buffer_info->mapped_as_page = false;
3654         count--;
3655
3656         /* clear timestamp and dma mappings for remaining portion of packet */
3657         while (count >= 0) {
3658                 count--;
3659                 i--;
3660                 if (i < 0)
3661                         i += tx_ring->count;
3662                 buffer_info = &tx_ring->buffer_info[i];
3663                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3664         }
3665
3666         return 0;
3667 }
3668
3669 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3670                                     int tx_flags, int count, u32 paylen,
3671                                     u8 hdr_len)
3672 {
3673         union e1000_adv_tx_desc *tx_desc;
3674         struct igb_buffer *buffer_info;
3675         u32 olinfo_status = 0, cmd_type_len;
3676         unsigned int i = tx_ring->next_to_use;
3677
3678         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3679                         E1000_ADVTXD_DCMD_DEXT);
3680
3681         if (tx_flags & IGB_TX_FLAGS_VLAN)
3682                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3683
3684         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3685                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3686
3687         if (tx_flags & IGB_TX_FLAGS_TSO) {
3688                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3689
3690                 /* insert tcp checksum */
3691                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3692
3693                 /* insert ip checksum */
3694                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3695                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3696
3697         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3698                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3699         }
3700
3701         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3702             (tx_flags & (IGB_TX_FLAGS_CSUM |
3703                          IGB_TX_FLAGS_TSO |
3704                          IGB_TX_FLAGS_VLAN)))
3705                 olinfo_status |= tx_ring->reg_idx << 4;
3706
3707         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3708
3709         do {
3710                 buffer_info = &tx_ring->buffer_info[i];
3711                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3712                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3713                 tx_desc->read.cmd_type_len =
3714                         cpu_to_le32(cmd_type_len | buffer_info->length);
3715                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3716                 count--;
3717                 i++;
3718                 if (i == tx_ring->count)
3719                         i = 0;
3720         } while (count > 0);
3721
3722         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3723         /* Force memory writes to complete before letting h/w
3724          * know there are new descriptors to fetch.  (Only
3725          * applicable for weak-ordered memory model archs,
3726          * such as IA-64). */
3727         wmb();
3728
3729         tx_ring->next_to_use = i;
3730         writel(i, tx_ring->tail);
3731         /* we need this if more than one processor can write to our tail
3732          * at a time, it syncronizes IO on IA64/Altix systems */
3733         mmiowb();
3734 }
3735
3736 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3737 {
3738         struct net_device *netdev = tx_ring->netdev;
3739
3740         netif_stop_subqueue(netdev, tx_ring->queue_index);
3741
3742         /* Herbert's original patch had:
3743          *  smp_mb__after_netif_stop_queue();
3744          * but since that doesn't exist yet, just open code it. */
3745         smp_mb();
3746
3747         /* We need to check again in a case another CPU has just
3748          * made room available. */
3749         if (igb_desc_unused(tx_ring) < size)
3750                 return -EBUSY;
3751
3752         /* A reprieve! */
3753         netif_wake_subqueue(netdev, tx_ring->queue_index);
3754         tx_ring->tx_stats.restart_queue++;
3755         return 0;
3756 }
3757
3758 static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3759 {
3760         if (igb_desc_unused(tx_ring) >= size)
3761                 return 0;
3762         return __igb_maybe_stop_tx(tx_ring, size);
3763 }
3764
3765 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3766                                     struct igb_ring *tx_ring)
3767 {
3768         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3769         unsigned int first;
3770         unsigned int tx_flags = 0;
3771         u8 hdr_len = 0;
3772         int tso = 0, count;
3773         union skb_shared_tx *shtx = skb_tx(skb);
3774
3775         /* need: 1 descriptor per page,
3776          *       + 2 desc gap to keep tail from touching head,
3777          *       + 1 desc for skb->data,
3778          *       + 1 desc for context descriptor,
3779          * otherwise try next time */
3780         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3781                 /* this is a hard error */
3782                 return NETDEV_TX_BUSY;
3783         }
3784
3785         if (unlikely(shtx->hardware)) {
3786                 shtx->in_progress = 1;
3787                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3788         }
3789
3790         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3791                 tx_flags |= IGB_TX_FLAGS_VLAN;
3792                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3793         }
3794
3795         if (skb->protocol == htons(ETH_P_IP))
3796                 tx_flags |= IGB_TX_FLAGS_IPV4;
3797
3798         first = tx_ring->next_to_use;
3799         if (skb_is_gso(skb)) {
3800                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3801
3802                 if (tso < 0) {
3803                         dev_kfree_skb_any(skb);
3804                         return NETDEV_TX_OK;
3805                 }
3806         }
3807
3808         if (tso)
3809                 tx_flags |= IGB_TX_FLAGS_TSO;
3810         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3811                  (skb->ip_summed == CHECKSUM_PARTIAL))
3812                 tx_flags |= IGB_TX_FLAGS_CSUM;
3813
3814         /*
3815          * count reflects descriptors mapped, if 0 or less then mapping error
3816          * has occured and we need to rewind the descriptor queue
3817          */
3818         count = igb_tx_map_adv(tx_ring, skb, first);
3819         if (!count) {
3820                 dev_kfree_skb_any(skb);
3821                 tx_ring->buffer_info[first].time_stamp = 0;
3822                 tx_ring->next_to_use = first;
3823                 return NETDEV_TX_OK;
3824         }
3825
3826         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3827
3828         /* Make sure there is space in the ring for the next send. */
3829         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3830
3831         return NETDEV_TX_OK;
3832 }
3833
3834 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3835                                       struct net_device *netdev)
3836 {
3837         struct igb_adapter *adapter = netdev_priv(netdev);
3838         struct igb_ring *tx_ring;
3839         int r_idx = 0;
3840
3841         if (test_bit(__IGB_DOWN, &adapter->state)) {
3842                 dev_kfree_skb_any(skb);
3843                 return NETDEV_TX_OK;
3844         }
3845
3846         if (skb->len <= 0) {
3847                 dev_kfree_skb_any(skb);
3848                 return NETDEV_TX_OK;
3849         }
3850
3851         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3852         tx_ring = adapter->multi_tx_table[r_idx];
3853
3854         /* This goes back to the question of how to logically map a tx queue
3855          * to a flow.  Right now, performance is impacted slightly negatively
3856          * if using multiple tx queues.  If the stack breaks away from a
3857          * single qdisc implementation, we can look at this again. */
3858         return igb_xmit_frame_ring_adv(skb, tx_ring);
3859 }
3860
3861 /**
3862  * igb_tx_timeout - Respond to a Tx Hang
3863  * @netdev: network interface device structure
3864  **/
3865 static void igb_tx_timeout(struct net_device *netdev)
3866 {
3867         struct igb_adapter *adapter = netdev_priv(netdev);
3868         struct e1000_hw *hw = &adapter->hw;
3869
3870         /* Do the reset outside of interrupt context */
3871         adapter->tx_timeout_count++;
3872
3873         if (hw->mac.type == e1000_82580)
3874                 hw->dev_spec._82575.global_device_reset = true;
3875
3876         schedule_work(&adapter->reset_task);
3877         wr32(E1000_EICS,
3878              (adapter->eims_enable_mask & ~adapter->eims_other));
3879 }
3880
3881 static void igb_reset_task(struct work_struct *work)
3882 {
3883         struct igb_adapter *adapter;
3884         adapter = container_of(work, struct igb_adapter, reset_task);
3885
3886         igb_reinit_locked(adapter);
3887 }
3888
3889 /**
3890  * igb_get_stats - Get System Network Statistics
3891  * @netdev: network interface device structure
3892  *
3893  * Returns the address of the device statistics structure.
3894  * The statistics are actually updated from the timer callback.
3895  **/
3896 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3897 {
3898         /* only return the current stats */
3899         return &netdev->stats;
3900 }
3901
3902 /**
3903  * igb_change_mtu - Change the Maximum Transfer Unit
3904  * @netdev: network interface device structure
3905  * @new_mtu: new value for maximum frame size
3906  *
3907  * Returns 0 on success, negative on failure
3908  **/
3909 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3910 {
3911         struct igb_adapter *adapter = netdev_priv(netdev);
3912         struct pci_dev *pdev = adapter->pdev;
3913         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3914         u32 rx_buffer_len, i;
3915
3916         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3917                 dev_err(&pdev->dev, "Invalid MTU setting\n");
3918                 return -EINVAL;
3919         }
3920
3921         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3922                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3923                 return -EINVAL;
3924         }
3925
3926         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3927                 msleep(1);
3928
3929         /* igb_down has a dependency on max_frame_size */
3930         adapter->max_frame_size = max_frame;
3931
3932         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3933          * means we reserve 2 more, this pushes us to allocate from the next
3934          * larger slab size.
3935          * i.e. RXBUFFER_2048 --> size-4096 slab
3936          */
3937
3938         if (max_frame <= IGB_RXBUFFER_1024)
3939                 rx_buffer_len = IGB_RXBUFFER_1024;
3940         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3941                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3942         else
3943                 rx_buffer_len = IGB_RXBUFFER_128;
3944
3945         if (netif_running(netdev))
3946                 igb_down(adapter);
3947
3948         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3949                  netdev->mtu, new_mtu);
3950         netdev->mtu = new_mtu;
3951
3952         for (i = 0; i < adapter->num_rx_queues; i++)
3953                 adapter->rx_ring[i].rx_buffer_len = rx_buffer_len;
3954
3955         if (netif_running(netdev))
3956                 igb_up(adapter);
3957         else
3958                 igb_reset(adapter);
3959
3960         clear_bit(__IGB_RESETTING, &adapter->state);
3961
3962         return 0;
3963 }
3964
3965 /**
3966  * igb_update_stats - Update the board statistics counters
3967  * @adapter: board private structure
3968  **/
3969
3970 void igb_update_stats(struct igb_adapter *adapter)
3971 {
3972         struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3973         struct e1000_hw *hw = &adapter->hw;
3974         struct pci_dev *pdev = adapter->pdev;
3975         u32 rnbc;
3976         u16 phy_tmp;
3977         int i;
3978         u64 bytes, packets;
3979
3980 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3981
3982         /*
3983          * Prevent stats update while adapter is being reset, or if the pci
3984          * connection is down.
3985          */
3986         if (adapter->link_speed == 0)
3987                 return;
3988         if (pci_channel_offline(pdev))
3989                 return;
3990
3991         bytes = 0;
3992         packets = 0;
3993         for (i = 0; i < adapter->num_rx_queues; i++) {
3994                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3995                 adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
3996                 net_stats->rx_fifo_errors += rqdpc_tmp;
3997                 bytes += adapter->rx_ring[i].rx_stats.bytes;
3998                 packets += adapter->rx_ring[i].rx_stats.packets;
3999         }
4000
4001         net_stats->rx_bytes = bytes;
4002         net_stats->rx_packets = packets;
4003
4004         bytes = 0;
4005         packets = 0;
4006         for (i = 0; i < adapter->num_tx_queues; i++) {
4007                 bytes += adapter->tx_ring[i].tx_stats.bytes;
4008                 packets += adapter->tx_ring[i].tx_stats.packets;
4009         }
4010         net_stats->tx_bytes = bytes;
4011         net_stats->tx_packets = packets;
4012
4013         /* read stats registers */
4014         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4015         adapter->stats.gprc += rd32(E1000_GPRC);
4016         adapter->stats.gorc += rd32(E1000_GORCL);
4017         rd32(E1000_GORCH); /* clear GORCL */
4018         adapter->stats.bprc += rd32(E1000_BPRC);
4019         adapter->stats.mprc += rd32(E1000_MPRC);
4020         adapter->stats.roc += rd32(E1000_ROC);
4021
4022         adapter->stats.prc64 += rd32(E1000_PRC64);
4023         adapter->stats.prc127 += rd32(E1000_PRC127);
4024         adapter->stats.prc255 += rd32(E1000_PRC255);
4025         adapter->stats.prc511 += rd32(E1000_PRC511);
4026         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4027         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4028         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4029         adapter->stats.sec += rd32(E1000_SEC);
4030
4031         adapter->stats.mpc += rd32(E1000_MPC);
4032         adapter->stats.scc += rd32(E1000_SCC);
4033         adapter->stats.ecol += rd32(E1000_ECOL);
4034         adapter->stats.mcc += rd32(E1000_MCC);
4035         adapter->stats.latecol += rd32(E1000_LATECOL);
4036         adapter->stats.dc += rd32(E1000_DC);
4037         adapter->stats.rlec += rd32(E1000_RLEC);
4038         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4039         adapter->stats.xontxc += rd32(E1000_XONTXC);
4040         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4041         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4042         adapter->stats.fcruc += rd32(E1000_FCRUC);
4043         adapter->stats.gptc += rd32(E1000_GPTC);
4044         adapter->stats.gotc += rd32(E1000_GOTCL);
4045         rd32(E1000_GOTCH); /* clear GOTCL */
4046         rnbc = rd32(E1000_RNBC);
4047         adapter->stats.rnbc += rnbc;
4048         net_stats->rx_fifo_errors += rnbc;
4049         adapter->stats.ruc += rd32(E1000_RUC);
4050         adapter->stats.rfc += rd32(E1000_RFC);
4051         adapter->stats.rjc += rd32(E1000_RJC);
4052         adapter->stats.tor += rd32(E1000_TORH);
4053         adapter->stats.tot += rd32(E1000_TOTH);
4054         adapter->stats.tpr += rd32(E1000_TPR);
4055
4056         adapter->stats.ptc64 += rd32(E1000_PTC64);
4057         adapter->stats.ptc127 += rd32(E1000_PTC127);
4058         adapter->stats.ptc255 += rd32(E1000_PTC255);
4059         adapter->stats.ptc511 += rd32(E1000_PTC511);
4060         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4061         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4062
4063         adapter->stats.mptc += rd32(E1000_MPTC);
4064         adapter->stats.bptc += rd32(E1000_BPTC);
4065
4066         /* used for adaptive IFS */
4067         hw->mac.tx_packet_delta = rd32(E1000_TPT);
4068         adapter->stats.tpt += hw->mac.tx_packet_delta;
4069         hw->mac.collision_delta = rd32(E1000_COLC);
4070         adapter->stats.colc += hw->mac.collision_delta;
4071
4072         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4073         adapter->stats.rxerrc += rd32(E1000_RXERRC);
4074         adapter->stats.tncrs += rd32(E1000_TNCRS);
4075         adapter->stats.tsctc += rd32(E1000_TSCTC);
4076         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4077
4078         adapter->stats.iac += rd32(E1000_IAC);
4079         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4080         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4081         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4082         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4083         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4084         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4085         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4086         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4087
4088         /* Fill out the OS statistics structure */
4089         net_stats->multicast = adapter->stats.mprc;
4090         net_stats->collisions = adapter->stats.colc;
4091
4092         /* Rx Errors */
4093
4094         /* RLEC on some newer hardware can be incorrect so build
4095          * our own version based on RUC and ROC */
4096         net_stats->rx_errors = adapter->stats.rxerrc +
4097                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4098                 adapter->stats.ruc + adapter->stats.roc +
4099                 adapter->stats.cexterr;
4100         net_stats->rx_length_errors = adapter->stats.ruc +
4101                                       adapter->stats.roc;
4102         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4103         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4104         net_stats->rx_missed_errors = adapter->stats.mpc;
4105
4106         /* Tx Errors */
4107         net_stats->tx_errors = adapter->stats.ecol +
4108                                adapter->stats.latecol;
4109         net_stats->tx_aborted_errors = adapter->stats.ecol;
4110         net_stats->tx_window_errors = adapter->stats.latecol;
4111         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4112
4113         /* Tx Dropped needs to be maintained elsewhere */
4114
4115         /* Phy Stats */
4116         if (hw->phy.media_type == e1000_media_type_copper) {
4117                 if ((adapter->link_speed == SPEED_1000) &&
4118                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4119                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4120                         adapter->phy_stats.idle_errors += phy_tmp;
4121                 }
4122         }
4123
4124         /* Management Stats */
4125         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4126         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4127         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4128 }
4129
4130 static irqreturn_t igb_msix_other(int irq, void *data)
4131 {
4132         struct igb_adapter *adapter = data;
4133         struct e1000_hw *hw = &adapter->hw;
4134         u32 icr = rd32(E1000_ICR);
4135         /* reading ICR causes bit 31 of EICR to be cleared */
4136
4137         if (icr & E1000_ICR_DRSTA)
4138                 schedule_work(&adapter->reset_task);
4139
4140         if (icr & E1000_ICR_DOUTSYNC) {
4141                 /* HW is reporting DMA is out of sync */
4142                 adapter->stats.doosync++;
4143         }
4144
4145         /* Check for a mailbox event */
4146         if (icr & E1000_ICR_VMMB)
4147                 igb_msg_task(adapter);
4148
4149         if (icr & E1000_ICR_LSC) {
4150                 hw->mac.get_link_status = 1;
4151                 /* guard against interrupt when we're going down */
4152                 if (!test_bit(__IGB_DOWN, &adapter->state))
4153                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4154         }
4155
4156         if (adapter->vfs_allocated_count)
4157                 wr32(E1000_IMS, E1000_IMS_LSC |
4158                                 E1000_IMS_VMMB |
4159                                 E1000_IMS_DOUTSYNC);
4160         else
4161                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4162         wr32(E1000_EIMS, adapter->eims_other);
4163
4164         return IRQ_HANDLED;
4165 }
4166
4167 static void igb_write_itr(struct igb_q_vector *q_vector)
4168 {
4169         struct igb_adapter *adapter = q_vector->adapter;
4170         u32 itr_val = q_vector->itr_val & 0x7FFC;
4171
4172         if (!q_vector->set_itr)
4173                 return;
4174
4175         if (!itr_val)
4176                 itr_val = 0x4;
4177
4178         if (adapter->hw.mac.type == e1000_82575)
4179                 itr_val |= itr_val << 16;
4180         else
4181                 itr_val |= 0x8000000;
4182
4183         writel(itr_val, q_vector->itr_register);
4184         q_vector->set_itr = 0;
4185 }
4186
4187 static irqreturn_t igb_msix_ring(int irq, void *data)
4188 {
4189         struct igb_q_vector *q_vector = data;
4190
4191         /* Write the ITR value calculated from the previous interrupt. */
4192         igb_write_itr(q_vector);
4193
4194         napi_schedule(&q_vector->napi);
4195
4196         return IRQ_HANDLED;
4197 }
4198
4199 #ifdef CONFIG_IGB_DCA
4200 static void igb_update_dca(struct igb_q_vector *q_vector)
4201 {
4202         struct igb_adapter *adapter = q_vector->adapter;
4203         struct e1000_hw *hw = &adapter->hw;
4204         int cpu = get_cpu();
4205
4206         if (q_vector->cpu == cpu)
4207                 goto out_no_update;
4208
4209         if (q_vector->tx_ring) {
4210                 int q = q_vector->tx_ring->reg_idx;
4211                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4212                 if (hw->mac.type == e1000_82575) {
4213                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4214                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4215                 } else {
4216                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4217                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4218                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4219                 }
4220                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4221                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4222         }
4223         if (q_vector->rx_ring) {
4224                 int q = q_vector->rx_ring->reg_idx;
4225                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4226                 if (hw->mac.type == e1000_82575) {
4227                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4228                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4229                 } else {
4230                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4231                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4232                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4233                 }
4234                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4235                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4236                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4237                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4238         }
4239         q_vector->cpu = cpu;
4240 out_no_update:
4241         put_cpu();
4242 }
4243
4244 static void igb_setup_dca(struct igb_adapter *adapter)
4245 {
4246         struct e1000_hw *hw = &adapter->hw;
4247         int i;
4248
4249         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4250                 return;
4251
4252         /* Always use CB2 mode, difference is masked in the CB driver. */
4253         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4254
4255         for (i = 0; i < adapter->num_q_vectors; i++) {
4256                 adapter->q_vector[i]->cpu = -1;
4257                 igb_update_dca(adapter->q_vector[i]);
4258         }
4259 }
4260
4261 static int __igb_notify_dca(struct device *dev, void *data)
4262 {
4263         struct net_device *netdev = dev_get_drvdata(dev);
4264         struct igb_adapter *adapter = netdev_priv(netdev);
4265         struct pci_dev *pdev = adapter->pdev;
4266         struct e1000_hw *hw = &adapter->hw;
4267         unsigned long event = *(unsigned long *)data;
4268
4269         switch (event) {
4270         case DCA_PROVIDER_ADD:
4271                 /* if already enabled, don't do it again */
4272                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4273                         break;
4274                 if (dca_add_requester(dev) == 0) {
4275                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4276                         dev_info(&pdev->dev, "DCA enabled\n");
4277                         igb_setup_dca(adapter);
4278                         break;
4279                 }
4280                 /* Fall Through since DCA is disabled. */
4281         case DCA_PROVIDER_REMOVE:
4282                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4283                         /* without this a class_device is left
4284                          * hanging around in the sysfs model */
4285                         dca_remove_requester(dev);
4286                         dev_info(&pdev->dev, "DCA disabled\n");
4287                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4288                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4289                 }
4290                 break;
4291         }
4292
4293         return 0;
4294 }
4295
4296 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4297                           void *p)
4298 {
4299         int ret_val;
4300
4301         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4302                                          __igb_notify_dca);
4303
4304         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4305 }
4306 #endif /* CONFIG_IGB_DCA */
4307
4308 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4309 {
4310         struct e1000_hw *hw = &adapter->hw;
4311         u32 ping;
4312         int i;
4313
4314         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4315                 ping = E1000_PF_CONTROL_MSG;
4316                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4317                         ping |= E1000_VT_MSGTYPE_CTS;
4318                 igb_write_mbx(hw, &ping, 1, i);
4319         }
4320 }
4321
4322 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4323 {
4324         struct e1000_hw *hw = &adapter->hw;
4325         u32 vmolr = rd32(E1000_VMOLR(vf));
4326         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4327
4328         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4329                             IGB_VF_FLAG_MULTI_PROMISC);
4330         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4331
4332         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4333                 vmolr |= E1000_VMOLR_MPME;
4334                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4335         } else {
4336                 /*
4337                  * if we have hashes and we are clearing a multicast promisc
4338                  * flag we need to write the hashes to the MTA as this step
4339                  * was previously skipped
4340                  */
4341                 if (vf_data->num_vf_mc_hashes > 30) {
4342                         vmolr |= E1000_VMOLR_MPME;
4343                 } else if (vf_data->num_vf_mc_hashes) {
4344                         int j;
4345                         vmolr |= E1000_VMOLR_ROMPE;
4346                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4347                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4348                 }
4349         }
4350
4351         wr32(E1000_VMOLR(vf), vmolr);
4352
4353         /* there are flags left unprocessed, likely not supported */
4354         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4355                 return -EINVAL;
4356
4357         return 0;
4358
4359 }
4360
4361 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4362                                   u32 *msgbuf, u32 vf)
4363 {
4364         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4365         u16 *hash_list = (u16 *)&msgbuf[1];
4366         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4367         int i;
4368
4369         /* salt away the number of multicast addresses assigned
4370          * to this VF for later use to restore when the PF multi cast
4371          * list changes
4372          */
4373         vf_data->num_vf_mc_hashes = n;
4374
4375         /* only up to 30 hash values supported */
4376         if (n > 30)
4377                 n = 30;
4378
4379         /* store the hashes for later use */
4380         for (i = 0; i < n; i++)
4381                 vf_data->vf_mc_hashes[i] = hash_list[i];
4382
4383         /* Flush and reset the mta with the new values */
4384         igb_set_rx_mode(adapter->netdev);
4385
4386         return 0;
4387 }
4388
4389 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4390 {
4391         struct e1000_hw *hw = &adapter->hw;
4392         struct vf_data_storage *vf_data;
4393         int i, j;
4394
4395         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4396                 u32 vmolr = rd32(E1000_VMOLR(i));
4397                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4398
4399                 vf_data = &adapter->vf_data[i];
4400
4401                 if ((vf_data->num_vf_mc_hashes > 30) ||
4402                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4403                         vmolr |= E1000_VMOLR_MPME;
4404                 } else if (vf_data->num_vf_mc_hashes) {
4405                         vmolr |= E1000_VMOLR_ROMPE;
4406                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4407                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4408                 }
4409                 wr32(E1000_VMOLR(i), vmolr);
4410         }
4411 }
4412
4413 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4414 {
4415         struct e1000_hw *hw = &adapter->hw;
4416         u32 pool_mask, reg, vid;
4417         int i;
4418
4419         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4420
4421         /* Find the vlan filter for this id */
4422         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4423                 reg = rd32(E1000_VLVF(i));
4424
4425                 /* remove the vf from the pool */
4426                 reg &= ~pool_mask;
4427
4428                 /* if pool is empty then remove entry from vfta */
4429                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4430                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4431                         reg = 0;
4432                         vid = reg & E1000_VLVF_VLANID_MASK;
4433                         igb_vfta_set(hw, vid, false);
4434                 }
4435
4436                 wr32(E1000_VLVF(i), reg);
4437         }
4438
4439         adapter->vf_data[vf].vlans_enabled = 0;
4440 }
4441
4442 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4443 {
4444         struct e1000_hw *hw = &adapter->hw;
4445         u32 reg, i;
4446
4447         /* The vlvf table only exists on 82576 hardware and newer */
4448         if (hw->mac.type < e1000_82576)
4449                 return -1;
4450
4451         /* we only need to do this if VMDq is enabled */
4452         if (!adapter->vfs_allocated_count)
4453                 return -1;
4454
4455         /* Find the vlan filter for this id */
4456         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4457                 reg = rd32(E1000_VLVF(i));
4458                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4459                     vid == (reg & E1000_VLVF_VLANID_MASK))
4460                         break;
4461         }
4462
4463         if (add) {
4464                 if (i == E1000_VLVF_ARRAY_SIZE) {
4465                         /* Did not find a matching VLAN ID entry that was
4466                          * enabled.  Search for a free filter entry, i.e.
4467                          * one without the enable bit set
4468                          */
4469                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4470                                 reg = rd32(E1000_VLVF(i));
4471                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4472                                         break;
4473                         }
4474                 }
4475                 if (i < E1000_VLVF_ARRAY_SIZE) {
4476                         /* Found an enabled/available entry */
4477                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4478
4479                         /* if !enabled we need to set this up in vfta */
4480                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4481                                 /* add VID to filter table */
4482                                 igb_vfta_set(hw, vid, true);
4483                                 reg |= E1000_VLVF_VLANID_ENABLE;
4484                         }
4485                         reg &= ~E1000_VLVF_VLANID_MASK;
4486                         reg |= vid;
4487                         wr32(E1000_VLVF(i), reg);
4488
4489                         /* do not modify RLPML for PF devices */
4490                         if (vf >= adapter->vfs_allocated_count)
4491                                 return 0;
4492
4493                         if (!adapter->vf_data[vf].vlans_enabled) {
4494                                 u32 size;
4495                                 reg = rd32(E1000_VMOLR(vf));
4496                                 size = reg & E1000_VMOLR_RLPML_MASK;
4497                                 size += 4;
4498                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4499                                 reg |= size;
4500                                 wr32(E1000_VMOLR(vf), reg);
4501                         }
4502
4503                         adapter->vf_data[vf].vlans_enabled++;
4504                         return 0;
4505                 }
4506         } else {
4507                 if (i < E1000_VLVF_ARRAY_SIZE) {
4508                         /* remove vf from the pool */
4509                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4510                         /* if pool is empty then remove entry from vfta */
4511                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4512                                 reg = 0;
4513                                 igb_vfta_set(hw, vid, false);
4514                         }
4515                         wr32(E1000_VLVF(i), reg);
4516
4517                         /* do not modify RLPML for PF devices */
4518                         if (vf >= adapter->vfs_allocated_count)
4519                                 return 0;
4520
4521                         adapter->vf_data[vf].vlans_enabled--;
4522                         if (!adapter->vf_data[vf].vlans_enabled) {
4523                                 u32 size;
4524                                 reg = rd32(E1000_VMOLR(vf));
4525                                 size = reg & E1000_VMOLR_RLPML_MASK;
4526                                 size -= 4;
4527                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4528                                 reg |= size;
4529                                 wr32(E1000_VMOLR(vf), reg);
4530                         }
4531                 }
4532         }
4533         return 0;
4534 }
4535
4536 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4537 {
4538         struct e1000_hw *hw = &adapter->hw;
4539
4540         if (vid)
4541                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4542         else
4543                 wr32(E1000_VMVIR(vf), 0);
4544 }
4545
4546 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4547                                int vf, u16 vlan, u8 qos)
4548 {
4549         int err = 0;
4550         struct igb_adapter *adapter = netdev_priv(netdev);
4551
4552         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4553                 return -EINVAL;
4554         if (vlan || qos) {
4555                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4556                 if (err)
4557                         goto out;
4558                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4559                 igb_set_vmolr(adapter, vf, !vlan);
4560                 adapter->vf_data[vf].pf_vlan = vlan;
4561                 adapter->vf_data[vf].pf_qos = qos;
4562                 dev_info(&adapter->pdev->dev,
4563                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4564                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4565                         dev_warn(&adapter->pdev->dev,
4566                                  "The VF VLAN has been set,"
4567                                  " but the PF device is not up.\n");
4568                         dev_warn(&adapter->pdev->dev,
4569                                  "Bring the PF device up before"
4570                                  " attempting to use the VF device.\n");
4571                 }
4572         } else {
4573                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4574                                    false, vf);
4575                 igb_set_vmvir(adapter, vlan, vf);
4576                 igb_set_vmolr(adapter, vf, true);
4577                 adapter->vf_data[vf].pf_vlan = 0;
4578                 adapter->vf_data[vf].pf_qos = 0;
4579        }
4580 out:
4581        return err;
4582 }
4583
4584 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4585 {
4586         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4587         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4588
4589         return igb_vlvf_set(adapter, vid, add, vf);
4590 }
4591
4592 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4593 {
4594         /* clear flags */
4595         adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4596         adapter->vf_data[vf].last_nack = jiffies;
4597
4598         /* reset offloads to defaults */
4599         igb_set_vmolr(adapter, vf, true);
4600
4601         /* reset vlans for device */
4602         igb_clear_vf_vfta(adapter, vf);
4603         if (adapter->vf_data[vf].pf_vlan)
4604                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4605                                     adapter->vf_data[vf].pf_vlan,
4606                                     adapter->vf_data[vf].pf_qos);
4607         else
4608                 igb_clear_vf_vfta(adapter, vf);
4609
4610         /* reset multicast table array for vf */
4611         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4612
4613         /* Flush and reset the mta with the new values */
4614         igb_set_rx_mode(adapter->netdev);
4615 }
4616
4617 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4618 {
4619         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4620
4621         /* generate a new mac address as we were hotplug removed/added */
4622         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4623                 random_ether_addr(vf_mac);
4624
4625         /* process remaining reset events */
4626         igb_vf_reset(adapter, vf);
4627 }
4628
4629 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4630 {
4631         struct e1000_hw *hw = &adapter->hw;
4632         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4633         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4634         u32 reg, msgbuf[3];
4635         u8 *addr = (u8 *)(&msgbuf[1]);
4636
4637         /* process all the same items cleared in a function level reset */
4638         igb_vf_reset(adapter, vf);
4639
4640         /* set vf mac address */
4641         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4642
4643         /* enable transmit and receive for vf */
4644         reg = rd32(E1000_VFTE);
4645         wr32(E1000_VFTE, reg | (1 << vf));
4646         reg = rd32(E1000_VFRE);
4647         wr32(E1000_VFRE, reg | (1 << vf));
4648
4649         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4650
4651         /* reply to reset with ack and vf mac address */
4652         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4653         memcpy(addr, vf_mac, 6);
4654         igb_write_mbx(hw, msgbuf, 3, vf);
4655 }
4656
4657 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4658 {
4659         unsigned char *addr = (char *)&msg[1];
4660         int err = -1;
4661
4662         if (is_valid_ether_addr(addr))
4663                 err = igb_set_vf_mac(adapter, vf, addr);
4664
4665         return err;
4666 }
4667
4668 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4669 {
4670         struct e1000_hw *hw = &adapter->hw;
4671         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4672         u32 msg = E1000_VT_MSGTYPE_NACK;
4673
4674         /* if device isn't clear to send it shouldn't be reading either */
4675         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4676             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4677                 igb_write_mbx(hw, &msg, 1, vf);
4678                 vf_data->last_nack = jiffies;
4679         }
4680 }
4681
4682 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4683 {
4684         struct pci_dev *pdev = adapter->pdev;
4685         u32 msgbuf[E1000_VFMAILBOX_SIZE];
4686         struct e1000_hw *hw = &adapter->hw;
4687         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4688         s32 retval;
4689
4690         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4691
4692         if (retval) {
4693                 /* if receive failed revoke VF CTS stats and restart init */
4694                 dev_err(&pdev->dev, "Error receiving message from VF\n");
4695                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
4696                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4697                         return;
4698                 goto out;
4699         }
4700
4701         /* this is a message we already processed, do nothing */
4702         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4703                 return;
4704
4705         /*
4706          * until the vf completes a reset it should not be
4707          * allowed to start any configuration.
4708          */
4709
4710         if (msgbuf[0] == E1000_VF_RESET) {
4711                 igb_vf_reset_msg(adapter, vf);
4712                 return;
4713         }
4714
4715         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4716                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4717                         return;
4718                 retval = -1;
4719                 goto out;
4720         }
4721
4722         switch ((msgbuf[0] & 0xFFFF)) {
4723         case E1000_VF_SET_MAC_ADDR:
4724                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4725                 break;
4726         case E1000_VF_SET_PROMISC:
4727                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4728                 break;
4729         case E1000_VF_SET_MULTICAST:
4730                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4731                 break;
4732         case E1000_VF_SET_LPE:
4733                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4734                 break;
4735         case E1000_VF_SET_VLAN:
4736                 if (adapter->vf_data[vf].pf_vlan)
4737                         retval = -1;
4738                 else
4739                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4740                 break;
4741         default:
4742                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4743                 retval = -1;
4744                 break;
4745         }
4746
4747         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4748 out:
4749         /* notify the VF of the results of what it sent us */
4750         if (retval)
4751                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4752         else
4753                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4754
4755         igb_write_mbx(hw, msgbuf, 1, vf);
4756 }
4757
4758 static void igb_msg_task(struct igb_adapter *adapter)
4759 {
4760         struct e1000_hw *hw = &adapter->hw;
4761         u32 vf;
4762
4763         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4764                 /* process any reset requests */
4765                 if (!igb_check_for_rst(hw, vf))
4766                         igb_vf_reset_event(adapter, vf);
4767
4768                 /* process any messages pending */
4769                 if (!igb_check_for_msg(hw, vf))
4770                         igb_rcv_msg_from_vf(adapter, vf);
4771
4772                 /* process any acks */
4773                 if (!igb_check_for_ack(hw, vf))
4774                         igb_rcv_ack_from_vf(adapter, vf);
4775         }
4776 }
4777
4778 /**
4779  *  igb_set_uta - Set unicast filter table address
4780  *  @adapter: board private structure
4781  *
4782  *  The unicast table address is a register array of 32-bit registers.
4783  *  The table is meant to be used in a way similar to how the MTA is used
4784  *  however due to certain limitations in the hardware it is necessary to
4785  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4786  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4787  **/
4788 static void igb_set_uta(struct igb_adapter *adapter)
4789 {
4790         struct e1000_hw *hw = &adapter->hw;
4791         int i;
4792
4793         /* The UTA table only exists on 82576 hardware and newer */
4794         if (hw->mac.type < e1000_82576)
4795                 return;
4796
4797         /* we only need to do this if VMDq is enabled */
4798         if (!adapter->vfs_allocated_count)
4799                 return;
4800
4801         for (i = 0; i < hw->mac.uta_reg_count; i++)
4802                 array_wr32(E1000_UTA, i, ~0);
4803 }
4804
4805 /**
4806  * igb_intr_msi - Interrupt Handler
4807  * @irq: interrupt number
4808  * @data: pointer to a network interface device structure
4809  **/
4810 static irqreturn_t igb_intr_msi(int irq, void *data)
4811 {
4812         struct igb_adapter *adapter = data;
4813         struct igb_q_vector *q_vector = adapter->q_vector[0];
4814         struct e1000_hw *hw = &adapter->hw;
4815         /* read ICR disables interrupts using IAM */
4816         u32 icr = rd32(E1000_ICR);
4817
4818         igb_write_itr(q_vector);
4819
4820         if (icr & E1000_ICR_DRSTA)
4821                 schedule_work(&adapter->reset_task);
4822
4823         if (icr & E1000_ICR_DOUTSYNC) {
4824                 /* HW is reporting DMA is out of sync */
4825                 adapter->stats.doosync++;
4826         }
4827
4828         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4829                 hw->mac.get_link_status = 1;
4830                 if (!test_bit(__IGB_DOWN, &adapter->state))
4831                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4832         }
4833
4834         napi_schedule(&q_vector->napi);
4835
4836         return IRQ_HANDLED;
4837 }
4838
4839 /**
4840  * igb_intr - Legacy Interrupt Handler
4841  * @irq: interrupt number
4842  * @data: pointer to a network interface device structure
4843  **/
4844 static irqreturn_t igb_intr(int irq, void *data)
4845 {
4846         struct igb_adapter *adapter = data;
4847         struct igb_q_vector *q_vector = adapter->q_vector[0];
4848         struct e1000_hw *hw = &adapter->hw;
4849         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4850          * need for the IMC write */
4851         u32 icr = rd32(E1000_ICR);
4852         if (!icr)
4853                 return IRQ_NONE;  /* Not our interrupt */
4854
4855         igb_write_itr(q_vector);
4856
4857         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4858          * not set, then the adapter didn't send an interrupt */
4859         if (!(icr & E1000_ICR_INT_ASSERTED))
4860                 return IRQ_NONE;
4861
4862         if (icr & E1000_ICR_DRSTA)
4863                 schedule_work(&adapter->reset_task);
4864
4865         if (icr & E1000_ICR_DOUTSYNC) {
4866                 /* HW is reporting DMA is out of sync */
4867                 adapter->stats.doosync++;
4868         }
4869
4870         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4871                 hw->mac.get_link_status = 1;
4872                 /* guard against interrupt when we're going down */
4873                 if (!test_bit(__IGB_DOWN, &adapter->state))
4874                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4875         }
4876
4877         napi_schedule(&q_vector->napi);
4878
4879         return IRQ_HANDLED;
4880 }
4881
4882 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4883 {
4884         struct igb_adapter *adapter = q_vector->adapter;
4885         struct e1000_hw *hw = &adapter->hw;
4886
4887         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4888             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4889                 if (!adapter->msix_entries)
4890                         igb_set_itr(adapter);
4891                 else
4892                         igb_update_ring_itr(q_vector);
4893         }
4894
4895         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4896                 if (adapter->msix_entries)
4897                         wr32(E1000_EIMS, q_vector->eims_value);
4898                 else
4899                         igb_irq_enable(adapter);
4900         }
4901 }
4902
4903 /**
4904  * igb_poll - NAPI Rx polling callback
4905  * @napi: napi polling structure
4906  * @budget: count of how many packets we should handle
4907  **/
4908 static int igb_poll(struct napi_struct *napi, int budget)
4909 {
4910         struct igb_q_vector *q_vector = container_of(napi,
4911                                                      struct igb_q_vector,
4912                                                      napi);
4913         int tx_clean_complete = 1, work_done = 0;
4914
4915 #ifdef CONFIG_IGB_DCA
4916         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4917                 igb_update_dca(q_vector);
4918 #endif
4919         if (q_vector->tx_ring)
4920                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4921
4922         if (q_vector->rx_ring)
4923                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4924
4925         if (!tx_clean_complete)
4926                 work_done = budget;
4927
4928         /* If not enough Rx work done, exit the polling mode */
4929         if (work_done < budget) {
4930                 napi_complete(napi);
4931                 igb_ring_irq_enable(q_vector);
4932         }
4933
4934         return work_done;
4935 }
4936
4937 /**
4938  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4939  * @adapter: board private structure
4940  * @shhwtstamps: timestamp structure to update
4941  * @regval: unsigned 64bit system time value.
4942  *
4943  * We need to convert the system time value stored in the RX/TXSTMP registers
4944  * into a hwtstamp which can be used by the upper level timestamping functions
4945  */
4946 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4947                                    struct skb_shared_hwtstamps *shhwtstamps,
4948                                    u64 regval)
4949 {
4950         u64 ns;
4951
4952         /*
4953          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
4954          * 24 to match clock shift we setup earlier.
4955          */
4956         if (adapter->hw.mac.type == e1000_82580)
4957                 regval <<= IGB_82580_TSYNC_SHIFT;
4958
4959         ns = timecounter_cyc2time(&adapter->clock, regval);
4960         timecompare_update(&adapter->compare, ns);
4961         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4962         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4963         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4964 }
4965
4966 /**
4967  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4968  * @q_vector: pointer to q_vector containing needed info
4969  * @skb: packet that was just sent
4970  *
4971  * If we were asked to do hardware stamping and such a time stamp is
4972  * available, then it must have been for this skb here because we only
4973  * allow only one such packet into the queue.
4974  */
4975 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4976 {
4977         struct igb_adapter *adapter = q_vector->adapter;
4978         union skb_shared_tx *shtx = skb_tx(skb);
4979         struct e1000_hw *hw = &adapter->hw;
4980         struct skb_shared_hwtstamps shhwtstamps;
4981         u64 regval;
4982
4983         /* if skb does not support hw timestamp or TX stamp not valid exit */
4984         if (likely(!shtx->hardware) ||
4985             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4986                 return;
4987
4988         regval = rd32(E1000_TXSTMPL);
4989         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4990
4991         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4992         skb_tstamp_tx(skb, &shhwtstamps);
4993 }
4994
4995 /**
4996  * igb_clean_tx_irq - Reclaim resources after transmit completes
4997  * @q_vector: pointer to q_vector containing needed info
4998  * returns true if ring is completely cleaned
4999  **/
5000 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5001 {
5002         struct igb_adapter *adapter = q_vector->adapter;
5003         struct igb_ring *tx_ring = q_vector->tx_ring;
5004         struct net_device *netdev = tx_ring->netdev;
5005         struct e1000_hw *hw = &adapter->hw;
5006         struct igb_buffer *buffer_info;
5007         struct sk_buff *skb;
5008         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5009         unsigned int total_bytes = 0, total_packets = 0;
5010         unsigned int i, eop, count = 0;
5011         bool cleaned = false;
5012
5013         i = tx_ring->next_to_clean;
5014         eop = tx_ring->buffer_info[i].next_to_watch;
5015         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5016
5017         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5018                (count < tx_ring->count)) {
5019                 for (cleaned = false; !cleaned; count++) {
5020                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5021                         buffer_info = &tx_ring->buffer_info[i];
5022                         cleaned = (i == eop);
5023                         skb = buffer_info->skb;
5024
5025                         if (skb) {
5026                                 unsigned int segs, bytecount;
5027                                 /* gso_segs is currently only valid for tcp */
5028                                 segs = skb_shinfo(skb)->gso_segs ?: 1;
5029                                 /* multiply data chunks by size of headers */
5030                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
5031                                             skb->len;
5032                                 total_packets += segs;
5033                                 total_bytes += bytecount;
5034
5035                                 igb_tx_hwtstamp(q_vector, skb);
5036                         }
5037
5038                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5039                         tx_desc->wb.status = 0;
5040
5041                         i++;
5042                         if (i == tx_ring->count)
5043                                 i = 0;
5044                 }
5045                 eop = tx_ring->buffer_info[i].next_to_watch;
5046                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5047         }
5048
5049         tx_ring->next_to_clean = i;
5050
5051         if (unlikely(count &&
5052                      netif_carrier_ok(netdev) &&
5053                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5054                 /* Make sure that anybody stopping the queue after this
5055                  * sees the new next_to_clean.
5056                  */
5057                 smp_mb();
5058                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5059                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5060                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5061                         tx_ring->tx_stats.restart_queue++;
5062                 }
5063         }
5064
5065         if (tx_ring->detect_tx_hung) {
5066                 /* Detect a transmit hang in hardware, this serializes the
5067                  * check with the clearing of time_stamp and movement of i */
5068                 tx_ring->detect_tx_hung = false;
5069                 if (tx_ring->buffer_info[i].time_stamp &&
5070                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5071                                (adapter->tx_timeout_factor * HZ)) &&
5072                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5073
5074                         /* detected Tx unit hang */
5075                         dev_err(&tx_ring->pdev->dev,
5076                                 "Detected Tx Unit Hang\n"
5077                                 "  Tx Queue             <%d>\n"
5078                                 "  TDH                  <%x>\n"
5079                                 "  TDT                  <%x>\n"
5080                                 "  next_to_use          <%x>\n"
5081                                 "  next_to_clean        <%x>\n"
5082                                 "buffer_info[next_to_clean]\n"
5083                                 "  time_stamp           <%lx>\n"
5084                                 "  next_to_watch        <%x>\n"
5085                                 "  jiffies              <%lx>\n"
5086                                 "  desc.status          <%x>\n",
5087                                 tx_ring->queue_index,
5088                                 readl(tx_ring->head),
5089                                 readl(tx_ring->tail),
5090                                 tx_ring->next_to_use,
5091                                 tx_ring->next_to_clean,
5092                                 tx_ring->buffer_info[eop].time_stamp,
5093                                 eop,
5094                                 jiffies,
5095                                 eop_desc->wb.status);
5096                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5097                 }
5098         }
5099         tx_ring->total_bytes += total_bytes;
5100         tx_ring->total_packets += total_packets;
5101         tx_ring->tx_stats.bytes += total_bytes;
5102         tx_ring->tx_stats.packets += total_packets;
5103         return (count < tx_ring->count);
5104 }
5105
5106 /**
5107  * igb_receive_skb - helper function to handle rx indications
5108  * @q_vector: structure containing interrupt and ring information
5109  * @skb: packet to send up
5110  * @vlan_tag: vlan tag for packet
5111  **/
5112 static void igb_receive_skb(struct igb_q_vector *q_vector,
5113                             struct sk_buff *skb,
5114                             u16 vlan_tag)
5115 {
5116         struct igb_adapter *adapter = q_vector->adapter;
5117
5118         if (vlan_tag)
5119                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5120                                  vlan_tag, skb);
5121         else
5122                 napi_gro_receive(&q_vector->napi, skb);
5123 }
5124
5125 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5126                                        u32 status_err, struct sk_buff *skb)
5127 {
5128         skb->ip_summed = CHECKSUM_NONE;
5129
5130         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5131         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5132              (status_err & E1000_RXD_STAT_IXSM))
5133                 return;
5134
5135         /* TCP/UDP checksum error bit is set */
5136         if (status_err &
5137             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5138                 /*
5139                  * work around errata with sctp packets where the TCPE aka
5140                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5141                  * packets, (aka let the stack check the crc32c)
5142                  */
5143                 if ((skb->len == 60) &&
5144                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5145                         ring->rx_stats.csum_err++;
5146
5147                 /* let the stack verify checksum errors */
5148                 return;
5149         }
5150         /* It must be a TCP or UDP packet with a valid checksum */
5151         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5152                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5153
5154         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
5155 }
5156
5157 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5158                                    struct sk_buff *skb)
5159 {
5160         struct igb_adapter *adapter = q_vector->adapter;
5161         struct e1000_hw *hw = &adapter->hw;
5162         u64 regval;
5163
5164         /*
5165          * If this bit is set, then the RX registers contain the time stamp. No
5166          * other packet will be time stamped until we read these registers, so
5167          * read the registers to make them available again. Because only one
5168          * packet can be time stamped at a time, we know that the register
5169          * values must belong to this one here and therefore we don't need to
5170          * compare any of the additional attributes stored for it.
5171          *
5172          * If nothing went wrong, then it should have a skb_shared_tx that we
5173          * can turn into a skb_shared_hwtstamps.
5174          */
5175         if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
5176                 return;
5177         if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5178                 return;
5179
5180         regval = rd32(E1000_RXSTMPL);
5181         regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5182
5183         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5184 }
5185 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5186                                union e1000_adv_rx_desc *rx_desc)
5187 {
5188         /* HW will not DMA in data larger than the given buffer, even if it
5189          * parses the (NFS, of course) header to be larger.  In that case, it
5190          * fills the header buffer and spills the rest into the page.
5191          */
5192         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5193                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5194         if (hlen > rx_ring->rx_buffer_len)
5195                 hlen = rx_ring->rx_buffer_len;
5196         return hlen;
5197 }
5198
5199 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5200                                  int *work_done, int budget)
5201 {
5202         struct igb_ring *rx_ring = q_vector->rx_ring;
5203         struct net_device *netdev = rx_ring->netdev;
5204         struct pci_dev *pdev = rx_ring->pdev;
5205         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5206         struct igb_buffer *buffer_info , *next_buffer;
5207         struct sk_buff *skb;
5208         bool cleaned = false;
5209         int cleaned_count = 0;
5210         int current_node = numa_node_id();
5211         unsigned int total_bytes = 0, total_packets = 0;
5212         unsigned int i;
5213         u32 staterr;
5214         u16 length;
5215         u16 vlan_tag;
5216
5217         i = rx_ring->next_to_clean;
5218         buffer_info = &rx_ring->buffer_info[i];
5219         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5220         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5221
5222         while (staterr & E1000_RXD_STAT_DD) {
5223                 if (*work_done >= budget)
5224                         break;
5225                 (*work_done)++;
5226
5227                 skb = buffer_info->skb;
5228                 prefetch(skb->data - NET_IP_ALIGN);
5229                 buffer_info->skb = NULL;
5230
5231                 i++;
5232                 if (i == rx_ring->count)
5233                         i = 0;
5234
5235                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5236                 prefetch(next_rxd);
5237                 next_buffer = &rx_ring->buffer_info[i];
5238
5239                 length = le16_to_cpu(rx_desc->wb.upper.length);
5240                 cleaned = true;
5241                 cleaned_count++;
5242
5243                 if (buffer_info->dma) {
5244                         pci_unmap_single(pdev, buffer_info->dma,
5245                                          rx_ring->rx_buffer_len,
5246                                          PCI_DMA_FROMDEVICE);
5247                         buffer_info->dma = 0;
5248                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5249                                 skb_put(skb, length);
5250                                 goto send_up;
5251                         }
5252                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5253                 }
5254
5255                 if (length) {
5256                         pci_unmap_page(pdev, buffer_info->page_dma,
5257                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5258                         buffer_info->page_dma = 0;
5259
5260                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
5261                                                 buffer_info->page,
5262                                                 buffer_info->page_offset,
5263                                                 length);
5264
5265                         if ((page_count(buffer_info->page) != 1) ||
5266                             (page_to_nid(buffer_info->page) != current_node))
5267                                 buffer_info->page = NULL;
5268                         else
5269                                 get_page(buffer_info->page);
5270
5271                         skb->len += length;
5272                         skb->data_len += length;
5273                         skb->truesize += length;
5274                 }
5275
5276                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5277                         buffer_info->skb = next_buffer->skb;
5278                         buffer_info->dma = next_buffer->dma;
5279                         next_buffer->skb = skb;
5280                         next_buffer->dma = 0;
5281                         goto next_desc;
5282                 }
5283 send_up:
5284                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5285                         dev_kfree_skb_irq(skb);
5286                         goto next_desc;
5287                 }
5288
5289                 igb_rx_hwtstamp(q_vector, staterr, skb);
5290                 total_bytes += skb->len;
5291                 total_packets++;
5292
5293                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5294
5295                 skb->protocol = eth_type_trans(skb, netdev);
5296                 skb_record_rx_queue(skb, rx_ring->queue_index);
5297
5298                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5299                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5300
5301                 igb_receive_skb(q_vector, skb, vlan_tag);
5302
5303 next_desc:
5304                 rx_desc->wb.upper.status_error = 0;
5305
5306                 /* return some buffers to hardware, one at a time is too slow */
5307                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5308                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5309                         cleaned_count = 0;
5310                 }
5311
5312                 /* use prefetched values */
5313                 rx_desc = next_rxd;
5314                 buffer_info = next_buffer;
5315                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5316         }
5317
5318         rx_ring->next_to_clean = i;
5319         cleaned_count = igb_desc_unused(rx_ring);
5320
5321         if (cleaned_count)
5322                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5323
5324         rx_ring->total_packets += total_packets;
5325         rx_ring->total_bytes += total_bytes;
5326         rx_ring->rx_stats.packets += total_packets;
5327         rx_ring->rx_stats.bytes += total_bytes;
5328         return cleaned;
5329 }
5330
5331 /**
5332  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5333  * @adapter: address of board private structure
5334  **/
5335 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5336 {
5337         struct net_device *netdev = rx_ring->netdev;
5338         union e1000_adv_rx_desc *rx_desc;
5339         struct igb_buffer *buffer_info;
5340         struct sk_buff *skb;
5341         unsigned int i;
5342         int bufsz;
5343
5344         i = rx_ring->next_to_use;
5345         buffer_info = &rx_ring->buffer_info[i];
5346
5347         bufsz = rx_ring->rx_buffer_len;
5348
5349         while (cleaned_count--) {
5350                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5351
5352                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5353                         if (!buffer_info->page) {
5354                                 buffer_info->page = netdev_alloc_page(netdev);
5355                                 if (!buffer_info->page) {
5356                                         rx_ring->rx_stats.alloc_failed++;
5357                                         goto no_buffers;
5358                                 }
5359                                 buffer_info->page_offset = 0;
5360                         } else {
5361                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5362                         }
5363                         buffer_info->page_dma =
5364                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5365                                              buffer_info->page_offset,
5366                                              PAGE_SIZE / 2,
5367                                              PCI_DMA_FROMDEVICE);
5368                         if (pci_dma_mapping_error(rx_ring->pdev,
5369                                                   buffer_info->page_dma)) {
5370                                 buffer_info->page_dma = 0;
5371                                 rx_ring->rx_stats.alloc_failed++;
5372                                 goto no_buffers;
5373                         }
5374                 }
5375
5376                 skb = buffer_info->skb;
5377                 if (!skb) {
5378                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5379                         if (!skb) {
5380                                 rx_ring->rx_stats.alloc_failed++;
5381                                 goto no_buffers;
5382                         }
5383
5384                         buffer_info->skb = skb;
5385                 }
5386                 if (!buffer_info->dma) {
5387                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5388                                                           skb->data,
5389                                                           bufsz,
5390                                                           PCI_DMA_FROMDEVICE);
5391                         if (pci_dma_mapping_error(rx_ring->pdev,
5392                                                   buffer_info->dma)) {
5393                                 buffer_info->dma = 0;
5394                                 rx_ring->rx_stats.alloc_failed++;
5395                                 goto no_buffers;
5396                         }
5397                 }
5398                 /* Refresh the desc even if buffer_addrs didn't change because
5399                  * each write-back erases this info. */
5400                 if (bufsz < IGB_RXBUFFER_1024) {
5401                         rx_desc->read.pkt_addr =
5402                              cpu_to_le64(buffer_info->page_dma);
5403                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5404                 } else {
5405                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5406                         rx_desc->read.hdr_addr = 0;
5407                 }
5408
5409                 i++;
5410                 if (i == rx_ring->count)
5411                         i = 0;
5412                 buffer_info = &rx_ring->buffer_info[i];
5413         }
5414
5415 no_buffers:
5416         if (rx_ring->next_to_use != i) {
5417                 rx_ring->next_to_use = i;
5418                 if (i == 0)
5419                         i = (rx_ring->count - 1);
5420                 else
5421                         i--;
5422
5423                 /* Force memory writes to complete before letting h/w
5424                  * know there are new descriptors to fetch.  (Only
5425                  * applicable for weak-ordered memory model archs,
5426                  * such as IA-64). */
5427                 wmb();
5428                 writel(i, rx_ring->tail);
5429         }
5430 }
5431
5432 /**
5433  * igb_mii_ioctl -
5434  * @netdev:
5435  * @ifreq:
5436  * @cmd:
5437  **/
5438 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5439 {
5440         struct igb_adapter *adapter = netdev_priv(netdev);
5441         struct mii_ioctl_data *data = if_mii(ifr);
5442
5443         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5444                 return -EOPNOTSUPP;
5445
5446         switch (cmd) {
5447         case SIOCGMIIPHY:
5448                 data->phy_id = adapter->hw.phy.addr;
5449                 break;
5450         case SIOCGMIIREG:
5451                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5452                                      &data->val_out))
5453                         return -EIO;
5454                 break;
5455         case SIOCSMIIREG:
5456         default:
5457                 return -EOPNOTSUPP;
5458         }
5459         return 0;
5460 }
5461
5462 /**
5463  * igb_hwtstamp_ioctl - control hardware time stamping
5464  * @netdev:
5465  * @ifreq:
5466  * @cmd:
5467  *
5468  * Outgoing time stamping can be enabled and disabled. Play nice and
5469  * disable it when requested, although it shouldn't case any overhead
5470  * when no packet needs it. At most one packet in the queue may be
5471  * marked for time stamping, otherwise it would be impossible to tell
5472  * for sure to which packet the hardware time stamp belongs.
5473  *
5474  * Incoming time stamping has to be configured via the hardware
5475  * filters. Not all combinations are supported, in particular event
5476  * type has to be specified. Matching the kind of event packet is
5477  * not supported, with the exception of "all V2 events regardless of
5478  * level 2 or 4".
5479  *
5480  **/
5481 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5482                               struct ifreq *ifr, int cmd)
5483 {
5484         struct igb_adapter *adapter = netdev_priv(netdev);
5485         struct e1000_hw *hw = &adapter->hw;
5486         struct hwtstamp_config config;
5487         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5488         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5489         u32 tsync_rx_cfg = 0;
5490         bool is_l4 = false;
5491         bool is_l2 = false;
5492         u32 regval;
5493
5494         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5495                 return -EFAULT;
5496
5497         /* reserved for future extensions */
5498         if (config.flags)
5499                 return -EINVAL;
5500
5501         switch (config.tx_type) {
5502         case HWTSTAMP_TX_OFF:
5503                 tsync_tx_ctl = 0;
5504         case HWTSTAMP_TX_ON:
5505                 break;
5506         default:
5507                 return -ERANGE;
5508         }
5509
5510         switch (config.rx_filter) {
5511         case HWTSTAMP_FILTER_NONE:
5512                 tsync_rx_ctl = 0;
5513                 break;
5514         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5515         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5516         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5517         case HWTSTAMP_FILTER_ALL:
5518                 /*
5519                  * register TSYNCRXCFG must be set, therefore it is not
5520                  * possible to time stamp both Sync and Delay_Req messages
5521                  * => fall back to time stamping all packets
5522                  */
5523                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5524                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5525                 break;
5526         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5527                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5528                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5529                 is_l4 = true;
5530                 break;
5531         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5532                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5533                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5534                 is_l4 = true;
5535                 break;
5536         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5537         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5538                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5539                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5540                 is_l2 = true;
5541                 is_l4 = true;
5542                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5543                 break;
5544         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5545         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5546                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5547                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5548                 is_l2 = true;
5549                 is_l4 = true;
5550                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5551                 break;
5552         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5553         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5554         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5555                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5556                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5557                 is_l2 = true;
5558                 break;
5559         default:
5560                 return -ERANGE;
5561         }
5562
5563         if (hw->mac.type == e1000_82575) {
5564                 if (tsync_rx_ctl | tsync_tx_ctl)
5565                         return -EINVAL;
5566                 return 0;
5567         }
5568
5569         /* enable/disable TX */
5570         regval = rd32(E1000_TSYNCTXCTL);
5571         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5572         regval |= tsync_tx_ctl;
5573         wr32(E1000_TSYNCTXCTL, regval);
5574
5575         /* enable/disable RX */
5576         regval = rd32(E1000_TSYNCRXCTL);
5577         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5578         regval |= tsync_rx_ctl;
5579         wr32(E1000_TSYNCRXCTL, regval);
5580
5581         /* define which PTP packets are time stamped */
5582         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5583
5584         /* define ethertype filter for timestamped packets */
5585         if (is_l2)
5586                 wr32(E1000_ETQF(3),
5587                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5588                                  E1000_ETQF_1588 | /* enable timestamping */
5589                                  ETH_P_1588));     /* 1588 eth protocol type */
5590         else
5591                 wr32(E1000_ETQF(3), 0);
5592
5593 #define PTP_PORT 319
5594         /* L4 Queue Filter[3]: filter by destination port and protocol */
5595         if (is_l4) {
5596                 u32 ftqf = (IPPROTO_UDP /* UDP */
5597                         | E1000_FTQF_VF_BP /* VF not compared */
5598                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5599                         | E1000_FTQF_MASK); /* mask all inputs */
5600                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5601
5602                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5603                 wr32(E1000_IMIREXT(3),
5604                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5605                 if (hw->mac.type == e1000_82576) {
5606                         /* enable source port check */
5607                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5608                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5609                 }
5610                 wr32(E1000_FTQF(3), ftqf);
5611         } else {
5612                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5613         }
5614         wrfl();
5615
5616         adapter->hwtstamp_config = config;
5617
5618         /* clear TX/RX time stamp registers, just to be sure */
5619         regval = rd32(E1000_TXSTMPH);
5620         regval = rd32(E1000_RXSTMPH);
5621
5622         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5623                 -EFAULT : 0;
5624 }
5625
5626 /**
5627  * igb_ioctl -
5628  * @netdev:
5629  * @ifreq:
5630  * @cmd:
5631  **/
5632 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5633 {
5634         switch (cmd) {
5635         case SIOCGMIIPHY:
5636         case SIOCGMIIREG:
5637         case SIOCSMIIREG:
5638                 return igb_mii_ioctl(netdev, ifr, cmd);
5639         case SIOCSHWTSTAMP:
5640                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5641         default:
5642                 return -EOPNOTSUPP;
5643         }
5644 }
5645
5646 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5647 {
5648         struct igb_adapter *adapter = hw->back;
5649         u16 cap_offset;
5650
5651         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5652         if (!cap_offset)
5653                 return -E1000_ERR_CONFIG;
5654
5655         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5656
5657         return 0;
5658 }
5659
5660 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5661 {
5662         struct igb_adapter *adapter = hw->back;
5663         u16 cap_offset;
5664
5665         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5666         if (!cap_offset)
5667                 return -E1000_ERR_CONFIG;
5668
5669         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5670
5671         return 0;
5672 }
5673
5674 static void igb_vlan_rx_register(struct net_device *netdev,
5675                                  struct vlan_group *grp)
5676 {
5677         struct igb_adapter *adapter = netdev_priv(netdev);
5678         struct e1000_hw *hw = &adapter->hw;
5679         u32 ctrl, rctl;
5680
5681         igb_irq_disable(adapter);
5682         adapter->vlgrp = grp;
5683
5684         if (grp) {
5685                 /* enable VLAN tag insert/strip */
5686                 ctrl = rd32(E1000_CTRL);
5687                 ctrl |= E1000_CTRL_VME;
5688                 wr32(E1000_CTRL, ctrl);
5689
5690                 /* Disable CFI check */
5691                 rctl = rd32(E1000_RCTL);
5692                 rctl &= ~E1000_RCTL_CFIEN;
5693                 wr32(E1000_RCTL, rctl);
5694         } else {
5695                 /* disable VLAN tag insert/strip */
5696                 ctrl = rd32(E1000_CTRL);
5697                 ctrl &= ~E1000_CTRL_VME;
5698                 wr32(E1000_CTRL, ctrl);
5699         }
5700
5701         igb_rlpml_set(adapter);
5702
5703         if (!test_bit(__IGB_DOWN, &adapter->state))
5704                 igb_irq_enable(adapter);
5705 }
5706
5707 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5708 {
5709         struct igb_adapter *adapter = netdev_priv(netdev);
5710         struct e1000_hw *hw = &adapter->hw;
5711         int pf_id = adapter->vfs_allocated_count;
5712
5713         /* attempt to add filter to vlvf array */
5714         igb_vlvf_set(adapter, vid, true, pf_id);
5715
5716         /* add the filter since PF can receive vlans w/o entry in vlvf */
5717         igb_vfta_set(hw, vid, true);
5718 }
5719
5720 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5721 {
5722         struct igb_adapter *adapter = netdev_priv(netdev);
5723         struct e1000_hw *hw = &adapter->hw;
5724         int pf_id = adapter->vfs_allocated_count;
5725         s32 err;
5726
5727         igb_irq_disable(adapter);
5728         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5729
5730         if (!test_bit(__IGB_DOWN, &adapter->state))
5731                 igb_irq_enable(adapter);
5732
5733         /* remove vlan from VLVF table array */
5734         err = igb_vlvf_set(adapter, vid, false, pf_id);
5735
5736         /* if vid was not present in VLVF just remove it from table */
5737         if (err)
5738                 igb_vfta_set(hw, vid, false);
5739 }
5740
5741 static void igb_restore_vlan(struct igb_adapter *adapter)
5742 {
5743         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5744
5745         if (adapter->vlgrp) {
5746                 u16 vid;
5747                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5748                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5749                                 continue;
5750                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5751                 }
5752         }
5753 }
5754
5755 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5756 {
5757         struct pci_dev *pdev = adapter->pdev;
5758         struct e1000_mac_info *mac = &adapter->hw.mac;
5759
5760         mac->autoneg = 0;
5761
5762         switch (spddplx) {
5763         case SPEED_10 + DUPLEX_HALF:
5764                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5765                 break;
5766         case SPEED_10 + DUPLEX_FULL:
5767                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5768                 break;
5769         case SPEED_100 + DUPLEX_HALF:
5770                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5771                 break;
5772         case SPEED_100 + DUPLEX_FULL:
5773                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5774                 break;
5775         case SPEED_1000 + DUPLEX_FULL:
5776                 mac->autoneg = 1;
5777                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5778                 break;
5779         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5780         default:
5781                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5782                 return -EINVAL;
5783         }
5784         return 0;
5785 }
5786
5787 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5788 {
5789         struct net_device *netdev = pci_get_drvdata(pdev);
5790         struct igb_adapter *adapter = netdev_priv(netdev);
5791         struct e1000_hw *hw = &adapter->hw;
5792         u32 ctrl, rctl, status;
5793         u32 wufc = adapter->wol;
5794 #ifdef CONFIG_PM
5795         int retval = 0;
5796 #endif
5797
5798         netif_device_detach(netdev);
5799
5800         if (netif_running(netdev))
5801                 igb_close(netdev);
5802
5803         igb_clear_interrupt_scheme(adapter);
5804
5805 #ifdef CONFIG_PM
5806         retval = pci_save_state(pdev);
5807         if (retval)
5808                 return retval;
5809 #endif
5810
5811         status = rd32(E1000_STATUS);
5812         if (status & E1000_STATUS_LU)
5813                 wufc &= ~E1000_WUFC_LNKC;
5814
5815         if (wufc) {
5816                 igb_setup_rctl(adapter);
5817                 igb_set_rx_mode(netdev);
5818
5819                 /* turn on all-multi mode if wake on multicast is enabled */
5820                 if (wufc & E1000_WUFC_MC) {
5821                         rctl = rd32(E1000_RCTL);
5822                         rctl |= E1000_RCTL_MPE;
5823                         wr32(E1000_RCTL, rctl);
5824                 }
5825
5826                 ctrl = rd32(E1000_CTRL);
5827                 /* advertise wake from D3Cold */
5828                 #define E1000_CTRL_ADVD3WUC 0x00100000
5829                 /* phy power management enable */
5830                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5831                 ctrl |= E1000_CTRL_ADVD3WUC;
5832                 wr32(E1000_CTRL, ctrl);
5833
5834                 /* Allow time for pending master requests to run */
5835                 igb_disable_pcie_master(hw);
5836
5837                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5838                 wr32(E1000_WUFC, wufc);
5839         } else {
5840                 wr32(E1000_WUC, 0);
5841                 wr32(E1000_WUFC, 0);
5842         }
5843
5844         *enable_wake = wufc || adapter->en_mng_pt;
5845         if (!*enable_wake)
5846                 igb_power_down_link(adapter);
5847         else
5848                 igb_power_up_link(adapter);
5849
5850         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5851          * would have already happened in close and is redundant. */
5852         igb_release_hw_control(adapter);
5853
5854         pci_disable_device(pdev);
5855
5856         return 0;
5857 }
5858
5859 #ifdef CONFIG_PM
5860 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5861 {
5862         int retval;
5863         bool wake;
5864
5865         retval = __igb_shutdown(pdev, &wake);
5866         if (retval)
5867                 return retval;
5868
5869         if (wake) {
5870                 pci_prepare_to_sleep(pdev);
5871         } else {
5872                 pci_wake_from_d3(pdev, false);
5873                 pci_set_power_state(pdev, PCI_D3hot);
5874         }
5875
5876         return 0;
5877 }
5878
5879 static int igb_resume(struct pci_dev *pdev)
5880 {
5881         struct net_device *netdev = pci_get_drvdata(pdev);
5882         struct igb_adapter *adapter = netdev_priv(netdev);
5883         struct e1000_hw *hw = &adapter->hw;
5884         u32 err;
5885
5886         pci_set_power_state(pdev, PCI_D0);
5887         pci_restore_state(pdev);
5888         pci_save_state(pdev);
5889
5890         err = pci_enable_device_mem(pdev);
5891         if (err) {
5892                 dev_err(&pdev->dev,
5893                         "igb: Cannot enable PCI device from suspend\n");
5894                 return err;
5895         }
5896         pci_set_master(pdev);
5897
5898         pci_enable_wake(pdev, PCI_D3hot, 0);
5899         pci_enable_wake(pdev, PCI_D3cold, 0);
5900
5901         if (igb_init_interrupt_scheme(adapter)) {
5902                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5903                 return -ENOMEM;
5904         }
5905
5906         igb_reset(adapter);
5907
5908         /* let the f/w know that the h/w is now under the control of the
5909          * driver. */
5910         igb_get_hw_control(adapter);
5911
5912         wr32(E1000_WUS, ~0);
5913
5914         if (netif_running(netdev)) {
5915                 err = igb_open(netdev);
5916                 if (err)
5917                         return err;
5918         }
5919
5920         netif_device_attach(netdev);
5921
5922         return 0;
5923 }
5924 #endif
5925
5926 static void igb_shutdown(struct pci_dev *pdev)
5927 {
5928         bool wake;
5929
5930         __igb_shutdown(pdev, &wake);
5931
5932         if (system_state == SYSTEM_POWER_OFF) {
5933                 pci_wake_from_d3(pdev, wake);
5934                 pci_set_power_state(pdev, PCI_D3hot);
5935         }
5936 }
5937
5938 #ifdef CONFIG_NET_POLL_CONTROLLER
5939 /*
5940  * Polling 'interrupt' - used by things like netconsole to send skbs
5941  * without having to re-enable interrupts. It's not called while
5942  * the interrupt routine is executing.
5943  */
5944 static void igb_netpoll(struct net_device *netdev)
5945 {
5946         struct igb_adapter *adapter = netdev_priv(netdev);
5947         struct e1000_hw *hw = &adapter->hw;
5948         int i;
5949
5950         if (!adapter->msix_entries) {
5951                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5952                 igb_irq_disable(adapter);
5953                 napi_schedule(&q_vector->napi);
5954                 return;
5955         }
5956
5957         for (i = 0; i < adapter->num_q_vectors; i++) {
5958                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5959                 wr32(E1000_EIMC, q_vector->eims_value);
5960                 napi_schedule(&q_vector->napi);
5961         }
5962 }
5963 #endif /* CONFIG_NET_POLL_CONTROLLER */
5964
5965 /**
5966  * igb_io_error_detected - called when PCI error is detected
5967  * @pdev: Pointer to PCI device
5968  * @state: The current pci connection state
5969  *
5970  * This function is called after a PCI bus error affecting
5971  * this device has been detected.
5972  */
5973 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5974                                               pci_channel_state_t state)
5975 {
5976         struct net_device *netdev = pci_get_drvdata(pdev);
5977         struct igb_adapter *adapter = netdev_priv(netdev);
5978
5979         netif_device_detach(netdev);
5980
5981         if (state == pci_channel_io_perm_failure)
5982                 return PCI_ERS_RESULT_DISCONNECT;
5983
5984         if (netif_running(netdev))
5985                 igb_down(adapter);
5986         pci_disable_device(pdev);
5987
5988         /* Request a slot slot reset. */
5989         return PCI_ERS_RESULT_NEED_RESET;
5990 }
5991
5992 /**
5993  * igb_io_slot_reset - called after the pci bus has been reset.
5994  * @pdev: Pointer to PCI device
5995  *
5996  * Restart the card from scratch, as if from a cold-boot. Implementation
5997  * resembles the first-half of the igb_resume routine.
5998  */
5999 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6000 {
6001         struct net_device *netdev = pci_get_drvdata(pdev);
6002         struct igb_adapter *adapter = netdev_priv(netdev);
6003         struct e1000_hw *hw = &adapter->hw;
6004         pci_ers_result_t result;
6005         int err;
6006
6007         if (pci_enable_device_mem(pdev)) {
6008                 dev_err(&pdev->dev,
6009                         "Cannot re-enable PCI device after reset.\n");
6010                 result = PCI_ERS_RESULT_DISCONNECT;
6011         } else {
6012                 pci_set_master(pdev);
6013                 pci_restore_state(pdev);
6014                 pci_save_state(pdev);
6015
6016                 pci_enable_wake(pdev, PCI_D3hot, 0);
6017                 pci_enable_wake(pdev, PCI_D3cold, 0);
6018
6019                 igb_reset(adapter);
6020                 wr32(E1000_WUS, ~0);
6021                 result = PCI_ERS_RESULT_RECOVERED;
6022         }
6023
6024         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6025         if (err) {
6026                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6027                         "failed 0x%0x\n", err);
6028                 /* non-fatal, continue */
6029         }
6030
6031         return result;
6032 }
6033
6034 /**
6035  * igb_io_resume - called when traffic can start flowing again.
6036  * @pdev: Pointer to PCI device
6037  *
6038  * This callback is called when the error recovery driver tells us that
6039  * its OK to resume normal operation. Implementation resembles the
6040  * second-half of the igb_resume routine.
6041  */
6042 static void igb_io_resume(struct pci_dev *pdev)
6043 {
6044         struct net_device *netdev = pci_get_drvdata(pdev);
6045         struct igb_adapter *adapter = netdev_priv(netdev);
6046
6047         if (netif_running(netdev)) {
6048                 if (igb_up(adapter)) {
6049                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6050                         return;
6051                 }
6052         }
6053
6054         netif_device_attach(netdev);
6055
6056         /* let the f/w know that the h/w is now under the control of the
6057          * driver. */
6058         igb_get_hw_control(adapter);
6059 }
6060
6061 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6062                              u8 qsel)
6063 {
6064         u32 rar_low, rar_high;
6065         struct e1000_hw *hw = &adapter->hw;
6066
6067         /* HW expects these in little endian so we reverse the byte order
6068          * from network order (big endian) to little endian
6069          */
6070         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6071                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6072         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6073
6074         /* Indicate to hardware the Address is Valid. */
6075         rar_high |= E1000_RAH_AV;
6076
6077         if (hw->mac.type == e1000_82575)
6078                 rar_high |= E1000_RAH_POOL_1 * qsel;
6079         else
6080                 rar_high |= E1000_RAH_POOL_1 << qsel;
6081
6082         wr32(E1000_RAL(index), rar_low);
6083         wrfl();
6084         wr32(E1000_RAH(index), rar_high);
6085         wrfl();
6086 }
6087
6088 static int igb_set_vf_mac(struct igb_adapter *adapter,
6089                           int vf, unsigned char *mac_addr)
6090 {
6091         struct e1000_hw *hw = &adapter->hw;
6092         /* VF MAC addresses start at end of receive addresses and moves
6093          * torwards the first, as a result a collision should not be possible */
6094         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6095
6096         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6097
6098         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6099
6100         return 0;
6101 }
6102
6103 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6104 {
6105         struct igb_adapter *adapter = netdev_priv(netdev);
6106         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6107                 return -EINVAL;
6108         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6109         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6110         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6111                                       " change effective.");
6112         if (test_bit(__IGB_DOWN, &adapter->state)) {
6113                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6114                          " but the PF device is not up.\n");
6115                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6116                          " attempting to use the VF device.\n");
6117         }
6118         return igb_set_vf_mac(adapter, vf, mac);
6119 }
6120
6121 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6122 {
6123         return -EOPNOTSUPP;
6124 }
6125
6126 static int igb_ndo_get_vf_config(struct net_device *netdev,
6127                                  int vf, struct ifla_vf_info *ivi)
6128 {
6129         struct igb_adapter *adapter = netdev_priv(netdev);
6130         if (vf >= adapter->vfs_allocated_count)
6131                 return -EINVAL;
6132         ivi->vf = vf;
6133         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6134         ivi->tx_rate = 0;
6135         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6136         ivi->qos = adapter->vf_data[vf].pf_qos;
6137         return 0;
6138 }
6139
6140 static void igb_vmm_control(struct igb_adapter *adapter)
6141 {
6142         struct e1000_hw *hw = &adapter->hw;
6143         u32 reg;
6144
6145         /* replication is not supported for 82575 */
6146         if (hw->mac.type == e1000_82575)
6147                 return;
6148
6149         /* enable replication vlan tag stripping */
6150         reg = rd32(E1000_RPLOLR);
6151         reg |= E1000_RPLOLR_STRVLAN;
6152         wr32(E1000_RPLOLR, reg);
6153
6154         /* notify HW that the MAC is adding vlan tags */
6155         reg = rd32(E1000_DTXCTL);
6156         reg |= E1000_DTXCTL_VLAN_ADDED;
6157         wr32(E1000_DTXCTL, reg);
6158
6159         if (adapter->vfs_allocated_count) {
6160                 igb_vmdq_set_loopback_pf(hw, true);
6161                 igb_vmdq_set_replication_pf(hw, true);
6162         } else {
6163                 igb_vmdq_set_loopback_pf(hw, false);
6164                 igb_vmdq_set_replication_pf(hw, false);
6165         }
6166 }
6167
6168 /* igb_main.c */