igb: add per-packet timestamping
[safe/jmp/linux-2.6] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "2.1.0-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56                                 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60         [board_82575] = &e1000_82575_info,
61 };
62
63 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
64         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
84         /* required last entry */
85         {0, }
86 };
87
88 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
89
90 void igb_reset(struct igb_adapter *);
91 static int igb_setup_all_tx_resources(struct igb_adapter *);
92 static int igb_setup_all_rx_resources(struct igb_adapter *);
93 static void igb_free_all_tx_resources(struct igb_adapter *);
94 static void igb_free_all_rx_resources(struct igb_adapter *);
95 static void igb_setup_mrqc(struct igb_adapter *);
96 void igb_update_stats(struct igb_adapter *);
97 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
98 static void __devexit igb_remove(struct pci_dev *pdev);
99 static int igb_sw_init(struct igb_adapter *);
100 static int igb_open(struct net_device *);
101 static int igb_close(struct net_device *);
102 static void igb_configure_tx(struct igb_adapter *);
103 static void igb_configure_rx(struct igb_adapter *);
104 static void igb_clean_all_tx_rings(struct igb_adapter *);
105 static void igb_clean_all_rx_rings(struct igb_adapter *);
106 static void igb_clean_tx_ring(struct igb_ring *);
107 static void igb_clean_rx_ring(struct igb_ring *);
108 static void igb_set_rx_mode(struct net_device *);
109 static void igb_update_phy_info(unsigned long);
110 static void igb_watchdog(unsigned long);
111 static void igb_watchdog_task(struct work_struct *);
112 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
113 static struct net_device_stats *igb_get_stats(struct net_device *);
114 static int igb_change_mtu(struct net_device *, int);
115 static int igb_set_mac(struct net_device *, void *);
116 static void igb_set_uta(struct igb_adapter *adapter);
117 static irqreturn_t igb_intr(int irq, void *);
118 static irqreturn_t igb_intr_msi(int irq, void *);
119 static irqreturn_t igb_msix_other(int irq, void *);
120 static irqreturn_t igb_msix_ring(int irq, void *);
121 #ifdef CONFIG_IGB_DCA
122 static void igb_update_dca(struct igb_q_vector *);
123 static void igb_setup_dca(struct igb_adapter *);
124 #endif /* CONFIG_IGB_DCA */
125 static bool igb_clean_tx_irq(struct igb_q_vector *);
126 static int igb_poll(struct napi_struct *, int);
127 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
128 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
129 static void igb_tx_timeout(struct net_device *);
130 static void igb_reset_task(struct work_struct *);
131 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
132 static void igb_vlan_rx_add_vid(struct net_device *, u16);
133 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
134 static void igb_restore_vlan(struct igb_adapter *);
135 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
136 static void igb_ping_all_vfs(struct igb_adapter *);
137 static void igb_msg_task(struct igb_adapter *);
138 static void igb_vmm_control(struct igb_adapter *);
139 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
140 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
141 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
142 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
143                                int vf, u16 vlan, u8 qos);
144 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
145 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
146                                  struct ifla_vf_info *ivi);
147
148 #ifdef CONFIG_PM
149 static int igb_suspend(struct pci_dev *, pm_message_t);
150 static int igb_resume(struct pci_dev *);
151 #endif
152 static void igb_shutdown(struct pci_dev *);
153 #ifdef CONFIG_IGB_DCA
154 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
155 static struct notifier_block dca_notifier = {
156         .notifier_call  = igb_notify_dca,
157         .next           = NULL,
158         .priority       = 0
159 };
160 #endif
161 #ifdef CONFIG_NET_POLL_CONTROLLER
162 /* for netdump / net console */
163 static void igb_netpoll(struct net_device *);
164 #endif
165 #ifdef CONFIG_PCI_IOV
166 static unsigned int max_vfs = 0;
167 module_param(max_vfs, uint, 0);
168 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
169                  "per physical function");
170 #endif /* CONFIG_PCI_IOV */
171
172 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
173                      pci_channel_state_t);
174 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
175 static void igb_io_resume(struct pci_dev *);
176
177 static struct pci_error_handlers igb_err_handler = {
178         .error_detected = igb_io_error_detected,
179         .slot_reset = igb_io_slot_reset,
180         .resume = igb_io_resume,
181 };
182
183
184 static struct pci_driver igb_driver = {
185         .name     = igb_driver_name,
186         .id_table = igb_pci_tbl,
187         .probe    = igb_probe,
188         .remove   = __devexit_p(igb_remove),
189 #ifdef CONFIG_PM
190         /* Power Managment Hooks */
191         .suspend  = igb_suspend,
192         .resume   = igb_resume,
193 #endif
194         .shutdown = igb_shutdown,
195         .err_handler = &igb_err_handler
196 };
197
198 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
199 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
200 MODULE_LICENSE("GPL");
201 MODULE_VERSION(DRV_VERSION);
202
203 /**
204  * igb_read_clock - read raw cycle counter (to be used by time counter)
205  */
206 static cycle_t igb_read_clock(const struct cyclecounter *tc)
207 {
208         struct igb_adapter *adapter =
209                 container_of(tc, struct igb_adapter, cycles);
210         struct e1000_hw *hw = &adapter->hw;
211         u64 stamp = 0;
212         int shift = 0;
213
214         /*
215          * The timestamp latches on lowest register read. For the 82580
216          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
217          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
218          */
219         if (hw->mac.type == e1000_82580) {
220                 stamp = rd32(E1000_SYSTIMR) >> 8;
221                 shift = IGB_82580_TSYNC_SHIFT;
222         }
223
224         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
225         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
226         return stamp;
227 }
228
229 /**
230  * igb_get_hw_dev - return device
231  * used by hardware layer to print debugging information
232  **/
233 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
234 {
235         struct igb_adapter *adapter = hw->back;
236         return adapter->netdev;
237 }
238
239 /**
240  * igb_init_module - Driver Registration Routine
241  *
242  * igb_init_module is the first routine called when the driver is
243  * loaded. All it does is register with the PCI subsystem.
244  **/
245 static int __init igb_init_module(void)
246 {
247         int ret;
248         printk(KERN_INFO "%s - version %s\n",
249                igb_driver_string, igb_driver_version);
250
251         printk(KERN_INFO "%s\n", igb_copyright);
252
253 #ifdef CONFIG_IGB_DCA
254         dca_register_notify(&dca_notifier);
255 #endif
256         ret = pci_register_driver(&igb_driver);
257         return ret;
258 }
259
260 module_init(igb_init_module);
261
262 /**
263  * igb_exit_module - Driver Exit Cleanup Routine
264  *
265  * igb_exit_module is called just before the driver is removed
266  * from memory.
267  **/
268 static void __exit igb_exit_module(void)
269 {
270 #ifdef CONFIG_IGB_DCA
271         dca_unregister_notify(&dca_notifier);
272 #endif
273         pci_unregister_driver(&igb_driver);
274 }
275
276 module_exit(igb_exit_module);
277
278 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
279 /**
280  * igb_cache_ring_register - Descriptor ring to register mapping
281  * @adapter: board private structure to initialize
282  *
283  * Once we know the feature-set enabled for the device, we'll cache
284  * the register offset the descriptor ring is assigned to.
285  **/
286 static void igb_cache_ring_register(struct igb_adapter *adapter)
287 {
288         int i = 0, j = 0;
289         u32 rbase_offset = adapter->vfs_allocated_count;
290
291         switch (adapter->hw.mac.type) {
292         case e1000_82576:
293                 /* The queues are allocated for virtualization such that VF 0
294                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
295                  * In order to avoid collision we start at the first free queue
296                  * and continue consuming queues in the same sequence
297                  */
298                 if (adapter->vfs_allocated_count) {
299                         for (; i < adapter->rss_queues; i++)
300                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
301                                                                Q_IDX_82576(i);
302                         for (; j < adapter->rss_queues; j++)
303                                 adapter->tx_ring[j]->reg_idx = rbase_offset +
304                                                                Q_IDX_82576(j);
305                 }
306         case e1000_82575:
307         case e1000_82580:
308         case e1000_i350:
309         default:
310                 for (; i < adapter->num_rx_queues; i++)
311                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
312                 for (; j < adapter->num_tx_queues; j++)
313                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
314                 break;
315         }
316 }
317
318 static void igb_free_queues(struct igb_adapter *adapter)
319 {
320         int i;
321
322         for (i = 0; i < adapter->num_tx_queues; i++) {
323                 kfree(adapter->tx_ring[i]);
324                 adapter->tx_ring[i] = NULL;
325         }
326         for (i = 0; i < adapter->num_rx_queues; i++) {
327                 kfree(adapter->rx_ring[i]);
328                 adapter->rx_ring[i] = NULL;
329         }
330         adapter->num_rx_queues = 0;
331         adapter->num_tx_queues = 0;
332 }
333
334 /**
335  * igb_alloc_queues - Allocate memory for all rings
336  * @adapter: board private structure to initialize
337  *
338  * We allocate one ring per queue at run-time since we don't know the
339  * number of queues at compile-time.
340  **/
341 static int igb_alloc_queues(struct igb_adapter *adapter)
342 {
343         struct igb_ring *ring;
344         int i;
345
346         for (i = 0; i < adapter->num_tx_queues; i++) {
347                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
348                 if (!ring)
349                         goto err;
350                 ring->count = adapter->tx_ring_count;
351                 ring->queue_index = i;
352                 ring->pdev = adapter->pdev;
353                 ring->netdev = adapter->netdev;
354                 /* For 82575, context index must be unique per ring. */
355                 if (adapter->hw.mac.type == e1000_82575)
356                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
357                 adapter->tx_ring[i] = ring;
358         }
359
360         for (i = 0; i < adapter->num_rx_queues; i++) {
361                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
362                 if (!ring)
363                         goto err;
364                 ring->count = adapter->rx_ring_count;
365                 ring->queue_index = i;
366                 ring->pdev = adapter->pdev;
367                 ring->netdev = adapter->netdev;
368                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
369                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
370                 /* set flag indicating ring supports SCTP checksum offload */
371                 if (adapter->hw.mac.type >= e1000_82576)
372                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
373                 adapter->rx_ring[i] = ring;
374         }
375
376         igb_cache_ring_register(adapter);
377
378         return 0;
379
380 err:
381         igb_free_queues(adapter);
382
383         return -ENOMEM;
384 }
385
386 #define IGB_N0_QUEUE -1
387 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
388 {
389         u32 msixbm = 0;
390         struct igb_adapter *adapter = q_vector->adapter;
391         struct e1000_hw *hw = &adapter->hw;
392         u32 ivar, index;
393         int rx_queue = IGB_N0_QUEUE;
394         int tx_queue = IGB_N0_QUEUE;
395
396         if (q_vector->rx_ring)
397                 rx_queue = q_vector->rx_ring->reg_idx;
398         if (q_vector->tx_ring)
399                 tx_queue = q_vector->tx_ring->reg_idx;
400
401         switch (hw->mac.type) {
402         case e1000_82575:
403                 /* The 82575 assigns vectors using a bitmask, which matches the
404                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
405                    or more queues to a vector, we write the appropriate bits
406                    into the MSIXBM register for that vector. */
407                 if (rx_queue > IGB_N0_QUEUE)
408                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
409                 if (tx_queue > IGB_N0_QUEUE)
410                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
411                 if (!adapter->msix_entries && msix_vector == 0)
412                         msixbm |= E1000_EIMS_OTHER;
413                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
414                 q_vector->eims_value = msixbm;
415                 break;
416         case e1000_82576:
417                 /* 82576 uses a table-based method for assigning vectors.
418                    Each queue has a single entry in the table to which we write
419                    a vector number along with a "valid" bit.  Sadly, the layout
420                    of the table is somewhat counterintuitive. */
421                 if (rx_queue > IGB_N0_QUEUE) {
422                         index = (rx_queue & 0x7);
423                         ivar = array_rd32(E1000_IVAR0, index);
424                         if (rx_queue < 8) {
425                                 /* vector goes into low byte of register */
426                                 ivar = ivar & 0xFFFFFF00;
427                                 ivar |= msix_vector | E1000_IVAR_VALID;
428                         } else {
429                                 /* vector goes into third byte of register */
430                                 ivar = ivar & 0xFF00FFFF;
431                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
432                         }
433                         array_wr32(E1000_IVAR0, index, ivar);
434                 }
435                 if (tx_queue > IGB_N0_QUEUE) {
436                         index = (tx_queue & 0x7);
437                         ivar = array_rd32(E1000_IVAR0, index);
438                         if (tx_queue < 8) {
439                                 /* vector goes into second byte of register */
440                                 ivar = ivar & 0xFFFF00FF;
441                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
442                         } else {
443                                 /* vector goes into high byte of register */
444                                 ivar = ivar & 0x00FFFFFF;
445                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
446                         }
447                         array_wr32(E1000_IVAR0, index, ivar);
448                 }
449                 q_vector->eims_value = 1 << msix_vector;
450                 break;
451         case e1000_82580:
452         case e1000_i350:
453                 /* 82580 uses the same table-based approach as 82576 but has fewer
454                    entries as a result we carry over for queues greater than 4. */
455                 if (rx_queue > IGB_N0_QUEUE) {
456                         index = (rx_queue >> 1);
457                         ivar = array_rd32(E1000_IVAR0, index);
458                         if (rx_queue & 0x1) {
459                                 /* vector goes into third byte of register */
460                                 ivar = ivar & 0xFF00FFFF;
461                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
462                         } else {
463                                 /* vector goes into low byte of register */
464                                 ivar = ivar & 0xFFFFFF00;
465                                 ivar |= msix_vector | E1000_IVAR_VALID;
466                         }
467                         array_wr32(E1000_IVAR0, index, ivar);
468                 }
469                 if (tx_queue > IGB_N0_QUEUE) {
470                         index = (tx_queue >> 1);
471                         ivar = array_rd32(E1000_IVAR0, index);
472                         if (tx_queue & 0x1) {
473                                 /* vector goes into high byte of register */
474                                 ivar = ivar & 0x00FFFFFF;
475                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
476                         } else {
477                                 /* vector goes into second byte of register */
478                                 ivar = ivar & 0xFFFF00FF;
479                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
480                         }
481                         array_wr32(E1000_IVAR0, index, ivar);
482                 }
483                 q_vector->eims_value = 1 << msix_vector;
484                 break;
485         default:
486                 BUG();
487                 break;
488         }
489
490         /* add q_vector eims value to global eims_enable_mask */
491         adapter->eims_enable_mask |= q_vector->eims_value;
492
493         /* configure q_vector to set itr on first interrupt */
494         q_vector->set_itr = 1;
495 }
496
497 /**
498  * igb_configure_msix - Configure MSI-X hardware
499  *
500  * igb_configure_msix sets up the hardware to properly
501  * generate MSI-X interrupts.
502  **/
503 static void igb_configure_msix(struct igb_adapter *adapter)
504 {
505         u32 tmp;
506         int i, vector = 0;
507         struct e1000_hw *hw = &adapter->hw;
508
509         adapter->eims_enable_mask = 0;
510
511         /* set vector for other causes, i.e. link changes */
512         switch (hw->mac.type) {
513         case e1000_82575:
514                 tmp = rd32(E1000_CTRL_EXT);
515                 /* enable MSI-X PBA support*/
516                 tmp |= E1000_CTRL_EXT_PBA_CLR;
517
518                 /* Auto-Mask interrupts upon ICR read. */
519                 tmp |= E1000_CTRL_EXT_EIAME;
520                 tmp |= E1000_CTRL_EXT_IRCA;
521
522                 wr32(E1000_CTRL_EXT, tmp);
523
524                 /* enable msix_other interrupt */
525                 array_wr32(E1000_MSIXBM(0), vector++,
526                                       E1000_EIMS_OTHER);
527                 adapter->eims_other = E1000_EIMS_OTHER;
528
529                 break;
530
531         case e1000_82576:
532         case e1000_82580:
533         case e1000_i350:
534                 /* Turn on MSI-X capability first, or our settings
535                  * won't stick.  And it will take days to debug. */
536                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
537                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
538                                 E1000_GPIE_NSICR);
539
540                 /* enable msix_other interrupt */
541                 adapter->eims_other = 1 << vector;
542                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
543
544                 wr32(E1000_IVAR_MISC, tmp);
545                 break;
546         default:
547                 /* do nothing, since nothing else supports MSI-X */
548                 break;
549         } /* switch (hw->mac.type) */
550
551         adapter->eims_enable_mask |= adapter->eims_other;
552
553         for (i = 0; i < adapter->num_q_vectors; i++)
554                 igb_assign_vector(adapter->q_vector[i], vector++);
555
556         wrfl();
557 }
558
559 /**
560  * igb_request_msix - Initialize MSI-X interrupts
561  *
562  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
563  * kernel.
564  **/
565 static int igb_request_msix(struct igb_adapter *adapter)
566 {
567         struct net_device *netdev = adapter->netdev;
568         struct e1000_hw *hw = &adapter->hw;
569         int i, err = 0, vector = 0;
570
571         err = request_irq(adapter->msix_entries[vector].vector,
572                           igb_msix_other, 0, netdev->name, adapter);
573         if (err)
574                 goto out;
575         vector++;
576
577         for (i = 0; i < adapter->num_q_vectors; i++) {
578                 struct igb_q_vector *q_vector = adapter->q_vector[i];
579
580                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
581
582                 if (q_vector->rx_ring && q_vector->tx_ring)
583                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
584                                 q_vector->rx_ring->queue_index);
585                 else if (q_vector->tx_ring)
586                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
587                                 q_vector->tx_ring->queue_index);
588                 else if (q_vector->rx_ring)
589                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
590                                 q_vector->rx_ring->queue_index);
591                 else
592                         sprintf(q_vector->name, "%s-unused", netdev->name);
593
594                 err = request_irq(adapter->msix_entries[vector].vector,
595                                   igb_msix_ring, 0, q_vector->name,
596                                   q_vector);
597                 if (err)
598                         goto out;
599                 vector++;
600         }
601
602         igb_configure_msix(adapter);
603         return 0;
604 out:
605         return err;
606 }
607
608 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
609 {
610         if (adapter->msix_entries) {
611                 pci_disable_msix(adapter->pdev);
612                 kfree(adapter->msix_entries);
613                 adapter->msix_entries = NULL;
614         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
615                 pci_disable_msi(adapter->pdev);
616         }
617 }
618
619 /**
620  * igb_free_q_vectors - Free memory allocated for interrupt vectors
621  * @adapter: board private structure to initialize
622  *
623  * This function frees the memory allocated to the q_vectors.  In addition if
624  * NAPI is enabled it will delete any references to the NAPI struct prior
625  * to freeing the q_vector.
626  **/
627 static void igb_free_q_vectors(struct igb_adapter *adapter)
628 {
629         int v_idx;
630
631         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
632                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
633                 adapter->q_vector[v_idx] = NULL;
634                 if (!q_vector)
635                         continue;
636                 netif_napi_del(&q_vector->napi);
637                 kfree(q_vector);
638         }
639         adapter->num_q_vectors = 0;
640 }
641
642 /**
643  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
644  *
645  * This function resets the device so that it has 0 rx queues, tx queues, and
646  * MSI-X interrupts allocated.
647  */
648 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
649 {
650         igb_free_queues(adapter);
651         igb_free_q_vectors(adapter);
652         igb_reset_interrupt_capability(adapter);
653 }
654
655 /**
656  * igb_set_interrupt_capability - set MSI or MSI-X if supported
657  *
658  * Attempt to configure interrupts using the best available
659  * capabilities of the hardware and kernel.
660  **/
661 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
662 {
663         int err;
664         int numvecs, i;
665
666         /* Number of supported queues. */
667         adapter->num_rx_queues = adapter->rss_queues;
668         adapter->num_tx_queues = adapter->rss_queues;
669
670         /* start with one vector for every rx queue */
671         numvecs = adapter->num_rx_queues;
672
673         /* if tx handler is separate add 1 for every tx queue */
674         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
675                 numvecs += adapter->num_tx_queues;
676
677         /* store the number of vectors reserved for queues */
678         adapter->num_q_vectors = numvecs;
679
680         /* add 1 vector for link status interrupts */
681         numvecs++;
682         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
683                                         GFP_KERNEL);
684         if (!adapter->msix_entries)
685                 goto msi_only;
686
687         for (i = 0; i < numvecs; i++)
688                 adapter->msix_entries[i].entry = i;
689
690         err = pci_enable_msix(adapter->pdev,
691                               adapter->msix_entries,
692                               numvecs);
693         if (err == 0)
694                 goto out;
695
696         igb_reset_interrupt_capability(adapter);
697
698         /* If we can't do MSI-X, try MSI */
699 msi_only:
700 #ifdef CONFIG_PCI_IOV
701         /* disable SR-IOV for non MSI-X configurations */
702         if (adapter->vf_data) {
703                 struct e1000_hw *hw = &adapter->hw;
704                 /* disable iov and allow time for transactions to clear */
705                 pci_disable_sriov(adapter->pdev);
706                 msleep(500);
707
708                 kfree(adapter->vf_data);
709                 adapter->vf_data = NULL;
710                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
711                 msleep(100);
712                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
713         }
714 #endif
715         adapter->vfs_allocated_count = 0;
716         adapter->rss_queues = 1;
717         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
718         adapter->num_rx_queues = 1;
719         adapter->num_tx_queues = 1;
720         adapter->num_q_vectors = 1;
721         if (!pci_enable_msi(adapter->pdev))
722                 adapter->flags |= IGB_FLAG_HAS_MSI;
723 out:
724         /* Notify the stack of the (possibly) reduced Tx Queue count. */
725         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
726         return;
727 }
728
729 /**
730  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
731  * @adapter: board private structure to initialize
732  *
733  * We allocate one q_vector per queue interrupt.  If allocation fails we
734  * return -ENOMEM.
735  **/
736 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
737 {
738         struct igb_q_vector *q_vector;
739         struct e1000_hw *hw = &adapter->hw;
740         int v_idx;
741
742         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
743                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
744                 if (!q_vector)
745                         goto err_out;
746                 q_vector->adapter = adapter;
747                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
748                 q_vector->itr_val = IGB_START_ITR;
749                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
750                 adapter->q_vector[v_idx] = q_vector;
751         }
752         return 0;
753
754 err_out:
755         igb_free_q_vectors(adapter);
756         return -ENOMEM;
757 }
758
759 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
760                                       int ring_idx, int v_idx)
761 {
762         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
763
764         q_vector->rx_ring = adapter->rx_ring[ring_idx];
765         q_vector->rx_ring->q_vector = q_vector;
766         q_vector->itr_val = adapter->rx_itr_setting;
767         if (q_vector->itr_val && q_vector->itr_val <= 3)
768                 q_vector->itr_val = IGB_START_ITR;
769 }
770
771 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
772                                       int ring_idx, int v_idx)
773 {
774         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
775
776         q_vector->tx_ring = adapter->tx_ring[ring_idx];
777         q_vector->tx_ring->q_vector = q_vector;
778         q_vector->itr_val = adapter->tx_itr_setting;
779         if (q_vector->itr_val && q_vector->itr_val <= 3)
780                 q_vector->itr_val = IGB_START_ITR;
781 }
782
783 /**
784  * igb_map_ring_to_vector - maps allocated queues to vectors
785  *
786  * This function maps the recently allocated queues to vectors.
787  **/
788 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
789 {
790         int i;
791         int v_idx = 0;
792
793         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
794             (adapter->num_q_vectors < adapter->num_tx_queues))
795                 return -ENOMEM;
796
797         if (adapter->num_q_vectors >=
798             (adapter->num_rx_queues + adapter->num_tx_queues)) {
799                 for (i = 0; i < adapter->num_rx_queues; i++)
800                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
801                 for (i = 0; i < adapter->num_tx_queues; i++)
802                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
803         } else {
804                 for (i = 0; i < adapter->num_rx_queues; i++) {
805                         if (i < adapter->num_tx_queues)
806                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
807                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
808                 }
809                 for (; i < adapter->num_tx_queues; i++)
810                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
811         }
812         return 0;
813 }
814
815 /**
816  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
817  *
818  * This function initializes the interrupts and allocates all of the queues.
819  **/
820 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
821 {
822         struct pci_dev *pdev = adapter->pdev;
823         int err;
824
825         igb_set_interrupt_capability(adapter);
826
827         err = igb_alloc_q_vectors(adapter);
828         if (err) {
829                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
830                 goto err_alloc_q_vectors;
831         }
832
833         err = igb_alloc_queues(adapter);
834         if (err) {
835                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
836                 goto err_alloc_queues;
837         }
838
839         err = igb_map_ring_to_vector(adapter);
840         if (err) {
841                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
842                 goto err_map_queues;
843         }
844
845
846         return 0;
847 err_map_queues:
848         igb_free_queues(adapter);
849 err_alloc_queues:
850         igb_free_q_vectors(adapter);
851 err_alloc_q_vectors:
852         igb_reset_interrupt_capability(adapter);
853         return err;
854 }
855
856 /**
857  * igb_request_irq - initialize interrupts
858  *
859  * Attempts to configure interrupts using the best available
860  * capabilities of the hardware and kernel.
861  **/
862 static int igb_request_irq(struct igb_adapter *adapter)
863 {
864         struct net_device *netdev = adapter->netdev;
865         struct pci_dev *pdev = adapter->pdev;
866         int err = 0;
867
868         if (adapter->msix_entries) {
869                 err = igb_request_msix(adapter);
870                 if (!err)
871                         goto request_done;
872                 /* fall back to MSI */
873                 igb_clear_interrupt_scheme(adapter);
874                 if (!pci_enable_msi(adapter->pdev))
875                         adapter->flags |= IGB_FLAG_HAS_MSI;
876                 igb_free_all_tx_resources(adapter);
877                 igb_free_all_rx_resources(adapter);
878                 adapter->num_tx_queues = 1;
879                 adapter->num_rx_queues = 1;
880                 adapter->num_q_vectors = 1;
881                 err = igb_alloc_q_vectors(adapter);
882                 if (err) {
883                         dev_err(&pdev->dev,
884                                 "Unable to allocate memory for vectors\n");
885                         goto request_done;
886                 }
887                 err = igb_alloc_queues(adapter);
888                 if (err) {
889                         dev_err(&pdev->dev,
890                                 "Unable to allocate memory for queues\n");
891                         igb_free_q_vectors(adapter);
892                         goto request_done;
893                 }
894                 igb_setup_all_tx_resources(adapter);
895                 igb_setup_all_rx_resources(adapter);
896         } else {
897                 igb_assign_vector(adapter->q_vector[0], 0);
898         }
899
900         if (adapter->flags & IGB_FLAG_HAS_MSI) {
901                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
902                                   netdev->name, adapter);
903                 if (!err)
904                         goto request_done;
905
906                 /* fall back to legacy interrupts */
907                 igb_reset_interrupt_capability(adapter);
908                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
909         }
910
911         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
912                           netdev->name, adapter);
913
914         if (err)
915                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
916                         err);
917
918 request_done:
919         return err;
920 }
921
922 static void igb_free_irq(struct igb_adapter *adapter)
923 {
924         if (adapter->msix_entries) {
925                 int vector = 0, i;
926
927                 free_irq(adapter->msix_entries[vector++].vector, adapter);
928
929                 for (i = 0; i < adapter->num_q_vectors; i++) {
930                         struct igb_q_vector *q_vector = adapter->q_vector[i];
931                         free_irq(adapter->msix_entries[vector++].vector,
932                                  q_vector);
933                 }
934         } else {
935                 free_irq(adapter->pdev->irq, adapter);
936         }
937 }
938
939 /**
940  * igb_irq_disable - Mask off interrupt generation on the NIC
941  * @adapter: board private structure
942  **/
943 static void igb_irq_disable(struct igb_adapter *adapter)
944 {
945         struct e1000_hw *hw = &adapter->hw;
946
947         /*
948          * we need to be careful when disabling interrupts.  The VFs are also
949          * mapped into these registers and so clearing the bits can cause
950          * issues on the VF drivers so we only need to clear what we set
951          */
952         if (adapter->msix_entries) {
953                 u32 regval = rd32(E1000_EIAM);
954                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
955                 wr32(E1000_EIMC, adapter->eims_enable_mask);
956                 regval = rd32(E1000_EIAC);
957                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
958         }
959
960         wr32(E1000_IAM, 0);
961         wr32(E1000_IMC, ~0);
962         wrfl();
963         synchronize_irq(adapter->pdev->irq);
964 }
965
966 /**
967  * igb_irq_enable - Enable default interrupt generation settings
968  * @adapter: board private structure
969  **/
970 static void igb_irq_enable(struct igb_adapter *adapter)
971 {
972         struct e1000_hw *hw = &adapter->hw;
973
974         if (adapter->msix_entries) {
975                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
976                 u32 regval = rd32(E1000_EIAC);
977                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
978                 regval = rd32(E1000_EIAM);
979                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
980                 wr32(E1000_EIMS, adapter->eims_enable_mask);
981                 if (adapter->vfs_allocated_count) {
982                         wr32(E1000_MBVFIMR, 0xFF);
983                         ims |= E1000_IMS_VMMB;
984                 }
985                 if (adapter->hw.mac.type == e1000_82580)
986                         ims |= E1000_IMS_DRSTA;
987
988                 wr32(E1000_IMS, ims);
989         } else {
990                 wr32(E1000_IMS, IMS_ENABLE_MASK |
991                                 E1000_IMS_DRSTA);
992                 wr32(E1000_IAM, IMS_ENABLE_MASK |
993                                 E1000_IMS_DRSTA);
994         }
995 }
996
997 static void igb_update_mng_vlan(struct igb_adapter *adapter)
998 {
999         struct e1000_hw *hw = &adapter->hw;
1000         u16 vid = adapter->hw.mng_cookie.vlan_id;
1001         u16 old_vid = adapter->mng_vlan_id;
1002
1003         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1004                 /* add VID to filter table */
1005                 igb_vfta_set(hw, vid, true);
1006                 adapter->mng_vlan_id = vid;
1007         } else {
1008                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1009         }
1010
1011         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1012             (vid != old_vid) &&
1013             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1014                 /* remove VID from filter table */
1015                 igb_vfta_set(hw, old_vid, false);
1016         }
1017 }
1018
1019 /**
1020  * igb_release_hw_control - release control of the h/w to f/w
1021  * @adapter: address of board private structure
1022  *
1023  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1024  * For ASF and Pass Through versions of f/w this means that the
1025  * driver is no longer loaded.
1026  *
1027  **/
1028 static void igb_release_hw_control(struct igb_adapter *adapter)
1029 {
1030         struct e1000_hw *hw = &adapter->hw;
1031         u32 ctrl_ext;
1032
1033         /* Let firmware take over control of h/w */
1034         ctrl_ext = rd32(E1000_CTRL_EXT);
1035         wr32(E1000_CTRL_EXT,
1036                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1037 }
1038
1039 /**
1040  * igb_get_hw_control - get control of the h/w from f/w
1041  * @adapter: address of board private structure
1042  *
1043  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1044  * For ASF and Pass Through versions of f/w this means that
1045  * the driver is loaded.
1046  *
1047  **/
1048 static void igb_get_hw_control(struct igb_adapter *adapter)
1049 {
1050         struct e1000_hw *hw = &adapter->hw;
1051         u32 ctrl_ext;
1052
1053         /* Let firmware know the driver has taken over */
1054         ctrl_ext = rd32(E1000_CTRL_EXT);
1055         wr32(E1000_CTRL_EXT,
1056                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1057 }
1058
1059 /**
1060  * igb_configure - configure the hardware for RX and TX
1061  * @adapter: private board structure
1062  **/
1063 static void igb_configure(struct igb_adapter *adapter)
1064 {
1065         struct net_device *netdev = adapter->netdev;
1066         int i;
1067
1068         igb_get_hw_control(adapter);
1069         igb_set_rx_mode(netdev);
1070
1071         igb_restore_vlan(adapter);
1072
1073         igb_setup_tctl(adapter);
1074         igb_setup_mrqc(adapter);
1075         igb_setup_rctl(adapter);
1076
1077         igb_configure_tx(adapter);
1078         igb_configure_rx(adapter);
1079
1080         igb_rx_fifo_flush_82575(&adapter->hw);
1081
1082         /* call igb_desc_unused which always leaves
1083          * at least 1 descriptor unused to make sure
1084          * next_to_use != next_to_clean */
1085         for (i = 0; i < adapter->num_rx_queues; i++) {
1086                 struct igb_ring *ring = adapter->rx_ring[i];
1087                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1088         }
1089
1090
1091         adapter->tx_queue_len = netdev->tx_queue_len;
1092 }
1093
1094 /**
1095  * igb_power_up_link - Power up the phy/serdes link
1096  * @adapter: address of board private structure
1097  **/
1098 void igb_power_up_link(struct igb_adapter *adapter)
1099 {
1100         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1101                 igb_power_up_phy_copper(&adapter->hw);
1102         else
1103                 igb_power_up_serdes_link_82575(&adapter->hw);
1104 }
1105
1106 /**
1107  * igb_power_down_link - Power down the phy/serdes link
1108  * @adapter: address of board private structure
1109  */
1110 static void igb_power_down_link(struct igb_adapter *adapter)
1111 {
1112         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1113                 igb_power_down_phy_copper_82575(&adapter->hw);
1114         else
1115                 igb_shutdown_serdes_link_82575(&adapter->hw);
1116 }
1117
1118 /**
1119  * igb_up - Open the interface and prepare it to handle traffic
1120  * @adapter: board private structure
1121  **/
1122 int igb_up(struct igb_adapter *adapter)
1123 {
1124         struct e1000_hw *hw = &adapter->hw;
1125         int i;
1126
1127         /* hardware has been reset, we need to reload some things */
1128         igb_configure(adapter);
1129
1130         clear_bit(__IGB_DOWN, &adapter->state);
1131
1132         for (i = 0; i < adapter->num_q_vectors; i++) {
1133                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1134                 napi_enable(&q_vector->napi);
1135         }
1136         if (adapter->msix_entries)
1137                 igb_configure_msix(adapter);
1138         else
1139                 igb_assign_vector(adapter->q_vector[0], 0);
1140
1141         /* Clear any pending interrupts. */
1142         rd32(E1000_ICR);
1143         igb_irq_enable(adapter);
1144
1145         /* notify VFs that reset has been completed */
1146         if (adapter->vfs_allocated_count) {
1147                 u32 reg_data = rd32(E1000_CTRL_EXT);
1148                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1149                 wr32(E1000_CTRL_EXT, reg_data);
1150         }
1151
1152         netif_tx_start_all_queues(adapter->netdev);
1153
1154         /* start the watchdog. */
1155         hw->mac.get_link_status = 1;
1156         schedule_work(&adapter->watchdog_task);
1157
1158         return 0;
1159 }
1160
1161 void igb_down(struct igb_adapter *adapter)
1162 {
1163         struct net_device *netdev = adapter->netdev;
1164         struct e1000_hw *hw = &adapter->hw;
1165         u32 tctl, rctl;
1166         int i;
1167
1168         /* signal that we're down so the interrupt handler does not
1169          * reschedule our watchdog timer */
1170         set_bit(__IGB_DOWN, &adapter->state);
1171
1172         /* disable receives in the hardware */
1173         rctl = rd32(E1000_RCTL);
1174         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1175         /* flush and sleep below */
1176
1177         netif_tx_stop_all_queues(netdev);
1178
1179         /* disable transmits in the hardware */
1180         tctl = rd32(E1000_TCTL);
1181         tctl &= ~E1000_TCTL_EN;
1182         wr32(E1000_TCTL, tctl);
1183         /* flush both disables and wait for them to finish */
1184         wrfl();
1185         msleep(10);
1186
1187         for (i = 0; i < adapter->num_q_vectors; i++) {
1188                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1189                 napi_disable(&q_vector->napi);
1190         }
1191
1192         igb_irq_disable(adapter);
1193
1194         del_timer_sync(&adapter->watchdog_timer);
1195         del_timer_sync(&adapter->phy_info_timer);
1196
1197         netdev->tx_queue_len = adapter->tx_queue_len;
1198         netif_carrier_off(netdev);
1199
1200         /* record the stats before reset*/
1201         igb_update_stats(adapter);
1202
1203         adapter->link_speed = 0;
1204         adapter->link_duplex = 0;
1205
1206         if (!pci_channel_offline(adapter->pdev))
1207                 igb_reset(adapter);
1208         igb_clean_all_tx_rings(adapter);
1209         igb_clean_all_rx_rings(adapter);
1210 #ifdef CONFIG_IGB_DCA
1211
1212         /* since we reset the hardware DCA settings were cleared */
1213         igb_setup_dca(adapter);
1214 #endif
1215 }
1216
1217 void igb_reinit_locked(struct igb_adapter *adapter)
1218 {
1219         WARN_ON(in_interrupt());
1220         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1221                 msleep(1);
1222         igb_down(adapter);
1223         igb_up(adapter);
1224         clear_bit(__IGB_RESETTING, &adapter->state);
1225 }
1226
1227 void igb_reset(struct igb_adapter *adapter)
1228 {
1229         struct pci_dev *pdev = adapter->pdev;
1230         struct e1000_hw *hw = &adapter->hw;
1231         struct e1000_mac_info *mac = &hw->mac;
1232         struct e1000_fc_info *fc = &hw->fc;
1233         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1234         u16 hwm;
1235
1236         /* Repartition Pba for greater than 9k mtu
1237          * To take effect CTRL.RST is required.
1238          */
1239         switch (mac->type) {
1240         case e1000_i350:
1241         case e1000_82580:
1242                 pba = rd32(E1000_RXPBS);
1243                 pba = igb_rxpbs_adjust_82580(pba);
1244                 break;
1245         case e1000_82576:
1246                 pba = rd32(E1000_RXPBS);
1247                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1248                 break;
1249         case e1000_82575:
1250         default:
1251                 pba = E1000_PBA_34K;
1252                 break;
1253         }
1254
1255         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1256             (mac->type < e1000_82576)) {
1257                 /* adjust PBA for jumbo frames */
1258                 wr32(E1000_PBA, pba);
1259
1260                 /* To maintain wire speed transmits, the Tx FIFO should be
1261                  * large enough to accommodate two full transmit packets,
1262                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1263                  * the Rx FIFO should be large enough to accommodate at least
1264                  * one full receive packet and is similarly rounded up and
1265                  * expressed in KB. */
1266                 pba = rd32(E1000_PBA);
1267                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1268                 tx_space = pba >> 16;
1269                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1270                 pba &= 0xffff;
1271                 /* the tx fifo also stores 16 bytes of information about the tx
1272                  * but don't include ethernet FCS because hardware appends it */
1273                 min_tx_space = (adapter->max_frame_size +
1274                                 sizeof(union e1000_adv_tx_desc) -
1275                                 ETH_FCS_LEN) * 2;
1276                 min_tx_space = ALIGN(min_tx_space, 1024);
1277                 min_tx_space >>= 10;
1278                 /* software strips receive CRC, so leave room for it */
1279                 min_rx_space = adapter->max_frame_size;
1280                 min_rx_space = ALIGN(min_rx_space, 1024);
1281                 min_rx_space >>= 10;
1282
1283                 /* If current Tx allocation is less than the min Tx FIFO size,
1284                  * and the min Tx FIFO size is less than the current Rx FIFO
1285                  * allocation, take space away from current Rx allocation */
1286                 if (tx_space < min_tx_space &&
1287                     ((min_tx_space - tx_space) < pba)) {
1288                         pba = pba - (min_tx_space - tx_space);
1289
1290                         /* if short on rx space, rx wins and must trump tx
1291                          * adjustment */
1292                         if (pba < min_rx_space)
1293                                 pba = min_rx_space;
1294                 }
1295                 wr32(E1000_PBA, pba);
1296         }
1297
1298         /* flow control settings */
1299         /* The high water mark must be low enough to fit one full frame
1300          * (or the size used for early receive) above it in the Rx FIFO.
1301          * Set it to the lower of:
1302          * - 90% of the Rx FIFO size, or
1303          * - the full Rx FIFO size minus one full frame */
1304         hwm = min(((pba << 10) * 9 / 10),
1305                         ((pba << 10) - 2 * adapter->max_frame_size));
1306
1307         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1308         fc->low_water = fc->high_water - 16;
1309         fc->pause_time = 0xFFFF;
1310         fc->send_xon = 1;
1311         fc->current_mode = fc->requested_mode;
1312
1313         /* disable receive for all VFs and wait one second */
1314         if (adapter->vfs_allocated_count) {
1315                 int i;
1316                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1317                         adapter->vf_data[i].flags = 0;
1318
1319                 /* ping all the active vfs to let them know we are going down */
1320                 igb_ping_all_vfs(adapter);
1321
1322                 /* disable transmits and receives */
1323                 wr32(E1000_VFRE, 0);
1324                 wr32(E1000_VFTE, 0);
1325         }
1326
1327         /* Allow time for pending master requests to run */
1328         hw->mac.ops.reset_hw(hw);
1329         wr32(E1000_WUC, 0);
1330
1331         if (hw->mac.ops.init_hw(hw))
1332                 dev_err(&pdev->dev, "Hardware Error\n");
1333
1334         if (hw->mac.type == e1000_82580) {
1335                 u32 reg = rd32(E1000_PCIEMISC);
1336                 wr32(E1000_PCIEMISC,
1337                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1338         }
1339         if (!netif_running(adapter->netdev))
1340                 igb_power_down_link(adapter);
1341
1342         igb_update_mng_vlan(adapter);
1343
1344         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1345         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1346
1347         igb_get_phy_info(hw);
1348 }
1349
1350 static const struct net_device_ops igb_netdev_ops = {
1351         .ndo_open               = igb_open,
1352         .ndo_stop               = igb_close,
1353         .ndo_start_xmit         = igb_xmit_frame_adv,
1354         .ndo_get_stats          = igb_get_stats,
1355         .ndo_set_rx_mode        = igb_set_rx_mode,
1356         .ndo_set_multicast_list = igb_set_rx_mode,
1357         .ndo_set_mac_address    = igb_set_mac,
1358         .ndo_change_mtu         = igb_change_mtu,
1359         .ndo_do_ioctl           = igb_ioctl,
1360         .ndo_tx_timeout         = igb_tx_timeout,
1361         .ndo_validate_addr      = eth_validate_addr,
1362         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1363         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1364         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1365         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1366         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1367         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1368         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1369 #ifdef CONFIG_NET_POLL_CONTROLLER
1370         .ndo_poll_controller    = igb_netpoll,
1371 #endif
1372 };
1373
1374 /**
1375  * igb_probe - Device Initialization Routine
1376  * @pdev: PCI device information struct
1377  * @ent: entry in igb_pci_tbl
1378  *
1379  * Returns 0 on success, negative on failure
1380  *
1381  * igb_probe initializes an adapter identified by a pci_dev structure.
1382  * The OS initialization, configuring of the adapter private structure,
1383  * and a hardware reset occur.
1384  **/
1385 static int __devinit igb_probe(struct pci_dev *pdev,
1386                                const struct pci_device_id *ent)
1387 {
1388         struct net_device *netdev;
1389         struct igb_adapter *adapter;
1390         struct e1000_hw *hw;
1391         u16 eeprom_data = 0;
1392         static int global_quad_port_a; /* global quad port a indication */
1393         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1394         unsigned long mmio_start, mmio_len;
1395         int err, pci_using_dac;
1396         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1397         u32 part_num;
1398
1399         err = pci_enable_device_mem(pdev);
1400         if (err)
1401                 return err;
1402
1403         pci_using_dac = 0;
1404         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1405         if (!err) {
1406                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1407                 if (!err)
1408                         pci_using_dac = 1;
1409         } else {
1410                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1411                 if (err) {
1412                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1413                         if (err) {
1414                                 dev_err(&pdev->dev, "No usable DMA "
1415                                         "configuration, aborting\n");
1416                                 goto err_dma;
1417                         }
1418                 }
1419         }
1420
1421         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1422                                            IORESOURCE_MEM),
1423                                            igb_driver_name);
1424         if (err)
1425                 goto err_pci_reg;
1426
1427         pci_enable_pcie_error_reporting(pdev);
1428
1429         pci_set_master(pdev);
1430         pci_save_state(pdev);
1431
1432         err = -ENOMEM;
1433         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1434                                    IGB_ABS_MAX_TX_QUEUES);
1435         if (!netdev)
1436                 goto err_alloc_etherdev;
1437
1438         SET_NETDEV_DEV(netdev, &pdev->dev);
1439
1440         pci_set_drvdata(pdev, netdev);
1441         adapter = netdev_priv(netdev);
1442         adapter->netdev = netdev;
1443         adapter->pdev = pdev;
1444         hw = &adapter->hw;
1445         hw->back = adapter;
1446         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1447
1448         mmio_start = pci_resource_start(pdev, 0);
1449         mmio_len = pci_resource_len(pdev, 0);
1450
1451         err = -EIO;
1452         hw->hw_addr = ioremap(mmio_start, mmio_len);
1453         if (!hw->hw_addr)
1454                 goto err_ioremap;
1455
1456         netdev->netdev_ops = &igb_netdev_ops;
1457         igb_set_ethtool_ops(netdev);
1458         netdev->watchdog_timeo = 5 * HZ;
1459
1460         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1461
1462         netdev->mem_start = mmio_start;
1463         netdev->mem_end = mmio_start + mmio_len;
1464
1465         /* PCI config space info */
1466         hw->vendor_id = pdev->vendor;
1467         hw->device_id = pdev->device;
1468         hw->revision_id = pdev->revision;
1469         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1470         hw->subsystem_device_id = pdev->subsystem_device;
1471
1472         /* Copy the default MAC, PHY and NVM function pointers */
1473         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1474         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1475         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1476         /* Initialize skew-specific constants */
1477         err = ei->get_invariants(hw);
1478         if (err)
1479                 goto err_sw_init;
1480
1481         /* setup the private structure */
1482         err = igb_sw_init(adapter);
1483         if (err)
1484                 goto err_sw_init;
1485
1486         igb_get_bus_info_pcie(hw);
1487
1488         hw->phy.autoneg_wait_to_complete = false;
1489
1490         /* Copper options */
1491         if (hw->phy.media_type == e1000_media_type_copper) {
1492                 hw->phy.mdix = AUTO_ALL_MODES;
1493                 hw->phy.disable_polarity_correction = false;
1494                 hw->phy.ms_type = e1000_ms_hw_default;
1495         }
1496
1497         if (igb_check_reset_block(hw))
1498                 dev_info(&pdev->dev,
1499                         "PHY reset is blocked due to SOL/IDER session.\n");
1500
1501         netdev->features = NETIF_F_SG |
1502                            NETIF_F_IP_CSUM |
1503                            NETIF_F_HW_VLAN_TX |
1504                            NETIF_F_HW_VLAN_RX |
1505                            NETIF_F_HW_VLAN_FILTER;
1506
1507         netdev->features |= NETIF_F_IPV6_CSUM;
1508         netdev->features |= NETIF_F_TSO;
1509         netdev->features |= NETIF_F_TSO6;
1510         netdev->features |= NETIF_F_GRO;
1511
1512         netdev->vlan_features |= NETIF_F_TSO;
1513         netdev->vlan_features |= NETIF_F_TSO6;
1514         netdev->vlan_features |= NETIF_F_IP_CSUM;
1515         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1516         netdev->vlan_features |= NETIF_F_SG;
1517
1518         if (pci_using_dac)
1519                 netdev->features |= NETIF_F_HIGHDMA;
1520
1521         if (hw->mac.type >= e1000_82576)
1522                 netdev->features |= NETIF_F_SCTP_CSUM;
1523
1524         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1525
1526         /* before reading the NVM, reset the controller to put the device in a
1527          * known good starting state */
1528         hw->mac.ops.reset_hw(hw);
1529
1530         /* make sure the NVM is good */
1531         if (igb_validate_nvm_checksum(hw) < 0) {
1532                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1533                 err = -EIO;
1534                 goto err_eeprom;
1535         }
1536
1537         /* copy the MAC address out of the NVM */
1538         if (hw->mac.ops.read_mac_addr(hw))
1539                 dev_err(&pdev->dev, "NVM Read Error\n");
1540
1541         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1542         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1543
1544         if (!is_valid_ether_addr(netdev->perm_addr)) {
1545                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1546                 err = -EIO;
1547                 goto err_eeprom;
1548         }
1549
1550         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1551                     (unsigned long) adapter);
1552         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1553                     (unsigned long) adapter);
1554
1555         INIT_WORK(&adapter->reset_task, igb_reset_task);
1556         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1557
1558         /* Initialize link properties that are user-changeable */
1559         adapter->fc_autoneg = true;
1560         hw->mac.autoneg = true;
1561         hw->phy.autoneg_advertised = 0x2f;
1562
1563         hw->fc.requested_mode = e1000_fc_default;
1564         hw->fc.current_mode = e1000_fc_default;
1565
1566         igb_validate_mdi_setting(hw);
1567
1568         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1569          * enable the ACPI Magic Packet filter
1570          */
1571
1572         if (hw->bus.func == 0)
1573                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1574         else if (hw->mac.type == e1000_82580)
1575                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1576                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1577                                  &eeprom_data);
1578         else if (hw->bus.func == 1)
1579                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1580
1581         if (eeprom_data & eeprom_apme_mask)
1582                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1583
1584         /* now that we have the eeprom settings, apply the special cases where
1585          * the eeprom may be wrong or the board simply won't support wake on
1586          * lan on a particular port */
1587         switch (pdev->device) {
1588         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1589                 adapter->eeprom_wol = 0;
1590                 break;
1591         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1592         case E1000_DEV_ID_82576_FIBER:
1593         case E1000_DEV_ID_82576_SERDES:
1594                 /* Wake events only supported on port A for dual fiber
1595                  * regardless of eeprom setting */
1596                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1597                         adapter->eeprom_wol = 0;
1598                 break;
1599         case E1000_DEV_ID_82576_QUAD_COPPER:
1600                 /* if quad port adapter, disable WoL on all but port A */
1601                 if (global_quad_port_a != 0)
1602                         adapter->eeprom_wol = 0;
1603                 else
1604                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1605                 /* Reset for multiple quad port adapters */
1606                 if (++global_quad_port_a == 4)
1607                         global_quad_port_a = 0;
1608                 break;
1609         }
1610
1611         /* initialize the wol settings based on the eeprom settings */
1612         adapter->wol = adapter->eeprom_wol;
1613         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1614
1615         /* reset the hardware with the new settings */
1616         igb_reset(adapter);
1617
1618         /* let the f/w know that the h/w is now under the control of the
1619          * driver. */
1620         igb_get_hw_control(adapter);
1621
1622         strcpy(netdev->name, "eth%d");
1623         err = register_netdev(netdev);
1624         if (err)
1625                 goto err_register;
1626
1627         /* carrier off reporting is important to ethtool even BEFORE open */
1628         netif_carrier_off(netdev);
1629
1630 #ifdef CONFIG_IGB_DCA
1631         if (dca_add_requester(&pdev->dev) == 0) {
1632                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1633                 dev_info(&pdev->dev, "DCA enabled\n");
1634                 igb_setup_dca(adapter);
1635         }
1636
1637 #endif
1638         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1639         /* print bus type/speed/width info */
1640         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1641                  netdev->name,
1642                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1643                                                             "unknown"),
1644                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1645                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1646                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1647                    "unknown"),
1648                  netdev->dev_addr);
1649
1650         igb_read_part_num(hw, &part_num);
1651         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1652                 (part_num >> 8), (part_num & 0xff));
1653
1654         dev_info(&pdev->dev,
1655                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1656                 adapter->msix_entries ? "MSI-X" :
1657                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1658                 adapter->num_rx_queues, adapter->num_tx_queues);
1659
1660         return 0;
1661
1662 err_register:
1663         igb_release_hw_control(adapter);
1664 err_eeprom:
1665         if (!igb_check_reset_block(hw))
1666                 igb_reset_phy(hw);
1667
1668         if (hw->flash_address)
1669                 iounmap(hw->flash_address);
1670 err_sw_init:
1671         igb_clear_interrupt_scheme(adapter);
1672         iounmap(hw->hw_addr);
1673 err_ioremap:
1674         free_netdev(netdev);
1675 err_alloc_etherdev:
1676         pci_release_selected_regions(pdev,
1677                                      pci_select_bars(pdev, IORESOURCE_MEM));
1678 err_pci_reg:
1679 err_dma:
1680         pci_disable_device(pdev);
1681         return err;
1682 }
1683
1684 /**
1685  * igb_remove - Device Removal Routine
1686  * @pdev: PCI device information struct
1687  *
1688  * igb_remove is called by the PCI subsystem to alert the driver
1689  * that it should release a PCI device.  The could be caused by a
1690  * Hot-Plug event, or because the driver is going to be removed from
1691  * memory.
1692  **/
1693 static void __devexit igb_remove(struct pci_dev *pdev)
1694 {
1695         struct net_device *netdev = pci_get_drvdata(pdev);
1696         struct igb_adapter *adapter = netdev_priv(netdev);
1697         struct e1000_hw *hw = &adapter->hw;
1698
1699         /* flush_scheduled work may reschedule our watchdog task, so
1700          * explicitly disable watchdog tasks from being rescheduled  */
1701         set_bit(__IGB_DOWN, &adapter->state);
1702         del_timer_sync(&adapter->watchdog_timer);
1703         del_timer_sync(&adapter->phy_info_timer);
1704
1705         flush_scheduled_work();
1706
1707 #ifdef CONFIG_IGB_DCA
1708         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1709                 dev_info(&pdev->dev, "DCA disabled\n");
1710                 dca_remove_requester(&pdev->dev);
1711                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1712                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1713         }
1714 #endif
1715
1716         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1717          * would have already happened in close and is redundant. */
1718         igb_release_hw_control(adapter);
1719
1720         unregister_netdev(netdev);
1721
1722         igb_clear_interrupt_scheme(adapter);
1723
1724 #ifdef CONFIG_PCI_IOV
1725         /* reclaim resources allocated to VFs */
1726         if (adapter->vf_data) {
1727                 /* disable iov and allow time for transactions to clear */
1728                 pci_disable_sriov(pdev);
1729                 msleep(500);
1730
1731                 kfree(adapter->vf_data);
1732                 adapter->vf_data = NULL;
1733                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1734                 msleep(100);
1735                 dev_info(&pdev->dev, "IOV Disabled\n");
1736         }
1737 #endif
1738
1739         iounmap(hw->hw_addr);
1740         if (hw->flash_address)
1741                 iounmap(hw->flash_address);
1742         pci_release_selected_regions(pdev,
1743                                      pci_select_bars(pdev, IORESOURCE_MEM));
1744
1745         free_netdev(netdev);
1746
1747         pci_disable_pcie_error_reporting(pdev);
1748
1749         pci_disable_device(pdev);
1750 }
1751
1752 /**
1753  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1754  * @adapter: board private structure to initialize
1755  *
1756  * This function initializes the vf specific data storage and then attempts to
1757  * allocate the VFs.  The reason for ordering it this way is because it is much
1758  * mor expensive time wise to disable SR-IOV than it is to allocate and free
1759  * the memory for the VFs.
1760  **/
1761 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1762 {
1763 #ifdef CONFIG_PCI_IOV
1764         struct pci_dev *pdev = adapter->pdev;
1765
1766         if (adapter->vfs_allocated_count > 7)
1767                 adapter->vfs_allocated_count = 7;
1768
1769         if (adapter->vfs_allocated_count) {
1770                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1771                                            sizeof(struct vf_data_storage),
1772                                            GFP_KERNEL);
1773                 /* if allocation failed then we do not support SR-IOV */
1774                 if (!adapter->vf_data) {
1775                         adapter->vfs_allocated_count = 0;
1776                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
1777                                 "Data Storage\n");
1778                 }
1779         }
1780
1781         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1782                 kfree(adapter->vf_data);
1783                 adapter->vf_data = NULL;
1784 #endif /* CONFIG_PCI_IOV */
1785                 adapter->vfs_allocated_count = 0;
1786 #ifdef CONFIG_PCI_IOV
1787         } else {
1788                 unsigned char mac_addr[ETH_ALEN];
1789                 int i;
1790                 dev_info(&pdev->dev, "%d vfs allocated\n",
1791                          adapter->vfs_allocated_count);
1792                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1793                         random_ether_addr(mac_addr);
1794                         igb_set_vf_mac(adapter, i, mac_addr);
1795                 }
1796         }
1797 #endif /* CONFIG_PCI_IOV */
1798 }
1799
1800
1801 /**
1802  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1803  * @adapter: board private structure to initialize
1804  *
1805  * igb_init_hw_timer initializes the function pointer and values for the hw
1806  * timer found in hardware.
1807  **/
1808 static void igb_init_hw_timer(struct igb_adapter *adapter)
1809 {
1810         struct e1000_hw *hw = &adapter->hw;
1811
1812         switch (hw->mac.type) {
1813         case e1000_i350:
1814         case e1000_82580:
1815                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1816                 adapter->cycles.read = igb_read_clock;
1817                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1818                 adapter->cycles.mult = 1;
1819                 /*
1820                  * The 82580 timesync updates the system timer every 8ns by 8ns
1821                  * and the value cannot be shifted.  Instead we need to shift
1822                  * the registers to generate a 64bit timer value.  As a result
1823                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
1824                  * 24 in order to generate a larger value for synchronization.
1825                  */
1826                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
1827                 /* disable system timer temporarily by setting bit 31 */
1828                 wr32(E1000_TSAUXC, 0x80000000);
1829                 wrfl();
1830
1831                 /* Set registers so that rollover occurs soon to test this. */
1832                 wr32(E1000_SYSTIMR, 0x00000000);
1833                 wr32(E1000_SYSTIML, 0x80000000);
1834                 wr32(E1000_SYSTIMH, 0x000000FF);
1835                 wrfl();
1836
1837                 /* enable system timer by clearing bit 31 */
1838                 wr32(E1000_TSAUXC, 0x0);
1839                 wrfl();
1840
1841                 timecounter_init(&adapter->clock,
1842                                  &adapter->cycles,
1843                                  ktime_to_ns(ktime_get_real()));
1844                 /*
1845                  * Synchronize our NIC clock against system wall clock. NIC
1846                  * time stamp reading requires ~3us per sample, each sample
1847                  * was pretty stable even under load => only require 10
1848                  * samples for each offset comparison.
1849                  */
1850                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1851                 adapter->compare.source = &adapter->clock;
1852                 adapter->compare.target = ktime_get_real;
1853                 adapter->compare.num_samples = 10;
1854                 timecompare_update(&adapter->compare, 0);
1855                 break;
1856         case e1000_82576:
1857                 /*
1858                  * Initialize hardware timer: we keep it running just in case
1859                  * that some program needs it later on.
1860                  */
1861                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1862                 adapter->cycles.read = igb_read_clock;
1863                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1864                 adapter->cycles.mult = 1;
1865                 /**
1866                  * Scale the NIC clock cycle by a large factor so that
1867                  * relatively small clock corrections can be added or
1868                  * substracted at each clock tick. The drawbacks of a large
1869                  * factor are a) that the clock register overflows more quickly
1870                  * (not such a big deal) and b) that the increment per tick has
1871                  * to fit into 24 bits.  As a result we need to use a shift of
1872                  * 19 so we can fit a value of 16 into the TIMINCA register.
1873                  */
1874                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1875                 wr32(E1000_TIMINCA,
1876                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1877                                 (16 << IGB_82576_TSYNC_SHIFT));
1878
1879                 /* Set registers so that rollover occurs soon to test this. */
1880                 wr32(E1000_SYSTIML, 0x00000000);
1881                 wr32(E1000_SYSTIMH, 0xFF800000);
1882                 wrfl();
1883
1884                 timecounter_init(&adapter->clock,
1885                                  &adapter->cycles,
1886                                  ktime_to_ns(ktime_get_real()));
1887                 /*
1888                  * Synchronize our NIC clock against system wall clock. NIC
1889                  * time stamp reading requires ~3us per sample, each sample
1890                  * was pretty stable even under load => only require 10
1891                  * samples for each offset comparison.
1892                  */
1893                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1894                 adapter->compare.source = &adapter->clock;
1895                 adapter->compare.target = ktime_get_real;
1896                 adapter->compare.num_samples = 10;
1897                 timecompare_update(&adapter->compare, 0);
1898                 break;
1899         case e1000_82575:
1900                 /* 82575 does not support timesync */
1901         default:
1902                 break;
1903         }
1904
1905 }
1906
1907 /**
1908  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1909  * @adapter: board private structure to initialize
1910  *
1911  * igb_sw_init initializes the Adapter private data structure.
1912  * Fields are initialized based on PCI device information and
1913  * OS network device settings (MTU size).
1914  **/
1915 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1916 {
1917         struct e1000_hw *hw = &adapter->hw;
1918         struct net_device *netdev = adapter->netdev;
1919         struct pci_dev *pdev = adapter->pdev;
1920
1921         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1922
1923         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1924         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1925         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1926         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1927
1928         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1929         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1930
1931 #ifdef CONFIG_PCI_IOV
1932         if (hw->mac.type == e1000_82576)
1933                 adapter->vfs_allocated_count = max_vfs;
1934
1935 #endif /* CONFIG_PCI_IOV */
1936         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1937
1938         /*
1939          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1940          * then we should combine the queues into a queue pair in order to
1941          * conserve interrupts due to limited supply
1942          */
1943         if ((adapter->rss_queues > 4) ||
1944             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1945                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1946
1947         /* This call may decrease the number of queues */
1948         if (igb_init_interrupt_scheme(adapter)) {
1949                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1950                 return -ENOMEM;
1951         }
1952
1953         igb_init_hw_timer(adapter);
1954         igb_probe_vfs(adapter);
1955
1956         /* Explicitly disable IRQ since the NIC can be in any state. */
1957         igb_irq_disable(adapter);
1958
1959         set_bit(__IGB_DOWN, &adapter->state);
1960         return 0;
1961 }
1962
1963 /**
1964  * igb_open - Called when a network interface is made active
1965  * @netdev: network interface device structure
1966  *
1967  * Returns 0 on success, negative value on failure
1968  *
1969  * The open entry point is called when a network interface is made
1970  * active by the system (IFF_UP).  At this point all resources needed
1971  * for transmit and receive operations are allocated, the interrupt
1972  * handler is registered with the OS, the watchdog timer is started,
1973  * and the stack is notified that the interface is ready.
1974  **/
1975 static int igb_open(struct net_device *netdev)
1976 {
1977         struct igb_adapter *adapter = netdev_priv(netdev);
1978         struct e1000_hw *hw = &adapter->hw;
1979         int err;
1980         int i;
1981
1982         /* disallow open during test */
1983         if (test_bit(__IGB_TESTING, &adapter->state))
1984                 return -EBUSY;
1985
1986         netif_carrier_off(netdev);
1987
1988         /* allocate transmit descriptors */
1989         err = igb_setup_all_tx_resources(adapter);
1990         if (err)
1991                 goto err_setup_tx;
1992
1993         /* allocate receive descriptors */
1994         err = igb_setup_all_rx_resources(adapter);
1995         if (err)
1996                 goto err_setup_rx;
1997
1998         igb_power_up_link(adapter);
1999
2000         /* before we allocate an interrupt, we must be ready to handle it.
2001          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2002          * as soon as we call pci_request_irq, so we have to setup our
2003          * clean_rx handler before we do so.  */
2004         igb_configure(adapter);
2005
2006         err = igb_request_irq(adapter);
2007         if (err)
2008                 goto err_req_irq;
2009
2010         /* From here on the code is the same as igb_up() */
2011         clear_bit(__IGB_DOWN, &adapter->state);
2012
2013         for (i = 0; i < adapter->num_q_vectors; i++) {
2014                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2015                 napi_enable(&q_vector->napi);
2016         }
2017
2018         /* Clear any pending interrupts. */
2019         rd32(E1000_ICR);
2020
2021         igb_irq_enable(adapter);
2022
2023         /* notify VFs that reset has been completed */
2024         if (adapter->vfs_allocated_count) {
2025                 u32 reg_data = rd32(E1000_CTRL_EXT);
2026                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2027                 wr32(E1000_CTRL_EXT, reg_data);
2028         }
2029
2030         netif_tx_start_all_queues(netdev);
2031
2032         /* start the watchdog. */
2033         hw->mac.get_link_status = 1;
2034         schedule_work(&adapter->watchdog_task);
2035
2036         return 0;
2037
2038 err_req_irq:
2039         igb_release_hw_control(adapter);
2040         igb_power_down_link(adapter);
2041         igb_free_all_rx_resources(adapter);
2042 err_setup_rx:
2043         igb_free_all_tx_resources(adapter);
2044 err_setup_tx:
2045         igb_reset(adapter);
2046
2047         return err;
2048 }
2049
2050 /**
2051  * igb_close - Disables a network interface
2052  * @netdev: network interface device structure
2053  *
2054  * Returns 0, this is not allowed to fail
2055  *
2056  * The close entry point is called when an interface is de-activated
2057  * by the OS.  The hardware is still under the driver's control, but
2058  * needs to be disabled.  A global MAC reset is issued to stop the
2059  * hardware, and all transmit and receive resources are freed.
2060  **/
2061 static int igb_close(struct net_device *netdev)
2062 {
2063         struct igb_adapter *adapter = netdev_priv(netdev);
2064
2065         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2066         igb_down(adapter);
2067
2068         igb_free_irq(adapter);
2069
2070         igb_free_all_tx_resources(adapter);
2071         igb_free_all_rx_resources(adapter);
2072
2073         return 0;
2074 }
2075
2076 /**
2077  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2078  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2079  *
2080  * Return 0 on success, negative on failure
2081  **/
2082 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2083 {
2084         struct pci_dev *pdev = tx_ring->pdev;
2085         int size;
2086
2087         size = sizeof(struct igb_buffer) * tx_ring->count;
2088         tx_ring->buffer_info = vmalloc(size);
2089         if (!tx_ring->buffer_info)
2090                 goto err;
2091         memset(tx_ring->buffer_info, 0, size);
2092
2093         /* round up to nearest 4K */
2094         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2095         tx_ring->size = ALIGN(tx_ring->size, 4096);
2096
2097         tx_ring->desc = pci_alloc_consistent(pdev,
2098                                              tx_ring->size,
2099                                              &tx_ring->dma);
2100
2101         if (!tx_ring->desc)
2102                 goto err;
2103
2104         tx_ring->next_to_use = 0;
2105         tx_ring->next_to_clean = 0;
2106         return 0;
2107
2108 err:
2109         vfree(tx_ring->buffer_info);
2110         dev_err(&pdev->dev,
2111                 "Unable to allocate memory for the transmit descriptor ring\n");
2112         return -ENOMEM;
2113 }
2114
2115 /**
2116  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2117  *                                (Descriptors) for all queues
2118  * @adapter: board private structure
2119  *
2120  * Return 0 on success, negative on failure
2121  **/
2122 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2123 {
2124         struct pci_dev *pdev = adapter->pdev;
2125         int i, err = 0;
2126
2127         for (i = 0; i < adapter->num_tx_queues; i++) {
2128                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2129                 if (err) {
2130                         dev_err(&pdev->dev,
2131                                 "Allocation for Tx Queue %u failed\n", i);
2132                         for (i--; i >= 0; i--)
2133                                 igb_free_tx_resources(adapter->tx_ring[i]);
2134                         break;
2135                 }
2136         }
2137
2138         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2139                 int r_idx = i % adapter->num_tx_queues;
2140                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2141         }
2142         return err;
2143 }
2144
2145 /**
2146  * igb_setup_tctl - configure the transmit control registers
2147  * @adapter: Board private structure
2148  **/
2149 void igb_setup_tctl(struct igb_adapter *adapter)
2150 {
2151         struct e1000_hw *hw = &adapter->hw;
2152         u32 tctl;
2153
2154         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2155         wr32(E1000_TXDCTL(0), 0);
2156
2157         /* Program the Transmit Control Register */
2158         tctl = rd32(E1000_TCTL);
2159         tctl &= ~E1000_TCTL_CT;
2160         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2161                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2162
2163         igb_config_collision_dist(hw);
2164
2165         /* Enable transmits */
2166         tctl |= E1000_TCTL_EN;
2167
2168         wr32(E1000_TCTL, tctl);
2169 }
2170
2171 /**
2172  * igb_configure_tx_ring - Configure transmit ring after Reset
2173  * @adapter: board private structure
2174  * @ring: tx ring to configure
2175  *
2176  * Configure a transmit ring after a reset.
2177  **/
2178 void igb_configure_tx_ring(struct igb_adapter *adapter,
2179                            struct igb_ring *ring)
2180 {
2181         struct e1000_hw *hw = &adapter->hw;
2182         u32 txdctl;
2183         u64 tdba = ring->dma;
2184         int reg_idx = ring->reg_idx;
2185
2186         /* disable the queue */
2187         txdctl = rd32(E1000_TXDCTL(reg_idx));
2188         wr32(E1000_TXDCTL(reg_idx),
2189                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2190         wrfl();
2191         mdelay(10);
2192
2193         wr32(E1000_TDLEN(reg_idx),
2194                         ring->count * sizeof(union e1000_adv_tx_desc));
2195         wr32(E1000_TDBAL(reg_idx),
2196                         tdba & 0x00000000ffffffffULL);
2197         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2198
2199         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2200         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2201         writel(0, ring->head);
2202         writel(0, ring->tail);
2203
2204         txdctl |= IGB_TX_PTHRESH;
2205         txdctl |= IGB_TX_HTHRESH << 8;
2206         txdctl |= IGB_TX_WTHRESH << 16;
2207
2208         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2209         wr32(E1000_TXDCTL(reg_idx), txdctl);
2210 }
2211
2212 /**
2213  * igb_configure_tx - Configure transmit Unit after Reset
2214  * @adapter: board private structure
2215  *
2216  * Configure the Tx unit of the MAC after a reset.
2217  **/
2218 static void igb_configure_tx(struct igb_adapter *adapter)
2219 {
2220         int i;
2221
2222         for (i = 0; i < adapter->num_tx_queues; i++)
2223                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2224 }
2225
2226 /**
2227  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2228  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2229  *
2230  * Returns 0 on success, negative on failure
2231  **/
2232 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2233 {
2234         struct pci_dev *pdev = rx_ring->pdev;
2235         int size, desc_len;
2236
2237         size = sizeof(struct igb_buffer) * rx_ring->count;
2238         rx_ring->buffer_info = vmalloc(size);
2239         if (!rx_ring->buffer_info)
2240                 goto err;
2241         memset(rx_ring->buffer_info, 0, size);
2242
2243         desc_len = sizeof(union e1000_adv_rx_desc);
2244
2245         /* Round up to nearest 4K */
2246         rx_ring->size = rx_ring->count * desc_len;
2247         rx_ring->size = ALIGN(rx_ring->size, 4096);
2248
2249         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2250                                              &rx_ring->dma);
2251
2252         if (!rx_ring->desc)
2253                 goto err;
2254
2255         rx_ring->next_to_clean = 0;
2256         rx_ring->next_to_use = 0;
2257
2258         return 0;
2259
2260 err:
2261         vfree(rx_ring->buffer_info);
2262         rx_ring->buffer_info = NULL;
2263         dev_err(&pdev->dev, "Unable to allocate memory for "
2264                 "the receive descriptor ring\n");
2265         return -ENOMEM;
2266 }
2267
2268 /**
2269  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2270  *                                (Descriptors) for all queues
2271  * @adapter: board private structure
2272  *
2273  * Return 0 on success, negative on failure
2274  **/
2275 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2276 {
2277         struct pci_dev *pdev = adapter->pdev;
2278         int i, err = 0;
2279
2280         for (i = 0; i < adapter->num_rx_queues; i++) {
2281                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2282                 if (err) {
2283                         dev_err(&pdev->dev,
2284                                 "Allocation for Rx Queue %u failed\n", i);
2285                         for (i--; i >= 0; i--)
2286                                 igb_free_rx_resources(adapter->rx_ring[i]);
2287                         break;
2288                 }
2289         }
2290
2291         return err;
2292 }
2293
2294 /**
2295  * igb_setup_mrqc - configure the multiple receive queue control registers
2296  * @adapter: Board private structure
2297  **/
2298 static void igb_setup_mrqc(struct igb_adapter *adapter)
2299 {
2300         struct e1000_hw *hw = &adapter->hw;
2301         u32 mrqc, rxcsum;
2302         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2303         union e1000_reta {
2304                 u32 dword;
2305                 u8  bytes[4];
2306         } reta;
2307         static const u8 rsshash[40] = {
2308                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2309                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2310                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2311                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2312
2313         /* Fill out hash function seeds */
2314         for (j = 0; j < 10; j++) {
2315                 u32 rsskey = rsshash[(j * 4)];
2316                 rsskey |= rsshash[(j * 4) + 1] << 8;
2317                 rsskey |= rsshash[(j * 4) + 2] << 16;
2318                 rsskey |= rsshash[(j * 4) + 3] << 24;
2319                 array_wr32(E1000_RSSRK(0), j, rsskey);
2320         }
2321
2322         num_rx_queues = adapter->rss_queues;
2323
2324         if (adapter->vfs_allocated_count) {
2325                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2326                 switch (hw->mac.type) {
2327                 case e1000_i350:
2328                 case e1000_82580:
2329                         num_rx_queues = 1;
2330                         shift = 0;
2331                         break;
2332                 case e1000_82576:
2333                         shift = 3;
2334                         num_rx_queues = 2;
2335                         break;
2336                 case e1000_82575:
2337                         shift = 2;
2338                         shift2 = 6;
2339                 default:
2340                         break;
2341                 }
2342         } else {
2343                 if (hw->mac.type == e1000_82575)
2344                         shift = 6;
2345         }
2346
2347         for (j = 0; j < (32 * 4); j++) {
2348                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2349                 if (shift2)
2350                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2351                 if ((j & 3) == 3)
2352                         wr32(E1000_RETA(j >> 2), reta.dword);
2353         }
2354
2355         /*
2356          * Disable raw packet checksumming so that RSS hash is placed in
2357          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2358          * offloads as they are enabled by default
2359          */
2360         rxcsum = rd32(E1000_RXCSUM);
2361         rxcsum |= E1000_RXCSUM_PCSD;
2362
2363         if (adapter->hw.mac.type >= e1000_82576)
2364                 /* Enable Receive Checksum Offload for SCTP */
2365                 rxcsum |= E1000_RXCSUM_CRCOFL;
2366
2367         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2368         wr32(E1000_RXCSUM, rxcsum);
2369
2370         /* If VMDq is enabled then we set the appropriate mode for that, else
2371          * we default to RSS so that an RSS hash is calculated per packet even
2372          * if we are only using one queue */
2373         if (adapter->vfs_allocated_count) {
2374                 if (hw->mac.type > e1000_82575) {
2375                         /* Set the default pool for the PF's first queue */
2376                         u32 vtctl = rd32(E1000_VT_CTL);
2377                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2378                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2379                         vtctl |= adapter->vfs_allocated_count <<
2380                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2381                         wr32(E1000_VT_CTL, vtctl);
2382                 }
2383                 if (adapter->rss_queues > 1)
2384                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2385                 else
2386                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2387         } else {
2388                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2389         }
2390         igb_vmm_control(adapter);
2391
2392         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2393                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2394         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2395                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2396         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2397                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2398         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2399                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2400
2401         wr32(E1000_MRQC, mrqc);
2402 }
2403
2404 /**
2405  * igb_setup_rctl - configure the receive control registers
2406  * @adapter: Board private structure
2407  **/
2408 void igb_setup_rctl(struct igb_adapter *adapter)
2409 {
2410         struct e1000_hw *hw = &adapter->hw;
2411         u32 rctl;
2412
2413         rctl = rd32(E1000_RCTL);
2414
2415         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2416         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2417
2418         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2419                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2420
2421         /*
2422          * enable stripping of CRC. It's unlikely this will break BMC
2423          * redirection as it did with e1000. Newer features require
2424          * that the HW strips the CRC.
2425          */
2426         rctl |= E1000_RCTL_SECRC;
2427
2428         /* disable store bad packets and clear size bits. */
2429         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2430
2431         /* enable LPE to prevent packets larger than max_frame_size */
2432         rctl |= E1000_RCTL_LPE;
2433
2434         /* disable queue 0 to prevent tail write w/o re-config */
2435         wr32(E1000_RXDCTL(0), 0);
2436
2437         /* Attention!!!  For SR-IOV PF driver operations you must enable
2438          * queue drop for all VF and PF queues to prevent head of line blocking
2439          * if an un-trusted VF does not provide descriptors to hardware.
2440          */
2441         if (adapter->vfs_allocated_count) {
2442                 /* set all queue drop enable bits */
2443                 wr32(E1000_QDE, ALL_QUEUES);
2444         }
2445
2446         wr32(E1000_RCTL, rctl);
2447 }
2448
2449 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2450                                    int vfn)
2451 {
2452         struct e1000_hw *hw = &adapter->hw;
2453         u32 vmolr;
2454
2455         /* if it isn't the PF check to see if VFs are enabled and
2456          * increase the size to support vlan tags */
2457         if (vfn < adapter->vfs_allocated_count &&
2458             adapter->vf_data[vfn].vlans_enabled)
2459                 size += VLAN_TAG_SIZE;
2460
2461         vmolr = rd32(E1000_VMOLR(vfn));
2462         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2463         vmolr |= size | E1000_VMOLR_LPE;
2464         wr32(E1000_VMOLR(vfn), vmolr);
2465
2466         return 0;
2467 }
2468
2469 /**
2470  * igb_rlpml_set - set maximum receive packet size
2471  * @adapter: board private structure
2472  *
2473  * Configure maximum receivable packet size.
2474  **/
2475 static void igb_rlpml_set(struct igb_adapter *adapter)
2476 {
2477         u32 max_frame_size = adapter->max_frame_size;
2478         struct e1000_hw *hw = &adapter->hw;
2479         u16 pf_id = adapter->vfs_allocated_count;
2480
2481         if (adapter->vlgrp)
2482                 max_frame_size += VLAN_TAG_SIZE;
2483
2484         /* if vfs are enabled we set RLPML to the largest possible request
2485          * size and set the VMOLR RLPML to the size we need */
2486         if (pf_id) {
2487                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2488                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2489         }
2490
2491         wr32(E1000_RLPML, max_frame_size);
2492 }
2493
2494 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2495                                  int vfn, bool aupe)
2496 {
2497         struct e1000_hw *hw = &adapter->hw;
2498         u32 vmolr;
2499
2500         /*
2501          * This register exists only on 82576 and newer so if we are older then
2502          * we should exit and do nothing
2503          */
2504         if (hw->mac.type < e1000_82576)
2505                 return;
2506
2507         vmolr = rd32(E1000_VMOLR(vfn));
2508         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2509         if (aupe)
2510                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2511         else
2512                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2513
2514         /* clear all bits that might not be set */
2515         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2516
2517         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2518                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2519         /*
2520          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2521          * multicast packets
2522          */
2523         if (vfn <= adapter->vfs_allocated_count)
2524                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2525
2526         wr32(E1000_VMOLR(vfn), vmolr);
2527 }
2528
2529 /**
2530  * igb_configure_rx_ring - Configure a receive ring after Reset
2531  * @adapter: board private structure
2532  * @ring: receive ring to be configured
2533  *
2534  * Configure the Rx unit of the MAC after a reset.
2535  **/
2536 void igb_configure_rx_ring(struct igb_adapter *adapter,
2537                            struct igb_ring *ring)
2538 {
2539         struct e1000_hw *hw = &adapter->hw;
2540         u64 rdba = ring->dma;
2541         int reg_idx = ring->reg_idx;
2542         u32 srrctl, rxdctl;
2543
2544         /* disable the queue */
2545         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2546         wr32(E1000_RXDCTL(reg_idx),
2547                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2548
2549         /* Set DMA base address registers */
2550         wr32(E1000_RDBAL(reg_idx),
2551              rdba & 0x00000000ffffffffULL);
2552         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2553         wr32(E1000_RDLEN(reg_idx),
2554                        ring->count * sizeof(union e1000_adv_rx_desc));
2555
2556         /* initialize head and tail */
2557         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2558         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2559         writel(0, ring->head);
2560         writel(0, ring->tail);
2561
2562         /* set descriptor configuration */
2563         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2564                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2565                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2566 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2567                 srrctl |= IGB_RXBUFFER_16384 >>
2568                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2569 #else
2570                 srrctl |= (PAGE_SIZE / 2) >>
2571                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2572 #endif
2573                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2574         } else {
2575                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2576                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2577                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2578         }
2579         if (hw->mac.type == e1000_82580)
2580                 srrctl |= E1000_SRRCTL_TIMESTAMP;
2581         /* Only set Drop Enable if we are supporting multiple queues */
2582         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2583                 srrctl |= E1000_SRRCTL_DROP_EN;
2584
2585         wr32(E1000_SRRCTL(reg_idx), srrctl);
2586
2587         /* set filtering for VMDQ pools */
2588         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2589
2590         /* enable receive descriptor fetching */
2591         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2592         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2593         rxdctl &= 0xFFF00000;
2594         rxdctl |= IGB_RX_PTHRESH;
2595         rxdctl |= IGB_RX_HTHRESH << 8;
2596         rxdctl |= IGB_RX_WTHRESH << 16;
2597         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2598 }
2599
2600 /**
2601  * igb_configure_rx - Configure receive Unit after Reset
2602  * @adapter: board private structure
2603  *
2604  * Configure the Rx unit of the MAC after a reset.
2605  **/
2606 static void igb_configure_rx(struct igb_adapter *adapter)
2607 {
2608         int i;
2609
2610         /* set UTA to appropriate mode */
2611         igb_set_uta(adapter);
2612
2613         /* set the correct pool for the PF default MAC address in entry 0 */
2614         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2615                          adapter->vfs_allocated_count);
2616
2617         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2618          * the Base and Length of the Rx Descriptor Ring */
2619         for (i = 0; i < adapter->num_rx_queues; i++)
2620                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2621 }
2622
2623 /**
2624  * igb_free_tx_resources - Free Tx Resources per Queue
2625  * @tx_ring: Tx descriptor ring for a specific queue
2626  *
2627  * Free all transmit software resources
2628  **/
2629 void igb_free_tx_resources(struct igb_ring *tx_ring)
2630 {
2631         igb_clean_tx_ring(tx_ring);
2632
2633         vfree(tx_ring->buffer_info);
2634         tx_ring->buffer_info = NULL;
2635
2636         /* if not set, then don't free */
2637         if (!tx_ring->desc)
2638                 return;
2639
2640         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2641                             tx_ring->desc, tx_ring->dma);
2642
2643         tx_ring->desc = NULL;
2644 }
2645
2646 /**
2647  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2648  * @adapter: board private structure
2649  *
2650  * Free all transmit software resources
2651  **/
2652 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2653 {
2654         int i;
2655
2656         for (i = 0; i < adapter->num_tx_queues; i++)
2657                 igb_free_tx_resources(adapter->tx_ring[i]);
2658 }
2659
2660 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2661                                     struct igb_buffer *buffer_info)
2662 {
2663         if (buffer_info->dma) {
2664                 if (buffer_info->mapped_as_page)
2665                         pci_unmap_page(tx_ring->pdev,
2666                                         buffer_info->dma,
2667                                         buffer_info->length,
2668                                         PCI_DMA_TODEVICE);
2669                 else
2670                         pci_unmap_single(tx_ring->pdev,
2671                                         buffer_info->dma,
2672                                         buffer_info->length,
2673                                         PCI_DMA_TODEVICE);
2674                 buffer_info->dma = 0;
2675         }
2676         if (buffer_info->skb) {
2677                 dev_kfree_skb_any(buffer_info->skb);
2678                 buffer_info->skb = NULL;
2679         }
2680         buffer_info->time_stamp = 0;
2681         buffer_info->length = 0;
2682         buffer_info->next_to_watch = 0;
2683         buffer_info->mapped_as_page = false;
2684 }
2685
2686 /**
2687  * igb_clean_tx_ring - Free Tx Buffers
2688  * @tx_ring: ring to be cleaned
2689  **/
2690 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2691 {
2692         struct igb_buffer *buffer_info;
2693         unsigned long size;
2694         unsigned int i;
2695
2696         if (!tx_ring->buffer_info)
2697                 return;
2698         /* Free all the Tx ring sk_buffs */
2699
2700         for (i = 0; i < tx_ring->count; i++) {
2701                 buffer_info = &tx_ring->buffer_info[i];
2702                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2703         }
2704
2705         size = sizeof(struct igb_buffer) * tx_ring->count;
2706         memset(tx_ring->buffer_info, 0, size);
2707
2708         /* Zero out the descriptor ring */
2709         memset(tx_ring->desc, 0, tx_ring->size);
2710
2711         tx_ring->next_to_use = 0;
2712         tx_ring->next_to_clean = 0;
2713 }
2714
2715 /**
2716  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2717  * @adapter: board private structure
2718  **/
2719 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2720 {
2721         int i;
2722
2723         for (i = 0; i < adapter->num_tx_queues; i++)
2724                 igb_clean_tx_ring(adapter->tx_ring[i]);
2725 }
2726
2727 /**
2728  * igb_free_rx_resources - Free Rx Resources
2729  * @rx_ring: ring to clean the resources from
2730  *
2731  * Free all receive software resources
2732  **/
2733 void igb_free_rx_resources(struct igb_ring *rx_ring)
2734 {
2735         igb_clean_rx_ring(rx_ring);
2736
2737         vfree(rx_ring->buffer_info);
2738         rx_ring->buffer_info = NULL;
2739
2740         /* if not set, then don't free */
2741         if (!rx_ring->desc)
2742                 return;
2743
2744         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2745                             rx_ring->desc, rx_ring->dma);
2746
2747         rx_ring->desc = NULL;
2748 }
2749
2750 /**
2751  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2752  * @adapter: board private structure
2753  *
2754  * Free all receive software resources
2755  **/
2756 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2757 {
2758         int i;
2759
2760         for (i = 0; i < adapter->num_rx_queues; i++)
2761                 igb_free_rx_resources(adapter->rx_ring[i]);
2762 }
2763
2764 /**
2765  * igb_clean_rx_ring - Free Rx Buffers per Queue
2766  * @rx_ring: ring to free buffers from
2767  **/
2768 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2769 {
2770         struct igb_buffer *buffer_info;
2771         unsigned long size;
2772         unsigned int i;
2773
2774         if (!rx_ring->buffer_info)
2775                 return;
2776
2777         /* Free all the Rx ring sk_buffs */
2778         for (i = 0; i < rx_ring->count; i++) {
2779                 buffer_info = &rx_ring->buffer_info[i];
2780                 if (buffer_info->dma) {
2781                         pci_unmap_single(rx_ring->pdev,
2782                                          buffer_info->dma,
2783                                          rx_ring->rx_buffer_len,
2784                                          PCI_DMA_FROMDEVICE);
2785                         buffer_info->dma = 0;
2786                 }
2787
2788                 if (buffer_info->skb) {
2789                         dev_kfree_skb(buffer_info->skb);
2790                         buffer_info->skb = NULL;
2791                 }
2792                 if (buffer_info->page_dma) {
2793                         pci_unmap_page(rx_ring->pdev,
2794                                        buffer_info->page_dma,
2795                                        PAGE_SIZE / 2,
2796                                        PCI_DMA_FROMDEVICE);
2797                         buffer_info->page_dma = 0;
2798                 }
2799                 if (buffer_info->page) {
2800                         put_page(buffer_info->page);
2801                         buffer_info->page = NULL;
2802                         buffer_info->page_offset = 0;
2803                 }
2804         }
2805
2806         size = sizeof(struct igb_buffer) * rx_ring->count;
2807         memset(rx_ring->buffer_info, 0, size);
2808
2809         /* Zero out the descriptor ring */
2810         memset(rx_ring->desc, 0, rx_ring->size);
2811
2812         rx_ring->next_to_clean = 0;
2813         rx_ring->next_to_use = 0;
2814 }
2815
2816 /**
2817  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2818  * @adapter: board private structure
2819  **/
2820 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2821 {
2822         int i;
2823
2824         for (i = 0; i < adapter->num_rx_queues; i++)
2825                 igb_clean_rx_ring(adapter->rx_ring[i]);
2826 }
2827
2828 /**
2829  * igb_set_mac - Change the Ethernet Address of the NIC
2830  * @netdev: network interface device structure
2831  * @p: pointer to an address structure
2832  *
2833  * Returns 0 on success, negative on failure
2834  **/
2835 static int igb_set_mac(struct net_device *netdev, void *p)
2836 {
2837         struct igb_adapter *adapter = netdev_priv(netdev);
2838         struct e1000_hw *hw = &adapter->hw;
2839         struct sockaddr *addr = p;
2840
2841         if (!is_valid_ether_addr(addr->sa_data))
2842                 return -EADDRNOTAVAIL;
2843
2844         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2845         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2846
2847         /* set the correct pool for the new PF MAC address in entry 0 */
2848         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2849                          adapter->vfs_allocated_count);
2850
2851         return 0;
2852 }
2853
2854 /**
2855  * igb_write_mc_addr_list - write multicast addresses to MTA
2856  * @netdev: network interface device structure
2857  *
2858  * Writes multicast address list to the MTA hash table.
2859  * Returns: -ENOMEM on failure
2860  *                0 on no addresses written
2861  *                X on writing X addresses to MTA
2862  **/
2863 static int igb_write_mc_addr_list(struct net_device *netdev)
2864 {
2865         struct igb_adapter *adapter = netdev_priv(netdev);
2866         struct e1000_hw *hw = &adapter->hw;
2867         struct dev_mc_list *mc_ptr;
2868         u8  *mta_list;
2869         int i;
2870
2871         if (netdev_mc_empty(netdev)) {
2872                 /* nothing to program, so clear mc list */
2873                 igb_update_mc_addr_list(hw, NULL, 0);
2874                 igb_restore_vf_multicasts(adapter);
2875                 return 0;
2876         }
2877
2878         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
2879         if (!mta_list)
2880                 return -ENOMEM;
2881
2882         /* The shared function expects a packed array of only addresses. */
2883         i = 0;
2884         netdev_for_each_mc_addr(mc_ptr, netdev)
2885                 memcpy(mta_list + (i++ * ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2886
2887         igb_update_mc_addr_list(hw, mta_list, i);
2888         kfree(mta_list);
2889
2890         return netdev_mc_count(netdev);
2891 }
2892
2893 /**
2894  * igb_write_uc_addr_list - write unicast addresses to RAR table
2895  * @netdev: network interface device structure
2896  *
2897  * Writes unicast address list to the RAR table.
2898  * Returns: -ENOMEM on failure/insufficient address space
2899  *                0 on no addresses written
2900  *                X on writing X addresses to the RAR table
2901  **/
2902 static int igb_write_uc_addr_list(struct net_device *netdev)
2903 {
2904         struct igb_adapter *adapter = netdev_priv(netdev);
2905         struct e1000_hw *hw = &adapter->hw;
2906         unsigned int vfn = adapter->vfs_allocated_count;
2907         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2908         int count = 0;
2909
2910         /* return ENOMEM indicating insufficient memory for addresses */
2911         if (netdev_uc_count(netdev) > rar_entries)
2912                 return -ENOMEM;
2913
2914         if (!netdev_uc_empty(netdev) && rar_entries) {
2915                 struct netdev_hw_addr *ha;
2916
2917                 netdev_for_each_uc_addr(ha, netdev) {
2918                         if (!rar_entries)
2919                                 break;
2920                         igb_rar_set_qsel(adapter, ha->addr,
2921                                          rar_entries--,
2922                                          vfn);
2923                         count++;
2924                 }
2925         }
2926         /* write the addresses in reverse order to avoid write combining */
2927         for (; rar_entries > 0 ; rar_entries--) {
2928                 wr32(E1000_RAH(rar_entries), 0);
2929                 wr32(E1000_RAL(rar_entries), 0);
2930         }
2931         wrfl();
2932
2933         return count;
2934 }
2935
2936 /**
2937  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2938  * @netdev: network interface device structure
2939  *
2940  * The set_rx_mode entry point is called whenever the unicast or multicast
2941  * address lists or the network interface flags are updated.  This routine is
2942  * responsible for configuring the hardware for proper unicast, multicast,
2943  * promiscuous mode, and all-multi behavior.
2944  **/
2945 static void igb_set_rx_mode(struct net_device *netdev)
2946 {
2947         struct igb_adapter *adapter = netdev_priv(netdev);
2948         struct e1000_hw *hw = &adapter->hw;
2949         unsigned int vfn = adapter->vfs_allocated_count;
2950         u32 rctl, vmolr = 0;
2951         int count;
2952
2953         /* Check for Promiscuous and All Multicast modes */
2954         rctl = rd32(E1000_RCTL);
2955
2956         /* clear the effected bits */
2957         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2958
2959         if (netdev->flags & IFF_PROMISC) {
2960                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2961                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2962         } else {
2963                 if (netdev->flags & IFF_ALLMULTI) {
2964                         rctl |= E1000_RCTL_MPE;
2965                         vmolr |= E1000_VMOLR_MPME;
2966                 } else {
2967                         /*
2968                          * Write addresses to the MTA, if the attempt fails
2969                          * then we should just turn on promiscous mode so
2970                          * that we can at least receive multicast traffic
2971                          */
2972                         count = igb_write_mc_addr_list(netdev);
2973                         if (count < 0) {
2974                                 rctl |= E1000_RCTL_MPE;
2975                                 vmolr |= E1000_VMOLR_MPME;
2976                         } else if (count) {
2977                                 vmolr |= E1000_VMOLR_ROMPE;
2978                         }
2979                 }
2980                 /*
2981                  * Write addresses to available RAR registers, if there is not
2982                  * sufficient space to store all the addresses then enable
2983                  * unicast promiscous mode
2984                  */
2985                 count = igb_write_uc_addr_list(netdev);
2986                 if (count < 0) {
2987                         rctl |= E1000_RCTL_UPE;
2988                         vmolr |= E1000_VMOLR_ROPE;
2989                 }
2990                 rctl |= E1000_RCTL_VFE;
2991         }
2992         wr32(E1000_RCTL, rctl);
2993
2994         /*
2995          * In order to support SR-IOV and eventually VMDq it is necessary to set
2996          * the VMOLR to enable the appropriate modes.  Without this workaround
2997          * we will have issues with VLAN tag stripping not being done for frames
2998          * that are only arriving because we are the default pool
2999          */
3000         if (hw->mac.type < e1000_82576)
3001                 return;
3002
3003         vmolr |= rd32(E1000_VMOLR(vfn)) &
3004                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3005         wr32(E1000_VMOLR(vfn), vmolr);
3006         igb_restore_vf_multicasts(adapter);
3007 }
3008
3009 /* Need to wait a few seconds after link up to get diagnostic information from
3010  * the phy */
3011 static void igb_update_phy_info(unsigned long data)
3012 {
3013         struct igb_adapter *adapter = (struct igb_adapter *) data;
3014         igb_get_phy_info(&adapter->hw);
3015 }
3016
3017 /**
3018  * igb_has_link - check shared code for link and determine up/down
3019  * @adapter: pointer to driver private info
3020  **/
3021 bool igb_has_link(struct igb_adapter *adapter)
3022 {
3023         struct e1000_hw *hw = &adapter->hw;
3024         bool link_active = false;
3025         s32 ret_val = 0;
3026
3027         /* get_link_status is set on LSC (link status) interrupt or
3028          * rx sequence error interrupt.  get_link_status will stay
3029          * false until the e1000_check_for_link establishes link
3030          * for copper adapters ONLY
3031          */
3032         switch (hw->phy.media_type) {
3033         case e1000_media_type_copper:
3034                 if (hw->mac.get_link_status) {
3035                         ret_val = hw->mac.ops.check_for_link(hw);
3036                         link_active = !hw->mac.get_link_status;
3037                 } else {
3038                         link_active = true;
3039                 }
3040                 break;
3041         case e1000_media_type_internal_serdes:
3042                 ret_val = hw->mac.ops.check_for_link(hw);
3043                 link_active = hw->mac.serdes_has_link;
3044                 break;
3045         default:
3046         case e1000_media_type_unknown:
3047                 break;
3048         }
3049
3050         return link_active;
3051 }
3052
3053 /**
3054  * igb_watchdog - Timer Call-back
3055  * @data: pointer to adapter cast into an unsigned long
3056  **/
3057 static void igb_watchdog(unsigned long data)
3058 {
3059         struct igb_adapter *adapter = (struct igb_adapter *)data;
3060         /* Do the rest outside of interrupt context */
3061         schedule_work(&adapter->watchdog_task);
3062 }
3063
3064 static void igb_watchdog_task(struct work_struct *work)
3065 {
3066         struct igb_adapter *adapter = container_of(work,
3067                                                    struct igb_adapter,
3068                                                    watchdog_task);
3069         struct e1000_hw *hw = &adapter->hw;
3070         struct net_device *netdev = adapter->netdev;
3071         u32 link;
3072         int i;
3073
3074         link = igb_has_link(adapter);
3075         if (link) {
3076                 if (!netif_carrier_ok(netdev)) {
3077                         u32 ctrl;
3078                         hw->mac.ops.get_speed_and_duplex(hw,
3079                                                          &adapter->link_speed,
3080                                                          &adapter->link_duplex);
3081
3082                         ctrl = rd32(E1000_CTRL);
3083                         /* Links status message must follow this format */
3084                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3085                                  "Flow Control: %s\n",
3086                                netdev->name,
3087                                adapter->link_speed,
3088                                adapter->link_duplex == FULL_DUPLEX ?
3089                                  "Full Duplex" : "Half Duplex",
3090                                ((ctrl & E1000_CTRL_TFCE) &&
3091                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3092                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3093                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3094
3095                         /* tweak tx_queue_len according to speed/duplex and
3096                          * adjust the timeout factor */
3097                         netdev->tx_queue_len = adapter->tx_queue_len;
3098                         adapter->tx_timeout_factor = 1;
3099                         switch (adapter->link_speed) {
3100                         case SPEED_10:
3101                                 netdev->tx_queue_len = 10;
3102                                 adapter->tx_timeout_factor = 14;
3103                                 break;
3104                         case SPEED_100:
3105                                 netdev->tx_queue_len = 100;
3106                                 /* maybe add some timeout factor ? */
3107                                 break;
3108                         }
3109
3110                         netif_carrier_on(netdev);
3111
3112                         igb_ping_all_vfs(adapter);
3113
3114                         /* link state has changed, schedule phy info update */
3115                         if (!test_bit(__IGB_DOWN, &adapter->state))
3116                                 mod_timer(&adapter->phy_info_timer,
3117                                           round_jiffies(jiffies + 2 * HZ));
3118                 }
3119         } else {
3120                 if (netif_carrier_ok(netdev)) {
3121                         adapter->link_speed = 0;
3122                         adapter->link_duplex = 0;
3123                         /* Links status message must follow this format */
3124                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3125                                netdev->name);
3126                         netif_carrier_off(netdev);
3127
3128                         igb_ping_all_vfs(adapter);
3129
3130                         /* link state has changed, schedule phy info update */
3131                         if (!test_bit(__IGB_DOWN, &adapter->state))
3132                                 mod_timer(&adapter->phy_info_timer,
3133                                           round_jiffies(jiffies + 2 * HZ));
3134                 }
3135         }
3136
3137         igb_update_stats(adapter);
3138
3139         for (i = 0; i < adapter->num_tx_queues; i++) {
3140                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3141                 if (!netif_carrier_ok(netdev)) {
3142                         /* We've lost link, so the controller stops DMA,
3143                          * but we've got queued Tx work that's never going
3144                          * to get done, so reset controller to flush Tx.
3145                          * (Do the reset outside of interrupt context). */
3146                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3147                                 adapter->tx_timeout_count++;
3148                                 schedule_work(&adapter->reset_task);
3149                                 /* return immediately since reset is imminent */
3150                                 return;
3151                         }
3152                 }
3153
3154                 /* Force detection of hung controller every watchdog period */
3155                 tx_ring->detect_tx_hung = true;
3156         }
3157
3158         /* Cause software interrupt to ensure rx ring is cleaned */
3159         if (adapter->msix_entries) {
3160                 u32 eics = 0;
3161                 for (i = 0; i < adapter->num_q_vectors; i++) {
3162                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3163                         eics |= q_vector->eims_value;
3164                 }
3165                 wr32(E1000_EICS, eics);
3166         } else {
3167                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3168         }
3169
3170         /* Reset the timer */
3171         if (!test_bit(__IGB_DOWN, &adapter->state))
3172                 mod_timer(&adapter->watchdog_timer,
3173                           round_jiffies(jiffies + 2 * HZ));
3174 }
3175
3176 enum latency_range {
3177         lowest_latency = 0,
3178         low_latency = 1,
3179         bulk_latency = 2,
3180         latency_invalid = 255
3181 };
3182
3183 /**
3184  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3185  *
3186  *      Stores a new ITR value based on strictly on packet size.  This
3187  *      algorithm is less sophisticated than that used in igb_update_itr,
3188  *      due to the difficulty of synchronizing statistics across multiple
3189  *      receive rings.  The divisors and thresholds used by this fuction
3190  *      were determined based on theoretical maximum wire speed and testing
3191  *      data, in order to minimize response time while increasing bulk
3192  *      throughput.
3193  *      This functionality is controlled by the InterruptThrottleRate module
3194  *      parameter (see igb_param.c)
3195  *      NOTE:  This function is called only when operating in a multiqueue
3196  *             receive environment.
3197  * @q_vector: pointer to q_vector
3198  **/
3199 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3200 {
3201         int new_val = q_vector->itr_val;
3202         int avg_wire_size = 0;
3203         struct igb_adapter *adapter = q_vector->adapter;
3204
3205         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3206          * ints/sec - ITR timer value of 120 ticks.
3207          */
3208         if (adapter->link_speed != SPEED_1000) {
3209                 new_val = 976;
3210                 goto set_itr_val;
3211         }
3212
3213         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3214                 struct igb_ring *ring = q_vector->rx_ring;
3215                 avg_wire_size = ring->total_bytes / ring->total_packets;
3216         }
3217
3218         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3219                 struct igb_ring *ring = q_vector->tx_ring;
3220                 avg_wire_size = max_t(u32, avg_wire_size,
3221                                       (ring->total_bytes /
3222                                        ring->total_packets));
3223         }
3224
3225         /* if avg_wire_size isn't set no work was done */
3226         if (!avg_wire_size)
3227                 goto clear_counts;
3228
3229         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3230         avg_wire_size += 24;
3231
3232         /* Don't starve jumbo frames */
3233         avg_wire_size = min(avg_wire_size, 3000);
3234
3235         /* Give a little boost to mid-size frames */
3236         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3237                 new_val = avg_wire_size / 3;
3238         else
3239                 new_val = avg_wire_size / 2;
3240
3241         /* when in itr mode 3 do not exceed 20K ints/sec */
3242         if (adapter->rx_itr_setting == 3 && new_val < 196)
3243                 new_val = 196;
3244
3245 set_itr_val:
3246         if (new_val != q_vector->itr_val) {
3247                 q_vector->itr_val = new_val;
3248                 q_vector->set_itr = 1;
3249         }
3250 clear_counts:
3251         if (q_vector->rx_ring) {
3252                 q_vector->rx_ring->total_bytes = 0;
3253                 q_vector->rx_ring->total_packets = 0;
3254         }
3255         if (q_vector->tx_ring) {
3256                 q_vector->tx_ring->total_bytes = 0;
3257                 q_vector->tx_ring->total_packets = 0;
3258         }
3259 }
3260
3261 /**
3262  * igb_update_itr - update the dynamic ITR value based on statistics
3263  *      Stores a new ITR value based on packets and byte
3264  *      counts during the last interrupt.  The advantage of per interrupt
3265  *      computation is faster updates and more accurate ITR for the current
3266  *      traffic pattern.  Constants in this function were computed
3267  *      based on theoretical maximum wire speed and thresholds were set based
3268  *      on testing data as well as attempting to minimize response time
3269  *      while increasing bulk throughput.
3270  *      this functionality is controlled by the InterruptThrottleRate module
3271  *      parameter (see igb_param.c)
3272  *      NOTE:  These calculations are only valid when operating in a single-
3273  *             queue environment.
3274  * @adapter: pointer to adapter
3275  * @itr_setting: current q_vector->itr_val
3276  * @packets: the number of packets during this measurement interval
3277  * @bytes: the number of bytes during this measurement interval
3278  **/
3279 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3280                                    int packets, int bytes)
3281 {
3282         unsigned int retval = itr_setting;
3283
3284         if (packets == 0)
3285                 goto update_itr_done;
3286
3287         switch (itr_setting) {
3288         case lowest_latency:
3289                 /* handle TSO and jumbo frames */
3290                 if (bytes/packets > 8000)
3291                         retval = bulk_latency;
3292                 else if ((packets < 5) && (bytes > 512))
3293                         retval = low_latency;
3294                 break;
3295         case low_latency:  /* 50 usec aka 20000 ints/s */
3296                 if (bytes > 10000) {
3297                         /* this if handles the TSO accounting */
3298                         if (bytes/packets > 8000) {
3299                                 retval = bulk_latency;
3300                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3301                                 retval = bulk_latency;
3302                         } else if ((packets > 35)) {
3303                                 retval = lowest_latency;
3304                         }
3305                 } else if (bytes/packets > 2000) {
3306                         retval = bulk_latency;
3307                 } else if (packets <= 2 && bytes < 512) {
3308                         retval = lowest_latency;
3309                 }
3310                 break;
3311         case bulk_latency: /* 250 usec aka 4000 ints/s */
3312                 if (bytes > 25000) {
3313                         if (packets > 35)
3314                                 retval = low_latency;
3315                 } else if (bytes < 1500) {
3316                         retval = low_latency;
3317                 }
3318                 break;
3319         }
3320
3321 update_itr_done:
3322         return retval;
3323 }
3324
3325 static void igb_set_itr(struct igb_adapter *adapter)
3326 {
3327         struct igb_q_vector *q_vector = adapter->q_vector[0];
3328         u16 current_itr;
3329         u32 new_itr = q_vector->itr_val;
3330
3331         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3332         if (adapter->link_speed != SPEED_1000) {
3333                 current_itr = 0;
3334                 new_itr = 4000;
3335                 goto set_itr_now;
3336         }
3337
3338         adapter->rx_itr = igb_update_itr(adapter,
3339                                     adapter->rx_itr,
3340                                     q_vector->rx_ring->total_packets,
3341                                     q_vector->rx_ring->total_bytes);
3342
3343         adapter->tx_itr = igb_update_itr(adapter,
3344                                     adapter->tx_itr,
3345                                     q_vector->tx_ring->total_packets,
3346                                     q_vector->tx_ring->total_bytes);
3347         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3348
3349         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3350         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3351                 current_itr = low_latency;
3352
3353         switch (current_itr) {
3354         /* counts and packets in update_itr are dependent on these numbers */
3355         case lowest_latency:
3356                 new_itr = 56;  /* aka 70,000 ints/sec */
3357                 break;
3358         case low_latency:
3359                 new_itr = 196; /* aka 20,000 ints/sec */
3360                 break;
3361         case bulk_latency:
3362                 new_itr = 980; /* aka 4,000 ints/sec */
3363                 break;
3364         default:
3365                 break;
3366         }
3367
3368 set_itr_now:
3369         q_vector->rx_ring->total_bytes = 0;
3370         q_vector->rx_ring->total_packets = 0;
3371         q_vector->tx_ring->total_bytes = 0;
3372         q_vector->tx_ring->total_packets = 0;
3373
3374         if (new_itr != q_vector->itr_val) {
3375                 /* this attempts to bias the interrupt rate towards Bulk
3376                  * by adding intermediate steps when interrupt rate is
3377                  * increasing */
3378                 new_itr = new_itr > q_vector->itr_val ?
3379                              max((new_itr * q_vector->itr_val) /
3380                                  (new_itr + (q_vector->itr_val >> 2)),
3381                                  new_itr) :
3382                              new_itr;
3383                 /* Don't write the value here; it resets the adapter's
3384                  * internal timer, and causes us to delay far longer than
3385                  * we should between interrupts.  Instead, we write the ITR
3386                  * value at the beginning of the next interrupt so the timing
3387                  * ends up being correct.
3388                  */
3389                 q_vector->itr_val = new_itr;
3390                 q_vector->set_itr = 1;
3391         }
3392
3393         return;
3394 }
3395
3396 #define IGB_TX_FLAGS_CSUM               0x00000001
3397 #define IGB_TX_FLAGS_VLAN               0x00000002
3398 #define IGB_TX_FLAGS_TSO                0x00000004
3399 #define IGB_TX_FLAGS_IPV4               0x00000008
3400 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3401 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3402 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3403
3404 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3405                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3406 {
3407         struct e1000_adv_tx_context_desc *context_desc;
3408         unsigned int i;
3409         int err;
3410         struct igb_buffer *buffer_info;
3411         u32 info = 0, tu_cmd = 0;
3412         u32 mss_l4len_idx;
3413         u8 l4len;
3414
3415         if (skb_header_cloned(skb)) {
3416                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3417                 if (err)
3418                         return err;
3419         }
3420
3421         l4len = tcp_hdrlen(skb);
3422         *hdr_len += l4len;
3423
3424         if (skb->protocol == htons(ETH_P_IP)) {
3425                 struct iphdr *iph = ip_hdr(skb);
3426                 iph->tot_len = 0;
3427                 iph->check = 0;
3428                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3429                                                          iph->daddr, 0,
3430                                                          IPPROTO_TCP,
3431                                                          0);
3432         } else if (skb_is_gso_v6(skb)) {
3433                 ipv6_hdr(skb)->payload_len = 0;
3434                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3435                                                        &ipv6_hdr(skb)->daddr,
3436                                                        0, IPPROTO_TCP, 0);
3437         }
3438
3439         i = tx_ring->next_to_use;
3440
3441         buffer_info = &tx_ring->buffer_info[i];
3442         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3443         /* VLAN MACLEN IPLEN */
3444         if (tx_flags & IGB_TX_FLAGS_VLAN)
3445                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3446         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3447         *hdr_len += skb_network_offset(skb);
3448         info |= skb_network_header_len(skb);
3449         *hdr_len += skb_network_header_len(skb);
3450         context_desc->vlan_macip_lens = cpu_to_le32(info);
3451
3452         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3453         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3454
3455         if (skb->protocol == htons(ETH_P_IP))
3456                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3457         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3458
3459         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3460
3461         /* MSS L4LEN IDX */
3462         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3463         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3464
3465         /* For 82575, context index must be unique per ring. */
3466         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3467                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3468
3469         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3470         context_desc->seqnum_seed = 0;
3471
3472         buffer_info->time_stamp = jiffies;
3473         buffer_info->next_to_watch = i;
3474         buffer_info->dma = 0;
3475         i++;
3476         if (i == tx_ring->count)
3477                 i = 0;
3478
3479         tx_ring->next_to_use = i;
3480
3481         return true;
3482 }
3483
3484 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3485                                    struct sk_buff *skb, u32 tx_flags)
3486 {
3487         struct e1000_adv_tx_context_desc *context_desc;
3488         struct pci_dev *pdev = tx_ring->pdev;
3489         struct igb_buffer *buffer_info;
3490         u32 info = 0, tu_cmd = 0;
3491         unsigned int i;
3492
3493         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3494             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3495                 i = tx_ring->next_to_use;
3496                 buffer_info = &tx_ring->buffer_info[i];
3497                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3498
3499                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3500                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3501
3502                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3503                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3504                         info |= skb_network_header_len(skb);
3505
3506                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3507
3508                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3509
3510                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3511                         __be16 protocol;
3512
3513                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3514                                 const struct vlan_ethhdr *vhdr =
3515                                           (const struct vlan_ethhdr*)skb->data;
3516
3517                                 protocol = vhdr->h_vlan_encapsulated_proto;
3518                         } else {
3519                                 protocol = skb->protocol;
3520                         }
3521
3522                         switch (protocol) {
3523                         case cpu_to_be16(ETH_P_IP):
3524                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3525                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3526                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3527                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3528                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3529                                 break;
3530                         case cpu_to_be16(ETH_P_IPV6):
3531                                 /* XXX what about other V6 headers?? */
3532                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3533                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3534                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3535                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3536                                 break;
3537                         default:
3538                                 if (unlikely(net_ratelimit()))
3539                                         dev_warn(&pdev->dev,
3540                                             "partial checksum but proto=%x!\n",
3541                                             skb->protocol);
3542                                 break;
3543                         }
3544                 }
3545
3546                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3547                 context_desc->seqnum_seed = 0;
3548                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3549                         context_desc->mss_l4len_idx =
3550                                 cpu_to_le32(tx_ring->reg_idx << 4);
3551
3552                 buffer_info->time_stamp = jiffies;
3553                 buffer_info->next_to_watch = i;
3554                 buffer_info->dma = 0;
3555
3556                 i++;
3557                 if (i == tx_ring->count)
3558                         i = 0;
3559                 tx_ring->next_to_use = i;
3560
3561                 return true;
3562         }
3563         return false;
3564 }
3565
3566 #define IGB_MAX_TXD_PWR 16
3567 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3568
3569 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3570                                  unsigned int first)
3571 {
3572         struct igb_buffer *buffer_info;
3573         struct pci_dev *pdev = tx_ring->pdev;
3574         unsigned int len = skb_headlen(skb);
3575         unsigned int count = 0, i;
3576         unsigned int f;
3577
3578         i = tx_ring->next_to_use;
3579
3580         buffer_info = &tx_ring->buffer_info[i];
3581         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3582         buffer_info->length = len;
3583         /* set time_stamp *before* dma to help avoid a possible race */
3584         buffer_info->time_stamp = jiffies;
3585         buffer_info->next_to_watch = i;
3586         buffer_info->dma = pci_map_single(pdev, skb->data, len,
3587                                           PCI_DMA_TODEVICE);
3588         if (pci_dma_mapping_error(pdev, buffer_info->dma))
3589                 goto dma_error;
3590
3591         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3592                 struct skb_frag_struct *frag;
3593
3594                 count++;
3595                 i++;
3596                 if (i == tx_ring->count)
3597                         i = 0;
3598
3599                 frag = &skb_shinfo(skb)->frags[f];
3600                 len = frag->size;
3601
3602                 buffer_info = &tx_ring->buffer_info[i];
3603                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3604                 buffer_info->length = len;
3605                 buffer_info->time_stamp = jiffies;
3606                 buffer_info->next_to_watch = i;
3607                 buffer_info->mapped_as_page = true;
3608                 buffer_info->dma = pci_map_page(pdev,
3609                                                 frag->page,
3610                                                 frag->page_offset,
3611                                                 len,
3612                                                 PCI_DMA_TODEVICE);
3613                 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3614                         goto dma_error;
3615
3616         }
3617
3618         tx_ring->buffer_info[i].skb = skb;
3619         tx_ring->buffer_info[i].gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3620         tx_ring->buffer_info[first].next_to_watch = i;
3621
3622         return ++count;
3623
3624 dma_error:
3625         dev_err(&pdev->dev, "TX DMA map failed\n");
3626
3627         /* clear timestamp and dma mappings for failed buffer_info mapping */
3628         buffer_info->dma = 0;
3629         buffer_info->time_stamp = 0;
3630         buffer_info->length = 0;
3631         buffer_info->next_to_watch = 0;
3632         buffer_info->mapped_as_page = false;
3633
3634         /* clear timestamp and dma mappings for remaining portion of packet */
3635         while (count--) {
3636                 if (i == 0)
3637                         i = tx_ring->count;
3638                 i--;
3639                 buffer_info = &tx_ring->buffer_info[i];
3640                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3641         }
3642
3643         return 0;
3644 }
3645
3646 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3647                                     u32 tx_flags, int count, u32 paylen,
3648                                     u8 hdr_len)
3649 {
3650         union e1000_adv_tx_desc *tx_desc;
3651         struct igb_buffer *buffer_info;
3652         u32 olinfo_status = 0, cmd_type_len;
3653         unsigned int i = tx_ring->next_to_use;
3654
3655         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3656                         E1000_ADVTXD_DCMD_DEXT);
3657
3658         if (tx_flags & IGB_TX_FLAGS_VLAN)
3659                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3660
3661         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3662                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3663
3664         if (tx_flags & IGB_TX_FLAGS_TSO) {
3665                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3666
3667                 /* insert tcp checksum */
3668                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3669
3670                 /* insert ip checksum */
3671                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3672                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3673
3674         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3675                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3676         }
3677
3678         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3679             (tx_flags & (IGB_TX_FLAGS_CSUM |
3680                          IGB_TX_FLAGS_TSO |
3681                          IGB_TX_FLAGS_VLAN)))
3682                 olinfo_status |= tx_ring->reg_idx << 4;
3683
3684         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3685
3686         do {
3687                 buffer_info = &tx_ring->buffer_info[i];
3688                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3689                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3690                 tx_desc->read.cmd_type_len =
3691                         cpu_to_le32(cmd_type_len | buffer_info->length);
3692                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3693                 count--;
3694                 i++;
3695                 if (i == tx_ring->count)
3696                         i = 0;
3697         } while (count > 0);
3698
3699         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3700         /* Force memory writes to complete before letting h/w
3701          * know there are new descriptors to fetch.  (Only
3702          * applicable for weak-ordered memory model archs,
3703          * such as IA-64). */
3704         wmb();
3705
3706         tx_ring->next_to_use = i;
3707         writel(i, tx_ring->tail);
3708         /* we need this if more than one processor can write to our tail
3709          * at a time, it syncronizes IO on IA64/Altix systems */
3710         mmiowb();
3711 }
3712
3713 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3714 {
3715         struct net_device *netdev = tx_ring->netdev;
3716
3717         netif_stop_subqueue(netdev, tx_ring->queue_index);
3718
3719         /* Herbert's original patch had:
3720          *  smp_mb__after_netif_stop_queue();
3721          * but since that doesn't exist yet, just open code it. */
3722         smp_mb();
3723
3724         /* We need to check again in a case another CPU has just
3725          * made room available. */
3726         if (igb_desc_unused(tx_ring) < size)
3727                 return -EBUSY;
3728
3729         /* A reprieve! */
3730         netif_wake_subqueue(netdev, tx_ring->queue_index);
3731         tx_ring->tx_stats.restart_queue++;
3732         return 0;
3733 }
3734
3735 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3736 {
3737         if (igb_desc_unused(tx_ring) >= size)
3738                 return 0;
3739         return __igb_maybe_stop_tx(tx_ring, size);
3740 }
3741
3742 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3743                                     struct igb_ring *tx_ring)
3744 {
3745         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3746         int tso = 0, count;
3747         u32 tx_flags = 0;
3748         u16 first;
3749         u8 hdr_len = 0;
3750         union skb_shared_tx *shtx = skb_tx(skb);
3751
3752         /* need: 1 descriptor per page,
3753          *       + 2 desc gap to keep tail from touching head,
3754          *       + 1 desc for skb->data,
3755          *       + 1 desc for context descriptor,
3756          * otherwise try next time */
3757         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3758                 /* this is a hard error */
3759                 return NETDEV_TX_BUSY;
3760         }
3761
3762         if (unlikely(shtx->hardware)) {
3763                 shtx->in_progress = 1;
3764                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3765         }
3766
3767         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3768                 tx_flags |= IGB_TX_FLAGS_VLAN;
3769                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3770         }
3771
3772         if (skb->protocol == htons(ETH_P_IP))
3773                 tx_flags |= IGB_TX_FLAGS_IPV4;
3774
3775         first = tx_ring->next_to_use;
3776         if (skb_is_gso(skb)) {
3777                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3778
3779                 if (tso < 0) {
3780                         dev_kfree_skb_any(skb);
3781                         return NETDEV_TX_OK;
3782                 }
3783         }
3784
3785         if (tso)
3786                 tx_flags |= IGB_TX_FLAGS_TSO;
3787         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3788                  (skb->ip_summed == CHECKSUM_PARTIAL))
3789                 tx_flags |= IGB_TX_FLAGS_CSUM;
3790
3791         /*
3792          * count reflects descriptors mapped, if 0 or less then mapping error
3793          * has occured and we need to rewind the descriptor queue
3794          */
3795         count = igb_tx_map_adv(tx_ring, skb, first);
3796         if (!count) {
3797                 dev_kfree_skb_any(skb);
3798                 tx_ring->buffer_info[first].time_stamp = 0;
3799                 tx_ring->next_to_use = first;
3800                 return NETDEV_TX_OK;
3801         }
3802
3803         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3804
3805         /* Make sure there is space in the ring for the next send. */
3806         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3807
3808         return NETDEV_TX_OK;
3809 }
3810
3811 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3812                                       struct net_device *netdev)
3813 {
3814         struct igb_adapter *adapter = netdev_priv(netdev);
3815         struct igb_ring *tx_ring;
3816         int r_idx = 0;
3817
3818         if (test_bit(__IGB_DOWN, &adapter->state)) {
3819                 dev_kfree_skb_any(skb);
3820                 return NETDEV_TX_OK;
3821         }
3822
3823         if (skb->len <= 0) {
3824                 dev_kfree_skb_any(skb);
3825                 return NETDEV_TX_OK;
3826         }
3827
3828         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3829         tx_ring = adapter->multi_tx_table[r_idx];
3830
3831         /* This goes back to the question of how to logically map a tx queue
3832          * to a flow.  Right now, performance is impacted slightly negatively
3833          * if using multiple tx queues.  If the stack breaks away from a
3834          * single qdisc implementation, we can look at this again. */
3835         return igb_xmit_frame_ring_adv(skb, tx_ring);
3836 }
3837
3838 /**
3839  * igb_tx_timeout - Respond to a Tx Hang
3840  * @netdev: network interface device structure
3841  **/
3842 static void igb_tx_timeout(struct net_device *netdev)
3843 {
3844         struct igb_adapter *adapter = netdev_priv(netdev);
3845         struct e1000_hw *hw = &adapter->hw;
3846
3847         /* Do the reset outside of interrupt context */
3848         adapter->tx_timeout_count++;
3849
3850         if (hw->mac.type == e1000_82580)
3851                 hw->dev_spec._82575.global_device_reset = true;
3852
3853         schedule_work(&adapter->reset_task);
3854         wr32(E1000_EICS,
3855              (adapter->eims_enable_mask & ~adapter->eims_other));
3856 }
3857
3858 static void igb_reset_task(struct work_struct *work)
3859 {
3860         struct igb_adapter *adapter;
3861         adapter = container_of(work, struct igb_adapter, reset_task);
3862
3863         igb_reinit_locked(adapter);
3864 }
3865
3866 /**
3867  * igb_get_stats - Get System Network Statistics
3868  * @netdev: network interface device structure
3869  *
3870  * Returns the address of the device statistics structure.
3871  * The statistics are actually updated from the timer callback.
3872  **/
3873 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3874 {
3875         /* only return the current stats */
3876         return &netdev->stats;
3877 }
3878
3879 /**
3880  * igb_change_mtu - Change the Maximum Transfer Unit
3881  * @netdev: network interface device structure
3882  * @new_mtu: new value for maximum frame size
3883  *
3884  * Returns 0 on success, negative on failure
3885  **/
3886 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3887 {
3888         struct igb_adapter *adapter = netdev_priv(netdev);
3889         struct pci_dev *pdev = adapter->pdev;
3890         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3891         u32 rx_buffer_len, i;
3892
3893         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3894                 dev_err(&pdev->dev, "Invalid MTU setting\n");
3895                 return -EINVAL;
3896         }
3897
3898         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3899                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3900                 return -EINVAL;
3901         }
3902
3903         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3904                 msleep(1);
3905
3906         /* igb_down has a dependency on max_frame_size */
3907         adapter->max_frame_size = max_frame;
3908
3909         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3910          * means we reserve 2 more, this pushes us to allocate from the next
3911          * larger slab size.
3912          * i.e. RXBUFFER_2048 --> size-4096 slab
3913          */
3914
3915         if (adapter->hw.mac.type == e1000_82580)
3916                 max_frame += IGB_TS_HDR_LEN;
3917
3918         if (max_frame <= IGB_RXBUFFER_1024)
3919                 rx_buffer_len = IGB_RXBUFFER_1024;
3920         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3921                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3922         else
3923                 rx_buffer_len = IGB_RXBUFFER_128;
3924
3925         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
3926              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
3927                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
3928
3929         if ((adapter->hw.mac.type == e1000_82580) &&
3930             (rx_buffer_len == IGB_RXBUFFER_128))
3931                 rx_buffer_len += IGB_RXBUFFER_64;
3932
3933         if (netif_running(netdev))
3934                 igb_down(adapter);
3935
3936         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3937                  netdev->mtu, new_mtu);
3938         netdev->mtu = new_mtu;
3939
3940         for (i = 0; i < adapter->num_rx_queues; i++)
3941                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
3942
3943         if (netif_running(netdev))
3944                 igb_up(adapter);
3945         else
3946                 igb_reset(adapter);
3947
3948         clear_bit(__IGB_RESETTING, &adapter->state);
3949
3950         return 0;
3951 }
3952
3953 /**
3954  * igb_update_stats - Update the board statistics counters
3955  * @adapter: board private structure
3956  **/
3957
3958 void igb_update_stats(struct igb_adapter *adapter)
3959 {
3960         struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3961         struct e1000_hw *hw = &adapter->hw;
3962         struct pci_dev *pdev = adapter->pdev;
3963         u32 rnbc, reg;
3964         u16 phy_tmp;
3965         int i;
3966         u64 bytes, packets;
3967
3968 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3969
3970         /*
3971          * Prevent stats update while adapter is being reset, or if the pci
3972          * connection is down.
3973          */
3974         if (adapter->link_speed == 0)
3975                 return;
3976         if (pci_channel_offline(pdev))
3977                 return;
3978
3979         bytes = 0;
3980         packets = 0;
3981         for (i = 0; i < adapter->num_rx_queues; i++) {
3982                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3983                 struct igb_ring *ring = adapter->rx_ring[i];
3984                 ring->rx_stats.drops += rqdpc_tmp;
3985                 net_stats->rx_fifo_errors += rqdpc_tmp;
3986                 bytes += ring->rx_stats.bytes;
3987                 packets += ring->rx_stats.packets;
3988         }
3989
3990         net_stats->rx_bytes = bytes;
3991         net_stats->rx_packets = packets;
3992
3993         bytes = 0;
3994         packets = 0;
3995         for (i = 0; i < adapter->num_tx_queues; i++) {
3996                 struct igb_ring *ring = adapter->tx_ring[i];
3997                 bytes += ring->tx_stats.bytes;
3998                 packets += ring->tx_stats.packets;
3999         }
4000         net_stats->tx_bytes = bytes;
4001         net_stats->tx_packets = packets;
4002
4003         /* read stats registers */
4004         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4005         adapter->stats.gprc += rd32(E1000_GPRC);
4006         adapter->stats.gorc += rd32(E1000_GORCL);
4007         rd32(E1000_GORCH); /* clear GORCL */
4008         adapter->stats.bprc += rd32(E1000_BPRC);
4009         adapter->stats.mprc += rd32(E1000_MPRC);
4010         adapter->stats.roc += rd32(E1000_ROC);
4011
4012         adapter->stats.prc64 += rd32(E1000_PRC64);
4013         adapter->stats.prc127 += rd32(E1000_PRC127);
4014         adapter->stats.prc255 += rd32(E1000_PRC255);
4015         adapter->stats.prc511 += rd32(E1000_PRC511);
4016         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4017         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4018         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4019         adapter->stats.sec += rd32(E1000_SEC);
4020
4021         adapter->stats.mpc += rd32(E1000_MPC);
4022         adapter->stats.scc += rd32(E1000_SCC);
4023         adapter->stats.ecol += rd32(E1000_ECOL);
4024         adapter->stats.mcc += rd32(E1000_MCC);
4025         adapter->stats.latecol += rd32(E1000_LATECOL);
4026         adapter->stats.dc += rd32(E1000_DC);
4027         adapter->stats.rlec += rd32(E1000_RLEC);
4028         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4029         adapter->stats.xontxc += rd32(E1000_XONTXC);
4030         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4031         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4032         adapter->stats.fcruc += rd32(E1000_FCRUC);
4033         adapter->stats.gptc += rd32(E1000_GPTC);
4034         adapter->stats.gotc += rd32(E1000_GOTCL);
4035         rd32(E1000_GOTCH); /* clear GOTCL */
4036         rnbc = rd32(E1000_RNBC);
4037         adapter->stats.rnbc += rnbc;
4038         net_stats->rx_fifo_errors += rnbc;
4039         adapter->stats.ruc += rd32(E1000_RUC);
4040         adapter->stats.rfc += rd32(E1000_RFC);
4041         adapter->stats.rjc += rd32(E1000_RJC);
4042         adapter->stats.tor += rd32(E1000_TORH);
4043         adapter->stats.tot += rd32(E1000_TOTH);
4044         adapter->stats.tpr += rd32(E1000_TPR);
4045
4046         adapter->stats.ptc64 += rd32(E1000_PTC64);
4047         adapter->stats.ptc127 += rd32(E1000_PTC127);
4048         adapter->stats.ptc255 += rd32(E1000_PTC255);
4049         adapter->stats.ptc511 += rd32(E1000_PTC511);
4050         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4051         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4052
4053         adapter->stats.mptc += rd32(E1000_MPTC);
4054         adapter->stats.bptc += rd32(E1000_BPTC);
4055
4056         adapter->stats.tpt += rd32(E1000_TPT);
4057         adapter->stats.colc += rd32(E1000_COLC);
4058
4059         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4060         /* read internal phy specific stats */
4061         reg = rd32(E1000_CTRL_EXT);
4062         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4063                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4064                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4065         }
4066
4067         adapter->stats.tsctc += rd32(E1000_TSCTC);
4068         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4069
4070         adapter->stats.iac += rd32(E1000_IAC);
4071         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4072         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4073         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4074         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4075         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4076         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4077         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4078         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4079
4080         /* Fill out the OS statistics structure */
4081         net_stats->multicast = adapter->stats.mprc;
4082         net_stats->collisions = adapter->stats.colc;
4083
4084         /* Rx Errors */
4085
4086         /* RLEC on some newer hardware can be incorrect so build
4087          * our own version based on RUC and ROC */
4088         net_stats->rx_errors = adapter->stats.rxerrc +
4089                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4090                 adapter->stats.ruc + adapter->stats.roc +
4091                 adapter->stats.cexterr;
4092         net_stats->rx_length_errors = adapter->stats.ruc +
4093                                       adapter->stats.roc;
4094         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4095         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4096         net_stats->rx_missed_errors = adapter->stats.mpc;
4097
4098         /* Tx Errors */
4099         net_stats->tx_errors = adapter->stats.ecol +
4100                                adapter->stats.latecol;
4101         net_stats->tx_aborted_errors = adapter->stats.ecol;
4102         net_stats->tx_window_errors = adapter->stats.latecol;
4103         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4104
4105         /* Tx Dropped needs to be maintained elsewhere */
4106
4107         /* Phy Stats */
4108         if (hw->phy.media_type == e1000_media_type_copper) {
4109                 if ((adapter->link_speed == SPEED_1000) &&
4110                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4111                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4112                         adapter->phy_stats.idle_errors += phy_tmp;
4113                 }
4114         }
4115
4116         /* Management Stats */
4117         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4118         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4119         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4120 }
4121
4122 static irqreturn_t igb_msix_other(int irq, void *data)
4123 {
4124         struct igb_adapter *adapter = data;
4125         struct e1000_hw *hw = &adapter->hw;
4126         u32 icr = rd32(E1000_ICR);
4127         /* reading ICR causes bit 31 of EICR to be cleared */
4128
4129         if (icr & E1000_ICR_DRSTA)
4130                 schedule_work(&adapter->reset_task);
4131
4132         if (icr & E1000_ICR_DOUTSYNC) {
4133                 /* HW is reporting DMA is out of sync */
4134                 adapter->stats.doosync++;
4135         }
4136
4137         /* Check for a mailbox event */
4138         if (icr & E1000_ICR_VMMB)
4139                 igb_msg_task(adapter);
4140
4141         if (icr & E1000_ICR_LSC) {
4142                 hw->mac.get_link_status = 1;
4143                 /* guard against interrupt when we're going down */
4144                 if (!test_bit(__IGB_DOWN, &adapter->state))
4145                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4146         }
4147
4148         if (adapter->vfs_allocated_count)
4149                 wr32(E1000_IMS, E1000_IMS_LSC |
4150                                 E1000_IMS_VMMB |
4151                                 E1000_IMS_DOUTSYNC);
4152         else
4153                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4154         wr32(E1000_EIMS, adapter->eims_other);
4155
4156         return IRQ_HANDLED;
4157 }
4158
4159 static void igb_write_itr(struct igb_q_vector *q_vector)
4160 {
4161         struct igb_adapter *adapter = q_vector->adapter;
4162         u32 itr_val = q_vector->itr_val & 0x7FFC;
4163
4164         if (!q_vector->set_itr)
4165                 return;
4166
4167         if (!itr_val)
4168                 itr_val = 0x4;
4169
4170         if (adapter->hw.mac.type == e1000_82575)
4171                 itr_val |= itr_val << 16;
4172         else
4173                 itr_val |= 0x8000000;
4174
4175         writel(itr_val, q_vector->itr_register);
4176         q_vector->set_itr = 0;
4177 }
4178
4179 static irqreturn_t igb_msix_ring(int irq, void *data)
4180 {
4181         struct igb_q_vector *q_vector = data;
4182
4183         /* Write the ITR value calculated from the previous interrupt. */
4184         igb_write_itr(q_vector);
4185
4186         napi_schedule(&q_vector->napi);
4187
4188         return IRQ_HANDLED;
4189 }
4190
4191 #ifdef CONFIG_IGB_DCA
4192 static void igb_update_dca(struct igb_q_vector *q_vector)
4193 {
4194         struct igb_adapter *adapter = q_vector->adapter;
4195         struct e1000_hw *hw = &adapter->hw;
4196         int cpu = get_cpu();
4197
4198         if (q_vector->cpu == cpu)
4199                 goto out_no_update;
4200
4201         if (q_vector->tx_ring) {
4202                 int q = q_vector->tx_ring->reg_idx;
4203                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4204                 if (hw->mac.type == e1000_82575) {
4205                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4206                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4207                 } else {
4208                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4209                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4210                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4211                 }
4212                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4213                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4214         }
4215         if (q_vector->rx_ring) {
4216                 int q = q_vector->rx_ring->reg_idx;
4217                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4218                 if (hw->mac.type == e1000_82575) {
4219                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4220                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4221                 } else {
4222                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4223                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4224                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4225                 }
4226                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4227                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4228                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4229                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4230         }
4231         q_vector->cpu = cpu;
4232 out_no_update:
4233         put_cpu();
4234 }
4235
4236 static void igb_setup_dca(struct igb_adapter *adapter)
4237 {
4238         struct e1000_hw *hw = &adapter->hw;
4239         int i;
4240
4241         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4242                 return;
4243
4244         /* Always use CB2 mode, difference is masked in the CB driver. */
4245         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4246
4247         for (i = 0; i < adapter->num_q_vectors; i++) {
4248                 adapter->q_vector[i]->cpu = -1;
4249                 igb_update_dca(adapter->q_vector[i]);
4250         }
4251 }
4252
4253 static int __igb_notify_dca(struct device *dev, void *data)
4254 {
4255         struct net_device *netdev = dev_get_drvdata(dev);
4256         struct igb_adapter *adapter = netdev_priv(netdev);
4257         struct pci_dev *pdev = adapter->pdev;
4258         struct e1000_hw *hw = &adapter->hw;
4259         unsigned long event = *(unsigned long *)data;
4260
4261         switch (event) {
4262         case DCA_PROVIDER_ADD:
4263                 /* if already enabled, don't do it again */
4264                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4265                         break;
4266                 if (dca_add_requester(dev) == 0) {
4267                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4268                         dev_info(&pdev->dev, "DCA enabled\n");
4269                         igb_setup_dca(adapter);
4270                         break;
4271                 }
4272                 /* Fall Through since DCA is disabled. */
4273         case DCA_PROVIDER_REMOVE:
4274                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4275                         /* without this a class_device is left
4276                          * hanging around in the sysfs model */
4277                         dca_remove_requester(dev);
4278                         dev_info(&pdev->dev, "DCA disabled\n");
4279                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4280                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4281                 }
4282                 break;
4283         }
4284
4285         return 0;
4286 }
4287
4288 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4289                           void *p)
4290 {
4291         int ret_val;
4292
4293         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4294                                          __igb_notify_dca);
4295
4296         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4297 }
4298 #endif /* CONFIG_IGB_DCA */
4299
4300 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4301 {
4302         struct e1000_hw *hw = &adapter->hw;
4303         u32 ping;
4304         int i;
4305
4306         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4307                 ping = E1000_PF_CONTROL_MSG;
4308                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4309                         ping |= E1000_VT_MSGTYPE_CTS;
4310                 igb_write_mbx(hw, &ping, 1, i);
4311         }
4312 }
4313
4314 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4315 {
4316         struct e1000_hw *hw = &adapter->hw;
4317         u32 vmolr = rd32(E1000_VMOLR(vf));
4318         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4319
4320         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4321                             IGB_VF_FLAG_MULTI_PROMISC);
4322         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4323
4324         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4325                 vmolr |= E1000_VMOLR_MPME;
4326                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4327         } else {
4328                 /*
4329                  * if we have hashes and we are clearing a multicast promisc
4330                  * flag we need to write the hashes to the MTA as this step
4331                  * was previously skipped
4332                  */
4333                 if (vf_data->num_vf_mc_hashes > 30) {
4334                         vmolr |= E1000_VMOLR_MPME;
4335                 } else if (vf_data->num_vf_mc_hashes) {
4336                         int j;
4337                         vmolr |= E1000_VMOLR_ROMPE;
4338                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4339                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4340                 }
4341         }
4342
4343         wr32(E1000_VMOLR(vf), vmolr);
4344
4345         /* there are flags left unprocessed, likely not supported */
4346         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4347                 return -EINVAL;
4348
4349         return 0;
4350
4351 }
4352
4353 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4354                                   u32 *msgbuf, u32 vf)
4355 {
4356         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4357         u16 *hash_list = (u16 *)&msgbuf[1];
4358         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4359         int i;
4360
4361         /* salt away the number of multicast addresses assigned
4362          * to this VF for later use to restore when the PF multi cast
4363          * list changes
4364          */
4365         vf_data->num_vf_mc_hashes = n;
4366
4367         /* only up to 30 hash values supported */
4368         if (n > 30)
4369                 n = 30;
4370
4371         /* store the hashes for later use */
4372         for (i = 0; i < n; i++)
4373                 vf_data->vf_mc_hashes[i] = hash_list[i];
4374
4375         /* Flush and reset the mta with the new values */
4376         igb_set_rx_mode(adapter->netdev);
4377
4378         return 0;
4379 }
4380
4381 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4382 {
4383         struct e1000_hw *hw = &adapter->hw;
4384         struct vf_data_storage *vf_data;
4385         int i, j;
4386
4387         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4388                 u32 vmolr = rd32(E1000_VMOLR(i));
4389                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4390
4391                 vf_data = &adapter->vf_data[i];
4392
4393                 if ((vf_data->num_vf_mc_hashes > 30) ||
4394                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4395                         vmolr |= E1000_VMOLR_MPME;
4396                 } else if (vf_data->num_vf_mc_hashes) {
4397                         vmolr |= E1000_VMOLR_ROMPE;
4398                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4399                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4400                 }
4401                 wr32(E1000_VMOLR(i), vmolr);
4402         }
4403 }
4404
4405 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4406 {
4407         struct e1000_hw *hw = &adapter->hw;
4408         u32 pool_mask, reg, vid;
4409         int i;
4410
4411         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4412
4413         /* Find the vlan filter for this id */
4414         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4415                 reg = rd32(E1000_VLVF(i));
4416
4417                 /* remove the vf from the pool */
4418                 reg &= ~pool_mask;
4419
4420                 /* if pool is empty then remove entry from vfta */
4421                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4422                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4423                         reg = 0;
4424                         vid = reg & E1000_VLVF_VLANID_MASK;
4425                         igb_vfta_set(hw, vid, false);
4426                 }
4427
4428                 wr32(E1000_VLVF(i), reg);
4429         }
4430
4431         adapter->vf_data[vf].vlans_enabled = 0;
4432 }
4433
4434 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4435 {
4436         struct e1000_hw *hw = &adapter->hw;
4437         u32 reg, i;
4438
4439         /* The vlvf table only exists on 82576 hardware and newer */
4440         if (hw->mac.type < e1000_82576)
4441                 return -1;
4442
4443         /* we only need to do this if VMDq is enabled */
4444         if (!adapter->vfs_allocated_count)
4445                 return -1;
4446
4447         /* Find the vlan filter for this id */
4448         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4449                 reg = rd32(E1000_VLVF(i));
4450                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4451                     vid == (reg & E1000_VLVF_VLANID_MASK))
4452                         break;
4453         }
4454
4455         if (add) {
4456                 if (i == E1000_VLVF_ARRAY_SIZE) {
4457                         /* Did not find a matching VLAN ID entry that was
4458                          * enabled.  Search for a free filter entry, i.e.
4459                          * one without the enable bit set
4460                          */
4461                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4462                                 reg = rd32(E1000_VLVF(i));
4463                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4464                                         break;
4465                         }
4466                 }
4467                 if (i < E1000_VLVF_ARRAY_SIZE) {
4468                         /* Found an enabled/available entry */
4469                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4470
4471                         /* if !enabled we need to set this up in vfta */
4472                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4473                                 /* add VID to filter table */
4474                                 igb_vfta_set(hw, vid, true);
4475                                 reg |= E1000_VLVF_VLANID_ENABLE;
4476                         }
4477                         reg &= ~E1000_VLVF_VLANID_MASK;
4478                         reg |= vid;
4479                         wr32(E1000_VLVF(i), reg);
4480
4481                         /* do not modify RLPML for PF devices */
4482                         if (vf >= adapter->vfs_allocated_count)
4483                                 return 0;
4484
4485                         if (!adapter->vf_data[vf].vlans_enabled) {
4486                                 u32 size;
4487                                 reg = rd32(E1000_VMOLR(vf));
4488                                 size = reg & E1000_VMOLR_RLPML_MASK;
4489                                 size += 4;
4490                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4491                                 reg |= size;
4492                                 wr32(E1000_VMOLR(vf), reg);
4493                         }
4494
4495                         adapter->vf_data[vf].vlans_enabled++;
4496                         return 0;
4497                 }
4498         } else {
4499                 if (i < E1000_VLVF_ARRAY_SIZE) {
4500                         /* remove vf from the pool */
4501                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4502                         /* if pool is empty then remove entry from vfta */
4503                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4504                                 reg = 0;
4505                                 igb_vfta_set(hw, vid, false);
4506                         }
4507                         wr32(E1000_VLVF(i), reg);
4508
4509                         /* do not modify RLPML for PF devices */
4510                         if (vf >= adapter->vfs_allocated_count)
4511                                 return 0;
4512
4513                         adapter->vf_data[vf].vlans_enabled--;
4514                         if (!adapter->vf_data[vf].vlans_enabled) {
4515                                 u32 size;
4516                                 reg = rd32(E1000_VMOLR(vf));
4517                                 size = reg & E1000_VMOLR_RLPML_MASK;
4518                                 size -= 4;
4519                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4520                                 reg |= size;
4521                                 wr32(E1000_VMOLR(vf), reg);
4522                         }
4523                 }
4524         }
4525         return 0;
4526 }
4527
4528 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4529 {
4530         struct e1000_hw *hw = &adapter->hw;
4531
4532         if (vid)
4533                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4534         else
4535                 wr32(E1000_VMVIR(vf), 0);
4536 }
4537
4538 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4539                                int vf, u16 vlan, u8 qos)
4540 {
4541         int err = 0;
4542         struct igb_adapter *adapter = netdev_priv(netdev);
4543
4544         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4545                 return -EINVAL;
4546         if (vlan || qos) {
4547                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4548                 if (err)
4549                         goto out;
4550                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4551                 igb_set_vmolr(adapter, vf, !vlan);
4552                 adapter->vf_data[vf].pf_vlan = vlan;
4553                 adapter->vf_data[vf].pf_qos = qos;
4554                 dev_info(&adapter->pdev->dev,
4555                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4556                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4557                         dev_warn(&adapter->pdev->dev,
4558                                  "The VF VLAN has been set,"
4559                                  " but the PF device is not up.\n");
4560                         dev_warn(&adapter->pdev->dev,
4561                                  "Bring the PF device up before"
4562                                  " attempting to use the VF device.\n");
4563                 }
4564         } else {
4565                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4566                                    false, vf);
4567                 igb_set_vmvir(adapter, vlan, vf);
4568                 igb_set_vmolr(adapter, vf, true);
4569                 adapter->vf_data[vf].pf_vlan = 0;
4570                 adapter->vf_data[vf].pf_qos = 0;
4571        }
4572 out:
4573        return err;
4574 }
4575
4576 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4577 {
4578         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4579         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4580
4581         return igb_vlvf_set(adapter, vid, add, vf);
4582 }
4583
4584 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4585 {
4586         /* clear flags */
4587         adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4588         adapter->vf_data[vf].last_nack = jiffies;
4589
4590         /* reset offloads to defaults */
4591         igb_set_vmolr(adapter, vf, true);
4592
4593         /* reset vlans for device */
4594         igb_clear_vf_vfta(adapter, vf);
4595         if (adapter->vf_data[vf].pf_vlan)
4596                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4597                                     adapter->vf_data[vf].pf_vlan,
4598                                     adapter->vf_data[vf].pf_qos);
4599         else
4600                 igb_clear_vf_vfta(adapter, vf);
4601
4602         /* reset multicast table array for vf */
4603         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4604
4605         /* Flush and reset the mta with the new values */
4606         igb_set_rx_mode(adapter->netdev);
4607 }
4608
4609 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4610 {
4611         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4612
4613         /* generate a new mac address as we were hotplug removed/added */
4614         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4615                 random_ether_addr(vf_mac);
4616
4617         /* process remaining reset events */
4618         igb_vf_reset(adapter, vf);
4619 }
4620
4621 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4622 {
4623         struct e1000_hw *hw = &adapter->hw;
4624         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4625         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4626         u32 reg, msgbuf[3];
4627         u8 *addr = (u8 *)(&msgbuf[1]);
4628
4629         /* process all the same items cleared in a function level reset */
4630         igb_vf_reset(adapter, vf);
4631
4632         /* set vf mac address */
4633         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4634
4635         /* enable transmit and receive for vf */
4636         reg = rd32(E1000_VFTE);
4637         wr32(E1000_VFTE, reg | (1 << vf));
4638         reg = rd32(E1000_VFRE);
4639         wr32(E1000_VFRE, reg | (1 << vf));
4640
4641         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4642
4643         /* reply to reset with ack and vf mac address */
4644         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4645         memcpy(addr, vf_mac, 6);
4646         igb_write_mbx(hw, msgbuf, 3, vf);
4647 }
4648
4649 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4650 {
4651         unsigned char *addr = (char *)&msg[1];
4652         int err = -1;
4653
4654         if (is_valid_ether_addr(addr))
4655                 err = igb_set_vf_mac(adapter, vf, addr);
4656
4657         return err;
4658 }
4659
4660 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4661 {
4662         struct e1000_hw *hw = &adapter->hw;
4663         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4664         u32 msg = E1000_VT_MSGTYPE_NACK;
4665
4666         /* if device isn't clear to send it shouldn't be reading either */
4667         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4668             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4669                 igb_write_mbx(hw, &msg, 1, vf);
4670                 vf_data->last_nack = jiffies;
4671         }
4672 }
4673
4674 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4675 {
4676         struct pci_dev *pdev = adapter->pdev;
4677         u32 msgbuf[E1000_VFMAILBOX_SIZE];
4678         struct e1000_hw *hw = &adapter->hw;
4679         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4680         s32 retval;
4681
4682         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4683
4684         if (retval) {
4685                 /* if receive failed revoke VF CTS stats and restart init */
4686                 dev_err(&pdev->dev, "Error receiving message from VF\n");
4687                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
4688                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4689                         return;
4690                 goto out;
4691         }
4692
4693         /* this is a message we already processed, do nothing */
4694         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4695                 return;
4696
4697         /*
4698          * until the vf completes a reset it should not be
4699          * allowed to start any configuration.
4700          */
4701
4702         if (msgbuf[0] == E1000_VF_RESET) {
4703                 igb_vf_reset_msg(adapter, vf);
4704                 return;
4705         }
4706
4707         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4708                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4709                         return;
4710                 retval = -1;
4711                 goto out;
4712         }
4713
4714         switch ((msgbuf[0] & 0xFFFF)) {
4715         case E1000_VF_SET_MAC_ADDR:
4716                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4717                 break;
4718         case E1000_VF_SET_PROMISC:
4719                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4720                 break;
4721         case E1000_VF_SET_MULTICAST:
4722                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4723                 break;
4724         case E1000_VF_SET_LPE:
4725                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4726                 break;
4727         case E1000_VF_SET_VLAN:
4728                 if (adapter->vf_data[vf].pf_vlan)
4729                         retval = -1;
4730                 else
4731                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4732                 break;
4733         default:
4734                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4735                 retval = -1;
4736                 break;
4737         }
4738
4739         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4740 out:
4741         /* notify the VF of the results of what it sent us */
4742         if (retval)
4743                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4744         else
4745                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4746
4747         igb_write_mbx(hw, msgbuf, 1, vf);
4748 }
4749
4750 static void igb_msg_task(struct igb_adapter *adapter)
4751 {
4752         struct e1000_hw *hw = &adapter->hw;
4753         u32 vf;
4754
4755         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4756                 /* process any reset requests */
4757                 if (!igb_check_for_rst(hw, vf))
4758                         igb_vf_reset_event(adapter, vf);
4759
4760                 /* process any messages pending */
4761                 if (!igb_check_for_msg(hw, vf))
4762                         igb_rcv_msg_from_vf(adapter, vf);
4763
4764                 /* process any acks */
4765                 if (!igb_check_for_ack(hw, vf))
4766                         igb_rcv_ack_from_vf(adapter, vf);
4767         }
4768 }
4769
4770 /**
4771  *  igb_set_uta - Set unicast filter table address
4772  *  @adapter: board private structure
4773  *
4774  *  The unicast table address is a register array of 32-bit registers.
4775  *  The table is meant to be used in a way similar to how the MTA is used
4776  *  however due to certain limitations in the hardware it is necessary to
4777  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4778  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4779  **/
4780 static void igb_set_uta(struct igb_adapter *adapter)
4781 {
4782         struct e1000_hw *hw = &adapter->hw;
4783         int i;
4784
4785         /* The UTA table only exists on 82576 hardware and newer */
4786         if (hw->mac.type < e1000_82576)
4787                 return;
4788
4789         /* we only need to do this if VMDq is enabled */
4790         if (!adapter->vfs_allocated_count)
4791                 return;
4792
4793         for (i = 0; i < hw->mac.uta_reg_count; i++)
4794                 array_wr32(E1000_UTA, i, ~0);
4795 }
4796
4797 /**
4798  * igb_intr_msi - Interrupt Handler
4799  * @irq: interrupt number
4800  * @data: pointer to a network interface device structure
4801  **/
4802 static irqreturn_t igb_intr_msi(int irq, void *data)
4803 {
4804         struct igb_adapter *adapter = data;
4805         struct igb_q_vector *q_vector = adapter->q_vector[0];
4806         struct e1000_hw *hw = &adapter->hw;
4807         /* read ICR disables interrupts using IAM */
4808         u32 icr = rd32(E1000_ICR);
4809
4810         igb_write_itr(q_vector);
4811
4812         if (icr & E1000_ICR_DRSTA)
4813                 schedule_work(&adapter->reset_task);
4814
4815         if (icr & E1000_ICR_DOUTSYNC) {
4816                 /* HW is reporting DMA is out of sync */
4817                 adapter->stats.doosync++;
4818         }
4819
4820         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4821                 hw->mac.get_link_status = 1;
4822                 if (!test_bit(__IGB_DOWN, &adapter->state))
4823                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4824         }
4825
4826         napi_schedule(&q_vector->napi);
4827
4828         return IRQ_HANDLED;
4829 }
4830
4831 /**
4832  * igb_intr - Legacy Interrupt Handler
4833  * @irq: interrupt number
4834  * @data: pointer to a network interface device structure
4835  **/
4836 static irqreturn_t igb_intr(int irq, void *data)
4837 {
4838         struct igb_adapter *adapter = data;
4839         struct igb_q_vector *q_vector = adapter->q_vector[0];
4840         struct e1000_hw *hw = &adapter->hw;
4841         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4842          * need for the IMC write */
4843         u32 icr = rd32(E1000_ICR);
4844         if (!icr)
4845                 return IRQ_NONE;  /* Not our interrupt */
4846
4847         igb_write_itr(q_vector);
4848
4849         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4850          * not set, then the adapter didn't send an interrupt */
4851         if (!(icr & E1000_ICR_INT_ASSERTED))
4852                 return IRQ_NONE;
4853
4854         if (icr & E1000_ICR_DRSTA)
4855                 schedule_work(&adapter->reset_task);
4856
4857         if (icr & E1000_ICR_DOUTSYNC) {
4858                 /* HW is reporting DMA is out of sync */
4859                 adapter->stats.doosync++;
4860         }
4861
4862         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4863                 hw->mac.get_link_status = 1;
4864                 /* guard against interrupt when we're going down */
4865                 if (!test_bit(__IGB_DOWN, &adapter->state))
4866                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4867         }
4868
4869         napi_schedule(&q_vector->napi);
4870
4871         return IRQ_HANDLED;
4872 }
4873
4874 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4875 {
4876         struct igb_adapter *adapter = q_vector->adapter;
4877         struct e1000_hw *hw = &adapter->hw;
4878
4879         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4880             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4881                 if (!adapter->msix_entries)
4882                         igb_set_itr(adapter);
4883                 else
4884                         igb_update_ring_itr(q_vector);
4885         }
4886
4887         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4888                 if (adapter->msix_entries)
4889                         wr32(E1000_EIMS, q_vector->eims_value);
4890                 else
4891                         igb_irq_enable(adapter);
4892         }
4893 }
4894
4895 /**
4896  * igb_poll - NAPI Rx polling callback
4897  * @napi: napi polling structure
4898  * @budget: count of how many packets we should handle
4899  **/
4900 static int igb_poll(struct napi_struct *napi, int budget)
4901 {
4902         struct igb_q_vector *q_vector = container_of(napi,
4903                                                      struct igb_q_vector,
4904                                                      napi);
4905         int tx_clean_complete = 1, work_done = 0;
4906
4907 #ifdef CONFIG_IGB_DCA
4908         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4909                 igb_update_dca(q_vector);
4910 #endif
4911         if (q_vector->tx_ring)
4912                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4913
4914         if (q_vector->rx_ring)
4915                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4916
4917         if (!tx_clean_complete)
4918                 work_done = budget;
4919
4920         /* If not enough Rx work done, exit the polling mode */
4921         if (work_done < budget) {
4922                 napi_complete(napi);
4923                 igb_ring_irq_enable(q_vector);
4924         }
4925
4926         return work_done;
4927 }
4928
4929 /**
4930  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4931  * @adapter: board private structure
4932  * @shhwtstamps: timestamp structure to update
4933  * @regval: unsigned 64bit system time value.
4934  *
4935  * We need to convert the system time value stored in the RX/TXSTMP registers
4936  * into a hwtstamp which can be used by the upper level timestamping functions
4937  */
4938 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4939                                    struct skb_shared_hwtstamps *shhwtstamps,
4940                                    u64 regval)
4941 {
4942         u64 ns;
4943
4944         /*
4945          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
4946          * 24 to match clock shift we setup earlier.
4947          */
4948         if (adapter->hw.mac.type == e1000_82580)
4949                 regval <<= IGB_82580_TSYNC_SHIFT;
4950
4951         ns = timecounter_cyc2time(&adapter->clock, regval);
4952         timecompare_update(&adapter->compare, ns);
4953         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4954         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4955         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4956 }
4957
4958 /**
4959  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4960  * @q_vector: pointer to q_vector containing needed info
4961  * @skb: packet that was just sent
4962  *
4963  * If we were asked to do hardware stamping and such a time stamp is
4964  * available, then it must have been for this skb here because we only
4965  * allow only one such packet into the queue.
4966  */
4967 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4968 {
4969         struct igb_adapter *adapter = q_vector->adapter;
4970         union skb_shared_tx *shtx = skb_tx(skb);
4971         struct e1000_hw *hw = &adapter->hw;
4972         struct skb_shared_hwtstamps shhwtstamps;
4973         u64 regval;
4974
4975         /* if skb does not support hw timestamp or TX stamp not valid exit */
4976         if (likely(!shtx->hardware) ||
4977             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4978                 return;
4979
4980         regval = rd32(E1000_TXSTMPL);
4981         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4982
4983         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4984         skb_tstamp_tx(skb, &shhwtstamps);
4985 }
4986
4987 /**
4988  * igb_clean_tx_irq - Reclaim resources after transmit completes
4989  * @q_vector: pointer to q_vector containing needed info
4990  * returns true if ring is completely cleaned
4991  **/
4992 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4993 {
4994         struct igb_adapter *adapter = q_vector->adapter;
4995         struct igb_ring *tx_ring = q_vector->tx_ring;
4996         struct net_device *netdev = tx_ring->netdev;
4997         struct e1000_hw *hw = &adapter->hw;
4998         struct igb_buffer *buffer_info;
4999         struct sk_buff *skb;
5000         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5001         unsigned int total_bytes = 0, total_packets = 0;
5002         unsigned int i, eop, count = 0;
5003         bool cleaned = false;
5004
5005         i = tx_ring->next_to_clean;
5006         eop = tx_ring->buffer_info[i].next_to_watch;
5007         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5008
5009         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5010                (count < tx_ring->count)) {
5011                 for (cleaned = false; !cleaned; count++) {
5012                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5013                         buffer_info = &tx_ring->buffer_info[i];
5014                         cleaned = (i == eop);
5015                         skb = buffer_info->skb;
5016
5017                         if (skb) {
5018                                 unsigned int segs, bytecount;
5019                                 /* gso_segs is currently only valid for tcp */
5020                                 segs = buffer_info->gso_segs;
5021                                 /* multiply data chunks by size of headers */
5022                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
5023                                             skb->len;
5024                                 total_packets += segs;
5025                                 total_bytes += bytecount;
5026
5027                                 igb_tx_hwtstamp(q_vector, skb);
5028                         }
5029
5030                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5031                         tx_desc->wb.status = 0;
5032
5033                         i++;
5034                         if (i == tx_ring->count)
5035                                 i = 0;
5036                 }
5037                 eop = tx_ring->buffer_info[i].next_to_watch;
5038                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5039         }
5040
5041         tx_ring->next_to_clean = i;
5042
5043         if (unlikely(count &&
5044                      netif_carrier_ok(netdev) &&
5045                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5046                 /* Make sure that anybody stopping the queue after this
5047                  * sees the new next_to_clean.
5048                  */
5049                 smp_mb();
5050                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5051                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5052                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5053                         tx_ring->tx_stats.restart_queue++;
5054                 }
5055         }
5056
5057         if (tx_ring->detect_tx_hung) {
5058                 /* Detect a transmit hang in hardware, this serializes the
5059                  * check with the clearing of time_stamp and movement of i */
5060                 tx_ring->detect_tx_hung = false;
5061                 if (tx_ring->buffer_info[i].time_stamp &&
5062                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5063                                (adapter->tx_timeout_factor * HZ)) &&
5064                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5065
5066                         /* detected Tx unit hang */
5067                         dev_err(&tx_ring->pdev->dev,
5068                                 "Detected Tx Unit Hang\n"
5069                                 "  Tx Queue             <%d>\n"
5070                                 "  TDH                  <%x>\n"
5071                                 "  TDT                  <%x>\n"
5072                                 "  next_to_use          <%x>\n"
5073                                 "  next_to_clean        <%x>\n"
5074                                 "buffer_info[next_to_clean]\n"
5075                                 "  time_stamp           <%lx>\n"
5076                                 "  next_to_watch        <%x>\n"
5077                                 "  jiffies              <%lx>\n"
5078                                 "  desc.status          <%x>\n",
5079                                 tx_ring->queue_index,
5080                                 readl(tx_ring->head),
5081                                 readl(tx_ring->tail),
5082                                 tx_ring->next_to_use,
5083                                 tx_ring->next_to_clean,
5084                                 tx_ring->buffer_info[eop].time_stamp,
5085                                 eop,
5086                                 jiffies,
5087                                 eop_desc->wb.status);
5088                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5089                 }
5090         }
5091         tx_ring->total_bytes += total_bytes;
5092         tx_ring->total_packets += total_packets;
5093         tx_ring->tx_stats.bytes += total_bytes;
5094         tx_ring->tx_stats.packets += total_packets;
5095         return (count < tx_ring->count);
5096 }
5097
5098 /**
5099  * igb_receive_skb - helper function to handle rx indications
5100  * @q_vector: structure containing interrupt and ring information
5101  * @skb: packet to send up
5102  * @vlan_tag: vlan tag for packet
5103  **/
5104 static void igb_receive_skb(struct igb_q_vector *q_vector,
5105                             struct sk_buff *skb,
5106                             u16 vlan_tag)
5107 {
5108         struct igb_adapter *adapter = q_vector->adapter;
5109
5110         if (vlan_tag)
5111                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5112                                  vlan_tag, skb);
5113         else
5114                 napi_gro_receive(&q_vector->napi, skb);
5115 }
5116
5117 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5118                                        u32 status_err, struct sk_buff *skb)
5119 {
5120         skb->ip_summed = CHECKSUM_NONE;
5121
5122         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5123         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5124              (status_err & E1000_RXD_STAT_IXSM))
5125                 return;
5126
5127         /* TCP/UDP checksum error bit is set */
5128         if (status_err &
5129             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5130                 /*
5131                  * work around errata with sctp packets where the TCPE aka
5132                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5133                  * packets, (aka let the stack check the crc32c)
5134                  */
5135                 if ((skb->len == 60) &&
5136                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5137                         ring->rx_stats.csum_err++;
5138
5139                 /* let the stack verify checksum errors */
5140                 return;
5141         }
5142         /* It must be a TCP or UDP packet with a valid checksum */
5143         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5144                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5145
5146         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
5147 }
5148
5149 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5150                                    struct sk_buff *skb)
5151 {
5152         struct igb_adapter *adapter = q_vector->adapter;
5153         struct e1000_hw *hw = &adapter->hw;
5154         u64 regval;
5155
5156         /*
5157          * If this bit is set, then the RX registers contain the time stamp. No
5158          * other packet will be time stamped until we read these registers, so
5159          * read the registers to make them available again. Because only one
5160          * packet can be time stamped at a time, we know that the register
5161          * values must belong to this one here and therefore we don't need to
5162          * compare any of the additional attributes stored for it.
5163          *
5164          * If nothing went wrong, then it should have a skb_shared_tx that we
5165          * can turn into a skb_shared_hwtstamps.
5166          */
5167         if (staterr & E1000_RXDADV_STAT_TSIP) {
5168                 u32 *stamp = (u32 *)skb->data;
5169                 regval = le32_to_cpu(*(stamp + 2));
5170                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5171                 skb_pull(skb, IGB_TS_HDR_LEN);
5172         } else {
5173                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5174                         return;
5175
5176                 regval = rd32(E1000_RXSTMPL);
5177                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5178         }
5179
5180         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5181 }
5182 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5183                                union e1000_adv_rx_desc *rx_desc)
5184 {
5185         /* HW will not DMA in data larger than the given buffer, even if it
5186          * parses the (NFS, of course) header to be larger.  In that case, it
5187          * fills the header buffer and spills the rest into the page.
5188          */
5189         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5190                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5191         if (hlen > rx_ring->rx_buffer_len)
5192                 hlen = rx_ring->rx_buffer_len;
5193         return hlen;
5194 }
5195
5196 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5197                                  int *work_done, int budget)
5198 {
5199         struct igb_ring *rx_ring = q_vector->rx_ring;
5200         struct net_device *netdev = rx_ring->netdev;
5201         struct pci_dev *pdev = rx_ring->pdev;
5202         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5203         struct igb_buffer *buffer_info , *next_buffer;
5204         struct sk_buff *skb;
5205         bool cleaned = false;
5206         int cleaned_count = 0;
5207         int current_node = numa_node_id();
5208         unsigned int total_bytes = 0, total_packets = 0;
5209         unsigned int i;
5210         u32 staterr;
5211         u16 length;
5212         u16 vlan_tag;
5213
5214         i = rx_ring->next_to_clean;
5215         buffer_info = &rx_ring->buffer_info[i];
5216         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5217         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5218
5219         while (staterr & E1000_RXD_STAT_DD) {
5220                 if (*work_done >= budget)
5221                         break;
5222                 (*work_done)++;
5223
5224                 skb = buffer_info->skb;
5225                 prefetch(skb->data - NET_IP_ALIGN);
5226                 buffer_info->skb = NULL;
5227
5228                 i++;
5229                 if (i == rx_ring->count)
5230                         i = 0;
5231
5232                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5233                 prefetch(next_rxd);
5234                 next_buffer = &rx_ring->buffer_info[i];
5235
5236                 length = le16_to_cpu(rx_desc->wb.upper.length);
5237                 cleaned = true;
5238                 cleaned_count++;
5239
5240                 if (buffer_info->dma) {
5241                         pci_unmap_single(pdev, buffer_info->dma,
5242                                          rx_ring->rx_buffer_len,
5243                                          PCI_DMA_FROMDEVICE);
5244                         buffer_info->dma = 0;
5245                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5246                                 skb_put(skb, length);
5247                                 goto send_up;
5248                         }
5249                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5250                 }
5251
5252                 if (length) {
5253                         pci_unmap_page(pdev, buffer_info->page_dma,
5254                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5255                         buffer_info->page_dma = 0;
5256
5257                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
5258                                                 buffer_info->page,
5259                                                 buffer_info->page_offset,
5260                                                 length);
5261
5262                         if ((page_count(buffer_info->page) != 1) ||
5263                             (page_to_nid(buffer_info->page) != current_node))
5264                                 buffer_info->page = NULL;
5265                         else
5266                                 get_page(buffer_info->page);
5267
5268                         skb->len += length;
5269                         skb->data_len += length;
5270                         skb->truesize += length;
5271                 }
5272
5273                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5274                         buffer_info->skb = next_buffer->skb;
5275                         buffer_info->dma = next_buffer->dma;
5276                         next_buffer->skb = skb;
5277                         next_buffer->dma = 0;
5278                         goto next_desc;
5279                 }
5280 send_up:
5281                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5282                         dev_kfree_skb_irq(skb);
5283                         goto next_desc;
5284                 }
5285
5286                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5287                         igb_rx_hwtstamp(q_vector, staterr, skb);
5288                 total_bytes += skb->len;
5289                 total_packets++;
5290
5291                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5292
5293                 skb->protocol = eth_type_trans(skb, netdev);
5294                 skb_record_rx_queue(skb, rx_ring->queue_index);
5295
5296                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5297                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5298
5299                 igb_receive_skb(q_vector, skb, vlan_tag);
5300
5301 next_desc:
5302                 rx_desc->wb.upper.status_error = 0;
5303
5304                 /* return some buffers to hardware, one at a time is too slow */
5305                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5306                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5307                         cleaned_count = 0;
5308                 }
5309
5310                 /* use prefetched values */
5311                 rx_desc = next_rxd;
5312                 buffer_info = next_buffer;
5313                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5314         }
5315
5316         rx_ring->next_to_clean = i;
5317         cleaned_count = igb_desc_unused(rx_ring);
5318
5319         if (cleaned_count)
5320                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5321
5322         rx_ring->total_packets += total_packets;
5323         rx_ring->total_bytes += total_bytes;
5324         rx_ring->rx_stats.packets += total_packets;
5325         rx_ring->rx_stats.bytes += total_bytes;
5326         return cleaned;
5327 }
5328
5329 /**
5330  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5331  * @adapter: address of board private structure
5332  **/
5333 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5334 {
5335         struct net_device *netdev = rx_ring->netdev;
5336         union e1000_adv_rx_desc *rx_desc;
5337         struct igb_buffer *buffer_info;
5338         struct sk_buff *skb;
5339         unsigned int i;
5340         int bufsz;
5341
5342         i = rx_ring->next_to_use;
5343         buffer_info = &rx_ring->buffer_info[i];
5344
5345         bufsz = rx_ring->rx_buffer_len;
5346
5347         while (cleaned_count--) {
5348                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5349
5350                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5351                         if (!buffer_info->page) {
5352                                 buffer_info->page = netdev_alloc_page(netdev);
5353                                 if (!buffer_info->page) {
5354                                         rx_ring->rx_stats.alloc_failed++;
5355                                         goto no_buffers;
5356                                 }
5357                                 buffer_info->page_offset = 0;
5358                         } else {
5359                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5360                         }
5361                         buffer_info->page_dma =
5362                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5363                                              buffer_info->page_offset,
5364                                              PAGE_SIZE / 2,
5365                                              PCI_DMA_FROMDEVICE);
5366                         if (pci_dma_mapping_error(rx_ring->pdev,
5367                                                   buffer_info->page_dma)) {
5368                                 buffer_info->page_dma = 0;
5369                                 rx_ring->rx_stats.alloc_failed++;
5370                                 goto no_buffers;
5371                         }
5372                 }
5373
5374                 skb = buffer_info->skb;
5375                 if (!skb) {
5376                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5377                         if (!skb) {
5378                                 rx_ring->rx_stats.alloc_failed++;
5379                                 goto no_buffers;
5380                         }
5381
5382                         buffer_info->skb = skb;
5383                 }
5384                 if (!buffer_info->dma) {
5385                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5386                                                           skb->data,
5387                                                           bufsz,
5388                                                           PCI_DMA_FROMDEVICE);
5389                         if (pci_dma_mapping_error(rx_ring->pdev,
5390                                                   buffer_info->dma)) {
5391                                 buffer_info->dma = 0;
5392                                 rx_ring->rx_stats.alloc_failed++;
5393                                 goto no_buffers;
5394                         }
5395                 }
5396                 /* Refresh the desc even if buffer_addrs didn't change because
5397                  * each write-back erases this info. */
5398                 if (bufsz < IGB_RXBUFFER_1024) {
5399                         rx_desc->read.pkt_addr =
5400                              cpu_to_le64(buffer_info->page_dma);
5401                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5402                 } else {
5403                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5404                         rx_desc->read.hdr_addr = 0;
5405                 }
5406
5407                 i++;
5408                 if (i == rx_ring->count)
5409                         i = 0;
5410                 buffer_info = &rx_ring->buffer_info[i];
5411         }
5412
5413 no_buffers:
5414         if (rx_ring->next_to_use != i) {
5415                 rx_ring->next_to_use = i;
5416                 if (i == 0)
5417                         i = (rx_ring->count - 1);
5418                 else
5419                         i--;
5420
5421                 /* Force memory writes to complete before letting h/w
5422                  * know there are new descriptors to fetch.  (Only
5423                  * applicable for weak-ordered memory model archs,
5424                  * such as IA-64). */
5425                 wmb();
5426                 writel(i, rx_ring->tail);
5427         }
5428 }
5429
5430 /**
5431  * igb_mii_ioctl -
5432  * @netdev:
5433  * @ifreq:
5434  * @cmd:
5435  **/
5436 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5437 {
5438         struct igb_adapter *adapter = netdev_priv(netdev);
5439         struct mii_ioctl_data *data = if_mii(ifr);
5440
5441         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5442                 return -EOPNOTSUPP;
5443
5444         switch (cmd) {
5445         case SIOCGMIIPHY:
5446                 data->phy_id = adapter->hw.phy.addr;
5447                 break;
5448         case SIOCGMIIREG:
5449                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5450                                      &data->val_out))
5451                         return -EIO;
5452                 break;
5453         case SIOCSMIIREG:
5454         default:
5455                 return -EOPNOTSUPP;
5456         }
5457         return 0;
5458 }
5459
5460 /**
5461  * igb_hwtstamp_ioctl - control hardware time stamping
5462  * @netdev:
5463  * @ifreq:
5464  * @cmd:
5465  *
5466  * Outgoing time stamping can be enabled and disabled. Play nice and
5467  * disable it when requested, although it shouldn't case any overhead
5468  * when no packet needs it. At most one packet in the queue may be
5469  * marked for time stamping, otherwise it would be impossible to tell
5470  * for sure to which packet the hardware time stamp belongs.
5471  *
5472  * Incoming time stamping has to be configured via the hardware
5473  * filters. Not all combinations are supported, in particular event
5474  * type has to be specified. Matching the kind of event packet is
5475  * not supported, with the exception of "all V2 events regardless of
5476  * level 2 or 4".
5477  *
5478  **/
5479 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5480                               struct ifreq *ifr, int cmd)
5481 {
5482         struct igb_adapter *adapter = netdev_priv(netdev);
5483         struct e1000_hw *hw = &adapter->hw;
5484         struct hwtstamp_config config;
5485         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5486         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5487         u32 tsync_rx_cfg = 0;
5488         bool is_l4 = false;
5489         bool is_l2 = false;
5490         u32 regval;
5491
5492         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5493                 return -EFAULT;
5494
5495         /* reserved for future extensions */
5496         if (config.flags)
5497                 return -EINVAL;
5498
5499         switch (config.tx_type) {
5500         case HWTSTAMP_TX_OFF:
5501                 tsync_tx_ctl = 0;
5502         case HWTSTAMP_TX_ON:
5503                 break;
5504         default:
5505                 return -ERANGE;
5506         }
5507
5508         switch (config.rx_filter) {
5509         case HWTSTAMP_FILTER_NONE:
5510                 tsync_rx_ctl = 0;
5511                 break;
5512         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5513         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5514         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5515         case HWTSTAMP_FILTER_ALL:
5516                 /*
5517                  * register TSYNCRXCFG must be set, therefore it is not
5518                  * possible to time stamp both Sync and Delay_Req messages
5519                  * => fall back to time stamping all packets
5520                  */
5521                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5522                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5523                 break;
5524         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5525                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5526                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5527                 is_l4 = true;
5528                 break;
5529         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5530                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5531                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5532                 is_l4 = true;
5533                 break;
5534         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5535         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5536                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5537                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5538                 is_l2 = true;
5539                 is_l4 = true;
5540                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5541                 break;
5542         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5543         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5544                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5545                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5546                 is_l2 = true;
5547                 is_l4 = true;
5548                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5549                 break;
5550         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5551         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5552         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5553                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5554                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5555                 is_l2 = true;
5556                 break;
5557         default:
5558                 return -ERANGE;
5559         }
5560
5561         if (hw->mac.type == e1000_82575) {
5562                 if (tsync_rx_ctl | tsync_tx_ctl)
5563                         return -EINVAL;
5564                 return 0;
5565         }
5566
5567         /*
5568          * Per-packet timestamping only works if all packets are
5569          * timestamped, so enable timestamping in all packets as
5570          * long as one rx filter was configured.
5571          */
5572         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
5573                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5574                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5575         }
5576
5577         /* enable/disable TX */
5578         regval = rd32(E1000_TSYNCTXCTL);
5579         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5580         regval |= tsync_tx_ctl;
5581         wr32(E1000_TSYNCTXCTL, regval);
5582
5583         /* enable/disable RX */
5584         regval = rd32(E1000_TSYNCRXCTL);
5585         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5586         regval |= tsync_rx_ctl;
5587         wr32(E1000_TSYNCRXCTL, regval);
5588
5589         /* define which PTP packets are time stamped */
5590         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5591
5592         /* define ethertype filter for timestamped packets */
5593         if (is_l2)
5594                 wr32(E1000_ETQF(3),
5595                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5596                                  E1000_ETQF_1588 | /* enable timestamping */
5597                                  ETH_P_1588));     /* 1588 eth protocol type */
5598         else
5599                 wr32(E1000_ETQF(3), 0);
5600
5601 #define PTP_PORT 319
5602         /* L4 Queue Filter[3]: filter by destination port and protocol */
5603         if (is_l4) {
5604                 u32 ftqf = (IPPROTO_UDP /* UDP */
5605                         | E1000_FTQF_VF_BP /* VF not compared */
5606                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5607                         | E1000_FTQF_MASK); /* mask all inputs */
5608                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5609
5610                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5611                 wr32(E1000_IMIREXT(3),
5612                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5613                 if (hw->mac.type == e1000_82576) {
5614                         /* enable source port check */
5615                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5616                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5617                 }
5618                 wr32(E1000_FTQF(3), ftqf);
5619         } else {
5620                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5621         }
5622         wrfl();
5623
5624         adapter->hwtstamp_config = config;
5625
5626         /* clear TX/RX time stamp registers, just to be sure */
5627         regval = rd32(E1000_TXSTMPH);
5628         regval = rd32(E1000_RXSTMPH);
5629
5630         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5631                 -EFAULT : 0;
5632 }
5633
5634 /**
5635  * igb_ioctl -
5636  * @netdev:
5637  * @ifreq:
5638  * @cmd:
5639  **/
5640 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5641 {
5642         switch (cmd) {
5643         case SIOCGMIIPHY:
5644         case SIOCGMIIREG:
5645         case SIOCSMIIREG:
5646                 return igb_mii_ioctl(netdev, ifr, cmd);
5647         case SIOCSHWTSTAMP:
5648                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5649         default:
5650                 return -EOPNOTSUPP;
5651         }
5652 }
5653
5654 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5655 {
5656         struct igb_adapter *adapter = hw->back;
5657         u16 cap_offset;
5658
5659         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5660         if (!cap_offset)
5661                 return -E1000_ERR_CONFIG;
5662
5663         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5664
5665         return 0;
5666 }
5667
5668 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5669 {
5670         struct igb_adapter *adapter = hw->back;
5671         u16 cap_offset;
5672
5673         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5674         if (!cap_offset)
5675                 return -E1000_ERR_CONFIG;
5676
5677         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5678
5679         return 0;
5680 }
5681
5682 static void igb_vlan_rx_register(struct net_device *netdev,
5683                                  struct vlan_group *grp)
5684 {
5685         struct igb_adapter *adapter = netdev_priv(netdev);
5686         struct e1000_hw *hw = &adapter->hw;
5687         u32 ctrl, rctl;
5688
5689         igb_irq_disable(adapter);
5690         adapter->vlgrp = grp;
5691
5692         if (grp) {
5693                 /* enable VLAN tag insert/strip */
5694                 ctrl = rd32(E1000_CTRL);
5695                 ctrl |= E1000_CTRL_VME;
5696                 wr32(E1000_CTRL, ctrl);
5697
5698                 /* Disable CFI check */
5699                 rctl = rd32(E1000_RCTL);
5700                 rctl &= ~E1000_RCTL_CFIEN;
5701                 wr32(E1000_RCTL, rctl);
5702         } else {
5703                 /* disable VLAN tag insert/strip */
5704                 ctrl = rd32(E1000_CTRL);
5705                 ctrl &= ~E1000_CTRL_VME;
5706                 wr32(E1000_CTRL, ctrl);
5707         }
5708
5709         igb_rlpml_set(adapter);
5710
5711         if (!test_bit(__IGB_DOWN, &adapter->state))
5712                 igb_irq_enable(adapter);
5713 }
5714
5715 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5716 {
5717         struct igb_adapter *adapter = netdev_priv(netdev);
5718         struct e1000_hw *hw = &adapter->hw;
5719         int pf_id = adapter->vfs_allocated_count;
5720
5721         /* attempt to add filter to vlvf array */
5722         igb_vlvf_set(adapter, vid, true, pf_id);
5723
5724         /* add the filter since PF can receive vlans w/o entry in vlvf */
5725         igb_vfta_set(hw, vid, true);
5726 }
5727
5728 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5729 {
5730         struct igb_adapter *adapter = netdev_priv(netdev);
5731         struct e1000_hw *hw = &adapter->hw;
5732         int pf_id = adapter->vfs_allocated_count;
5733         s32 err;
5734
5735         igb_irq_disable(adapter);
5736         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5737
5738         if (!test_bit(__IGB_DOWN, &adapter->state))
5739                 igb_irq_enable(adapter);
5740
5741         /* remove vlan from VLVF table array */
5742         err = igb_vlvf_set(adapter, vid, false, pf_id);
5743
5744         /* if vid was not present in VLVF just remove it from table */
5745         if (err)
5746                 igb_vfta_set(hw, vid, false);
5747 }
5748
5749 static void igb_restore_vlan(struct igb_adapter *adapter)
5750 {
5751         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5752
5753         if (adapter->vlgrp) {
5754                 u16 vid;
5755                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5756                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5757                                 continue;
5758                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5759                 }
5760         }
5761 }
5762
5763 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5764 {
5765         struct pci_dev *pdev = adapter->pdev;
5766         struct e1000_mac_info *mac = &adapter->hw.mac;
5767
5768         mac->autoneg = 0;
5769
5770         switch (spddplx) {
5771         case SPEED_10 + DUPLEX_HALF:
5772                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5773                 break;
5774         case SPEED_10 + DUPLEX_FULL:
5775                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5776                 break;
5777         case SPEED_100 + DUPLEX_HALF:
5778                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5779                 break;
5780         case SPEED_100 + DUPLEX_FULL:
5781                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5782                 break;
5783         case SPEED_1000 + DUPLEX_FULL:
5784                 mac->autoneg = 1;
5785                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5786                 break;
5787         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5788         default:
5789                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5790                 return -EINVAL;
5791         }
5792         return 0;
5793 }
5794
5795 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5796 {
5797         struct net_device *netdev = pci_get_drvdata(pdev);
5798         struct igb_adapter *adapter = netdev_priv(netdev);
5799         struct e1000_hw *hw = &adapter->hw;
5800         u32 ctrl, rctl, status;
5801         u32 wufc = adapter->wol;
5802 #ifdef CONFIG_PM
5803         int retval = 0;
5804 #endif
5805
5806         netif_device_detach(netdev);
5807
5808         if (netif_running(netdev))
5809                 igb_close(netdev);
5810
5811         igb_clear_interrupt_scheme(adapter);
5812
5813 #ifdef CONFIG_PM
5814         retval = pci_save_state(pdev);
5815         if (retval)
5816                 return retval;
5817 #endif
5818
5819         status = rd32(E1000_STATUS);
5820         if (status & E1000_STATUS_LU)
5821                 wufc &= ~E1000_WUFC_LNKC;
5822
5823         if (wufc) {
5824                 igb_setup_rctl(adapter);
5825                 igb_set_rx_mode(netdev);
5826
5827                 /* turn on all-multi mode if wake on multicast is enabled */
5828                 if (wufc & E1000_WUFC_MC) {
5829                         rctl = rd32(E1000_RCTL);
5830                         rctl |= E1000_RCTL_MPE;
5831                         wr32(E1000_RCTL, rctl);
5832                 }
5833
5834                 ctrl = rd32(E1000_CTRL);
5835                 /* advertise wake from D3Cold */
5836                 #define E1000_CTRL_ADVD3WUC 0x00100000
5837                 /* phy power management enable */
5838                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5839                 ctrl |= E1000_CTRL_ADVD3WUC;
5840                 wr32(E1000_CTRL, ctrl);
5841
5842                 /* Allow time for pending master requests to run */
5843                 igb_disable_pcie_master(hw);
5844
5845                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5846                 wr32(E1000_WUFC, wufc);
5847         } else {
5848                 wr32(E1000_WUC, 0);
5849                 wr32(E1000_WUFC, 0);
5850         }
5851
5852         *enable_wake = wufc || adapter->en_mng_pt;
5853         if (!*enable_wake)
5854                 igb_power_down_link(adapter);
5855         else
5856                 igb_power_up_link(adapter);
5857
5858         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5859          * would have already happened in close and is redundant. */
5860         igb_release_hw_control(adapter);
5861
5862         pci_disable_device(pdev);
5863
5864         return 0;
5865 }
5866
5867 #ifdef CONFIG_PM
5868 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5869 {
5870         int retval;
5871         bool wake;
5872
5873         retval = __igb_shutdown(pdev, &wake);
5874         if (retval)
5875                 return retval;
5876
5877         if (wake) {
5878                 pci_prepare_to_sleep(pdev);
5879         } else {
5880                 pci_wake_from_d3(pdev, false);
5881                 pci_set_power_state(pdev, PCI_D3hot);
5882         }
5883
5884         return 0;
5885 }
5886
5887 static int igb_resume(struct pci_dev *pdev)
5888 {
5889         struct net_device *netdev = pci_get_drvdata(pdev);
5890         struct igb_adapter *adapter = netdev_priv(netdev);
5891         struct e1000_hw *hw = &adapter->hw;
5892         u32 err;
5893
5894         pci_set_power_state(pdev, PCI_D0);
5895         pci_restore_state(pdev);
5896         pci_save_state(pdev);
5897
5898         err = pci_enable_device_mem(pdev);
5899         if (err) {
5900                 dev_err(&pdev->dev,
5901                         "igb: Cannot enable PCI device from suspend\n");
5902                 return err;
5903         }
5904         pci_set_master(pdev);
5905
5906         pci_enable_wake(pdev, PCI_D3hot, 0);
5907         pci_enable_wake(pdev, PCI_D3cold, 0);
5908
5909         if (igb_init_interrupt_scheme(adapter)) {
5910                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5911                 return -ENOMEM;
5912         }
5913
5914         igb_reset(adapter);
5915
5916         /* let the f/w know that the h/w is now under the control of the
5917          * driver. */
5918         igb_get_hw_control(adapter);
5919
5920         wr32(E1000_WUS, ~0);
5921
5922         if (netif_running(netdev)) {
5923                 err = igb_open(netdev);
5924                 if (err)
5925                         return err;
5926         }
5927
5928         netif_device_attach(netdev);
5929
5930         return 0;
5931 }
5932 #endif
5933
5934 static void igb_shutdown(struct pci_dev *pdev)
5935 {
5936         bool wake;
5937
5938         __igb_shutdown(pdev, &wake);
5939
5940         if (system_state == SYSTEM_POWER_OFF) {
5941                 pci_wake_from_d3(pdev, wake);
5942                 pci_set_power_state(pdev, PCI_D3hot);
5943         }
5944 }
5945
5946 #ifdef CONFIG_NET_POLL_CONTROLLER
5947 /*
5948  * Polling 'interrupt' - used by things like netconsole to send skbs
5949  * without having to re-enable interrupts. It's not called while
5950  * the interrupt routine is executing.
5951  */
5952 static void igb_netpoll(struct net_device *netdev)
5953 {
5954         struct igb_adapter *adapter = netdev_priv(netdev);
5955         struct e1000_hw *hw = &adapter->hw;
5956         int i;
5957
5958         if (!adapter->msix_entries) {
5959                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5960                 igb_irq_disable(adapter);
5961                 napi_schedule(&q_vector->napi);
5962                 return;
5963         }
5964
5965         for (i = 0; i < adapter->num_q_vectors; i++) {
5966                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5967                 wr32(E1000_EIMC, q_vector->eims_value);
5968                 napi_schedule(&q_vector->napi);
5969         }
5970 }
5971 #endif /* CONFIG_NET_POLL_CONTROLLER */
5972
5973 /**
5974  * igb_io_error_detected - called when PCI error is detected
5975  * @pdev: Pointer to PCI device
5976  * @state: The current pci connection state
5977  *
5978  * This function is called after a PCI bus error affecting
5979  * this device has been detected.
5980  */
5981 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5982                                               pci_channel_state_t state)
5983 {
5984         struct net_device *netdev = pci_get_drvdata(pdev);
5985         struct igb_adapter *adapter = netdev_priv(netdev);
5986
5987         netif_device_detach(netdev);
5988
5989         if (state == pci_channel_io_perm_failure)
5990                 return PCI_ERS_RESULT_DISCONNECT;
5991
5992         if (netif_running(netdev))
5993                 igb_down(adapter);
5994         pci_disable_device(pdev);
5995
5996         /* Request a slot slot reset. */
5997         return PCI_ERS_RESULT_NEED_RESET;
5998 }
5999
6000 /**
6001  * igb_io_slot_reset - called after the pci bus has been reset.
6002  * @pdev: Pointer to PCI device
6003  *
6004  * Restart the card from scratch, as if from a cold-boot. Implementation
6005  * resembles the first-half of the igb_resume routine.
6006  */
6007 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6008 {
6009         struct net_device *netdev = pci_get_drvdata(pdev);
6010         struct igb_adapter *adapter = netdev_priv(netdev);
6011         struct e1000_hw *hw = &adapter->hw;
6012         pci_ers_result_t result;
6013         int err;
6014
6015         if (pci_enable_device_mem(pdev)) {
6016                 dev_err(&pdev->dev,
6017                         "Cannot re-enable PCI device after reset.\n");
6018                 result = PCI_ERS_RESULT_DISCONNECT;
6019         } else {
6020                 pci_set_master(pdev);
6021                 pci_restore_state(pdev);
6022                 pci_save_state(pdev);
6023
6024                 pci_enable_wake(pdev, PCI_D3hot, 0);
6025                 pci_enable_wake(pdev, PCI_D3cold, 0);
6026
6027                 igb_reset(adapter);
6028                 wr32(E1000_WUS, ~0);
6029                 result = PCI_ERS_RESULT_RECOVERED;
6030         }
6031
6032         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6033         if (err) {
6034                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6035                         "failed 0x%0x\n", err);
6036                 /* non-fatal, continue */
6037         }
6038
6039         return result;
6040 }
6041
6042 /**
6043  * igb_io_resume - called when traffic can start flowing again.
6044  * @pdev: Pointer to PCI device
6045  *
6046  * This callback is called when the error recovery driver tells us that
6047  * its OK to resume normal operation. Implementation resembles the
6048  * second-half of the igb_resume routine.
6049  */
6050 static void igb_io_resume(struct pci_dev *pdev)
6051 {
6052         struct net_device *netdev = pci_get_drvdata(pdev);
6053         struct igb_adapter *adapter = netdev_priv(netdev);
6054
6055         if (netif_running(netdev)) {
6056                 if (igb_up(adapter)) {
6057                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6058                         return;
6059                 }
6060         }
6061
6062         netif_device_attach(netdev);
6063
6064         /* let the f/w know that the h/w is now under the control of the
6065          * driver. */
6066         igb_get_hw_control(adapter);
6067 }
6068
6069 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6070                              u8 qsel)
6071 {
6072         u32 rar_low, rar_high;
6073         struct e1000_hw *hw = &adapter->hw;
6074
6075         /* HW expects these in little endian so we reverse the byte order
6076          * from network order (big endian) to little endian
6077          */
6078         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6079                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6080         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6081
6082         /* Indicate to hardware the Address is Valid. */
6083         rar_high |= E1000_RAH_AV;
6084
6085         if (hw->mac.type == e1000_82575)
6086                 rar_high |= E1000_RAH_POOL_1 * qsel;
6087         else
6088                 rar_high |= E1000_RAH_POOL_1 << qsel;
6089
6090         wr32(E1000_RAL(index), rar_low);
6091         wrfl();
6092         wr32(E1000_RAH(index), rar_high);
6093         wrfl();
6094 }
6095
6096 static int igb_set_vf_mac(struct igb_adapter *adapter,
6097                           int vf, unsigned char *mac_addr)
6098 {
6099         struct e1000_hw *hw = &adapter->hw;
6100         /* VF MAC addresses start at end of receive addresses and moves
6101          * torwards the first, as a result a collision should not be possible */
6102         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6103
6104         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6105
6106         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6107
6108         return 0;
6109 }
6110
6111 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6112 {
6113         struct igb_adapter *adapter = netdev_priv(netdev);
6114         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6115                 return -EINVAL;
6116         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6117         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6118         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6119                                       " change effective.");
6120         if (test_bit(__IGB_DOWN, &adapter->state)) {
6121                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6122                          " but the PF device is not up.\n");
6123                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6124                          " attempting to use the VF device.\n");
6125         }
6126         return igb_set_vf_mac(adapter, vf, mac);
6127 }
6128
6129 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6130 {
6131         return -EOPNOTSUPP;
6132 }
6133
6134 static int igb_ndo_get_vf_config(struct net_device *netdev,
6135                                  int vf, struct ifla_vf_info *ivi)
6136 {
6137         struct igb_adapter *adapter = netdev_priv(netdev);
6138         if (vf >= adapter->vfs_allocated_count)
6139                 return -EINVAL;
6140         ivi->vf = vf;
6141         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6142         ivi->tx_rate = 0;
6143         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6144         ivi->qos = adapter->vf_data[vf].pf_qos;
6145         return 0;
6146 }
6147
6148 static void igb_vmm_control(struct igb_adapter *adapter)
6149 {
6150         struct e1000_hw *hw = &adapter->hw;
6151         u32 reg;
6152
6153         switch (hw->mac.type) {
6154         case e1000_82575:
6155         default:
6156                 /* replication is not supported for 82575 */
6157                 return;
6158         case e1000_82576:
6159                 /* notify HW that the MAC is adding vlan tags */
6160                 reg = rd32(E1000_DTXCTL);
6161                 reg |= E1000_DTXCTL_VLAN_ADDED;
6162                 wr32(E1000_DTXCTL, reg);
6163         case e1000_82580:
6164                 /* enable replication vlan tag stripping */
6165                 reg = rd32(E1000_RPLOLR);
6166                 reg |= E1000_RPLOLR_STRVLAN;
6167                 wr32(E1000_RPLOLR, reg);
6168         case e1000_i350:
6169                 /* none of the above registers are supported by i350 */
6170                 break;
6171         }
6172
6173         if (adapter->vfs_allocated_count) {
6174                 igb_vmdq_set_loopback_pf(hw, true);
6175                 igb_vmdq_set_replication_pf(hw, true);
6176         } else {
6177                 igb_vmdq_set_loopback_pf(hw, false);
6178                 igb_vmdq_set_replication_pf(hw, false);
6179         }
6180 }
6181
6182 /* igb_main.c */