b989b34a4adb2e039a65c81d3547aa0f90c189b0
[safe/jmp/linux-2.6] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "2.1.0-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56                                 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60         [board_82575] = &e1000_82575_info,
61 };
62
63 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
64         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
79         /* required last entry */
80         {0, }
81 };
82
83 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
84
85 void igb_reset(struct igb_adapter *);
86 static int igb_setup_all_tx_resources(struct igb_adapter *);
87 static int igb_setup_all_rx_resources(struct igb_adapter *);
88 static void igb_free_all_tx_resources(struct igb_adapter *);
89 static void igb_free_all_rx_resources(struct igb_adapter *);
90 static void igb_setup_mrqc(struct igb_adapter *);
91 void igb_update_stats(struct igb_adapter *);
92 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
93 static void __devexit igb_remove(struct pci_dev *pdev);
94 static int igb_sw_init(struct igb_adapter *);
95 static int igb_open(struct net_device *);
96 static int igb_close(struct net_device *);
97 static void igb_configure_tx(struct igb_adapter *);
98 static void igb_configure_rx(struct igb_adapter *);
99 static void igb_clean_all_tx_rings(struct igb_adapter *);
100 static void igb_clean_all_rx_rings(struct igb_adapter *);
101 static void igb_clean_tx_ring(struct igb_ring *);
102 static void igb_clean_rx_ring(struct igb_ring *);
103 static void igb_set_rx_mode(struct net_device *);
104 static void igb_update_phy_info(unsigned long);
105 static void igb_watchdog(unsigned long);
106 static void igb_watchdog_task(struct work_struct *);
107 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
108 static struct net_device_stats *igb_get_stats(struct net_device *);
109 static int igb_change_mtu(struct net_device *, int);
110 static int igb_set_mac(struct net_device *, void *);
111 static void igb_set_uta(struct igb_adapter *adapter);
112 static irqreturn_t igb_intr(int irq, void *);
113 static irqreturn_t igb_intr_msi(int irq, void *);
114 static irqreturn_t igb_msix_other(int irq, void *);
115 static irqreturn_t igb_msix_ring(int irq, void *);
116 #ifdef CONFIG_IGB_DCA
117 static void igb_update_dca(struct igb_q_vector *);
118 static void igb_setup_dca(struct igb_adapter *);
119 #endif /* CONFIG_IGB_DCA */
120 static bool igb_clean_tx_irq(struct igb_q_vector *);
121 static int igb_poll(struct napi_struct *, int);
122 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
123 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
124 static void igb_tx_timeout(struct net_device *);
125 static void igb_reset_task(struct work_struct *);
126 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
127 static void igb_vlan_rx_add_vid(struct net_device *, u16);
128 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
129 static void igb_restore_vlan(struct igb_adapter *);
130 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
131 static void igb_ping_all_vfs(struct igb_adapter *);
132 static void igb_msg_task(struct igb_adapter *);
133 static void igb_vmm_control(struct igb_adapter *);
134 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
135 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
136 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
137 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
138                                int vf, u16 vlan, u8 qos);
139 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
140 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
141                                  struct ifla_vf_info *ivi);
142
143 #ifdef CONFIG_PM
144 static int igb_suspend(struct pci_dev *, pm_message_t);
145 static int igb_resume(struct pci_dev *);
146 #endif
147 static void igb_shutdown(struct pci_dev *);
148 #ifdef CONFIG_IGB_DCA
149 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
150 static struct notifier_block dca_notifier = {
151         .notifier_call  = igb_notify_dca,
152         .next           = NULL,
153         .priority       = 0
154 };
155 #endif
156 #ifdef CONFIG_NET_POLL_CONTROLLER
157 /* for netdump / net console */
158 static void igb_netpoll(struct net_device *);
159 #endif
160 #ifdef CONFIG_PCI_IOV
161 static unsigned int max_vfs = 0;
162 module_param(max_vfs, uint, 0);
163 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
164                  "per physical function");
165 #endif /* CONFIG_PCI_IOV */
166
167 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
168                      pci_channel_state_t);
169 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
170 static void igb_io_resume(struct pci_dev *);
171
172 static struct pci_error_handlers igb_err_handler = {
173         .error_detected = igb_io_error_detected,
174         .slot_reset = igb_io_slot_reset,
175         .resume = igb_io_resume,
176 };
177
178
179 static struct pci_driver igb_driver = {
180         .name     = igb_driver_name,
181         .id_table = igb_pci_tbl,
182         .probe    = igb_probe,
183         .remove   = __devexit_p(igb_remove),
184 #ifdef CONFIG_PM
185         /* Power Managment Hooks */
186         .suspend  = igb_suspend,
187         .resume   = igb_resume,
188 #endif
189         .shutdown = igb_shutdown,
190         .err_handler = &igb_err_handler
191 };
192
193 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
194 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
195 MODULE_LICENSE("GPL");
196 MODULE_VERSION(DRV_VERSION);
197
198 /**
199  * igb_read_clock - read raw cycle counter (to be used by time counter)
200  */
201 static cycle_t igb_read_clock(const struct cyclecounter *tc)
202 {
203         struct igb_adapter *adapter =
204                 container_of(tc, struct igb_adapter, cycles);
205         struct e1000_hw *hw = &adapter->hw;
206         u64 stamp = 0;
207         int shift = 0;
208
209         /*
210          * The timestamp latches on lowest register read. For the 82580
211          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
212          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
213          */
214         if (hw->mac.type == e1000_82580) {
215                 stamp = rd32(E1000_SYSTIMR) >> 8;
216                 shift = IGB_82580_TSYNC_SHIFT;
217         }
218
219         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
220         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
221         return stamp;
222 }
223
224 #ifdef DEBUG
225 /**
226  * igb_get_hw_dev_name - return device name string
227  * used by hardware layer to print debugging information
228  **/
229 char *igb_get_hw_dev_name(struct e1000_hw *hw)
230 {
231         struct igb_adapter *adapter = hw->back;
232         return adapter->netdev->name;
233 }
234
235 /**
236  * igb_get_time_str - format current NIC and system time as string
237  */
238 static char *igb_get_time_str(struct igb_adapter *adapter,
239                               char buffer[160])
240 {
241         cycle_t hw = adapter->cycles.read(&adapter->cycles);
242         struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
243         struct timespec sys;
244         struct timespec delta;
245         getnstimeofday(&sys);
246
247         delta = timespec_sub(nic, sys);
248
249         sprintf(buffer,
250                 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
251                 hw,
252                 (long)nic.tv_sec, nic.tv_nsec,
253                 (long)sys.tv_sec, sys.tv_nsec,
254                 (long)delta.tv_sec, delta.tv_nsec);
255
256         return buffer;
257 }
258 #endif
259
260 /**
261  * igb_init_module - Driver Registration Routine
262  *
263  * igb_init_module is the first routine called when the driver is
264  * loaded. All it does is register with the PCI subsystem.
265  **/
266 static int __init igb_init_module(void)
267 {
268         int ret;
269         printk(KERN_INFO "%s - version %s\n",
270                igb_driver_string, igb_driver_version);
271
272         printk(KERN_INFO "%s\n", igb_copyright);
273
274 #ifdef CONFIG_IGB_DCA
275         dca_register_notify(&dca_notifier);
276 #endif
277         ret = pci_register_driver(&igb_driver);
278         return ret;
279 }
280
281 module_init(igb_init_module);
282
283 /**
284  * igb_exit_module - Driver Exit Cleanup Routine
285  *
286  * igb_exit_module is called just before the driver is removed
287  * from memory.
288  **/
289 static void __exit igb_exit_module(void)
290 {
291 #ifdef CONFIG_IGB_DCA
292         dca_unregister_notify(&dca_notifier);
293 #endif
294         pci_unregister_driver(&igb_driver);
295 }
296
297 module_exit(igb_exit_module);
298
299 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
300 /**
301  * igb_cache_ring_register - Descriptor ring to register mapping
302  * @adapter: board private structure to initialize
303  *
304  * Once we know the feature-set enabled for the device, we'll cache
305  * the register offset the descriptor ring is assigned to.
306  **/
307 static void igb_cache_ring_register(struct igb_adapter *adapter)
308 {
309         int i = 0, j = 0;
310         u32 rbase_offset = adapter->vfs_allocated_count;
311
312         switch (adapter->hw.mac.type) {
313         case e1000_82576:
314                 /* The queues are allocated for virtualization such that VF 0
315                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
316                  * In order to avoid collision we start at the first free queue
317                  * and continue consuming queues in the same sequence
318                  */
319                 if (adapter->vfs_allocated_count) {
320                         for (; i < adapter->rss_queues; i++)
321                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
322                                                                Q_IDX_82576(i);
323                         for (; j < adapter->rss_queues; j++)
324                                 adapter->tx_ring[j]->reg_idx = rbase_offset +
325                                                                Q_IDX_82576(j);
326                 }
327         case e1000_82575:
328         case e1000_82580:
329         default:
330                 for (; i < adapter->num_rx_queues; i++)
331                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
332                 for (; j < adapter->num_tx_queues; j++)
333                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
334                 break;
335         }
336 }
337
338 static void igb_free_queues(struct igb_adapter *adapter)
339 {
340         int i;
341
342         for (i = 0; i < adapter->num_tx_queues; i++) {
343                 kfree(adapter->tx_ring[i]);
344                 adapter->tx_ring[i] = NULL;
345         }
346         for (i = 0; i < adapter->num_rx_queues; i++) {
347                 kfree(adapter->rx_ring[i]);
348                 adapter->rx_ring[i] = NULL;
349         }
350         adapter->num_rx_queues = 0;
351         adapter->num_tx_queues = 0;
352 }
353
354 /**
355  * igb_alloc_queues - Allocate memory for all rings
356  * @adapter: board private structure to initialize
357  *
358  * We allocate one ring per queue at run-time since we don't know the
359  * number of queues at compile-time.
360  **/
361 static int igb_alloc_queues(struct igb_adapter *adapter)
362 {
363         struct igb_ring *ring;
364         int i;
365
366         for (i = 0; i < adapter->num_tx_queues; i++) {
367                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
368                 if (!ring)
369                         goto err;
370                 ring->count = adapter->tx_ring_count;
371                 ring->queue_index = i;
372                 ring->pdev = adapter->pdev;
373                 ring->netdev = adapter->netdev;
374                 /* For 82575, context index must be unique per ring. */
375                 if (adapter->hw.mac.type == e1000_82575)
376                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
377                 adapter->tx_ring[i] = ring;
378         }
379
380         for (i = 0; i < adapter->num_rx_queues; i++) {
381                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
382                 if (!ring)
383                         goto err;
384                 ring->count = adapter->rx_ring_count;
385                 ring->queue_index = i;
386                 ring->pdev = adapter->pdev;
387                 ring->netdev = adapter->netdev;
388                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
389                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
390                 /* set flag indicating ring supports SCTP checksum offload */
391                 if (adapter->hw.mac.type >= e1000_82576)
392                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
393                 adapter->rx_ring[i] = ring;
394         }
395
396         igb_cache_ring_register(adapter);
397
398         return 0;
399
400 err:
401         igb_free_queues(adapter);
402
403         return -ENOMEM;
404 }
405
406 #define IGB_N0_QUEUE -1
407 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
408 {
409         u32 msixbm = 0;
410         struct igb_adapter *adapter = q_vector->adapter;
411         struct e1000_hw *hw = &adapter->hw;
412         u32 ivar, index;
413         int rx_queue = IGB_N0_QUEUE;
414         int tx_queue = IGB_N0_QUEUE;
415
416         if (q_vector->rx_ring)
417                 rx_queue = q_vector->rx_ring->reg_idx;
418         if (q_vector->tx_ring)
419                 tx_queue = q_vector->tx_ring->reg_idx;
420
421         switch (hw->mac.type) {
422         case e1000_82575:
423                 /* The 82575 assigns vectors using a bitmask, which matches the
424                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
425                    or more queues to a vector, we write the appropriate bits
426                    into the MSIXBM register for that vector. */
427                 if (rx_queue > IGB_N0_QUEUE)
428                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
429                 if (tx_queue > IGB_N0_QUEUE)
430                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
431                 if (!adapter->msix_entries && msix_vector == 0)
432                         msixbm |= E1000_EIMS_OTHER;
433                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
434                 q_vector->eims_value = msixbm;
435                 break;
436         case e1000_82576:
437                 /* 82576 uses a table-based method for assigning vectors.
438                    Each queue has a single entry in the table to which we write
439                    a vector number along with a "valid" bit.  Sadly, the layout
440                    of the table is somewhat counterintuitive. */
441                 if (rx_queue > IGB_N0_QUEUE) {
442                         index = (rx_queue & 0x7);
443                         ivar = array_rd32(E1000_IVAR0, index);
444                         if (rx_queue < 8) {
445                                 /* vector goes into low byte of register */
446                                 ivar = ivar & 0xFFFFFF00;
447                                 ivar |= msix_vector | E1000_IVAR_VALID;
448                         } else {
449                                 /* vector goes into third byte of register */
450                                 ivar = ivar & 0xFF00FFFF;
451                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
452                         }
453                         array_wr32(E1000_IVAR0, index, ivar);
454                 }
455                 if (tx_queue > IGB_N0_QUEUE) {
456                         index = (tx_queue & 0x7);
457                         ivar = array_rd32(E1000_IVAR0, index);
458                         if (tx_queue < 8) {
459                                 /* vector goes into second byte of register */
460                                 ivar = ivar & 0xFFFF00FF;
461                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
462                         } else {
463                                 /* vector goes into high byte of register */
464                                 ivar = ivar & 0x00FFFFFF;
465                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
466                         }
467                         array_wr32(E1000_IVAR0, index, ivar);
468                 }
469                 q_vector->eims_value = 1 << msix_vector;
470                 break;
471         case e1000_82580:
472                 /* 82580 uses the same table-based approach as 82576 but has fewer
473                    entries as a result we carry over for queues greater than 4. */
474                 if (rx_queue > IGB_N0_QUEUE) {
475                         index = (rx_queue >> 1);
476                         ivar = array_rd32(E1000_IVAR0, index);
477                         if (rx_queue & 0x1) {
478                                 /* vector goes into third byte of register */
479                                 ivar = ivar & 0xFF00FFFF;
480                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
481                         } else {
482                                 /* vector goes into low byte of register */
483                                 ivar = ivar & 0xFFFFFF00;
484                                 ivar |= msix_vector | E1000_IVAR_VALID;
485                         }
486                         array_wr32(E1000_IVAR0, index, ivar);
487                 }
488                 if (tx_queue > IGB_N0_QUEUE) {
489                         index = (tx_queue >> 1);
490                         ivar = array_rd32(E1000_IVAR0, index);
491                         if (tx_queue & 0x1) {
492                                 /* vector goes into high byte of register */
493                                 ivar = ivar & 0x00FFFFFF;
494                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
495                         } else {
496                                 /* vector goes into second byte of register */
497                                 ivar = ivar & 0xFFFF00FF;
498                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
499                         }
500                         array_wr32(E1000_IVAR0, index, ivar);
501                 }
502                 q_vector->eims_value = 1 << msix_vector;
503                 break;
504         default:
505                 BUG();
506                 break;
507         }
508
509         /* add q_vector eims value to global eims_enable_mask */
510         adapter->eims_enable_mask |= q_vector->eims_value;
511
512         /* configure q_vector to set itr on first interrupt */
513         q_vector->set_itr = 1;
514 }
515
516 /**
517  * igb_configure_msix - Configure MSI-X hardware
518  *
519  * igb_configure_msix sets up the hardware to properly
520  * generate MSI-X interrupts.
521  **/
522 static void igb_configure_msix(struct igb_adapter *adapter)
523 {
524         u32 tmp;
525         int i, vector = 0;
526         struct e1000_hw *hw = &adapter->hw;
527
528         adapter->eims_enable_mask = 0;
529
530         /* set vector for other causes, i.e. link changes */
531         switch (hw->mac.type) {
532         case e1000_82575:
533                 tmp = rd32(E1000_CTRL_EXT);
534                 /* enable MSI-X PBA support*/
535                 tmp |= E1000_CTRL_EXT_PBA_CLR;
536
537                 /* Auto-Mask interrupts upon ICR read. */
538                 tmp |= E1000_CTRL_EXT_EIAME;
539                 tmp |= E1000_CTRL_EXT_IRCA;
540
541                 wr32(E1000_CTRL_EXT, tmp);
542
543                 /* enable msix_other interrupt */
544                 array_wr32(E1000_MSIXBM(0), vector++,
545                                       E1000_EIMS_OTHER);
546                 adapter->eims_other = E1000_EIMS_OTHER;
547
548                 break;
549
550         case e1000_82576:
551         case e1000_82580:
552                 /* Turn on MSI-X capability first, or our settings
553                  * won't stick.  And it will take days to debug. */
554                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
555                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
556                                 E1000_GPIE_NSICR);
557
558                 /* enable msix_other interrupt */
559                 adapter->eims_other = 1 << vector;
560                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
561
562                 wr32(E1000_IVAR_MISC, tmp);
563                 break;
564         default:
565                 /* do nothing, since nothing else supports MSI-X */
566                 break;
567         } /* switch (hw->mac.type) */
568
569         adapter->eims_enable_mask |= adapter->eims_other;
570
571         for (i = 0; i < adapter->num_q_vectors; i++)
572                 igb_assign_vector(adapter->q_vector[i], vector++);
573
574         wrfl();
575 }
576
577 /**
578  * igb_request_msix - Initialize MSI-X interrupts
579  *
580  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
581  * kernel.
582  **/
583 static int igb_request_msix(struct igb_adapter *adapter)
584 {
585         struct net_device *netdev = adapter->netdev;
586         struct e1000_hw *hw = &adapter->hw;
587         int i, err = 0, vector = 0;
588
589         err = request_irq(adapter->msix_entries[vector].vector,
590                           igb_msix_other, 0, netdev->name, adapter);
591         if (err)
592                 goto out;
593         vector++;
594
595         for (i = 0; i < adapter->num_q_vectors; i++) {
596                 struct igb_q_vector *q_vector = adapter->q_vector[i];
597
598                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
599
600                 if (q_vector->rx_ring && q_vector->tx_ring)
601                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
602                                 q_vector->rx_ring->queue_index);
603                 else if (q_vector->tx_ring)
604                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
605                                 q_vector->tx_ring->queue_index);
606                 else if (q_vector->rx_ring)
607                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
608                                 q_vector->rx_ring->queue_index);
609                 else
610                         sprintf(q_vector->name, "%s-unused", netdev->name);
611
612                 err = request_irq(adapter->msix_entries[vector].vector,
613                                   igb_msix_ring, 0, q_vector->name,
614                                   q_vector);
615                 if (err)
616                         goto out;
617                 vector++;
618         }
619
620         igb_configure_msix(adapter);
621         return 0;
622 out:
623         return err;
624 }
625
626 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
627 {
628         if (adapter->msix_entries) {
629                 pci_disable_msix(adapter->pdev);
630                 kfree(adapter->msix_entries);
631                 adapter->msix_entries = NULL;
632         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
633                 pci_disable_msi(adapter->pdev);
634         }
635 }
636
637 /**
638  * igb_free_q_vectors - Free memory allocated for interrupt vectors
639  * @adapter: board private structure to initialize
640  *
641  * This function frees the memory allocated to the q_vectors.  In addition if
642  * NAPI is enabled it will delete any references to the NAPI struct prior
643  * to freeing the q_vector.
644  **/
645 static void igb_free_q_vectors(struct igb_adapter *adapter)
646 {
647         int v_idx;
648
649         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
650                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
651                 adapter->q_vector[v_idx] = NULL;
652                 netif_napi_del(&q_vector->napi);
653                 kfree(q_vector);
654         }
655         adapter->num_q_vectors = 0;
656 }
657
658 /**
659  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
660  *
661  * This function resets the device so that it has 0 rx queues, tx queues, and
662  * MSI-X interrupts allocated.
663  */
664 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
665 {
666         igb_free_queues(adapter);
667         igb_free_q_vectors(adapter);
668         igb_reset_interrupt_capability(adapter);
669 }
670
671 /**
672  * igb_set_interrupt_capability - set MSI or MSI-X if supported
673  *
674  * Attempt to configure interrupts using the best available
675  * capabilities of the hardware and kernel.
676  **/
677 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
678 {
679         int err;
680         int numvecs, i;
681
682         /* Number of supported queues. */
683         adapter->num_rx_queues = adapter->rss_queues;
684         adapter->num_tx_queues = adapter->rss_queues;
685
686         /* start with one vector for every rx queue */
687         numvecs = adapter->num_rx_queues;
688
689         /* if tx handler is seperate add 1 for every tx queue */
690         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
691                 numvecs += adapter->num_tx_queues;
692
693         /* store the number of vectors reserved for queues */
694         adapter->num_q_vectors = numvecs;
695
696         /* add 1 vector for link status interrupts */
697         numvecs++;
698         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
699                                         GFP_KERNEL);
700         if (!adapter->msix_entries)
701                 goto msi_only;
702
703         for (i = 0; i < numvecs; i++)
704                 adapter->msix_entries[i].entry = i;
705
706         err = pci_enable_msix(adapter->pdev,
707                               adapter->msix_entries,
708                               numvecs);
709         if (err == 0)
710                 goto out;
711
712         igb_reset_interrupt_capability(adapter);
713
714         /* If we can't do MSI-X, try MSI */
715 msi_only:
716 #ifdef CONFIG_PCI_IOV
717         /* disable SR-IOV for non MSI-X configurations */
718         if (adapter->vf_data) {
719                 struct e1000_hw *hw = &adapter->hw;
720                 /* disable iov and allow time for transactions to clear */
721                 pci_disable_sriov(adapter->pdev);
722                 msleep(500);
723
724                 kfree(adapter->vf_data);
725                 adapter->vf_data = NULL;
726                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
727                 msleep(100);
728                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
729         }
730 #endif
731         adapter->vfs_allocated_count = 0;
732         adapter->rss_queues = 1;
733         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
734         adapter->num_rx_queues = 1;
735         adapter->num_tx_queues = 1;
736         adapter->num_q_vectors = 1;
737         if (!pci_enable_msi(adapter->pdev))
738                 adapter->flags |= IGB_FLAG_HAS_MSI;
739 out:
740         /* Notify the stack of the (possibly) reduced Tx Queue count. */
741         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
742         return;
743 }
744
745 /**
746  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
747  * @adapter: board private structure to initialize
748  *
749  * We allocate one q_vector per queue interrupt.  If allocation fails we
750  * return -ENOMEM.
751  **/
752 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
753 {
754         struct igb_q_vector *q_vector;
755         struct e1000_hw *hw = &adapter->hw;
756         int v_idx;
757
758         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
759                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
760                 if (!q_vector)
761                         goto err_out;
762                 q_vector->adapter = adapter;
763                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
764                 q_vector->itr_val = IGB_START_ITR;
765                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
766                 adapter->q_vector[v_idx] = q_vector;
767         }
768         return 0;
769
770 err_out:
771         while (v_idx) {
772                 v_idx--;
773                 q_vector = adapter->q_vector[v_idx];
774                 netif_napi_del(&q_vector->napi);
775                 kfree(q_vector);
776                 adapter->q_vector[v_idx] = NULL;
777         }
778         return -ENOMEM;
779 }
780
781 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
782                                       int ring_idx, int v_idx)
783 {
784         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
785
786         q_vector->rx_ring = adapter->rx_ring[ring_idx];
787         q_vector->rx_ring->q_vector = q_vector;
788         q_vector->itr_val = adapter->rx_itr_setting;
789         if (q_vector->itr_val && q_vector->itr_val <= 3)
790                 q_vector->itr_val = IGB_START_ITR;
791 }
792
793 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
794                                       int ring_idx, int v_idx)
795 {
796         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
797
798         q_vector->tx_ring = adapter->tx_ring[ring_idx];
799         q_vector->tx_ring->q_vector = q_vector;
800         q_vector->itr_val = adapter->tx_itr_setting;
801         if (q_vector->itr_val && q_vector->itr_val <= 3)
802                 q_vector->itr_val = IGB_START_ITR;
803 }
804
805 /**
806  * igb_map_ring_to_vector - maps allocated queues to vectors
807  *
808  * This function maps the recently allocated queues to vectors.
809  **/
810 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
811 {
812         int i;
813         int v_idx = 0;
814
815         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
816             (adapter->num_q_vectors < adapter->num_tx_queues))
817                 return -ENOMEM;
818
819         if (adapter->num_q_vectors >=
820             (adapter->num_rx_queues + adapter->num_tx_queues)) {
821                 for (i = 0; i < adapter->num_rx_queues; i++)
822                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
823                 for (i = 0; i < adapter->num_tx_queues; i++)
824                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
825         } else {
826                 for (i = 0; i < adapter->num_rx_queues; i++) {
827                         if (i < adapter->num_tx_queues)
828                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
829                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
830                 }
831                 for (; i < adapter->num_tx_queues; i++)
832                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
833         }
834         return 0;
835 }
836
837 /**
838  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
839  *
840  * This function initializes the interrupts and allocates all of the queues.
841  **/
842 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
843 {
844         struct pci_dev *pdev = adapter->pdev;
845         int err;
846
847         igb_set_interrupt_capability(adapter);
848
849         err = igb_alloc_q_vectors(adapter);
850         if (err) {
851                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
852                 goto err_alloc_q_vectors;
853         }
854
855         err = igb_alloc_queues(adapter);
856         if (err) {
857                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
858                 goto err_alloc_queues;
859         }
860
861         err = igb_map_ring_to_vector(adapter);
862         if (err) {
863                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
864                 goto err_map_queues;
865         }
866
867
868         return 0;
869 err_map_queues:
870         igb_free_queues(adapter);
871 err_alloc_queues:
872         igb_free_q_vectors(adapter);
873 err_alloc_q_vectors:
874         igb_reset_interrupt_capability(adapter);
875         return err;
876 }
877
878 /**
879  * igb_request_irq - initialize interrupts
880  *
881  * Attempts to configure interrupts using the best available
882  * capabilities of the hardware and kernel.
883  **/
884 static int igb_request_irq(struct igb_adapter *adapter)
885 {
886         struct net_device *netdev = adapter->netdev;
887         struct pci_dev *pdev = adapter->pdev;
888         int err = 0;
889
890         if (adapter->msix_entries) {
891                 err = igb_request_msix(adapter);
892                 if (!err)
893                         goto request_done;
894                 /* fall back to MSI */
895                 igb_clear_interrupt_scheme(adapter);
896                 if (!pci_enable_msi(adapter->pdev))
897                         adapter->flags |= IGB_FLAG_HAS_MSI;
898                 igb_free_all_tx_resources(adapter);
899                 igb_free_all_rx_resources(adapter);
900                 adapter->num_tx_queues = 1;
901                 adapter->num_rx_queues = 1;
902                 adapter->num_q_vectors = 1;
903                 err = igb_alloc_q_vectors(adapter);
904                 if (err) {
905                         dev_err(&pdev->dev,
906                                 "Unable to allocate memory for vectors\n");
907                         goto request_done;
908                 }
909                 err = igb_alloc_queues(adapter);
910                 if (err) {
911                         dev_err(&pdev->dev,
912                                 "Unable to allocate memory for queues\n");
913                         igb_free_q_vectors(adapter);
914                         goto request_done;
915                 }
916                 igb_setup_all_tx_resources(adapter);
917                 igb_setup_all_rx_resources(adapter);
918         } else {
919                 igb_assign_vector(adapter->q_vector[0], 0);
920         }
921
922         if (adapter->flags & IGB_FLAG_HAS_MSI) {
923                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
924                                   netdev->name, adapter);
925                 if (!err)
926                         goto request_done;
927
928                 /* fall back to legacy interrupts */
929                 igb_reset_interrupt_capability(adapter);
930                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
931         }
932
933         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
934                           netdev->name, adapter);
935
936         if (err)
937                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
938                         err);
939
940 request_done:
941         return err;
942 }
943
944 static void igb_free_irq(struct igb_adapter *adapter)
945 {
946         if (adapter->msix_entries) {
947                 int vector = 0, i;
948
949                 free_irq(adapter->msix_entries[vector++].vector, adapter);
950
951                 for (i = 0; i < adapter->num_q_vectors; i++) {
952                         struct igb_q_vector *q_vector = adapter->q_vector[i];
953                         free_irq(adapter->msix_entries[vector++].vector,
954                                  q_vector);
955                 }
956         } else {
957                 free_irq(adapter->pdev->irq, adapter);
958         }
959 }
960
961 /**
962  * igb_irq_disable - Mask off interrupt generation on the NIC
963  * @adapter: board private structure
964  **/
965 static void igb_irq_disable(struct igb_adapter *adapter)
966 {
967         struct e1000_hw *hw = &adapter->hw;
968
969         /*
970          * we need to be careful when disabling interrupts.  The VFs are also
971          * mapped into these registers and so clearing the bits can cause
972          * issues on the VF drivers so we only need to clear what we set
973          */
974         if (adapter->msix_entries) {
975                 u32 regval = rd32(E1000_EIAM);
976                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
977                 wr32(E1000_EIMC, adapter->eims_enable_mask);
978                 regval = rd32(E1000_EIAC);
979                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
980         }
981
982         wr32(E1000_IAM, 0);
983         wr32(E1000_IMC, ~0);
984         wrfl();
985         synchronize_irq(adapter->pdev->irq);
986 }
987
988 /**
989  * igb_irq_enable - Enable default interrupt generation settings
990  * @adapter: board private structure
991  **/
992 static void igb_irq_enable(struct igb_adapter *adapter)
993 {
994         struct e1000_hw *hw = &adapter->hw;
995
996         if (adapter->msix_entries) {
997                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
998                 u32 regval = rd32(E1000_EIAC);
999                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1000                 regval = rd32(E1000_EIAM);
1001                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1002                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1003                 if (adapter->vfs_allocated_count) {
1004                         wr32(E1000_MBVFIMR, 0xFF);
1005                         ims |= E1000_IMS_VMMB;
1006                 }
1007                 if (adapter->hw.mac.type == e1000_82580)
1008                         ims |= E1000_IMS_DRSTA;
1009
1010                 wr32(E1000_IMS, ims);
1011         } else {
1012                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1013                                 E1000_IMS_DRSTA);
1014                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1015                                 E1000_IMS_DRSTA);
1016         }
1017 }
1018
1019 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1020 {
1021         struct e1000_hw *hw = &adapter->hw;
1022         u16 vid = adapter->hw.mng_cookie.vlan_id;
1023         u16 old_vid = adapter->mng_vlan_id;
1024
1025         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1026                 /* add VID to filter table */
1027                 igb_vfta_set(hw, vid, true);
1028                 adapter->mng_vlan_id = vid;
1029         } else {
1030                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1031         }
1032
1033         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1034             (vid != old_vid) &&
1035             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1036                 /* remove VID from filter table */
1037                 igb_vfta_set(hw, old_vid, false);
1038         }
1039 }
1040
1041 /**
1042  * igb_release_hw_control - release control of the h/w to f/w
1043  * @adapter: address of board private structure
1044  *
1045  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1046  * For ASF and Pass Through versions of f/w this means that the
1047  * driver is no longer loaded.
1048  *
1049  **/
1050 static void igb_release_hw_control(struct igb_adapter *adapter)
1051 {
1052         struct e1000_hw *hw = &adapter->hw;
1053         u32 ctrl_ext;
1054
1055         /* Let firmware take over control of h/w */
1056         ctrl_ext = rd32(E1000_CTRL_EXT);
1057         wr32(E1000_CTRL_EXT,
1058                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1059 }
1060
1061 /**
1062  * igb_get_hw_control - get control of the h/w from f/w
1063  * @adapter: address of board private structure
1064  *
1065  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1066  * For ASF and Pass Through versions of f/w this means that
1067  * the driver is loaded.
1068  *
1069  **/
1070 static void igb_get_hw_control(struct igb_adapter *adapter)
1071 {
1072         struct e1000_hw *hw = &adapter->hw;
1073         u32 ctrl_ext;
1074
1075         /* Let firmware know the driver has taken over */
1076         ctrl_ext = rd32(E1000_CTRL_EXT);
1077         wr32(E1000_CTRL_EXT,
1078                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1079 }
1080
1081 /**
1082  * igb_configure - configure the hardware for RX and TX
1083  * @adapter: private board structure
1084  **/
1085 static void igb_configure(struct igb_adapter *adapter)
1086 {
1087         struct net_device *netdev = adapter->netdev;
1088         int i;
1089
1090         igb_get_hw_control(adapter);
1091         igb_set_rx_mode(netdev);
1092
1093         igb_restore_vlan(adapter);
1094
1095         igb_setup_tctl(adapter);
1096         igb_setup_mrqc(adapter);
1097         igb_setup_rctl(adapter);
1098
1099         igb_configure_tx(adapter);
1100         igb_configure_rx(adapter);
1101
1102         igb_rx_fifo_flush_82575(&adapter->hw);
1103
1104         /* call igb_desc_unused which always leaves
1105          * at least 1 descriptor unused to make sure
1106          * next_to_use != next_to_clean */
1107         for (i = 0; i < adapter->num_rx_queues; i++) {
1108                 struct igb_ring *ring = adapter->rx_ring[i];
1109                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1110         }
1111
1112
1113         adapter->tx_queue_len = netdev->tx_queue_len;
1114 }
1115
1116 /**
1117  * igb_power_up_link - Power up the phy/serdes link
1118  * @adapter: address of board private structure
1119  **/
1120 void igb_power_up_link(struct igb_adapter *adapter)
1121 {
1122         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1123                 igb_power_up_phy_copper(&adapter->hw);
1124         else
1125                 igb_power_up_serdes_link_82575(&adapter->hw);
1126 }
1127
1128 /**
1129  * igb_power_down_link - Power down the phy/serdes link
1130  * @adapter: address of board private structure
1131  */
1132 static void igb_power_down_link(struct igb_adapter *adapter)
1133 {
1134         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1135                 igb_power_down_phy_copper_82575(&adapter->hw);
1136         else
1137                 igb_shutdown_serdes_link_82575(&adapter->hw);
1138 }
1139
1140 /**
1141  * igb_up - Open the interface and prepare it to handle traffic
1142  * @adapter: board private structure
1143  **/
1144 int igb_up(struct igb_adapter *adapter)
1145 {
1146         struct e1000_hw *hw = &adapter->hw;
1147         int i;
1148
1149         /* hardware has been reset, we need to reload some things */
1150         igb_configure(adapter);
1151
1152         clear_bit(__IGB_DOWN, &adapter->state);
1153
1154         for (i = 0; i < adapter->num_q_vectors; i++) {
1155                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1156                 napi_enable(&q_vector->napi);
1157         }
1158         if (adapter->msix_entries)
1159                 igb_configure_msix(adapter);
1160         else
1161                 igb_assign_vector(adapter->q_vector[0], 0);
1162
1163         /* Clear any pending interrupts. */
1164         rd32(E1000_ICR);
1165         igb_irq_enable(adapter);
1166
1167         /* notify VFs that reset has been completed */
1168         if (adapter->vfs_allocated_count) {
1169                 u32 reg_data = rd32(E1000_CTRL_EXT);
1170                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1171                 wr32(E1000_CTRL_EXT, reg_data);
1172         }
1173
1174         netif_tx_start_all_queues(adapter->netdev);
1175
1176         /* start the watchdog. */
1177         hw->mac.get_link_status = 1;
1178         schedule_work(&adapter->watchdog_task);
1179
1180         return 0;
1181 }
1182
1183 void igb_down(struct igb_adapter *adapter)
1184 {
1185         struct net_device *netdev = adapter->netdev;
1186         struct e1000_hw *hw = &adapter->hw;
1187         u32 tctl, rctl;
1188         int i;
1189
1190         /* signal that we're down so the interrupt handler does not
1191          * reschedule our watchdog timer */
1192         set_bit(__IGB_DOWN, &adapter->state);
1193
1194         /* disable receives in the hardware */
1195         rctl = rd32(E1000_RCTL);
1196         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1197         /* flush and sleep below */
1198
1199         netif_tx_stop_all_queues(netdev);
1200
1201         /* disable transmits in the hardware */
1202         tctl = rd32(E1000_TCTL);
1203         tctl &= ~E1000_TCTL_EN;
1204         wr32(E1000_TCTL, tctl);
1205         /* flush both disables and wait for them to finish */
1206         wrfl();
1207         msleep(10);
1208
1209         for (i = 0; i < adapter->num_q_vectors; i++) {
1210                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1211                 napi_disable(&q_vector->napi);
1212         }
1213
1214         igb_irq_disable(adapter);
1215
1216         del_timer_sync(&adapter->watchdog_timer);
1217         del_timer_sync(&adapter->phy_info_timer);
1218
1219         netdev->tx_queue_len = adapter->tx_queue_len;
1220         netif_carrier_off(netdev);
1221
1222         /* record the stats before reset*/
1223         igb_update_stats(adapter);
1224
1225         adapter->link_speed = 0;
1226         adapter->link_duplex = 0;
1227
1228         if (!pci_channel_offline(adapter->pdev))
1229                 igb_reset(adapter);
1230         igb_clean_all_tx_rings(adapter);
1231         igb_clean_all_rx_rings(adapter);
1232 #ifdef CONFIG_IGB_DCA
1233
1234         /* since we reset the hardware DCA settings were cleared */
1235         igb_setup_dca(adapter);
1236 #endif
1237 }
1238
1239 void igb_reinit_locked(struct igb_adapter *adapter)
1240 {
1241         WARN_ON(in_interrupt());
1242         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1243                 msleep(1);
1244         igb_down(adapter);
1245         igb_up(adapter);
1246         clear_bit(__IGB_RESETTING, &adapter->state);
1247 }
1248
1249 void igb_reset(struct igb_adapter *adapter)
1250 {
1251         struct pci_dev *pdev = adapter->pdev;
1252         struct e1000_hw *hw = &adapter->hw;
1253         struct e1000_mac_info *mac = &hw->mac;
1254         struct e1000_fc_info *fc = &hw->fc;
1255         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1256         u16 hwm;
1257
1258         /* Repartition Pba for greater than 9k mtu
1259          * To take effect CTRL.RST is required.
1260          */
1261         switch (mac->type) {
1262         case e1000_82580:
1263                 pba = rd32(E1000_RXPBS);
1264                 pba = igb_rxpbs_adjust_82580(pba);
1265                 break;
1266         case e1000_82576:
1267                 pba = rd32(E1000_RXPBS);
1268                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1269                 break;
1270         case e1000_82575:
1271         default:
1272                 pba = E1000_PBA_34K;
1273                 break;
1274         }
1275
1276         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1277             (mac->type < e1000_82576)) {
1278                 /* adjust PBA for jumbo frames */
1279                 wr32(E1000_PBA, pba);
1280
1281                 /* To maintain wire speed transmits, the Tx FIFO should be
1282                  * large enough to accommodate two full transmit packets,
1283                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1284                  * the Rx FIFO should be large enough to accommodate at least
1285                  * one full receive packet and is similarly rounded up and
1286                  * expressed in KB. */
1287                 pba = rd32(E1000_PBA);
1288                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1289                 tx_space = pba >> 16;
1290                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1291                 pba &= 0xffff;
1292                 /* the tx fifo also stores 16 bytes of information about the tx
1293                  * but don't include ethernet FCS because hardware appends it */
1294                 min_tx_space = (adapter->max_frame_size +
1295                                 sizeof(union e1000_adv_tx_desc) -
1296                                 ETH_FCS_LEN) * 2;
1297                 min_tx_space = ALIGN(min_tx_space, 1024);
1298                 min_tx_space >>= 10;
1299                 /* software strips receive CRC, so leave room for it */
1300                 min_rx_space = adapter->max_frame_size;
1301                 min_rx_space = ALIGN(min_rx_space, 1024);
1302                 min_rx_space >>= 10;
1303
1304                 /* If current Tx allocation is less than the min Tx FIFO size,
1305                  * and the min Tx FIFO size is less than the current Rx FIFO
1306                  * allocation, take space away from current Rx allocation */
1307                 if (tx_space < min_tx_space &&
1308                     ((min_tx_space - tx_space) < pba)) {
1309                         pba = pba - (min_tx_space - tx_space);
1310
1311                         /* if short on rx space, rx wins and must trump tx
1312                          * adjustment */
1313                         if (pba < min_rx_space)
1314                                 pba = min_rx_space;
1315                 }
1316                 wr32(E1000_PBA, pba);
1317         }
1318
1319         /* flow control settings */
1320         /* The high water mark must be low enough to fit one full frame
1321          * (or the size used for early receive) above it in the Rx FIFO.
1322          * Set it to the lower of:
1323          * - 90% of the Rx FIFO size, or
1324          * - the full Rx FIFO size minus one full frame */
1325         hwm = min(((pba << 10) * 9 / 10),
1326                         ((pba << 10) - 2 * adapter->max_frame_size));
1327
1328         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1329         fc->low_water = fc->high_water - 16;
1330         fc->pause_time = 0xFFFF;
1331         fc->send_xon = 1;
1332         fc->current_mode = fc->requested_mode;
1333
1334         /* disable receive for all VFs and wait one second */
1335         if (adapter->vfs_allocated_count) {
1336                 int i;
1337                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1338                         adapter->vf_data[i].flags = 0;
1339
1340                 /* ping all the active vfs to let them know we are going down */
1341                 igb_ping_all_vfs(adapter);
1342
1343                 /* disable transmits and receives */
1344                 wr32(E1000_VFRE, 0);
1345                 wr32(E1000_VFTE, 0);
1346         }
1347
1348         /* Allow time for pending master requests to run */
1349         hw->mac.ops.reset_hw(hw);
1350         wr32(E1000_WUC, 0);
1351
1352         if (hw->mac.ops.init_hw(hw))
1353                 dev_err(&pdev->dev, "Hardware Error\n");
1354
1355         if (hw->mac.type == e1000_82580) {
1356                 u32 reg = rd32(E1000_PCIEMISC);
1357                 wr32(E1000_PCIEMISC,
1358                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1359         }
1360         if (!netif_running(adapter->netdev))
1361                 igb_power_down_link(adapter);
1362
1363         igb_update_mng_vlan(adapter);
1364
1365         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1366         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1367
1368         igb_get_phy_info(hw);
1369 }
1370
1371 static const struct net_device_ops igb_netdev_ops = {
1372         .ndo_open               = igb_open,
1373         .ndo_stop               = igb_close,
1374         .ndo_start_xmit         = igb_xmit_frame_adv,
1375         .ndo_get_stats          = igb_get_stats,
1376         .ndo_set_rx_mode        = igb_set_rx_mode,
1377         .ndo_set_multicast_list = igb_set_rx_mode,
1378         .ndo_set_mac_address    = igb_set_mac,
1379         .ndo_change_mtu         = igb_change_mtu,
1380         .ndo_do_ioctl           = igb_ioctl,
1381         .ndo_tx_timeout         = igb_tx_timeout,
1382         .ndo_validate_addr      = eth_validate_addr,
1383         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1384         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1385         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1386         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1387         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1388         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1389         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1390 #ifdef CONFIG_NET_POLL_CONTROLLER
1391         .ndo_poll_controller    = igb_netpoll,
1392 #endif
1393 };
1394
1395 /**
1396  * igb_probe - Device Initialization Routine
1397  * @pdev: PCI device information struct
1398  * @ent: entry in igb_pci_tbl
1399  *
1400  * Returns 0 on success, negative on failure
1401  *
1402  * igb_probe initializes an adapter identified by a pci_dev structure.
1403  * The OS initialization, configuring of the adapter private structure,
1404  * and a hardware reset occur.
1405  **/
1406 static int __devinit igb_probe(struct pci_dev *pdev,
1407                                const struct pci_device_id *ent)
1408 {
1409         struct net_device *netdev;
1410         struct igb_adapter *adapter;
1411         struct e1000_hw *hw;
1412         u16 eeprom_data = 0;
1413         static int global_quad_port_a; /* global quad port a indication */
1414         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1415         unsigned long mmio_start, mmio_len;
1416         int err, pci_using_dac;
1417         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1418         u32 part_num;
1419
1420         err = pci_enable_device_mem(pdev);
1421         if (err)
1422                 return err;
1423
1424         pci_using_dac = 0;
1425         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1426         if (!err) {
1427                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1428                 if (!err)
1429                         pci_using_dac = 1;
1430         } else {
1431                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1432                 if (err) {
1433                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1434                         if (err) {
1435                                 dev_err(&pdev->dev, "No usable DMA "
1436                                         "configuration, aborting\n");
1437                                 goto err_dma;
1438                         }
1439                 }
1440         }
1441
1442         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1443                                            IORESOURCE_MEM),
1444                                            igb_driver_name);
1445         if (err)
1446                 goto err_pci_reg;
1447
1448         pci_enable_pcie_error_reporting(pdev);
1449
1450         pci_set_master(pdev);
1451         pci_save_state(pdev);
1452
1453         err = -ENOMEM;
1454         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1455                                    IGB_ABS_MAX_TX_QUEUES);
1456         if (!netdev)
1457                 goto err_alloc_etherdev;
1458
1459         SET_NETDEV_DEV(netdev, &pdev->dev);
1460
1461         pci_set_drvdata(pdev, netdev);
1462         adapter = netdev_priv(netdev);
1463         adapter->netdev = netdev;
1464         adapter->pdev = pdev;
1465         hw = &adapter->hw;
1466         hw->back = adapter;
1467         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1468
1469         mmio_start = pci_resource_start(pdev, 0);
1470         mmio_len = pci_resource_len(pdev, 0);
1471
1472         err = -EIO;
1473         hw->hw_addr = ioremap(mmio_start, mmio_len);
1474         if (!hw->hw_addr)
1475                 goto err_ioremap;
1476
1477         netdev->netdev_ops = &igb_netdev_ops;
1478         igb_set_ethtool_ops(netdev);
1479         netdev->watchdog_timeo = 5 * HZ;
1480
1481         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1482
1483         netdev->mem_start = mmio_start;
1484         netdev->mem_end = mmio_start + mmio_len;
1485
1486         /* PCI config space info */
1487         hw->vendor_id = pdev->vendor;
1488         hw->device_id = pdev->device;
1489         hw->revision_id = pdev->revision;
1490         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1491         hw->subsystem_device_id = pdev->subsystem_device;
1492
1493         /* Copy the default MAC, PHY and NVM function pointers */
1494         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1495         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1496         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1497         /* Initialize skew-specific constants */
1498         err = ei->get_invariants(hw);
1499         if (err)
1500                 goto err_sw_init;
1501
1502         /* setup the private structure */
1503         err = igb_sw_init(adapter);
1504         if (err)
1505                 goto err_sw_init;
1506
1507         igb_get_bus_info_pcie(hw);
1508
1509         hw->phy.autoneg_wait_to_complete = false;
1510
1511         /* Copper options */
1512         if (hw->phy.media_type == e1000_media_type_copper) {
1513                 hw->phy.mdix = AUTO_ALL_MODES;
1514                 hw->phy.disable_polarity_correction = false;
1515                 hw->phy.ms_type = e1000_ms_hw_default;
1516         }
1517
1518         if (igb_check_reset_block(hw))
1519                 dev_info(&pdev->dev,
1520                         "PHY reset is blocked due to SOL/IDER session.\n");
1521
1522         netdev->features = NETIF_F_SG |
1523                            NETIF_F_IP_CSUM |
1524                            NETIF_F_HW_VLAN_TX |
1525                            NETIF_F_HW_VLAN_RX |
1526                            NETIF_F_HW_VLAN_FILTER;
1527
1528         netdev->features |= NETIF_F_IPV6_CSUM;
1529         netdev->features |= NETIF_F_TSO;
1530         netdev->features |= NETIF_F_TSO6;
1531         netdev->features |= NETIF_F_GRO;
1532
1533         netdev->vlan_features |= NETIF_F_TSO;
1534         netdev->vlan_features |= NETIF_F_TSO6;
1535         netdev->vlan_features |= NETIF_F_IP_CSUM;
1536         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1537         netdev->vlan_features |= NETIF_F_SG;
1538
1539         if (pci_using_dac)
1540                 netdev->features |= NETIF_F_HIGHDMA;
1541
1542         if (hw->mac.type >= e1000_82576)
1543                 netdev->features |= NETIF_F_SCTP_CSUM;
1544
1545         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1546
1547         /* before reading the NVM, reset the controller to put the device in a
1548          * known good starting state */
1549         hw->mac.ops.reset_hw(hw);
1550
1551         /* make sure the NVM is good */
1552         if (igb_validate_nvm_checksum(hw) < 0) {
1553                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1554                 err = -EIO;
1555                 goto err_eeprom;
1556         }
1557
1558         /* copy the MAC address out of the NVM */
1559         if (hw->mac.ops.read_mac_addr(hw))
1560                 dev_err(&pdev->dev, "NVM Read Error\n");
1561
1562         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1563         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1564
1565         if (!is_valid_ether_addr(netdev->perm_addr)) {
1566                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1567                 err = -EIO;
1568                 goto err_eeprom;
1569         }
1570
1571         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1572                     (unsigned long) adapter);
1573         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1574                     (unsigned long) adapter);
1575
1576         INIT_WORK(&adapter->reset_task, igb_reset_task);
1577         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1578
1579         /* Initialize link properties that are user-changeable */
1580         adapter->fc_autoneg = true;
1581         hw->mac.autoneg = true;
1582         hw->phy.autoneg_advertised = 0x2f;
1583
1584         hw->fc.requested_mode = e1000_fc_default;
1585         hw->fc.current_mode = e1000_fc_default;
1586
1587         igb_validate_mdi_setting(hw);
1588
1589         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1590          * enable the ACPI Magic Packet filter
1591          */
1592
1593         if (hw->bus.func == 0)
1594                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1595         else if (hw->mac.type == e1000_82580)
1596                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1597                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1598                                  &eeprom_data);
1599         else if (hw->bus.func == 1)
1600                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1601
1602         if (eeprom_data & eeprom_apme_mask)
1603                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1604
1605         /* now that we have the eeprom settings, apply the special cases where
1606          * the eeprom may be wrong or the board simply won't support wake on
1607          * lan on a particular port */
1608         switch (pdev->device) {
1609         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1610                 adapter->eeprom_wol = 0;
1611                 break;
1612         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1613         case E1000_DEV_ID_82576_FIBER:
1614         case E1000_DEV_ID_82576_SERDES:
1615                 /* Wake events only supported on port A for dual fiber
1616                  * regardless of eeprom setting */
1617                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1618                         adapter->eeprom_wol = 0;
1619                 break;
1620         case E1000_DEV_ID_82576_QUAD_COPPER:
1621                 /* if quad port adapter, disable WoL on all but port A */
1622                 if (global_quad_port_a != 0)
1623                         adapter->eeprom_wol = 0;
1624                 else
1625                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1626                 /* Reset for multiple quad port adapters */
1627                 if (++global_quad_port_a == 4)
1628                         global_quad_port_a = 0;
1629                 break;
1630         }
1631
1632         /* initialize the wol settings based on the eeprom settings */
1633         adapter->wol = adapter->eeprom_wol;
1634         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1635
1636         /* reset the hardware with the new settings */
1637         igb_reset(adapter);
1638
1639         /* let the f/w know that the h/w is now under the control of the
1640          * driver. */
1641         igb_get_hw_control(adapter);
1642
1643         strcpy(netdev->name, "eth%d");
1644         err = register_netdev(netdev);
1645         if (err)
1646                 goto err_register;
1647
1648         /* carrier off reporting is important to ethtool even BEFORE open */
1649         netif_carrier_off(netdev);
1650
1651 #ifdef CONFIG_IGB_DCA
1652         if (dca_add_requester(&pdev->dev) == 0) {
1653                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1654                 dev_info(&pdev->dev, "DCA enabled\n");
1655                 igb_setup_dca(adapter);
1656         }
1657
1658 #endif
1659         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1660         /* print bus type/speed/width info */
1661         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1662                  netdev->name,
1663                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1664                                                             "unknown"),
1665                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1666                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1667                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1668                    "unknown"),
1669                  netdev->dev_addr);
1670
1671         igb_read_part_num(hw, &part_num);
1672         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1673                 (part_num >> 8), (part_num & 0xff));
1674
1675         dev_info(&pdev->dev,
1676                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1677                 adapter->msix_entries ? "MSI-X" :
1678                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1679                 adapter->num_rx_queues, adapter->num_tx_queues);
1680
1681         return 0;
1682
1683 err_register:
1684         igb_release_hw_control(adapter);
1685 err_eeprom:
1686         if (!igb_check_reset_block(hw))
1687                 igb_reset_phy(hw);
1688
1689         if (hw->flash_address)
1690                 iounmap(hw->flash_address);
1691 err_sw_init:
1692         igb_clear_interrupt_scheme(adapter);
1693         iounmap(hw->hw_addr);
1694 err_ioremap:
1695         free_netdev(netdev);
1696 err_alloc_etherdev:
1697         pci_release_selected_regions(pdev,
1698                                      pci_select_bars(pdev, IORESOURCE_MEM));
1699 err_pci_reg:
1700 err_dma:
1701         pci_disable_device(pdev);
1702         return err;
1703 }
1704
1705 /**
1706  * igb_remove - Device Removal Routine
1707  * @pdev: PCI device information struct
1708  *
1709  * igb_remove is called by the PCI subsystem to alert the driver
1710  * that it should release a PCI device.  The could be caused by a
1711  * Hot-Plug event, or because the driver is going to be removed from
1712  * memory.
1713  **/
1714 static void __devexit igb_remove(struct pci_dev *pdev)
1715 {
1716         struct net_device *netdev = pci_get_drvdata(pdev);
1717         struct igb_adapter *adapter = netdev_priv(netdev);
1718         struct e1000_hw *hw = &adapter->hw;
1719
1720         /* flush_scheduled work may reschedule our watchdog task, so
1721          * explicitly disable watchdog tasks from being rescheduled  */
1722         set_bit(__IGB_DOWN, &adapter->state);
1723         del_timer_sync(&adapter->watchdog_timer);
1724         del_timer_sync(&adapter->phy_info_timer);
1725
1726         flush_scheduled_work();
1727
1728 #ifdef CONFIG_IGB_DCA
1729         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1730                 dev_info(&pdev->dev, "DCA disabled\n");
1731                 dca_remove_requester(&pdev->dev);
1732                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1733                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1734         }
1735 #endif
1736
1737         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1738          * would have already happened in close and is redundant. */
1739         igb_release_hw_control(adapter);
1740
1741         unregister_netdev(netdev);
1742
1743         igb_clear_interrupt_scheme(adapter);
1744
1745 #ifdef CONFIG_PCI_IOV
1746         /* reclaim resources allocated to VFs */
1747         if (adapter->vf_data) {
1748                 /* disable iov and allow time for transactions to clear */
1749                 pci_disable_sriov(pdev);
1750                 msleep(500);
1751
1752                 kfree(adapter->vf_data);
1753                 adapter->vf_data = NULL;
1754                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1755                 msleep(100);
1756                 dev_info(&pdev->dev, "IOV Disabled\n");
1757         }
1758 #endif
1759
1760         iounmap(hw->hw_addr);
1761         if (hw->flash_address)
1762                 iounmap(hw->flash_address);
1763         pci_release_selected_regions(pdev,
1764                                      pci_select_bars(pdev, IORESOURCE_MEM));
1765
1766         free_netdev(netdev);
1767
1768         pci_disable_pcie_error_reporting(pdev);
1769
1770         pci_disable_device(pdev);
1771 }
1772
1773 /**
1774  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1775  * @adapter: board private structure to initialize
1776  *
1777  * This function initializes the vf specific data storage and then attempts to
1778  * allocate the VFs.  The reason for ordering it this way is because it is much
1779  * mor expensive time wise to disable SR-IOV than it is to allocate and free
1780  * the memory for the VFs.
1781  **/
1782 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1783 {
1784 #ifdef CONFIG_PCI_IOV
1785         struct pci_dev *pdev = adapter->pdev;
1786
1787         if (adapter->vfs_allocated_count > 7)
1788                 adapter->vfs_allocated_count = 7;
1789
1790         if (adapter->vfs_allocated_count) {
1791                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1792                                            sizeof(struct vf_data_storage),
1793                                            GFP_KERNEL);
1794                 /* if allocation failed then we do not support SR-IOV */
1795                 if (!adapter->vf_data) {
1796                         adapter->vfs_allocated_count = 0;
1797                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
1798                                 "Data Storage\n");
1799                 }
1800         }
1801
1802         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1803                 kfree(adapter->vf_data);
1804                 adapter->vf_data = NULL;
1805 #endif /* CONFIG_PCI_IOV */
1806                 adapter->vfs_allocated_count = 0;
1807 #ifdef CONFIG_PCI_IOV
1808         } else {
1809                 unsigned char mac_addr[ETH_ALEN];
1810                 int i;
1811                 dev_info(&pdev->dev, "%d vfs allocated\n",
1812                          adapter->vfs_allocated_count);
1813                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1814                         random_ether_addr(mac_addr);
1815                         igb_set_vf_mac(adapter, i, mac_addr);
1816                 }
1817         }
1818 #endif /* CONFIG_PCI_IOV */
1819 }
1820
1821
1822 /**
1823  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1824  * @adapter: board private structure to initialize
1825  *
1826  * igb_init_hw_timer initializes the function pointer and values for the hw
1827  * timer found in hardware.
1828  **/
1829 static void igb_init_hw_timer(struct igb_adapter *adapter)
1830 {
1831         struct e1000_hw *hw = &adapter->hw;
1832
1833         switch (hw->mac.type) {
1834         case e1000_82580:
1835                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1836                 adapter->cycles.read = igb_read_clock;
1837                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1838                 adapter->cycles.mult = 1;
1839                 /*
1840                  * The 82580 timesync updates the system timer every 8ns by 8ns
1841                  * and the value cannot be shifted.  Instead we need to shift
1842                  * the registers to generate a 64bit timer value.  As a result
1843                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
1844                  * 24 in order to generate a larger value for synchronization.
1845                  */
1846                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
1847                 /* disable system timer temporarily by setting bit 31 */
1848                 wr32(E1000_TSAUXC, 0x80000000);
1849                 wrfl();
1850
1851                 /* Set registers so that rollover occurs soon to test this. */
1852                 wr32(E1000_SYSTIMR, 0x00000000);
1853                 wr32(E1000_SYSTIML, 0x80000000);
1854                 wr32(E1000_SYSTIMH, 0x000000FF);
1855                 wrfl();
1856
1857                 /* enable system timer by clearing bit 31 */
1858                 wr32(E1000_TSAUXC, 0x0);
1859                 wrfl();
1860
1861                 timecounter_init(&adapter->clock,
1862                                  &adapter->cycles,
1863                                  ktime_to_ns(ktime_get_real()));
1864                 /*
1865                  * Synchronize our NIC clock against system wall clock. NIC
1866                  * time stamp reading requires ~3us per sample, each sample
1867                  * was pretty stable even under load => only require 10
1868                  * samples for each offset comparison.
1869                  */
1870                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1871                 adapter->compare.source = &adapter->clock;
1872                 adapter->compare.target = ktime_get_real;
1873                 adapter->compare.num_samples = 10;
1874                 timecompare_update(&adapter->compare, 0);
1875                 break;
1876         case e1000_82576:
1877                 /*
1878                  * Initialize hardware timer: we keep it running just in case
1879                  * that some program needs it later on.
1880                  */
1881                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1882                 adapter->cycles.read = igb_read_clock;
1883                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1884                 adapter->cycles.mult = 1;
1885                 /**
1886                  * Scale the NIC clock cycle by a large factor so that
1887                  * relatively small clock corrections can be added or
1888                  * substracted at each clock tick. The drawbacks of a large
1889                  * factor are a) that the clock register overflows more quickly
1890                  * (not such a big deal) and b) that the increment per tick has
1891                  * to fit into 24 bits.  As a result we need to use a shift of
1892                  * 19 so we can fit a value of 16 into the TIMINCA register.
1893                  */
1894                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1895                 wr32(E1000_TIMINCA,
1896                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1897                                 (16 << IGB_82576_TSYNC_SHIFT));
1898
1899                 /* Set registers so that rollover occurs soon to test this. */
1900                 wr32(E1000_SYSTIML, 0x00000000);
1901                 wr32(E1000_SYSTIMH, 0xFF800000);
1902                 wrfl();
1903
1904                 timecounter_init(&adapter->clock,
1905                                  &adapter->cycles,
1906                                  ktime_to_ns(ktime_get_real()));
1907                 /*
1908                  * Synchronize our NIC clock against system wall clock. NIC
1909                  * time stamp reading requires ~3us per sample, each sample
1910                  * was pretty stable even under load => only require 10
1911                  * samples for each offset comparison.
1912                  */
1913                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1914                 adapter->compare.source = &adapter->clock;
1915                 adapter->compare.target = ktime_get_real;
1916                 adapter->compare.num_samples = 10;
1917                 timecompare_update(&adapter->compare, 0);
1918                 break;
1919         case e1000_82575:
1920                 /* 82575 does not support timesync */
1921         default:
1922                 break;
1923         }
1924
1925 }
1926
1927 /**
1928  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1929  * @adapter: board private structure to initialize
1930  *
1931  * igb_sw_init initializes the Adapter private data structure.
1932  * Fields are initialized based on PCI device information and
1933  * OS network device settings (MTU size).
1934  **/
1935 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1936 {
1937         struct e1000_hw *hw = &adapter->hw;
1938         struct net_device *netdev = adapter->netdev;
1939         struct pci_dev *pdev = adapter->pdev;
1940
1941         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1942
1943         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1944         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1945         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1946         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1947
1948         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1949         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1950
1951 #ifdef CONFIG_PCI_IOV
1952         if (hw->mac.type == e1000_82576)
1953                 adapter->vfs_allocated_count = max_vfs;
1954
1955 #endif /* CONFIG_PCI_IOV */
1956         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1957
1958         /*
1959          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1960          * then we should combine the queues into a queue pair in order to
1961          * conserve interrupts due to limited supply
1962          */
1963         if ((adapter->rss_queues > 4) ||
1964             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1965                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1966
1967         /* This call may decrease the number of queues */
1968         if (igb_init_interrupt_scheme(adapter)) {
1969                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1970                 return -ENOMEM;
1971         }
1972
1973         igb_init_hw_timer(adapter);
1974         igb_probe_vfs(adapter);
1975
1976         /* Explicitly disable IRQ since the NIC can be in any state. */
1977         igb_irq_disable(adapter);
1978
1979         set_bit(__IGB_DOWN, &adapter->state);
1980         return 0;
1981 }
1982
1983 /**
1984  * igb_open - Called when a network interface is made active
1985  * @netdev: network interface device structure
1986  *
1987  * Returns 0 on success, negative value on failure
1988  *
1989  * The open entry point is called when a network interface is made
1990  * active by the system (IFF_UP).  At this point all resources needed
1991  * for transmit and receive operations are allocated, the interrupt
1992  * handler is registered with the OS, the watchdog timer is started,
1993  * and the stack is notified that the interface is ready.
1994  **/
1995 static int igb_open(struct net_device *netdev)
1996 {
1997         struct igb_adapter *adapter = netdev_priv(netdev);
1998         struct e1000_hw *hw = &adapter->hw;
1999         int err;
2000         int i;
2001
2002         /* disallow open during test */
2003         if (test_bit(__IGB_TESTING, &adapter->state))
2004                 return -EBUSY;
2005
2006         netif_carrier_off(netdev);
2007
2008         /* allocate transmit descriptors */
2009         err = igb_setup_all_tx_resources(adapter);
2010         if (err)
2011                 goto err_setup_tx;
2012
2013         /* allocate receive descriptors */
2014         err = igb_setup_all_rx_resources(adapter);
2015         if (err)
2016                 goto err_setup_rx;
2017
2018         igb_power_up_link(adapter);
2019
2020         /* before we allocate an interrupt, we must be ready to handle it.
2021          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2022          * as soon as we call pci_request_irq, so we have to setup our
2023          * clean_rx handler before we do so.  */
2024         igb_configure(adapter);
2025
2026         err = igb_request_irq(adapter);
2027         if (err)
2028                 goto err_req_irq;
2029
2030         /* From here on the code is the same as igb_up() */
2031         clear_bit(__IGB_DOWN, &adapter->state);
2032
2033         for (i = 0; i < adapter->num_q_vectors; i++) {
2034                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2035                 napi_enable(&q_vector->napi);
2036         }
2037
2038         /* Clear any pending interrupts. */
2039         rd32(E1000_ICR);
2040
2041         igb_irq_enable(adapter);
2042
2043         /* notify VFs that reset has been completed */
2044         if (adapter->vfs_allocated_count) {
2045                 u32 reg_data = rd32(E1000_CTRL_EXT);
2046                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2047                 wr32(E1000_CTRL_EXT, reg_data);
2048         }
2049
2050         netif_tx_start_all_queues(netdev);
2051
2052         /* start the watchdog. */
2053         hw->mac.get_link_status = 1;
2054         schedule_work(&adapter->watchdog_task);
2055
2056         return 0;
2057
2058 err_req_irq:
2059         igb_release_hw_control(adapter);
2060         igb_power_down_link(adapter);
2061         igb_free_all_rx_resources(adapter);
2062 err_setup_rx:
2063         igb_free_all_tx_resources(adapter);
2064 err_setup_tx:
2065         igb_reset(adapter);
2066
2067         return err;
2068 }
2069
2070 /**
2071  * igb_close - Disables a network interface
2072  * @netdev: network interface device structure
2073  *
2074  * Returns 0, this is not allowed to fail
2075  *
2076  * The close entry point is called when an interface is de-activated
2077  * by the OS.  The hardware is still under the driver's control, but
2078  * needs to be disabled.  A global MAC reset is issued to stop the
2079  * hardware, and all transmit and receive resources are freed.
2080  **/
2081 static int igb_close(struct net_device *netdev)
2082 {
2083         struct igb_adapter *adapter = netdev_priv(netdev);
2084
2085         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2086         igb_down(adapter);
2087
2088         igb_free_irq(adapter);
2089
2090         igb_free_all_tx_resources(adapter);
2091         igb_free_all_rx_resources(adapter);
2092
2093         return 0;
2094 }
2095
2096 /**
2097  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2098  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2099  *
2100  * Return 0 on success, negative on failure
2101  **/
2102 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2103 {
2104         struct pci_dev *pdev = tx_ring->pdev;
2105         int size;
2106
2107         size = sizeof(struct igb_buffer) * tx_ring->count;
2108         tx_ring->buffer_info = vmalloc(size);
2109         if (!tx_ring->buffer_info)
2110                 goto err;
2111         memset(tx_ring->buffer_info, 0, size);
2112
2113         /* round up to nearest 4K */
2114         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2115         tx_ring->size = ALIGN(tx_ring->size, 4096);
2116
2117         tx_ring->desc = pci_alloc_consistent(pdev,
2118                                              tx_ring->size,
2119                                              &tx_ring->dma);
2120
2121         if (!tx_ring->desc)
2122                 goto err;
2123
2124         tx_ring->next_to_use = 0;
2125         tx_ring->next_to_clean = 0;
2126         return 0;
2127
2128 err:
2129         vfree(tx_ring->buffer_info);
2130         dev_err(&pdev->dev,
2131                 "Unable to allocate memory for the transmit descriptor ring\n");
2132         return -ENOMEM;
2133 }
2134
2135 /**
2136  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2137  *                                (Descriptors) for all queues
2138  * @adapter: board private structure
2139  *
2140  * Return 0 on success, negative on failure
2141  **/
2142 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2143 {
2144         struct pci_dev *pdev = adapter->pdev;
2145         int i, err = 0;
2146
2147         for (i = 0; i < adapter->num_tx_queues; i++) {
2148                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2149                 if (err) {
2150                         dev_err(&pdev->dev,
2151                                 "Allocation for Tx Queue %u failed\n", i);
2152                         for (i--; i >= 0; i--)
2153                                 igb_free_tx_resources(adapter->tx_ring[i]);
2154                         break;
2155                 }
2156         }
2157
2158         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2159                 int r_idx = i % adapter->num_tx_queues;
2160                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2161         }
2162         return err;
2163 }
2164
2165 /**
2166  * igb_setup_tctl - configure the transmit control registers
2167  * @adapter: Board private structure
2168  **/
2169 void igb_setup_tctl(struct igb_adapter *adapter)
2170 {
2171         struct e1000_hw *hw = &adapter->hw;
2172         u32 tctl;
2173
2174         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2175         wr32(E1000_TXDCTL(0), 0);
2176
2177         /* Program the Transmit Control Register */
2178         tctl = rd32(E1000_TCTL);
2179         tctl &= ~E1000_TCTL_CT;
2180         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2181                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2182
2183         igb_config_collision_dist(hw);
2184
2185         /* Enable transmits */
2186         tctl |= E1000_TCTL_EN;
2187
2188         wr32(E1000_TCTL, tctl);
2189 }
2190
2191 /**
2192  * igb_configure_tx_ring - Configure transmit ring after Reset
2193  * @adapter: board private structure
2194  * @ring: tx ring to configure
2195  *
2196  * Configure a transmit ring after a reset.
2197  **/
2198 void igb_configure_tx_ring(struct igb_adapter *adapter,
2199                            struct igb_ring *ring)
2200 {
2201         struct e1000_hw *hw = &adapter->hw;
2202         u32 txdctl;
2203         u64 tdba = ring->dma;
2204         int reg_idx = ring->reg_idx;
2205
2206         /* disable the queue */
2207         txdctl = rd32(E1000_TXDCTL(reg_idx));
2208         wr32(E1000_TXDCTL(reg_idx),
2209                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2210         wrfl();
2211         mdelay(10);
2212
2213         wr32(E1000_TDLEN(reg_idx),
2214                         ring->count * sizeof(union e1000_adv_tx_desc));
2215         wr32(E1000_TDBAL(reg_idx),
2216                         tdba & 0x00000000ffffffffULL);
2217         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2218
2219         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2220         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2221         writel(0, ring->head);
2222         writel(0, ring->tail);
2223
2224         txdctl |= IGB_TX_PTHRESH;
2225         txdctl |= IGB_TX_HTHRESH << 8;
2226         txdctl |= IGB_TX_WTHRESH << 16;
2227
2228         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2229         wr32(E1000_TXDCTL(reg_idx), txdctl);
2230 }
2231
2232 /**
2233  * igb_configure_tx - Configure transmit Unit after Reset
2234  * @adapter: board private structure
2235  *
2236  * Configure the Tx unit of the MAC after a reset.
2237  **/
2238 static void igb_configure_tx(struct igb_adapter *adapter)
2239 {
2240         int i;
2241
2242         for (i = 0; i < adapter->num_tx_queues; i++)
2243                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2244 }
2245
2246 /**
2247  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2248  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2249  *
2250  * Returns 0 on success, negative on failure
2251  **/
2252 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2253 {
2254         struct pci_dev *pdev = rx_ring->pdev;
2255         int size, desc_len;
2256
2257         size = sizeof(struct igb_buffer) * rx_ring->count;
2258         rx_ring->buffer_info = vmalloc(size);
2259         if (!rx_ring->buffer_info)
2260                 goto err;
2261         memset(rx_ring->buffer_info, 0, size);
2262
2263         desc_len = sizeof(union e1000_adv_rx_desc);
2264
2265         /* Round up to nearest 4K */
2266         rx_ring->size = rx_ring->count * desc_len;
2267         rx_ring->size = ALIGN(rx_ring->size, 4096);
2268
2269         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2270                                              &rx_ring->dma);
2271
2272         if (!rx_ring->desc)
2273                 goto err;
2274
2275         rx_ring->next_to_clean = 0;
2276         rx_ring->next_to_use = 0;
2277
2278         return 0;
2279
2280 err:
2281         vfree(rx_ring->buffer_info);
2282         rx_ring->buffer_info = NULL;
2283         dev_err(&pdev->dev, "Unable to allocate memory for "
2284                 "the receive descriptor ring\n");
2285         return -ENOMEM;
2286 }
2287
2288 /**
2289  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2290  *                                (Descriptors) for all queues
2291  * @adapter: board private structure
2292  *
2293  * Return 0 on success, negative on failure
2294  **/
2295 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2296 {
2297         struct pci_dev *pdev = adapter->pdev;
2298         int i, err = 0;
2299
2300         for (i = 0; i < adapter->num_rx_queues; i++) {
2301                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2302                 if (err) {
2303                         dev_err(&pdev->dev,
2304                                 "Allocation for Rx Queue %u failed\n", i);
2305                         for (i--; i >= 0; i--)
2306                                 igb_free_rx_resources(adapter->rx_ring[i]);
2307                         break;
2308                 }
2309         }
2310
2311         return err;
2312 }
2313
2314 /**
2315  * igb_setup_mrqc - configure the multiple receive queue control registers
2316  * @adapter: Board private structure
2317  **/
2318 static void igb_setup_mrqc(struct igb_adapter *adapter)
2319 {
2320         struct e1000_hw *hw = &adapter->hw;
2321         u32 mrqc, rxcsum;
2322         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2323         union e1000_reta {
2324                 u32 dword;
2325                 u8  bytes[4];
2326         } reta;
2327         static const u8 rsshash[40] = {
2328                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2329                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2330                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2331                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2332
2333         /* Fill out hash function seeds */
2334         for (j = 0; j < 10; j++) {
2335                 u32 rsskey = rsshash[(j * 4)];
2336                 rsskey |= rsshash[(j * 4) + 1] << 8;
2337                 rsskey |= rsshash[(j * 4) + 2] << 16;
2338                 rsskey |= rsshash[(j * 4) + 3] << 24;
2339                 array_wr32(E1000_RSSRK(0), j, rsskey);
2340         }
2341
2342         num_rx_queues = adapter->rss_queues;
2343
2344         if (adapter->vfs_allocated_count) {
2345                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2346                 switch (hw->mac.type) {
2347                 case e1000_82580:
2348                         num_rx_queues = 1;
2349                         shift = 0;
2350                         break;
2351                 case e1000_82576:
2352                         shift = 3;
2353                         num_rx_queues = 2;
2354                         break;
2355                 case e1000_82575:
2356                         shift = 2;
2357                         shift2 = 6;
2358                 default:
2359                         break;
2360                 }
2361         } else {
2362                 if (hw->mac.type == e1000_82575)
2363                         shift = 6;
2364         }
2365
2366         for (j = 0; j < (32 * 4); j++) {
2367                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2368                 if (shift2)
2369                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2370                 if ((j & 3) == 3)
2371                         wr32(E1000_RETA(j >> 2), reta.dword);
2372         }
2373
2374         /*
2375          * Disable raw packet checksumming so that RSS hash is placed in
2376          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2377          * offloads as they are enabled by default
2378          */
2379         rxcsum = rd32(E1000_RXCSUM);
2380         rxcsum |= E1000_RXCSUM_PCSD;
2381
2382         if (adapter->hw.mac.type >= e1000_82576)
2383                 /* Enable Receive Checksum Offload for SCTP */
2384                 rxcsum |= E1000_RXCSUM_CRCOFL;
2385
2386         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2387         wr32(E1000_RXCSUM, rxcsum);
2388
2389         /* If VMDq is enabled then we set the appropriate mode for that, else
2390          * we default to RSS so that an RSS hash is calculated per packet even
2391          * if we are only using one queue */
2392         if (adapter->vfs_allocated_count) {
2393                 if (hw->mac.type > e1000_82575) {
2394                         /* Set the default pool for the PF's first queue */
2395                         u32 vtctl = rd32(E1000_VT_CTL);
2396                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2397                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2398                         vtctl |= adapter->vfs_allocated_count <<
2399                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2400                         wr32(E1000_VT_CTL, vtctl);
2401                 }
2402                 if (adapter->rss_queues > 1)
2403                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2404                 else
2405                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2406         } else {
2407                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2408         }
2409         igb_vmm_control(adapter);
2410
2411         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2412                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2413         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2414                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2415         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2416                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2417         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2418                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2419
2420         wr32(E1000_MRQC, mrqc);
2421 }
2422
2423 /**
2424  * igb_setup_rctl - configure the receive control registers
2425  * @adapter: Board private structure
2426  **/
2427 void igb_setup_rctl(struct igb_adapter *adapter)
2428 {
2429         struct e1000_hw *hw = &adapter->hw;
2430         u32 rctl;
2431
2432         rctl = rd32(E1000_RCTL);
2433
2434         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2435         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2436
2437         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2438                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2439
2440         /*
2441          * enable stripping of CRC. It's unlikely this will break BMC
2442          * redirection as it did with e1000. Newer features require
2443          * that the HW strips the CRC.
2444          */
2445         rctl |= E1000_RCTL_SECRC;
2446
2447         /* disable store bad packets and clear size bits. */
2448         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2449
2450         /* enable LPE to prevent packets larger than max_frame_size */
2451         rctl |= E1000_RCTL_LPE;
2452
2453         /* disable queue 0 to prevent tail write w/o re-config */
2454         wr32(E1000_RXDCTL(0), 0);
2455
2456         /* Attention!!!  For SR-IOV PF driver operations you must enable
2457          * queue drop for all VF and PF queues to prevent head of line blocking
2458          * if an un-trusted VF does not provide descriptors to hardware.
2459          */
2460         if (adapter->vfs_allocated_count) {
2461                 /* set all queue drop enable bits */
2462                 wr32(E1000_QDE, ALL_QUEUES);
2463         }
2464
2465         wr32(E1000_RCTL, rctl);
2466 }
2467
2468 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2469                                    int vfn)
2470 {
2471         struct e1000_hw *hw = &adapter->hw;
2472         u32 vmolr;
2473
2474         /* if it isn't the PF check to see if VFs are enabled and
2475          * increase the size to support vlan tags */
2476         if (vfn < adapter->vfs_allocated_count &&
2477             adapter->vf_data[vfn].vlans_enabled)
2478                 size += VLAN_TAG_SIZE;
2479
2480         vmolr = rd32(E1000_VMOLR(vfn));
2481         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2482         vmolr |= size | E1000_VMOLR_LPE;
2483         wr32(E1000_VMOLR(vfn), vmolr);
2484
2485         return 0;
2486 }
2487
2488 /**
2489  * igb_rlpml_set - set maximum receive packet size
2490  * @adapter: board private structure
2491  *
2492  * Configure maximum receivable packet size.
2493  **/
2494 static void igb_rlpml_set(struct igb_adapter *adapter)
2495 {
2496         u32 max_frame_size = adapter->max_frame_size;
2497         struct e1000_hw *hw = &adapter->hw;
2498         u16 pf_id = adapter->vfs_allocated_count;
2499
2500         if (adapter->vlgrp)
2501                 max_frame_size += VLAN_TAG_SIZE;
2502
2503         /* if vfs are enabled we set RLPML to the largest possible request
2504          * size and set the VMOLR RLPML to the size we need */
2505         if (pf_id) {
2506                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2507                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2508         }
2509
2510         wr32(E1000_RLPML, max_frame_size);
2511 }
2512
2513 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2514                                  int vfn, bool aupe)
2515 {
2516         struct e1000_hw *hw = &adapter->hw;
2517         u32 vmolr;
2518
2519         /*
2520          * This register exists only on 82576 and newer so if we are older then
2521          * we should exit and do nothing
2522          */
2523         if (hw->mac.type < e1000_82576)
2524                 return;
2525
2526         vmolr = rd32(E1000_VMOLR(vfn));
2527         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2528         if (aupe)
2529                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2530         else
2531                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2532
2533         /* clear all bits that might not be set */
2534         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2535
2536         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2537                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2538         /*
2539          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2540          * multicast packets
2541          */
2542         if (vfn <= adapter->vfs_allocated_count)
2543                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2544
2545         wr32(E1000_VMOLR(vfn), vmolr);
2546 }
2547
2548 /**
2549  * igb_configure_rx_ring - Configure a receive ring after Reset
2550  * @adapter: board private structure
2551  * @ring: receive ring to be configured
2552  *
2553  * Configure the Rx unit of the MAC after a reset.
2554  **/
2555 void igb_configure_rx_ring(struct igb_adapter *adapter,
2556                            struct igb_ring *ring)
2557 {
2558         struct e1000_hw *hw = &adapter->hw;
2559         u64 rdba = ring->dma;
2560         int reg_idx = ring->reg_idx;
2561         u32 srrctl, rxdctl;
2562
2563         /* disable the queue */
2564         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2565         wr32(E1000_RXDCTL(reg_idx),
2566                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2567
2568         /* Set DMA base address registers */
2569         wr32(E1000_RDBAL(reg_idx),
2570              rdba & 0x00000000ffffffffULL);
2571         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2572         wr32(E1000_RDLEN(reg_idx),
2573                        ring->count * sizeof(union e1000_adv_rx_desc));
2574
2575         /* initialize head and tail */
2576         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2577         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2578         writel(0, ring->head);
2579         writel(0, ring->tail);
2580
2581         /* set descriptor configuration */
2582         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2583                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2584                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2585 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2586                 srrctl |= IGB_RXBUFFER_16384 >>
2587                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2588 #else
2589                 srrctl |= (PAGE_SIZE / 2) >>
2590                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2591 #endif
2592                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2593         } else {
2594                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2595                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2596                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2597         }
2598         /* Only set Drop Enable if we are supporting multiple queues */
2599         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2600                 srrctl |= E1000_SRRCTL_DROP_EN;
2601
2602         wr32(E1000_SRRCTL(reg_idx), srrctl);
2603
2604         /* set filtering for VMDQ pools */
2605         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2606
2607         /* enable receive descriptor fetching */
2608         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2609         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2610         rxdctl &= 0xFFF00000;
2611         rxdctl |= IGB_RX_PTHRESH;
2612         rxdctl |= IGB_RX_HTHRESH << 8;
2613         rxdctl |= IGB_RX_WTHRESH << 16;
2614         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2615 }
2616
2617 /**
2618  * igb_configure_rx - Configure receive Unit after Reset
2619  * @adapter: board private structure
2620  *
2621  * Configure the Rx unit of the MAC after a reset.
2622  **/
2623 static void igb_configure_rx(struct igb_adapter *adapter)
2624 {
2625         int i;
2626
2627         /* set UTA to appropriate mode */
2628         igb_set_uta(adapter);
2629
2630         /* set the correct pool for the PF default MAC address in entry 0 */
2631         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2632                          adapter->vfs_allocated_count);
2633
2634         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2635          * the Base and Length of the Rx Descriptor Ring */
2636         for (i = 0; i < adapter->num_rx_queues; i++)
2637                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2638 }
2639
2640 /**
2641  * igb_free_tx_resources - Free Tx Resources per Queue
2642  * @tx_ring: Tx descriptor ring for a specific queue
2643  *
2644  * Free all transmit software resources
2645  **/
2646 void igb_free_tx_resources(struct igb_ring *tx_ring)
2647 {
2648         igb_clean_tx_ring(tx_ring);
2649
2650         vfree(tx_ring->buffer_info);
2651         tx_ring->buffer_info = NULL;
2652
2653         /* if not set, then don't free */
2654         if (!tx_ring->desc)
2655                 return;
2656
2657         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2658                             tx_ring->desc, tx_ring->dma);
2659
2660         tx_ring->desc = NULL;
2661 }
2662
2663 /**
2664  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2665  * @adapter: board private structure
2666  *
2667  * Free all transmit software resources
2668  **/
2669 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2670 {
2671         int i;
2672
2673         for (i = 0; i < adapter->num_tx_queues; i++)
2674                 igb_free_tx_resources(adapter->tx_ring[i]);
2675 }
2676
2677 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2678                                     struct igb_buffer *buffer_info)
2679 {
2680         if (buffer_info->dma) {
2681                 if (buffer_info->mapped_as_page)
2682                         pci_unmap_page(tx_ring->pdev,
2683                                         buffer_info->dma,
2684                                         buffer_info->length,
2685                                         PCI_DMA_TODEVICE);
2686                 else
2687                         pci_unmap_single(tx_ring->pdev,
2688                                         buffer_info->dma,
2689                                         buffer_info->length,
2690                                         PCI_DMA_TODEVICE);
2691                 buffer_info->dma = 0;
2692         }
2693         if (buffer_info->skb) {
2694                 dev_kfree_skb_any(buffer_info->skb);
2695                 buffer_info->skb = NULL;
2696         }
2697         buffer_info->time_stamp = 0;
2698         buffer_info->length = 0;
2699         buffer_info->next_to_watch = 0;
2700         buffer_info->mapped_as_page = false;
2701 }
2702
2703 /**
2704  * igb_clean_tx_ring - Free Tx Buffers
2705  * @tx_ring: ring to be cleaned
2706  **/
2707 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2708 {
2709         struct igb_buffer *buffer_info;
2710         unsigned long size;
2711         unsigned int i;
2712
2713         if (!tx_ring->buffer_info)
2714                 return;
2715         /* Free all the Tx ring sk_buffs */
2716
2717         for (i = 0; i < tx_ring->count; i++) {
2718                 buffer_info = &tx_ring->buffer_info[i];
2719                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2720         }
2721
2722         size = sizeof(struct igb_buffer) * tx_ring->count;
2723         memset(tx_ring->buffer_info, 0, size);
2724
2725         /* Zero out the descriptor ring */
2726         memset(tx_ring->desc, 0, tx_ring->size);
2727
2728         tx_ring->next_to_use = 0;
2729         tx_ring->next_to_clean = 0;
2730 }
2731
2732 /**
2733  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2734  * @adapter: board private structure
2735  **/
2736 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2737 {
2738         int i;
2739
2740         for (i = 0; i < adapter->num_tx_queues; i++)
2741                 igb_clean_tx_ring(adapter->tx_ring[i]);
2742 }
2743
2744 /**
2745  * igb_free_rx_resources - Free Rx Resources
2746  * @rx_ring: ring to clean the resources from
2747  *
2748  * Free all receive software resources
2749  **/
2750 void igb_free_rx_resources(struct igb_ring *rx_ring)
2751 {
2752         igb_clean_rx_ring(rx_ring);
2753
2754         vfree(rx_ring->buffer_info);
2755         rx_ring->buffer_info = NULL;
2756
2757         /* if not set, then don't free */
2758         if (!rx_ring->desc)
2759                 return;
2760
2761         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2762                             rx_ring->desc, rx_ring->dma);
2763
2764         rx_ring->desc = NULL;
2765 }
2766
2767 /**
2768  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2769  * @adapter: board private structure
2770  *
2771  * Free all receive software resources
2772  **/
2773 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2774 {
2775         int i;
2776
2777         for (i = 0; i < adapter->num_rx_queues; i++)
2778                 igb_free_rx_resources(adapter->rx_ring[i]);
2779 }
2780
2781 /**
2782  * igb_clean_rx_ring - Free Rx Buffers per Queue
2783  * @rx_ring: ring to free buffers from
2784  **/
2785 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2786 {
2787         struct igb_buffer *buffer_info;
2788         unsigned long size;
2789         unsigned int i;
2790
2791         if (!rx_ring->buffer_info)
2792                 return;
2793
2794         /* Free all the Rx ring sk_buffs */
2795         for (i = 0; i < rx_ring->count; i++) {
2796                 buffer_info = &rx_ring->buffer_info[i];
2797                 if (buffer_info->dma) {
2798                         pci_unmap_single(rx_ring->pdev,
2799                                          buffer_info->dma,
2800                                          rx_ring->rx_buffer_len,
2801                                          PCI_DMA_FROMDEVICE);
2802                         buffer_info->dma = 0;
2803                 }
2804
2805                 if (buffer_info->skb) {
2806                         dev_kfree_skb(buffer_info->skb);
2807                         buffer_info->skb = NULL;
2808                 }
2809                 if (buffer_info->page_dma) {
2810                         pci_unmap_page(rx_ring->pdev,
2811                                        buffer_info->page_dma,
2812                                        PAGE_SIZE / 2,
2813                                        PCI_DMA_FROMDEVICE);
2814                         buffer_info->page_dma = 0;
2815                 }
2816                 if (buffer_info->page) {
2817                         put_page(buffer_info->page);
2818                         buffer_info->page = NULL;
2819                         buffer_info->page_offset = 0;
2820                 }
2821         }
2822
2823         size = sizeof(struct igb_buffer) * rx_ring->count;
2824         memset(rx_ring->buffer_info, 0, size);
2825
2826         /* Zero out the descriptor ring */
2827         memset(rx_ring->desc, 0, rx_ring->size);
2828
2829         rx_ring->next_to_clean = 0;
2830         rx_ring->next_to_use = 0;
2831 }
2832
2833 /**
2834  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2835  * @adapter: board private structure
2836  **/
2837 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2838 {
2839         int i;
2840
2841         for (i = 0; i < adapter->num_rx_queues; i++)
2842                 igb_clean_rx_ring(adapter->rx_ring[i]);
2843 }
2844
2845 /**
2846  * igb_set_mac - Change the Ethernet Address of the NIC
2847  * @netdev: network interface device structure
2848  * @p: pointer to an address structure
2849  *
2850  * Returns 0 on success, negative on failure
2851  **/
2852 static int igb_set_mac(struct net_device *netdev, void *p)
2853 {
2854         struct igb_adapter *adapter = netdev_priv(netdev);
2855         struct e1000_hw *hw = &adapter->hw;
2856         struct sockaddr *addr = p;
2857
2858         if (!is_valid_ether_addr(addr->sa_data))
2859                 return -EADDRNOTAVAIL;
2860
2861         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2862         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2863
2864         /* set the correct pool for the new PF MAC address in entry 0 */
2865         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2866                          adapter->vfs_allocated_count);
2867
2868         return 0;
2869 }
2870
2871 /**
2872  * igb_write_mc_addr_list - write multicast addresses to MTA
2873  * @netdev: network interface device structure
2874  *
2875  * Writes multicast address list to the MTA hash table.
2876  * Returns: -ENOMEM on failure
2877  *                0 on no addresses written
2878  *                X on writing X addresses to MTA
2879  **/
2880 static int igb_write_mc_addr_list(struct net_device *netdev)
2881 {
2882         struct igb_adapter *adapter = netdev_priv(netdev);
2883         struct e1000_hw *hw = &adapter->hw;
2884         struct dev_mc_list *mc_ptr = netdev->mc_list;
2885         u8  *mta_list;
2886         u32 vmolr = 0;
2887         int i;
2888
2889         if (netdev_mc_empty(netdev)) {
2890                 /* nothing to program, so clear mc list */
2891                 igb_update_mc_addr_list(hw, NULL, 0);
2892                 igb_restore_vf_multicasts(adapter);
2893                 return 0;
2894         }
2895
2896         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
2897         if (!mta_list)
2898                 return -ENOMEM;
2899
2900         /* set vmolr receive overflow multicast bit */
2901         vmolr |= E1000_VMOLR_ROMPE;
2902
2903         /* The shared function expects a packed array of only addresses. */
2904         mc_ptr = netdev->mc_list;
2905
2906         for (i = 0; i < netdev_mc_count(netdev); i++) {
2907                 if (!mc_ptr)
2908                         break;
2909                 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2910                 mc_ptr = mc_ptr->next;
2911         }
2912         igb_update_mc_addr_list(hw, mta_list, i);
2913         kfree(mta_list);
2914
2915         return netdev_mc_count(netdev);
2916 }
2917
2918 /**
2919  * igb_write_uc_addr_list - write unicast addresses to RAR table
2920  * @netdev: network interface device structure
2921  *
2922  * Writes unicast address list to the RAR table.
2923  * Returns: -ENOMEM on failure/insufficient address space
2924  *                0 on no addresses written
2925  *                X on writing X addresses to the RAR table
2926  **/
2927 static int igb_write_uc_addr_list(struct net_device *netdev)
2928 {
2929         struct igb_adapter *adapter = netdev_priv(netdev);
2930         struct e1000_hw *hw = &adapter->hw;
2931         unsigned int vfn = adapter->vfs_allocated_count;
2932         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2933         int count = 0;
2934
2935         /* return ENOMEM indicating insufficient memory for addresses */
2936         if (netdev_uc_count(netdev) > rar_entries)
2937                 return -ENOMEM;
2938
2939         if (!netdev_uc_empty(netdev) && rar_entries) {
2940                 struct netdev_hw_addr *ha;
2941
2942                 netdev_for_each_uc_addr(ha, netdev) {
2943                         if (!rar_entries)
2944                                 break;
2945                         igb_rar_set_qsel(adapter, ha->addr,
2946                                          rar_entries--,
2947                                          vfn);
2948                         count++;
2949                 }
2950         }
2951         /* write the addresses in reverse order to avoid write combining */
2952         for (; rar_entries > 0 ; rar_entries--) {
2953                 wr32(E1000_RAH(rar_entries), 0);
2954                 wr32(E1000_RAL(rar_entries), 0);
2955         }
2956         wrfl();
2957
2958         return count;
2959 }
2960
2961 /**
2962  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2963  * @netdev: network interface device structure
2964  *
2965  * The set_rx_mode entry point is called whenever the unicast or multicast
2966  * address lists or the network interface flags are updated.  This routine is
2967  * responsible for configuring the hardware for proper unicast, multicast,
2968  * promiscuous mode, and all-multi behavior.
2969  **/
2970 static void igb_set_rx_mode(struct net_device *netdev)
2971 {
2972         struct igb_adapter *adapter = netdev_priv(netdev);
2973         struct e1000_hw *hw = &adapter->hw;
2974         unsigned int vfn = adapter->vfs_allocated_count;
2975         u32 rctl, vmolr = 0;
2976         int count;
2977
2978         /* Check for Promiscuous and All Multicast modes */
2979         rctl = rd32(E1000_RCTL);
2980
2981         /* clear the effected bits */
2982         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2983
2984         if (netdev->flags & IFF_PROMISC) {
2985                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2986                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2987         } else {
2988                 if (netdev->flags & IFF_ALLMULTI) {
2989                         rctl |= E1000_RCTL_MPE;
2990                         vmolr |= E1000_VMOLR_MPME;
2991                 } else {
2992                         /*
2993                          * Write addresses to the MTA, if the attempt fails
2994                          * then we should just turn on promiscous mode so
2995                          * that we can at least receive multicast traffic
2996                          */
2997                         count = igb_write_mc_addr_list(netdev);
2998                         if (count < 0) {
2999                                 rctl |= E1000_RCTL_MPE;
3000                                 vmolr |= E1000_VMOLR_MPME;
3001                         } else if (count) {
3002                                 vmolr |= E1000_VMOLR_ROMPE;
3003                         }
3004                 }
3005                 /*
3006                  * Write addresses to available RAR registers, if there is not
3007                  * sufficient space to store all the addresses then enable
3008                  * unicast promiscous mode
3009                  */
3010                 count = igb_write_uc_addr_list(netdev);
3011                 if (count < 0) {
3012                         rctl |= E1000_RCTL_UPE;
3013                         vmolr |= E1000_VMOLR_ROPE;
3014                 }
3015                 rctl |= E1000_RCTL_VFE;
3016         }
3017         wr32(E1000_RCTL, rctl);
3018
3019         /*
3020          * In order to support SR-IOV and eventually VMDq it is necessary to set
3021          * the VMOLR to enable the appropriate modes.  Without this workaround
3022          * we will have issues with VLAN tag stripping not being done for frames
3023          * that are only arriving because we are the default pool
3024          */
3025         if (hw->mac.type < e1000_82576)
3026                 return;
3027
3028         vmolr |= rd32(E1000_VMOLR(vfn)) &
3029                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3030         wr32(E1000_VMOLR(vfn), vmolr);
3031         igb_restore_vf_multicasts(adapter);
3032 }
3033
3034 /* Need to wait a few seconds after link up to get diagnostic information from
3035  * the phy */
3036 static void igb_update_phy_info(unsigned long data)
3037 {
3038         struct igb_adapter *adapter = (struct igb_adapter *) data;
3039         igb_get_phy_info(&adapter->hw);
3040 }
3041
3042 /**
3043  * igb_has_link - check shared code for link and determine up/down
3044  * @adapter: pointer to driver private info
3045  **/
3046 bool igb_has_link(struct igb_adapter *adapter)
3047 {
3048         struct e1000_hw *hw = &adapter->hw;
3049         bool link_active = false;
3050         s32 ret_val = 0;
3051
3052         /* get_link_status is set on LSC (link status) interrupt or
3053          * rx sequence error interrupt.  get_link_status will stay
3054          * false until the e1000_check_for_link establishes link
3055          * for copper adapters ONLY
3056          */
3057         switch (hw->phy.media_type) {
3058         case e1000_media_type_copper:
3059                 if (hw->mac.get_link_status) {
3060                         ret_val = hw->mac.ops.check_for_link(hw);
3061                         link_active = !hw->mac.get_link_status;
3062                 } else {
3063                         link_active = true;
3064                 }
3065                 break;
3066         case e1000_media_type_internal_serdes:
3067                 ret_val = hw->mac.ops.check_for_link(hw);
3068                 link_active = hw->mac.serdes_has_link;
3069                 break;
3070         default:
3071         case e1000_media_type_unknown:
3072                 break;
3073         }
3074
3075         return link_active;
3076 }
3077
3078 /**
3079  * igb_watchdog - Timer Call-back
3080  * @data: pointer to adapter cast into an unsigned long
3081  **/
3082 static void igb_watchdog(unsigned long data)
3083 {
3084         struct igb_adapter *adapter = (struct igb_adapter *)data;
3085         /* Do the rest outside of interrupt context */
3086         schedule_work(&adapter->watchdog_task);
3087 }
3088
3089 static void igb_watchdog_task(struct work_struct *work)
3090 {
3091         struct igb_adapter *adapter = container_of(work,
3092                                                    struct igb_adapter,
3093                                                    watchdog_task);
3094         struct e1000_hw *hw = &adapter->hw;
3095         struct net_device *netdev = adapter->netdev;
3096         u32 link;
3097         int i;
3098
3099         link = igb_has_link(adapter);
3100         if (link) {
3101                 if (!netif_carrier_ok(netdev)) {
3102                         u32 ctrl;
3103                         hw->mac.ops.get_speed_and_duplex(hw,
3104                                                          &adapter->link_speed,
3105                                                          &adapter->link_duplex);
3106
3107                         ctrl = rd32(E1000_CTRL);
3108                         /* Links status message must follow this format */
3109                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3110                                  "Flow Control: %s\n",
3111                                netdev->name,
3112                                adapter->link_speed,
3113                                adapter->link_duplex == FULL_DUPLEX ?
3114                                  "Full Duplex" : "Half Duplex",
3115                                ((ctrl & E1000_CTRL_TFCE) &&
3116                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3117                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3118                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3119
3120                         /* tweak tx_queue_len according to speed/duplex and
3121                          * adjust the timeout factor */
3122                         netdev->tx_queue_len = adapter->tx_queue_len;
3123                         adapter->tx_timeout_factor = 1;
3124                         switch (adapter->link_speed) {
3125                         case SPEED_10:
3126                                 netdev->tx_queue_len = 10;
3127                                 adapter->tx_timeout_factor = 14;
3128                                 break;
3129                         case SPEED_100:
3130                                 netdev->tx_queue_len = 100;
3131                                 /* maybe add some timeout factor ? */
3132                                 break;
3133                         }
3134
3135                         netif_carrier_on(netdev);
3136
3137                         igb_ping_all_vfs(adapter);
3138
3139                         /* link state has changed, schedule phy info update */
3140                         if (!test_bit(__IGB_DOWN, &adapter->state))
3141                                 mod_timer(&adapter->phy_info_timer,
3142                                           round_jiffies(jiffies + 2 * HZ));
3143                 }
3144         } else {
3145                 if (netif_carrier_ok(netdev)) {
3146                         adapter->link_speed = 0;
3147                         adapter->link_duplex = 0;
3148                         /* Links status message must follow this format */
3149                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3150                                netdev->name);
3151                         netif_carrier_off(netdev);
3152
3153                         igb_ping_all_vfs(adapter);
3154
3155                         /* link state has changed, schedule phy info update */
3156                         if (!test_bit(__IGB_DOWN, &adapter->state))
3157                                 mod_timer(&adapter->phy_info_timer,
3158                                           round_jiffies(jiffies + 2 * HZ));
3159                 }
3160         }
3161
3162         igb_update_stats(adapter);
3163
3164         for (i = 0; i < adapter->num_tx_queues; i++) {
3165                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3166                 if (!netif_carrier_ok(netdev)) {
3167                         /* We've lost link, so the controller stops DMA,
3168                          * but we've got queued Tx work that's never going
3169                          * to get done, so reset controller to flush Tx.
3170                          * (Do the reset outside of interrupt context). */
3171                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3172                                 adapter->tx_timeout_count++;
3173                                 schedule_work(&adapter->reset_task);
3174                                 /* return immediately since reset is imminent */
3175                                 return;
3176                         }
3177                 }
3178
3179                 /* Force detection of hung controller every watchdog period */
3180                 tx_ring->detect_tx_hung = true;
3181         }
3182
3183         /* Cause software interrupt to ensure rx ring is cleaned */
3184         if (adapter->msix_entries) {
3185                 u32 eics = 0;
3186                 for (i = 0; i < adapter->num_q_vectors; i++) {
3187                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3188                         eics |= q_vector->eims_value;
3189                 }
3190                 wr32(E1000_EICS, eics);
3191         } else {
3192                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3193         }
3194
3195         /* Reset the timer */
3196         if (!test_bit(__IGB_DOWN, &adapter->state))
3197                 mod_timer(&adapter->watchdog_timer,
3198                           round_jiffies(jiffies + 2 * HZ));
3199 }
3200
3201 enum latency_range {
3202         lowest_latency = 0,
3203         low_latency = 1,
3204         bulk_latency = 2,
3205         latency_invalid = 255
3206 };
3207
3208 /**
3209  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3210  *
3211  *      Stores a new ITR value based on strictly on packet size.  This
3212  *      algorithm is less sophisticated than that used in igb_update_itr,
3213  *      due to the difficulty of synchronizing statistics across multiple
3214  *      receive rings.  The divisors and thresholds used by this fuction
3215  *      were determined based on theoretical maximum wire speed and testing
3216  *      data, in order to minimize response time while increasing bulk
3217  *      throughput.
3218  *      This functionality is controlled by the InterruptThrottleRate module
3219  *      parameter (see igb_param.c)
3220  *      NOTE:  This function is called only when operating in a multiqueue
3221  *             receive environment.
3222  * @q_vector: pointer to q_vector
3223  **/
3224 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3225 {
3226         int new_val = q_vector->itr_val;
3227         int avg_wire_size = 0;
3228         struct igb_adapter *adapter = q_vector->adapter;
3229
3230         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3231          * ints/sec - ITR timer value of 120 ticks.
3232          */
3233         if (adapter->link_speed != SPEED_1000) {
3234                 new_val = 976;
3235                 goto set_itr_val;
3236         }
3237
3238         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3239                 struct igb_ring *ring = q_vector->rx_ring;
3240                 avg_wire_size = ring->total_bytes / ring->total_packets;
3241         }
3242
3243         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3244                 struct igb_ring *ring = q_vector->tx_ring;
3245                 avg_wire_size = max_t(u32, avg_wire_size,
3246                                       (ring->total_bytes /
3247                                        ring->total_packets));
3248         }
3249
3250         /* if avg_wire_size isn't set no work was done */
3251         if (!avg_wire_size)
3252                 goto clear_counts;
3253
3254         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3255         avg_wire_size += 24;
3256
3257         /* Don't starve jumbo frames */
3258         avg_wire_size = min(avg_wire_size, 3000);
3259
3260         /* Give a little boost to mid-size frames */
3261         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3262                 new_val = avg_wire_size / 3;
3263         else
3264                 new_val = avg_wire_size / 2;
3265
3266         /* when in itr mode 3 do not exceed 20K ints/sec */
3267         if (adapter->rx_itr_setting == 3 && new_val < 196)
3268                 new_val = 196;
3269
3270 set_itr_val:
3271         if (new_val != q_vector->itr_val) {
3272                 q_vector->itr_val = new_val;
3273                 q_vector->set_itr = 1;
3274         }
3275 clear_counts:
3276         if (q_vector->rx_ring) {
3277                 q_vector->rx_ring->total_bytes = 0;
3278                 q_vector->rx_ring->total_packets = 0;
3279         }
3280         if (q_vector->tx_ring) {
3281                 q_vector->tx_ring->total_bytes = 0;
3282                 q_vector->tx_ring->total_packets = 0;
3283         }
3284 }
3285
3286 /**
3287  * igb_update_itr - update the dynamic ITR value based on statistics
3288  *      Stores a new ITR value based on packets and byte
3289  *      counts during the last interrupt.  The advantage of per interrupt
3290  *      computation is faster updates and more accurate ITR for the current
3291  *      traffic pattern.  Constants in this function were computed
3292  *      based on theoretical maximum wire speed and thresholds were set based
3293  *      on testing data as well as attempting to minimize response time
3294  *      while increasing bulk throughput.
3295  *      this functionality is controlled by the InterruptThrottleRate module
3296  *      parameter (see igb_param.c)
3297  *      NOTE:  These calculations are only valid when operating in a single-
3298  *             queue environment.
3299  * @adapter: pointer to adapter
3300  * @itr_setting: current q_vector->itr_val
3301  * @packets: the number of packets during this measurement interval
3302  * @bytes: the number of bytes during this measurement interval
3303  **/
3304 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3305                                    int packets, int bytes)
3306 {
3307         unsigned int retval = itr_setting;
3308
3309         if (packets == 0)
3310                 goto update_itr_done;
3311
3312         switch (itr_setting) {
3313         case lowest_latency:
3314                 /* handle TSO and jumbo frames */
3315                 if (bytes/packets > 8000)
3316                         retval = bulk_latency;
3317                 else if ((packets < 5) && (bytes > 512))
3318                         retval = low_latency;
3319                 break;
3320         case low_latency:  /* 50 usec aka 20000 ints/s */
3321                 if (bytes > 10000) {
3322                         /* this if handles the TSO accounting */
3323                         if (bytes/packets > 8000) {
3324                                 retval = bulk_latency;
3325                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3326                                 retval = bulk_latency;
3327                         } else if ((packets > 35)) {
3328                                 retval = lowest_latency;
3329                         }
3330                 } else if (bytes/packets > 2000) {
3331                         retval = bulk_latency;
3332                 } else if (packets <= 2 && bytes < 512) {
3333                         retval = lowest_latency;
3334                 }
3335                 break;
3336         case bulk_latency: /* 250 usec aka 4000 ints/s */
3337                 if (bytes > 25000) {
3338                         if (packets > 35)
3339                                 retval = low_latency;
3340                 } else if (bytes < 1500) {
3341                         retval = low_latency;
3342                 }
3343                 break;
3344         }
3345
3346 update_itr_done:
3347         return retval;
3348 }
3349
3350 static void igb_set_itr(struct igb_adapter *adapter)
3351 {
3352         struct igb_q_vector *q_vector = adapter->q_vector[0];
3353         u16 current_itr;
3354         u32 new_itr = q_vector->itr_val;
3355
3356         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3357         if (adapter->link_speed != SPEED_1000) {
3358                 current_itr = 0;
3359                 new_itr = 4000;
3360                 goto set_itr_now;
3361         }
3362
3363         adapter->rx_itr = igb_update_itr(adapter,
3364                                     adapter->rx_itr,
3365                                     q_vector->rx_ring->total_packets,
3366                                     q_vector->rx_ring->total_bytes);
3367
3368         adapter->tx_itr = igb_update_itr(adapter,
3369                                     adapter->tx_itr,
3370                                     q_vector->tx_ring->total_packets,
3371                                     q_vector->tx_ring->total_bytes);
3372         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3373
3374         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3375         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3376                 current_itr = low_latency;
3377
3378         switch (current_itr) {
3379         /* counts and packets in update_itr are dependent on these numbers */
3380         case lowest_latency:
3381                 new_itr = 56;  /* aka 70,000 ints/sec */
3382                 break;
3383         case low_latency:
3384                 new_itr = 196; /* aka 20,000 ints/sec */
3385                 break;
3386         case bulk_latency:
3387                 new_itr = 980; /* aka 4,000 ints/sec */
3388                 break;
3389         default:
3390                 break;
3391         }
3392
3393 set_itr_now:
3394         q_vector->rx_ring->total_bytes = 0;
3395         q_vector->rx_ring->total_packets = 0;
3396         q_vector->tx_ring->total_bytes = 0;
3397         q_vector->tx_ring->total_packets = 0;
3398
3399         if (new_itr != q_vector->itr_val) {
3400                 /* this attempts to bias the interrupt rate towards Bulk
3401                  * by adding intermediate steps when interrupt rate is
3402                  * increasing */
3403                 new_itr = new_itr > q_vector->itr_val ?
3404                              max((new_itr * q_vector->itr_val) /
3405                                  (new_itr + (q_vector->itr_val >> 2)),
3406                                  new_itr) :
3407                              new_itr;
3408                 /* Don't write the value here; it resets the adapter's
3409                  * internal timer, and causes us to delay far longer than
3410                  * we should between interrupts.  Instead, we write the ITR
3411                  * value at the beginning of the next interrupt so the timing
3412                  * ends up being correct.
3413                  */
3414                 q_vector->itr_val = new_itr;
3415                 q_vector->set_itr = 1;
3416         }
3417
3418         return;
3419 }
3420
3421 #define IGB_TX_FLAGS_CSUM               0x00000001
3422 #define IGB_TX_FLAGS_VLAN               0x00000002
3423 #define IGB_TX_FLAGS_TSO                0x00000004
3424 #define IGB_TX_FLAGS_IPV4               0x00000008
3425 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3426 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3427 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3428
3429 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3430                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3431 {
3432         struct e1000_adv_tx_context_desc *context_desc;
3433         unsigned int i;
3434         int err;
3435         struct igb_buffer *buffer_info;
3436         u32 info = 0, tu_cmd = 0;
3437         u32 mss_l4len_idx, l4len;
3438         *hdr_len = 0;
3439
3440         if (skb_header_cloned(skb)) {
3441                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3442                 if (err)
3443                         return err;
3444         }
3445
3446         l4len = tcp_hdrlen(skb);
3447         *hdr_len += l4len;
3448
3449         if (skb->protocol == htons(ETH_P_IP)) {
3450                 struct iphdr *iph = ip_hdr(skb);
3451                 iph->tot_len = 0;
3452                 iph->check = 0;
3453                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3454                                                          iph->daddr, 0,
3455                                                          IPPROTO_TCP,
3456                                                          0);
3457         } else if (skb_is_gso_v6(skb)) {
3458                 ipv6_hdr(skb)->payload_len = 0;
3459                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3460                                                        &ipv6_hdr(skb)->daddr,
3461                                                        0, IPPROTO_TCP, 0);
3462         }
3463
3464         i = tx_ring->next_to_use;
3465
3466         buffer_info = &tx_ring->buffer_info[i];
3467         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3468         /* VLAN MACLEN IPLEN */
3469         if (tx_flags & IGB_TX_FLAGS_VLAN)
3470                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3471         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3472         *hdr_len += skb_network_offset(skb);
3473         info |= skb_network_header_len(skb);
3474         *hdr_len += skb_network_header_len(skb);
3475         context_desc->vlan_macip_lens = cpu_to_le32(info);
3476
3477         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3478         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3479
3480         if (skb->protocol == htons(ETH_P_IP))
3481                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3482         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3483
3484         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3485
3486         /* MSS L4LEN IDX */
3487         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3488         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3489
3490         /* For 82575, context index must be unique per ring. */
3491         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3492                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3493
3494         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3495         context_desc->seqnum_seed = 0;
3496
3497         buffer_info->time_stamp = jiffies;
3498         buffer_info->next_to_watch = i;
3499         buffer_info->dma = 0;
3500         i++;
3501         if (i == tx_ring->count)
3502                 i = 0;
3503
3504         tx_ring->next_to_use = i;
3505
3506         return true;
3507 }
3508
3509 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3510                                    struct sk_buff *skb, u32 tx_flags)
3511 {
3512         struct e1000_adv_tx_context_desc *context_desc;
3513         struct pci_dev *pdev = tx_ring->pdev;
3514         struct igb_buffer *buffer_info;
3515         u32 info = 0, tu_cmd = 0;
3516         unsigned int i;
3517
3518         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3519             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3520                 i = tx_ring->next_to_use;
3521                 buffer_info = &tx_ring->buffer_info[i];
3522                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3523
3524                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3525                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3526
3527                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3528                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3529                         info |= skb_network_header_len(skb);
3530
3531                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3532
3533                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3534
3535                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3536                         __be16 protocol;
3537
3538                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3539                                 const struct vlan_ethhdr *vhdr =
3540                                           (const struct vlan_ethhdr*)skb->data;
3541
3542                                 protocol = vhdr->h_vlan_encapsulated_proto;
3543                         } else {
3544                                 protocol = skb->protocol;
3545                         }
3546
3547                         switch (protocol) {
3548                         case cpu_to_be16(ETH_P_IP):
3549                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3550                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3551                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3552                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3553                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3554                                 break;
3555                         case cpu_to_be16(ETH_P_IPV6):
3556                                 /* XXX what about other V6 headers?? */
3557                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3558                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3559                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3560                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3561                                 break;
3562                         default:
3563                                 if (unlikely(net_ratelimit()))
3564                                         dev_warn(&pdev->dev,
3565                                             "partial checksum but proto=%x!\n",
3566                                             skb->protocol);
3567                                 break;
3568                         }
3569                 }
3570
3571                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3572                 context_desc->seqnum_seed = 0;
3573                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3574                         context_desc->mss_l4len_idx =
3575                                 cpu_to_le32(tx_ring->reg_idx << 4);
3576
3577                 buffer_info->time_stamp = jiffies;
3578                 buffer_info->next_to_watch = i;
3579                 buffer_info->dma = 0;
3580
3581                 i++;
3582                 if (i == tx_ring->count)
3583                         i = 0;
3584                 tx_ring->next_to_use = i;
3585
3586                 return true;
3587         }
3588         return false;
3589 }
3590
3591 #define IGB_MAX_TXD_PWR 16
3592 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3593
3594 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3595                                  unsigned int first)
3596 {
3597         struct igb_buffer *buffer_info;
3598         struct pci_dev *pdev = tx_ring->pdev;
3599         unsigned int len = skb_headlen(skb);
3600         unsigned int count = 0, i;
3601         unsigned int f;
3602
3603         i = tx_ring->next_to_use;
3604
3605         buffer_info = &tx_ring->buffer_info[i];
3606         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3607         buffer_info->length = len;
3608         /* set time_stamp *before* dma to help avoid a possible race */
3609         buffer_info->time_stamp = jiffies;
3610         buffer_info->next_to_watch = i;
3611         buffer_info->dma = pci_map_single(pdev, skb->data, len,
3612                                           PCI_DMA_TODEVICE);
3613         if (pci_dma_mapping_error(pdev, buffer_info->dma))
3614                 goto dma_error;
3615
3616         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3617                 struct skb_frag_struct *frag;
3618
3619                 count++;
3620                 i++;
3621                 if (i == tx_ring->count)
3622                         i = 0;
3623
3624                 frag = &skb_shinfo(skb)->frags[f];
3625                 len = frag->size;
3626
3627                 buffer_info = &tx_ring->buffer_info[i];
3628                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3629                 buffer_info->length = len;
3630                 buffer_info->time_stamp = jiffies;
3631                 buffer_info->next_to_watch = i;
3632                 buffer_info->mapped_as_page = true;
3633                 buffer_info->dma = pci_map_page(pdev,
3634                                                 frag->page,
3635                                                 frag->page_offset,
3636                                                 len,
3637                                                 PCI_DMA_TODEVICE);
3638                 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3639                         goto dma_error;
3640
3641         }
3642
3643         tx_ring->buffer_info[i].skb = skb;
3644         tx_ring->buffer_info[first].next_to_watch = i;
3645
3646         return ++count;
3647
3648 dma_error:
3649         dev_err(&pdev->dev, "TX DMA map failed\n");
3650
3651         /* clear timestamp and dma mappings for failed buffer_info mapping */
3652         buffer_info->dma = 0;
3653         buffer_info->time_stamp = 0;
3654         buffer_info->length = 0;
3655         buffer_info->next_to_watch = 0;
3656         buffer_info->mapped_as_page = false;
3657         count--;
3658
3659         /* clear timestamp and dma mappings for remaining portion of packet */
3660         while (count >= 0) {
3661                 count--;
3662                 i--;
3663                 if (i < 0)
3664                         i += tx_ring->count;
3665                 buffer_info = &tx_ring->buffer_info[i];
3666                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3667         }
3668
3669         return 0;
3670 }
3671
3672 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3673                                     int tx_flags, int count, u32 paylen,
3674                                     u8 hdr_len)
3675 {
3676         union e1000_adv_tx_desc *tx_desc;
3677         struct igb_buffer *buffer_info;
3678         u32 olinfo_status = 0, cmd_type_len;
3679         unsigned int i = tx_ring->next_to_use;
3680
3681         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3682                         E1000_ADVTXD_DCMD_DEXT);
3683
3684         if (tx_flags & IGB_TX_FLAGS_VLAN)
3685                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3686
3687         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3688                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3689
3690         if (tx_flags & IGB_TX_FLAGS_TSO) {
3691                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3692
3693                 /* insert tcp checksum */
3694                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3695
3696                 /* insert ip checksum */
3697                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3698                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3699
3700         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3701                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3702         }
3703
3704         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3705             (tx_flags & (IGB_TX_FLAGS_CSUM |
3706                          IGB_TX_FLAGS_TSO |
3707                          IGB_TX_FLAGS_VLAN)))
3708                 olinfo_status |= tx_ring->reg_idx << 4;
3709
3710         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3711
3712         do {
3713                 buffer_info = &tx_ring->buffer_info[i];
3714                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3715                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3716                 tx_desc->read.cmd_type_len =
3717                         cpu_to_le32(cmd_type_len | buffer_info->length);
3718                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3719                 count--;
3720                 i++;
3721                 if (i == tx_ring->count)
3722                         i = 0;
3723         } while (count > 0);
3724
3725         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3726         /* Force memory writes to complete before letting h/w
3727          * know there are new descriptors to fetch.  (Only
3728          * applicable for weak-ordered memory model archs,
3729          * such as IA-64). */
3730         wmb();
3731
3732         tx_ring->next_to_use = i;
3733         writel(i, tx_ring->tail);
3734         /* we need this if more than one processor can write to our tail
3735          * at a time, it syncronizes IO on IA64/Altix systems */
3736         mmiowb();
3737 }
3738
3739 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3740 {
3741         struct net_device *netdev = tx_ring->netdev;
3742
3743         netif_stop_subqueue(netdev, tx_ring->queue_index);
3744
3745         /* Herbert's original patch had:
3746          *  smp_mb__after_netif_stop_queue();
3747          * but since that doesn't exist yet, just open code it. */
3748         smp_mb();
3749
3750         /* We need to check again in a case another CPU has just
3751          * made room available. */
3752         if (igb_desc_unused(tx_ring) < size)
3753                 return -EBUSY;
3754
3755         /* A reprieve! */
3756         netif_wake_subqueue(netdev, tx_ring->queue_index);
3757         tx_ring->tx_stats.restart_queue++;
3758         return 0;
3759 }
3760
3761 static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3762 {
3763         if (igb_desc_unused(tx_ring) >= size)
3764                 return 0;
3765         return __igb_maybe_stop_tx(tx_ring, size);
3766 }
3767
3768 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3769                                     struct igb_ring *tx_ring)
3770 {
3771         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3772         unsigned int first;
3773         unsigned int tx_flags = 0;
3774         u8 hdr_len = 0;
3775         int tso = 0, count;
3776         union skb_shared_tx *shtx = skb_tx(skb);
3777
3778         /* need: 1 descriptor per page,
3779          *       + 2 desc gap to keep tail from touching head,
3780          *       + 1 desc for skb->data,
3781          *       + 1 desc for context descriptor,
3782          * otherwise try next time */
3783         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3784                 /* this is a hard error */
3785                 return NETDEV_TX_BUSY;
3786         }
3787
3788         if (unlikely(shtx->hardware)) {
3789                 shtx->in_progress = 1;
3790                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3791         }
3792
3793         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3794                 tx_flags |= IGB_TX_FLAGS_VLAN;
3795                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3796         }
3797
3798         if (skb->protocol == htons(ETH_P_IP))
3799                 tx_flags |= IGB_TX_FLAGS_IPV4;
3800
3801         first = tx_ring->next_to_use;
3802         if (skb_is_gso(skb)) {
3803                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3804
3805                 if (tso < 0) {
3806                         dev_kfree_skb_any(skb);
3807                         return NETDEV_TX_OK;
3808                 }
3809         }
3810
3811         if (tso)
3812                 tx_flags |= IGB_TX_FLAGS_TSO;
3813         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3814                  (skb->ip_summed == CHECKSUM_PARTIAL))
3815                 tx_flags |= IGB_TX_FLAGS_CSUM;
3816
3817         /*
3818          * count reflects descriptors mapped, if 0 or less then mapping error
3819          * has occured and we need to rewind the descriptor queue
3820          */
3821         count = igb_tx_map_adv(tx_ring, skb, first);
3822         if (!count) {
3823                 dev_kfree_skb_any(skb);
3824                 tx_ring->buffer_info[first].time_stamp = 0;
3825                 tx_ring->next_to_use = first;
3826                 return NETDEV_TX_OK;
3827         }
3828
3829         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3830
3831         /* Make sure there is space in the ring for the next send. */
3832         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3833
3834         return NETDEV_TX_OK;
3835 }
3836
3837 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3838                                       struct net_device *netdev)
3839 {
3840         struct igb_adapter *adapter = netdev_priv(netdev);
3841         struct igb_ring *tx_ring;
3842         int r_idx = 0;
3843
3844         if (test_bit(__IGB_DOWN, &adapter->state)) {
3845                 dev_kfree_skb_any(skb);
3846                 return NETDEV_TX_OK;
3847         }
3848
3849         if (skb->len <= 0) {
3850                 dev_kfree_skb_any(skb);
3851                 return NETDEV_TX_OK;
3852         }
3853
3854         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3855         tx_ring = adapter->multi_tx_table[r_idx];
3856
3857         /* This goes back to the question of how to logically map a tx queue
3858          * to a flow.  Right now, performance is impacted slightly negatively
3859          * if using multiple tx queues.  If the stack breaks away from a
3860          * single qdisc implementation, we can look at this again. */
3861         return igb_xmit_frame_ring_adv(skb, tx_ring);
3862 }
3863
3864 /**
3865  * igb_tx_timeout - Respond to a Tx Hang
3866  * @netdev: network interface device structure
3867  **/
3868 static void igb_tx_timeout(struct net_device *netdev)
3869 {
3870         struct igb_adapter *adapter = netdev_priv(netdev);
3871         struct e1000_hw *hw = &adapter->hw;
3872
3873         /* Do the reset outside of interrupt context */
3874         adapter->tx_timeout_count++;
3875
3876         if (hw->mac.type == e1000_82580)
3877                 hw->dev_spec._82575.global_device_reset = true;
3878
3879         schedule_work(&adapter->reset_task);
3880         wr32(E1000_EICS,
3881              (adapter->eims_enable_mask & ~adapter->eims_other));
3882 }
3883
3884 static void igb_reset_task(struct work_struct *work)
3885 {
3886         struct igb_adapter *adapter;
3887         adapter = container_of(work, struct igb_adapter, reset_task);
3888
3889         igb_reinit_locked(adapter);
3890 }
3891
3892 /**
3893  * igb_get_stats - Get System Network Statistics
3894  * @netdev: network interface device structure
3895  *
3896  * Returns the address of the device statistics structure.
3897  * The statistics are actually updated from the timer callback.
3898  **/
3899 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3900 {
3901         /* only return the current stats */
3902         return &netdev->stats;
3903 }
3904
3905 /**
3906  * igb_change_mtu - Change the Maximum Transfer Unit
3907  * @netdev: network interface device structure
3908  * @new_mtu: new value for maximum frame size
3909  *
3910  * Returns 0 on success, negative on failure
3911  **/
3912 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3913 {
3914         struct igb_adapter *adapter = netdev_priv(netdev);
3915         struct pci_dev *pdev = adapter->pdev;
3916         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3917         u32 rx_buffer_len, i;
3918
3919         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3920                 dev_err(&pdev->dev, "Invalid MTU setting\n");
3921                 return -EINVAL;
3922         }
3923
3924         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3925                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3926                 return -EINVAL;
3927         }
3928
3929         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3930                 msleep(1);
3931
3932         /* igb_down has a dependency on max_frame_size */
3933         adapter->max_frame_size = max_frame;
3934
3935         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3936          * means we reserve 2 more, this pushes us to allocate from the next
3937          * larger slab size.
3938          * i.e. RXBUFFER_2048 --> size-4096 slab
3939          */
3940
3941         if (max_frame <= IGB_RXBUFFER_1024)
3942                 rx_buffer_len = IGB_RXBUFFER_1024;
3943         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3944                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3945         else
3946                 rx_buffer_len = IGB_RXBUFFER_128;
3947
3948         if (netif_running(netdev))
3949                 igb_down(adapter);
3950
3951         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3952                  netdev->mtu, new_mtu);
3953         netdev->mtu = new_mtu;
3954
3955         for (i = 0; i < adapter->num_rx_queues; i++)
3956                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
3957
3958         if (netif_running(netdev))
3959                 igb_up(adapter);
3960         else
3961                 igb_reset(adapter);
3962
3963         clear_bit(__IGB_RESETTING, &adapter->state);
3964
3965         return 0;
3966 }
3967
3968 /**
3969  * igb_update_stats - Update the board statistics counters
3970  * @adapter: board private structure
3971  **/
3972
3973 void igb_update_stats(struct igb_adapter *adapter)
3974 {
3975         struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3976         struct e1000_hw *hw = &adapter->hw;
3977         struct pci_dev *pdev = adapter->pdev;
3978         u32 rnbc;
3979         u16 phy_tmp;
3980         int i;
3981         u64 bytes, packets;
3982
3983 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3984
3985         /*
3986          * Prevent stats update while adapter is being reset, or if the pci
3987          * connection is down.
3988          */
3989         if (adapter->link_speed == 0)
3990                 return;
3991         if (pci_channel_offline(pdev))
3992                 return;
3993
3994         bytes = 0;
3995         packets = 0;
3996         for (i = 0; i < adapter->num_rx_queues; i++) {
3997                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3998                 struct igb_ring *ring = adapter->rx_ring[i];
3999                 ring->rx_stats.drops += rqdpc_tmp;
4000                 net_stats->rx_fifo_errors += rqdpc_tmp;
4001                 bytes += ring->rx_stats.bytes;
4002                 packets += ring->rx_stats.packets;
4003         }
4004
4005         net_stats->rx_bytes = bytes;
4006         net_stats->rx_packets = packets;
4007
4008         bytes = 0;
4009         packets = 0;
4010         for (i = 0; i < adapter->num_tx_queues; i++) {
4011                 struct igb_ring *ring = adapter->tx_ring[i];
4012                 bytes += ring->tx_stats.bytes;
4013                 packets += ring->tx_stats.packets;
4014         }
4015         net_stats->tx_bytes = bytes;
4016         net_stats->tx_packets = packets;
4017
4018         /* read stats registers */
4019         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4020         adapter->stats.gprc += rd32(E1000_GPRC);
4021         adapter->stats.gorc += rd32(E1000_GORCL);
4022         rd32(E1000_GORCH); /* clear GORCL */
4023         adapter->stats.bprc += rd32(E1000_BPRC);
4024         adapter->stats.mprc += rd32(E1000_MPRC);
4025         adapter->stats.roc += rd32(E1000_ROC);
4026
4027         adapter->stats.prc64 += rd32(E1000_PRC64);
4028         adapter->stats.prc127 += rd32(E1000_PRC127);
4029         adapter->stats.prc255 += rd32(E1000_PRC255);
4030         adapter->stats.prc511 += rd32(E1000_PRC511);
4031         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4032         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4033         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4034         adapter->stats.sec += rd32(E1000_SEC);
4035
4036         adapter->stats.mpc += rd32(E1000_MPC);
4037         adapter->stats.scc += rd32(E1000_SCC);
4038         adapter->stats.ecol += rd32(E1000_ECOL);
4039         adapter->stats.mcc += rd32(E1000_MCC);
4040         adapter->stats.latecol += rd32(E1000_LATECOL);
4041         adapter->stats.dc += rd32(E1000_DC);
4042         adapter->stats.rlec += rd32(E1000_RLEC);
4043         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4044         adapter->stats.xontxc += rd32(E1000_XONTXC);
4045         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4046         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4047         adapter->stats.fcruc += rd32(E1000_FCRUC);
4048         adapter->stats.gptc += rd32(E1000_GPTC);
4049         adapter->stats.gotc += rd32(E1000_GOTCL);
4050         rd32(E1000_GOTCH); /* clear GOTCL */
4051         rnbc = rd32(E1000_RNBC);
4052         adapter->stats.rnbc += rnbc;
4053         net_stats->rx_fifo_errors += rnbc;
4054         adapter->stats.ruc += rd32(E1000_RUC);
4055         adapter->stats.rfc += rd32(E1000_RFC);
4056         adapter->stats.rjc += rd32(E1000_RJC);
4057         adapter->stats.tor += rd32(E1000_TORH);
4058         adapter->stats.tot += rd32(E1000_TOTH);
4059         adapter->stats.tpr += rd32(E1000_TPR);
4060
4061         adapter->stats.ptc64 += rd32(E1000_PTC64);
4062         adapter->stats.ptc127 += rd32(E1000_PTC127);
4063         adapter->stats.ptc255 += rd32(E1000_PTC255);
4064         adapter->stats.ptc511 += rd32(E1000_PTC511);
4065         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4066         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4067
4068         adapter->stats.mptc += rd32(E1000_MPTC);
4069         adapter->stats.bptc += rd32(E1000_BPTC);
4070
4071         adapter->stats.tpt += rd32(E1000_TPT);
4072         adapter->stats.colc += rd32(E1000_COLC);
4073
4074         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4075         adapter->stats.rxerrc += rd32(E1000_RXERRC);
4076         adapter->stats.tncrs += rd32(E1000_TNCRS);
4077         adapter->stats.tsctc += rd32(E1000_TSCTC);
4078         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4079
4080         adapter->stats.iac += rd32(E1000_IAC);
4081         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4082         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4083         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4084         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4085         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4086         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4087         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4088         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4089
4090         /* Fill out the OS statistics structure */
4091         net_stats->multicast = adapter->stats.mprc;
4092         net_stats->collisions = adapter->stats.colc;
4093
4094         /* Rx Errors */
4095
4096         /* RLEC on some newer hardware can be incorrect so build
4097          * our own version based on RUC and ROC */
4098         net_stats->rx_errors = adapter->stats.rxerrc +
4099                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4100                 adapter->stats.ruc + adapter->stats.roc +
4101                 adapter->stats.cexterr;
4102         net_stats->rx_length_errors = adapter->stats.ruc +
4103                                       adapter->stats.roc;
4104         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4105         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4106         net_stats->rx_missed_errors = adapter->stats.mpc;
4107
4108         /* Tx Errors */
4109         net_stats->tx_errors = adapter->stats.ecol +
4110                                adapter->stats.latecol;
4111         net_stats->tx_aborted_errors = adapter->stats.ecol;
4112         net_stats->tx_window_errors = adapter->stats.latecol;
4113         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4114
4115         /* Tx Dropped needs to be maintained elsewhere */
4116
4117         /* Phy Stats */
4118         if (hw->phy.media_type == e1000_media_type_copper) {
4119                 if ((adapter->link_speed == SPEED_1000) &&
4120                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4121                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4122                         adapter->phy_stats.idle_errors += phy_tmp;
4123                 }
4124         }
4125
4126         /* Management Stats */
4127         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4128         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4129         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4130 }
4131
4132 static irqreturn_t igb_msix_other(int irq, void *data)
4133 {
4134         struct igb_adapter *adapter = data;
4135         struct e1000_hw *hw = &adapter->hw;
4136         u32 icr = rd32(E1000_ICR);
4137         /* reading ICR causes bit 31 of EICR to be cleared */
4138
4139         if (icr & E1000_ICR_DRSTA)
4140                 schedule_work(&adapter->reset_task);
4141
4142         if (icr & E1000_ICR_DOUTSYNC) {
4143                 /* HW is reporting DMA is out of sync */
4144                 adapter->stats.doosync++;
4145         }
4146
4147         /* Check for a mailbox event */
4148         if (icr & E1000_ICR_VMMB)
4149                 igb_msg_task(adapter);
4150
4151         if (icr & E1000_ICR_LSC) {
4152                 hw->mac.get_link_status = 1;
4153                 /* guard against interrupt when we're going down */
4154                 if (!test_bit(__IGB_DOWN, &adapter->state))
4155                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4156         }
4157
4158         if (adapter->vfs_allocated_count)
4159                 wr32(E1000_IMS, E1000_IMS_LSC |
4160                                 E1000_IMS_VMMB |
4161                                 E1000_IMS_DOUTSYNC);
4162         else
4163                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4164         wr32(E1000_EIMS, adapter->eims_other);
4165
4166         return IRQ_HANDLED;
4167 }
4168
4169 static void igb_write_itr(struct igb_q_vector *q_vector)
4170 {
4171         struct igb_adapter *adapter = q_vector->adapter;
4172         u32 itr_val = q_vector->itr_val & 0x7FFC;
4173
4174         if (!q_vector->set_itr)
4175                 return;
4176
4177         if (!itr_val)
4178                 itr_val = 0x4;
4179
4180         if (adapter->hw.mac.type == e1000_82575)
4181                 itr_val |= itr_val << 16;
4182         else
4183                 itr_val |= 0x8000000;
4184
4185         writel(itr_val, q_vector->itr_register);
4186         q_vector->set_itr = 0;
4187 }
4188
4189 static irqreturn_t igb_msix_ring(int irq, void *data)
4190 {
4191         struct igb_q_vector *q_vector = data;
4192
4193         /* Write the ITR value calculated from the previous interrupt. */
4194         igb_write_itr(q_vector);
4195
4196         napi_schedule(&q_vector->napi);
4197
4198         return IRQ_HANDLED;
4199 }
4200
4201 #ifdef CONFIG_IGB_DCA
4202 static void igb_update_dca(struct igb_q_vector *q_vector)
4203 {
4204         struct igb_adapter *adapter = q_vector->adapter;
4205         struct e1000_hw *hw = &adapter->hw;
4206         int cpu = get_cpu();
4207
4208         if (q_vector->cpu == cpu)
4209                 goto out_no_update;
4210
4211         if (q_vector->tx_ring) {
4212                 int q = q_vector->tx_ring->reg_idx;
4213                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4214                 if (hw->mac.type == e1000_82575) {
4215                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4216                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4217                 } else {
4218                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4219                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4220                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4221                 }
4222                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4223                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4224         }
4225         if (q_vector->rx_ring) {
4226                 int q = q_vector->rx_ring->reg_idx;
4227                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4228                 if (hw->mac.type == e1000_82575) {
4229                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4230                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4231                 } else {
4232                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4233                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4234                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4235                 }
4236                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4237                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4238                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4239                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4240         }
4241         q_vector->cpu = cpu;
4242 out_no_update:
4243         put_cpu();
4244 }
4245
4246 static void igb_setup_dca(struct igb_adapter *adapter)
4247 {
4248         struct e1000_hw *hw = &adapter->hw;
4249         int i;
4250
4251         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4252                 return;
4253
4254         /* Always use CB2 mode, difference is masked in the CB driver. */
4255         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4256
4257         for (i = 0; i < adapter->num_q_vectors; i++) {
4258                 adapter->q_vector[i]->cpu = -1;
4259                 igb_update_dca(adapter->q_vector[i]);
4260         }
4261 }
4262
4263 static int __igb_notify_dca(struct device *dev, void *data)
4264 {
4265         struct net_device *netdev = dev_get_drvdata(dev);
4266         struct igb_adapter *adapter = netdev_priv(netdev);
4267         struct pci_dev *pdev = adapter->pdev;
4268         struct e1000_hw *hw = &adapter->hw;
4269         unsigned long event = *(unsigned long *)data;
4270
4271         switch (event) {
4272         case DCA_PROVIDER_ADD:
4273                 /* if already enabled, don't do it again */
4274                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4275                         break;
4276                 if (dca_add_requester(dev) == 0) {
4277                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4278                         dev_info(&pdev->dev, "DCA enabled\n");
4279                         igb_setup_dca(adapter);
4280                         break;
4281                 }
4282                 /* Fall Through since DCA is disabled. */
4283         case DCA_PROVIDER_REMOVE:
4284                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4285                         /* without this a class_device is left
4286                          * hanging around in the sysfs model */
4287                         dca_remove_requester(dev);
4288                         dev_info(&pdev->dev, "DCA disabled\n");
4289                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4290                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4291                 }
4292                 break;
4293         }
4294
4295         return 0;
4296 }
4297
4298 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4299                           void *p)
4300 {
4301         int ret_val;
4302
4303         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4304                                          __igb_notify_dca);
4305
4306         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4307 }
4308 #endif /* CONFIG_IGB_DCA */
4309
4310 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4311 {
4312         struct e1000_hw *hw = &adapter->hw;
4313         u32 ping;
4314         int i;
4315
4316         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4317                 ping = E1000_PF_CONTROL_MSG;
4318                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4319                         ping |= E1000_VT_MSGTYPE_CTS;
4320                 igb_write_mbx(hw, &ping, 1, i);
4321         }
4322 }
4323
4324 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4325 {
4326         struct e1000_hw *hw = &adapter->hw;
4327         u32 vmolr = rd32(E1000_VMOLR(vf));
4328         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4329
4330         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4331                             IGB_VF_FLAG_MULTI_PROMISC);
4332         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4333
4334         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4335                 vmolr |= E1000_VMOLR_MPME;
4336                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4337         } else {
4338                 /*
4339                  * if we have hashes and we are clearing a multicast promisc
4340                  * flag we need to write the hashes to the MTA as this step
4341                  * was previously skipped
4342                  */
4343                 if (vf_data->num_vf_mc_hashes > 30) {
4344                         vmolr |= E1000_VMOLR_MPME;
4345                 } else if (vf_data->num_vf_mc_hashes) {
4346                         int j;
4347                         vmolr |= E1000_VMOLR_ROMPE;
4348                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4349                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4350                 }
4351         }
4352
4353         wr32(E1000_VMOLR(vf), vmolr);
4354
4355         /* there are flags left unprocessed, likely not supported */
4356         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4357                 return -EINVAL;
4358
4359         return 0;
4360
4361 }
4362
4363 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4364                                   u32 *msgbuf, u32 vf)
4365 {
4366         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4367         u16 *hash_list = (u16 *)&msgbuf[1];
4368         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4369         int i;
4370
4371         /* salt away the number of multicast addresses assigned
4372          * to this VF for later use to restore when the PF multi cast
4373          * list changes
4374          */
4375         vf_data->num_vf_mc_hashes = n;
4376
4377         /* only up to 30 hash values supported */
4378         if (n > 30)
4379                 n = 30;
4380
4381         /* store the hashes for later use */
4382         for (i = 0; i < n; i++)
4383                 vf_data->vf_mc_hashes[i] = hash_list[i];
4384
4385         /* Flush and reset the mta with the new values */
4386         igb_set_rx_mode(adapter->netdev);
4387
4388         return 0;
4389 }
4390
4391 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4392 {
4393         struct e1000_hw *hw = &adapter->hw;
4394         struct vf_data_storage *vf_data;
4395         int i, j;
4396
4397         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4398                 u32 vmolr = rd32(E1000_VMOLR(i));
4399                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4400
4401                 vf_data = &adapter->vf_data[i];
4402
4403                 if ((vf_data->num_vf_mc_hashes > 30) ||
4404                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4405                         vmolr |= E1000_VMOLR_MPME;
4406                 } else if (vf_data->num_vf_mc_hashes) {
4407                         vmolr |= E1000_VMOLR_ROMPE;
4408                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4409                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4410                 }
4411                 wr32(E1000_VMOLR(i), vmolr);
4412         }
4413 }
4414
4415 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4416 {
4417         struct e1000_hw *hw = &adapter->hw;
4418         u32 pool_mask, reg, vid;
4419         int i;
4420
4421         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4422
4423         /* Find the vlan filter for this id */
4424         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4425                 reg = rd32(E1000_VLVF(i));
4426
4427                 /* remove the vf from the pool */
4428                 reg &= ~pool_mask;
4429
4430                 /* if pool is empty then remove entry from vfta */
4431                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4432                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4433                         reg = 0;
4434                         vid = reg & E1000_VLVF_VLANID_MASK;
4435                         igb_vfta_set(hw, vid, false);
4436                 }
4437
4438                 wr32(E1000_VLVF(i), reg);
4439         }
4440
4441         adapter->vf_data[vf].vlans_enabled = 0;
4442 }
4443
4444 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4445 {
4446         struct e1000_hw *hw = &adapter->hw;
4447         u32 reg, i;
4448
4449         /* The vlvf table only exists on 82576 hardware and newer */
4450         if (hw->mac.type < e1000_82576)
4451                 return -1;
4452
4453         /* we only need to do this if VMDq is enabled */
4454         if (!adapter->vfs_allocated_count)
4455                 return -1;
4456
4457         /* Find the vlan filter for this id */
4458         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4459                 reg = rd32(E1000_VLVF(i));
4460                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4461                     vid == (reg & E1000_VLVF_VLANID_MASK))
4462                         break;
4463         }
4464
4465         if (add) {
4466                 if (i == E1000_VLVF_ARRAY_SIZE) {
4467                         /* Did not find a matching VLAN ID entry that was
4468                          * enabled.  Search for a free filter entry, i.e.
4469                          * one without the enable bit set
4470                          */
4471                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4472                                 reg = rd32(E1000_VLVF(i));
4473                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4474                                         break;
4475                         }
4476                 }
4477                 if (i < E1000_VLVF_ARRAY_SIZE) {
4478                         /* Found an enabled/available entry */
4479                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4480
4481                         /* if !enabled we need to set this up in vfta */
4482                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4483                                 /* add VID to filter table */
4484                                 igb_vfta_set(hw, vid, true);
4485                                 reg |= E1000_VLVF_VLANID_ENABLE;
4486                         }
4487                         reg &= ~E1000_VLVF_VLANID_MASK;
4488                         reg |= vid;
4489                         wr32(E1000_VLVF(i), reg);
4490
4491                         /* do not modify RLPML for PF devices */
4492                         if (vf >= adapter->vfs_allocated_count)
4493                                 return 0;
4494
4495                         if (!adapter->vf_data[vf].vlans_enabled) {
4496                                 u32 size;
4497                                 reg = rd32(E1000_VMOLR(vf));
4498                                 size = reg & E1000_VMOLR_RLPML_MASK;
4499                                 size += 4;
4500                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4501                                 reg |= size;
4502                                 wr32(E1000_VMOLR(vf), reg);
4503                         }
4504
4505                         adapter->vf_data[vf].vlans_enabled++;
4506                         return 0;
4507                 }
4508         } else {
4509                 if (i < E1000_VLVF_ARRAY_SIZE) {
4510                         /* remove vf from the pool */
4511                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4512                         /* if pool is empty then remove entry from vfta */
4513                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4514                                 reg = 0;
4515                                 igb_vfta_set(hw, vid, false);
4516                         }
4517                         wr32(E1000_VLVF(i), reg);
4518
4519                         /* do not modify RLPML for PF devices */
4520                         if (vf >= adapter->vfs_allocated_count)
4521                                 return 0;
4522
4523                         adapter->vf_data[vf].vlans_enabled--;
4524                         if (!adapter->vf_data[vf].vlans_enabled) {
4525                                 u32 size;
4526                                 reg = rd32(E1000_VMOLR(vf));
4527                                 size = reg & E1000_VMOLR_RLPML_MASK;
4528                                 size -= 4;
4529                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4530                                 reg |= size;
4531                                 wr32(E1000_VMOLR(vf), reg);
4532                         }
4533                 }
4534         }
4535         return 0;
4536 }
4537
4538 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4539 {
4540         struct e1000_hw *hw = &adapter->hw;
4541
4542         if (vid)
4543                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4544         else
4545                 wr32(E1000_VMVIR(vf), 0);
4546 }
4547
4548 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4549                                int vf, u16 vlan, u8 qos)
4550 {
4551         int err = 0;
4552         struct igb_adapter *adapter = netdev_priv(netdev);
4553
4554         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4555                 return -EINVAL;
4556         if (vlan || qos) {
4557                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4558                 if (err)
4559                         goto out;
4560                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4561                 igb_set_vmolr(adapter, vf, !vlan);
4562                 adapter->vf_data[vf].pf_vlan = vlan;
4563                 adapter->vf_data[vf].pf_qos = qos;
4564                 dev_info(&adapter->pdev->dev,
4565                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4566                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4567                         dev_warn(&adapter->pdev->dev,
4568                                  "The VF VLAN has been set,"
4569                                  " but the PF device is not up.\n");
4570                         dev_warn(&adapter->pdev->dev,
4571                                  "Bring the PF device up before"
4572                                  " attempting to use the VF device.\n");
4573                 }
4574         } else {
4575                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4576                                    false, vf);
4577                 igb_set_vmvir(adapter, vlan, vf);
4578                 igb_set_vmolr(adapter, vf, true);
4579                 adapter->vf_data[vf].pf_vlan = 0;
4580                 adapter->vf_data[vf].pf_qos = 0;
4581        }
4582 out:
4583        return err;
4584 }
4585
4586 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4587 {
4588         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4589         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4590
4591         return igb_vlvf_set(adapter, vid, add, vf);
4592 }
4593
4594 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4595 {
4596         /* clear flags */
4597         adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4598         adapter->vf_data[vf].last_nack = jiffies;
4599
4600         /* reset offloads to defaults */
4601         igb_set_vmolr(adapter, vf, true);
4602
4603         /* reset vlans for device */
4604         igb_clear_vf_vfta(adapter, vf);
4605         if (adapter->vf_data[vf].pf_vlan)
4606                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4607                                     adapter->vf_data[vf].pf_vlan,
4608                                     adapter->vf_data[vf].pf_qos);
4609         else
4610                 igb_clear_vf_vfta(adapter, vf);
4611
4612         /* reset multicast table array for vf */
4613         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4614
4615         /* Flush and reset the mta with the new values */
4616         igb_set_rx_mode(adapter->netdev);
4617 }
4618
4619 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4620 {
4621         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4622
4623         /* generate a new mac address as we were hotplug removed/added */
4624         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4625                 random_ether_addr(vf_mac);
4626
4627         /* process remaining reset events */
4628         igb_vf_reset(adapter, vf);
4629 }
4630
4631 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4632 {
4633         struct e1000_hw *hw = &adapter->hw;
4634         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4635         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4636         u32 reg, msgbuf[3];
4637         u8 *addr = (u8 *)(&msgbuf[1]);
4638
4639         /* process all the same items cleared in a function level reset */
4640         igb_vf_reset(adapter, vf);
4641
4642         /* set vf mac address */
4643         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4644
4645         /* enable transmit and receive for vf */
4646         reg = rd32(E1000_VFTE);
4647         wr32(E1000_VFTE, reg | (1 << vf));
4648         reg = rd32(E1000_VFRE);
4649         wr32(E1000_VFRE, reg | (1 << vf));
4650
4651         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4652
4653         /* reply to reset with ack and vf mac address */
4654         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4655         memcpy(addr, vf_mac, 6);
4656         igb_write_mbx(hw, msgbuf, 3, vf);
4657 }
4658
4659 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4660 {
4661         unsigned char *addr = (char *)&msg[1];
4662         int err = -1;
4663
4664         if (is_valid_ether_addr(addr))
4665                 err = igb_set_vf_mac(adapter, vf, addr);
4666
4667         return err;
4668 }
4669
4670 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4671 {
4672         struct e1000_hw *hw = &adapter->hw;
4673         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4674         u32 msg = E1000_VT_MSGTYPE_NACK;
4675
4676         /* if device isn't clear to send it shouldn't be reading either */
4677         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4678             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4679                 igb_write_mbx(hw, &msg, 1, vf);
4680                 vf_data->last_nack = jiffies;
4681         }
4682 }
4683
4684 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4685 {
4686         struct pci_dev *pdev = adapter->pdev;
4687         u32 msgbuf[E1000_VFMAILBOX_SIZE];
4688         struct e1000_hw *hw = &adapter->hw;
4689         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4690         s32 retval;
4691
4692         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4693
4694         if (retval) {
4695                 /* if receive failed revoke VF CTS stats and restart init */
4696                 dev_err(&pdev->dev, "Error receiving message from VF\n");
4697                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
4698                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4699                         return;
4700                 goto out;
4701         }
4702
4703         /* this is a message we already processed, do nothing */
4704         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4705                 return;
4706
4707         /*
4708          * until the vf completes a reset it should not be
4709          * allowed to start any configuration.
4710          */
4711
4712         if (msgbuf[0] == E1000_VF_RESET) {
4713                 igb_vf_reset_msg(adapter, vf);
4714                 return;
4715         }
4716
4717         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4718                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4719                         return;
4720                 retval = -1;
4721                 goto out;
4722         }
4723
4724         switch ((msgbuf[0] & 0xFFFF)) {
4725         case E1000_VF_SET_MAC_ADDR:
4726                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4727                 break;
4728         case E1000_VF_SET_PROMISC:
4729                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4730                 break;
4731         case E1000_VF_SET_MULTICAST:
4732                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4733                 break;
4734         case E1000_VF_SET_LPE:
4735                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4736                 break;
4737         case E1000_VF_SET_VLAN:
4738                 if (adapter->vf_data[vf].pf_vlan)
4739                         retval = -1;
4740                 else
4741                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4742                 break;
4743         default:
4744                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4745                 retval = -1;
4746                 break;
4747         }
4748
4749         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4750 out:
4751         /* notify the VF of the results of what it sent us */
4752         if (retval)
4753                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4754         else
4755                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4756
4757         igb_write_mbx(hw, msgbuf, 1, vf);
4758 }
4759
4760 static void igb_msg_task(struct igb_adapter *adapter)
4761 {
4762         struct e1000_hw *hw = &adapter->hw;
4763         u32 vf;
4764
4765         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4766                 /* process any reset requests */
4767                 if (!igb_check_for_rst(hw, vf))
4768                         igb_vf_reset_event(adapter, vf);
4769
4770                 /* process any messages pending */
4771                 if (!igb_check_for_msg(hw, vf))
4772                         igb_rcv_msg_from_vf(adapter, vf);
4773
4774                 /* process any acks */
4775                 if (!igb_check_for_ack(hw, vf))
4776                         igb_rcv_ack_from_vf(adapter, vf);
4777         }
4778 }
4779
4780 /**
4781  *  igb_set_uta - Set unicast filter table address
4782  *  @adapter: board private structure
4783  *
4784  *  The unicast table address is a register array of 32-bit registers.
4785  *  The table is meant to be used in a way similar to how the MTA is used
4786  *  however due to certain limitations in the hardware it is necessary to
4787  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4788  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4789  **/
4790 static void igb_set_uta(struct igb_adapter *adapter)
4791 {
4792         struct e1000_hw *hw = &adapter->hw;
4793         int i;
4794
4795         /* The UTA table only exists on 82576 hardware and newer */
4796         if (hw->mac.type < e1000_82576)
4797                 return;
4798
4799         /* we only need to do this if VMDq is enabled */
4800         if (!adapter->vfs_allocated_count)
4801                 return;
4802
4803         for (i = 0; i < hw->mac.uta_reg_count; i++)
4804                 array_wr32(E1000_UTA, i, ~0);
4805 }
4806
4807 /**
4808  * igb_intr_msi - Interrupt Handler
4809  * @irq: interrupt number
4810  * @data: pointer to a network interface device structure
4811  **/
4812 static irqreturn_t igb_intr_msi(int irq, void *data)
4813 {
4814         struct igb_adapter *adapter = data;
4815         struct igb_q_vector *q_vector = adapter->q_vector[0];
4816         struct e1000_hw *hw = &adapter->hw;
4817         /* read ICR disables interrupts using IAM */
4818         u32 icr = rd32(E1000_ICR);
4819
4820         igb_write_itr(q_vector);
4821
4822         if (icr & E1000_ICR_DRSTA)
4823                 schedule_work(&adapter->reset_task);
4824
4825         if (icr & E1000_ICR_DOUTSYNC) {
4826                 /* HW is reporting DMA is out of sync */
4827                 adapter->stats.doosync++;
4828         }
4829
4830         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4831                 hw->mac.get_link_status = 1;
4832                 if (!test_bit(__IGB_DOWN, &adapter->state))
4833                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4834         }
4835
4836         napi_schedule(&q_vector->napi);
4837
4838         return IRQ_HANDLED;
4839 }
4840
4841 /**
4842  * igb_intr - Legacy Interrupt Handler
4843  * @irq: interrupt number
4844  * @data: pointer to a network interface device structure
4845  **/
4846 static irqreturn_t igb_intr(int irq, void *data)
4847 {
4848         struct igb_adapter *adapter = data;
4849         struct igb_q_vector *q_vector = adapter->q_vector[0];
4850         struct e1000_hw *hw = &adapter->hw;
4851         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4852          * need for the IMC write */
4853         u32 icr = rd32(E1000_ICR);
4854         if (!icr)
4855                 return IRQ_NONE;  /* Not our interrupt */
4856
4857         igb_write_itr(q_vector);
4858
4859         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4860          * not set, then the adapter didn't send an interrupt */
4861         if (!(icr & E1000_ICR_INT_ASSERTED))
4862                 return IRQ_NONE;
4863
4864         if (icr & E1000_ICR_DRSTA)
4865                 schedule_work(&adapter->reset_task);
4866
4867         if (icr & E1000_ICR_DOUTSYNC) {
4868                 /* HW is reporting DMA is out of sync */
4869                 adapter->stats.doosync++;
4870         }
4871
4872         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4873                 hw->mac.get_link_status = 1;
4874                 /* guard against interrupt when we're going down */
4875                 if (!test_bit(__IGB_DOWN, &adapter->state))
4876                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4877         }
4878
4879         napi_schedule(&q_vector->napi);
4880
4881         return IRQ_HANDLED;
4882 }
4883
4884 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4885 {
4886         struct igb_adapter *adapter = q_vector->adapter;
4887         struct e1000_hw *hw = &adapter->hw;
4888
4889         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4890             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4891                 if (!adapter->msix_entries)
4892                         igb_set_itr(adapter);
4893                 else
4894                         igb_update_ring_itr(q_vector);
4895         }
4896
4897         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4898                 if (adapter->msix_entries)
4899                         wr32(E1000_EIMS, q_vector->eims_value);
4900                 else
4901                         igb_irq_enable(adapter);
4902         }
4903 }
4904
4905 /**
4906  * igb_poll - NAPI Rx polling callback
4907  * @napi: napi polling structure
4908  * @budget: count of how many packets we should handle
4909  **/
4910 static int igb_poll(struct napi_struct *napi, int budget)
4911 {
4912         struct igb_q_vector *q_vector = container_of(napi,
4913                                                      struct igb_q_vector,
4914                                                      napi);
4915         int tx_clean_complete = 1, work_done = 0;
4916
4917 #ifdef CONFIG_IGB_DCA
4918         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4919                 igb_update_dca(q_vector);
4920 #endif
4921         if (q_vector->tx_ring)
4922                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4923
4924         if (q_vector->rx_ring)
4925                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4926
4927         if (!tx_clean_complete)
4928                 work_done = budget;
4929
4930         /* If not enough Rx work done, exit the polling mode */
4931         if (work_done < budget) {
4932                 napi_complete(napi);
4933                 igb_ring_irq_enable(q_vector);
4934         }
4935
4936         return work_done;
4937 }
4938
4939 /**
4940  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4941  * @adapter: board private structure
4942  * @shhwtstamps: timestamp structure to update
4943  * @regval: unsigned 64bit system time value.
4944  *
4945  * We need to convert the system time value stored in the RX/TXSTMP registers
4946  * into a hwtstamp which can be used by the upper level timestamping functions
4947  */
4948 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4949                                    struct skb_shared_hwtstamps *shhwtstamps,
4950                                    u64 regval)
4951 {
4952         u64 ns;
4953
4954         /*
4955          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
4956          * 24 to match clock shift we setup earlier.
4957          */
4958         if (adapter->hw.mac.type == e1000_82580)
4959                 regval <<= IGB_82580_TSYNC_SHIFT;
4960
4961         ns = timecounter_cyc2time(&adapter->clock, regval);
4962         timecompare_update(&adapter->compare, ns);
4963         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4964         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4965         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4966 }
4967
4968 /**
4969  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4970  * @q_vector: pointer to q_vector containing needed info
4971  * @skb: packet that was just sent
4972  *
4973  * If we were asked to do hardware stamping and such a time stamp is
4974  * available, then it must have been for this skb here because we only
4975  * allow only one such packet into the queue.
4976  */
4977 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4978 {
4979         struct igb_adapter *adapter = q_vector->adapter;
4980         union skb_shared_tx *shtx = skb_tx(skb);
4981         struct e1000_hw *hw = &adapter->hw;
4982         struct skb_shared_hwtstamps shhwtstamps;
4983         u64 regval;
4984
4985         /* if skb does not support hw timestamp or TX stamp not valid exit */
4986         if (likely(!shtx->hardware) ||
4987             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4988                 return;
4989
4990         regval = rd32(E1000_TXSTMPL);
4991         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4992
4993         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4994         skb_tstamp_tx(skb, &shhwtstamps);
4995 }
4996
4997 /**
4998  * igb_clean_tx_irq - Reclaim resources after transmit completes
4999  * @q_vector: pointer to q_vector containing needed info
5000  * returns true if ring is completely cleaned
5001  **/
5002 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5003 {
5004         struct igb_adapter *adapter = q_vector->adapter;
5005         struct igb_ring *tx_ring = q_vector->tx_ring;
5006         struct net_device *netdev = tx_ring->netdev;
5007         struct e1000_hw *hw = &adapter->hw;
5008         struct igb_buffer *buffer_info;
5009         struct sk_buff *skb;
5010         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5011         unsigned int total_bytes = 0, total_packets = 0;
5012         unsigned int i, eop, count = 0;
5013         bool cleaned = false;
5014
5015         i = tx_ring->next_to_clean;
5016         eop = tx_ring->buffer_info[i].next_to_watch;
5017         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5018
5019         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5020                (count < tx_ring->count)) {
5021                 for (cleaned = false; !cleaned; count++) {
5022                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5023                         buffer_info = &tx_ring->buffer_info[i];
5024                         cleaned = (i == eop);
5025                         skb = buffer_info->skb;
5026
5027                         if (skb) {
5028                                 unsigned int segs, bytecount;
5029                                 /* gso_segs is currently only valid for tcp */
5030                                 segs = skb_shinfo(skb)->gso_segs ?: 1;
5031                                 /* multiply data chunks by size of headers */
5032                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
5033                                             skb->len;
5034                                 total_packets += segs;
5035                                 total_bytes += bytecount;
5036
5037                                 igb_tx_hwtstamp(q_vector, skb);
5038                         }
5039
5040                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5041                         tx_desc->wb.status = 0;
5042
5043                         i++;
5044                         if (i == tx_ring->count)
5045                                 i = 0;
5046                 }
5047                 eop = tx_ring->buffer_info[i].next_to_watch;
5048                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5049         }
5050
5051         tx_ring->next_to_clean = i;
5052
5053         if (unlikely(count &&
5054                      netif_carrier_ok(netdev) &&
5055                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5056                 /* Make sure that anybody stopping the queue after this
5057                  * sees the new next_to_clean.
5058                  */
5059                 smp_mb();
5060                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5061                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5062                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5063                         tx_ring->tx_stats.restart_queue++;
5064                 }
5065         }
5066
5067         if (tx_ring->detect_tx_hung) {
5068                 /* Detect a transmit hang in hardware, this serializes the
5069                  * check with the clearing of time_stamp and movement of i */
5070                 tx_ring->detect_tx_hung = false;
5071                 if (tx_ring->buffer_info[i].time_stamp &&
5072                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5073                                (adapter->tx_timeout_factor * HZ)) &&
5074                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5075
5076                         /* detected Tx unit hang */
5077                         dev_err(&tx_ring->pdev->dev,
5078                                 "Detected Tx Unit Hang\n"
5079                                 "  Tx Queue             <%d>\n"
5080                                 "  TDH                  <%x>\n"
5081                                 "  TDT                  <%x>\n"
5082                                 "  next_to_use          <%x>\n"
5083                                 "  next_to_clean        <%x>\n"
5084                                 "buffer_info[next_to_clean]\n"
5085                                 "  time_stamp           <%lx>\n"
5086                                 "  next_to_watch        <%x>\n"
5087                                 "  jiffies              <%lx>\n"
5088                                 "  desc.status          <%x>\n",
5089                                 tx_ring->queue_index,
5090                                 readl(tx_ring->head),
5091                                 readl(tx_ring->tail),
5092                                 tx_ring->next_to_use,
5093                                 tx_ring->next_to_clean,
5094                                 tx_ring->buffer_info[eop].time_stamp,
5095                                 eop,
5096                                 jiffies,
5097                                 eop_desc->wb.status);
5098                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5099                 }
5100         }
5101         tx_ring->total_bytes += total_bytes;
5102         tx_ring->total_packets += total_packets;
5103         tx_ring->tx_stats.bytes += total_bytes;
5104         tx_ring->tx_stats.packets += total_packets;
5105         return (count < tx_ring->count);
5106 }
5107
5108 /**
5109  * igb_receive_skb - helper function to handle rx indications
5110  * @q_vector: structure containing interrupt and ring information
5111  * @skb: packet to send up
5112  * @vlan_tag: vlan tag for packet
5113  **/
5114 static void igb_receive_skb(struct igb_q_vector *q_vector,
5115                             struct sk_buff *skb,
5116                             u16 vlan_tag)
5117 {
5118         struct igb_adapter *adapter = q_vector->adapter;
5119
5120         if (vlan_tag)
5121                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5122                                  vlan_tag, skb);
5123         else
5124                 napi_gro_receive(&q_vector->napi, skb);
5125 }
5126
5127 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5128                                        u32 status_err, struct sk_buff *skb)
5129 {
5130         skb->ip_summed = CHECKSUM_NONE;
5131
5132         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5133         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5134              (status_err & E1000_RXD_STAT_IXSM))
5135                 return;
5136
5137         /* TCP/UDP checksum error bit is set */
5138         if (status_err &
5139             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5140                 /*
5141                  * work around errata with sctp packets where the TCPE aka
5142                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5143                  * packets, (aka let the stack check the crc32c)
5144                  */
5145                 if ((skb->len == 60) &&
5146                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5147                         ring->rx_stats.csum_err++;
5148
5149                 /* let the stack verify checksum errors */
5150                 return;
5151         }
5152         /* It must be a TCP or UDP packet with a valid checksum */
5153         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5154                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5155
5156         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
5157 }
5158
5159 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5160                                    struct sk_buff *skb)
5161 {
5162         struct igb_adapter *adapter = q_vector->adapter;
5163         struct e1000_hw *hw = &adapter->hw;
5164         u64 regval;
5165
5166         /*
5167          * If this bit is set, then the RX registers contain the time stamp. No
5168          * other packet will be time stamped until we read these registers, so
5169          * read the registers to make them available again. Because only one
5170          * packet can be time stamped at a time, we know that the register
5171          * values must belong to this one here and therefore we don't need to
5172          * compare any of the additional attributes stored for it.
5173          *
5174          * If nothing went wrong, then it should have a skb_shared_tx that we
5175          * can turn into a skb_shared_hwtstamps.
5176          */
5177         if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
5178                 return;
5179         if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5180                 return;
5181
5182         regval = rd32(E1000_RXSTMPL);
5183         regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5184
5185         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5186 }
5187 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5188                                union e1000_adv_rx_desc *rx_desc)
5189 {
5190         /* HW will not DMA in data larger than the given buffer, even if it
5191          * parses the (NFS, of course) header to be larger.  In that case, it
5192          * fills the header buffer and spills the rest into the page.
5193          */
5194         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5195                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5196         if (hlen > rx_ring->rx_buffer_len)
5197                 hlen = rx_ring->rx_buffer_len;
5198         return hlen;
5199 }
5200
5201 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5202                                  int *work_done, int budget)
5203 {
5204         struct igb_ring *rx_ring = q_vector->rx_ring;
5205         struct net_device *netdev = rx_ring->netdev;
5206         struct pci_dev *pdev = rx_ring->pdev;
5207         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5208         struct igb_buffer *buffer_info , *next_buffer;
5209         struct sk_buff *skb;
5210         bool cleaned = false;
5211         int cleaned_count = 0;
5212         int current_node = numa_node_id();
5213         unsigned int total_bytes = 0, total_packets = 0;
5214         unsigned int i;
5215         u32 staterr;
5216         u16 length;
5217         u16 vlan_tag;
5218
5219         i = rx_ring->next_to_clean;
5220         buffer_info = &rx_ring->buffer_info[i];
5221         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5222         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5223
5224         while (staterr & E1000_RXD_STAT_DD) {
5225                 if (*work_done >= budget)
5226                         break;
5227                 (*work_done)++;
5228
5229                 skb = buffer_info->skb;
5230                 prefetch(skb->data - NET_IP_ALIGN);
5231                 buffer_info->skb = NULL;
5232
5233                 i++;
5234                 if (i == rx_ring->count)
5235                         i = 0;
5236
5237                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5238                 prefetch(next_rxd);
5239                 next_buffer = &rx_ring->buffer_info[i];
5240
5241                 length = le16_to_cpu(rx_desc->wb.upper.length);
5242                 cleaned = true;
5243                 cleaned_count++;
5244
5245                 if (buffer_info->dma) {
5246                         pci_unmap_single(pdev, buffer_info->dma,
5247                                          rx_ring->rx_buffer_len,
5248                                          PCI_DMA_FROMDEVICE);
5249                         buffer_info->dma = 0;
5250                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5251                                 skb_put(skb, length);
5252                                 goto send_up;
5253                         }
5254                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5255                 }
5256
5257                 if (length) {
5258                         pci_unmap_page(pdev, buffer_info->page_dma,
5259                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5260                         buffer_info->page_dma = 0;
5261
5262                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
5263                                                 buffer_info->page,
5264                                                 buffer_info->page_offset,
5265                                                 length);
5266
5267                         if ((page_count(buffer_info->page) != 1) ||
5268                             (page_to_nid(buffer_info->page) != current_node))
5269                                 buffer_info->page = NULL;
5270                         else
5271                                 get_page(buffer_info->page);
5272
5273                         skb->len += length;
5274                         skb->data_len += length;
5275                         skb->truesize += length;
5276                 }
5277
5278                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5279                         buffer_info->skb = next_buffer->skb;
5280                         buffer_info->dma = next_buffer->dma;
5281                         next_buffer->skb = skb;
5282                         next_buffer->dma = 0;
5283                         goto next_desc;
5284                 }
5285 send_up:
5286                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5287                         dev_kfree_skb_irq(skb);
5288                         goto next_desc;
5289                 }
5290
5291                 igb_rx_hwtstamp(q_vector, staterr, skb);
5292                 total_bytes += skb->len;
5293                 total_packets++;
5294
5295                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5296
5297                 skb->protocol = eth_type_trans(skb, netdev);
5298                 skb_record_rx_queue(skb, rx_ring->queue_index);
5299
5300                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5301                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5302
5303                 igb_receive_skb(q_vector, skb, vlan_tag);
5304
5305 next_desc:
5306                 rx_desc->wb.upper.status_error = 0;
5307
5308                 /* return some buffers to hardware, one at a time is too slow */
5309                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5310                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5311                         cleaned_count = 0;
5312                 }
5313
5314                 /* use prefetched values */
5315                 rx_desc = next_rxd;
5316                 buffer_info = next_buffer;
5317                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5318         }
5319
5320         rx_ring->next_to_clean = i;
5321         cleaned_count = igb_desc_unused(rx_ring);
5322
5323         if (cleaned_count)
5324                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5325
5326         rx_ring->total_packets += total_packets;
5327         rx_ring->total_bytes += total_bytes;
5328         rx_ring->rx_stats.packets += total_packets;
5329         rx_ring->rx_stats.bytes += total_bytes;
5330         return cleaned;
5331 }
5332
5333 /**
5334  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5335  * @adapter: address of board private structure
5336  **/
5337 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5338 {
5339         struct net_device *netdev = rx_ring->netdev;
5340         union e1000_adv_rx_desc *rx_desc;
5341         struct igb_buffer *buffer_info;
5342         struct sk_buff *skb;
5343         unsigned int i;
5344         int bufsz;
5345
5346         i = rx_ring->next_to_use;
5347         buffer_info = &rx_ring->buffer_info[i];
5348
5349         bufsz = rx_ring->rx_buffer_len;
5350
5351         while (cleaned_count--) {
5352                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5353
5354                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5355                         if (!buffer_info->page) {
5356                                 buffer_info->page = netdev_alloc_page(netdev);
5357                                 if (!buffer_info->page) {
5358                                         rx_ring->rx_stats.alloc_failed++;
5359                                         goto no_buffers;
5360                                 }
5361                                 buffer_info->page_offset = 0;
5362                         } else {
5363                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5364                         }
5365                         buffer_info->page_dma =
5366                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5367                                              buffer_info->page_offset,
5368                                              PAGE_SIZE / 2,
5369                                              PCI_DMA_FROMDEVICE);
5370                         if (pci_dma_mapping_error(rx_ring->pdev,
5371                                                   buffer_info->page_dma)) {
5372                                 buffer_info->page_dma = 0;
5373                                 rx_ring->rx_stats.alloc_failed++;
5374                                 goto no_buffers;
5375                         }
5376                 }
5377
5378                 skb = buffer_info->skb;
5379                 if (!skb) {
5380                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5381                         if (!skb) {
5382                                 rx_ring->rx_stats.alloc_failed++;
5383                                 goto no_buffers;
5384                         }
5385
5386                         buffer_info->skb = skb;
5387                 }
5388                 if (!buffer_info->dma) {
5389                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5390                                                           skb->data,
5391                                                           bufsz,
5392                                                           PCI_DMA_FROMDEVICE);
5393                         if (pci_dma_mapping_error(rx_ring->pdev,
5394                                                   buffer_info->dma)) {
5395                                 buffer_info->dma = 0;
5396                                 rx_ring->rx_stats.alloc_failed++;
5397                                 goto no_buffers;
5398                         }
5399                 }
5400                 /* Refresh the desc even if buffer_addrs didn't change because
5401                  * each write-back erases this info. */
5402                 if (bufsz < IGB_RXBUFFER_1024) {
5403                         rx_desc->read.pkt_addr =
5404                              cpu_to_le64(buffer_info->page_dma);
5405                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5406                 } else {
5407                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5408                         rx_desc->read.hdr_addr = 0;
5409                 }
5410
5411                 i++;
5412                 if (i == rx_ring->count)
5413                         i = 0;
5414                 buffer_info = &rx_ring->buffer_info[i];
5415         }
5416
5417 no_buffers:
5418         if (rx_ring->next_to_use != i) {
5419                 rx_ring->next_to_use = i;
5420                 if (i == 0)
5421                         i = (rx_ring->count - 1);
5422                 else
5423                         i--;
5424
5425                 /* Force memory writes to complete before letting h/w
5426                  * know there are new descriptors to fetch.  (Only
5427                  * applicable for weak-ordered memory model archs,
5428                  * such as IA-64). */
5429                 wmb();
5430                 writel(i, rx_ring->tail);
5431         }
5432 }
5433
5434 /**
5435  * igb_mii_ioctl -
5436  * @netdev:
5437  * @ifreq:
5438  * @cmd:
5439  **/
5440 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5441 {
5442         struct igb_adapter *adapter = netdev_priv(netdev);
5443         struct mii_ioctl_data *data = if_mii(ifr);
5444
5445         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5446                 return -EOPNOTSUPP;
5447
5448         switch (cmd) {
5449         case SIOCGMIIPHY:
5450                 data->phy_id = adapter->hw.phy.addr;
5451                 break;
5452         case SIOCGMIIREG:
5453                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5454                                      &data->val_out))
5455                         return -EIO;
5456                 break;
5457         case SIOCSMIIREG:
5458         default:
5459                 return -EOPNOTSUPP;
5460         }
5461         return 0;
5462 }
5463
5464 /**
5465  * igb_hwtstamp_ioctl - control hardware time stamping
5466  * @netdev:
5467  * @ifreq:
5468  * @cmd:
5469  *
5470  * Outgoing time stamping can be enabled and disabled. Play nice and
5471  * disable it when requested, although it shouldn't case any overhead
5472  * when no packet needs it. At most one packet in the queue may be
5473  * marked for time stamping, otherwise it would be impossible to tell
5474  * for sure to which packet the hardware time stamp belongs.
5475  *
5476  * Incoming time stamping has to be configured via the hardware
5477  * filters. Not all combinations are supported, in particular event
5478  * type has to be specified. Matching the kind of event packet is
5479  * not supported, with the exception of "all V2 events regardless of
5480  * level 2 or 4".
5481  *
5482  **/
5483 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5484                               struct ifreq *ifr, int cmd)
5485 {
5486         struct igb_adapter *adapter = netdev_priv(netdev);
5487         struct e1000_hw *hw = &adapter->hw;
5488         struct hwtstamp_config config;
5489         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5490         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5491         u32 tsync_rx_cfg = 0;
5492         bool is_l4 = false;
5493         bool is_l2 = false;
5494         u32 regval;
5495
5496         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5497                 return -EFAULT;
5498
5499         /* reserved for future extensions */
5500         if (config.flags)
5501                 return -EINVAL;
5502
5503         switch (config.tx_type) {
5504         case HWTSTAMP_TX_OFF:
5505                 tsync_tx_ctl = 0;
5506         case HWTSTAMP_TX_ON:
5507                 break;
5508         default:
5509                 return -ERANGE;
5510         }
5511
5512         switch (config.rx_filter) {
5513         case HWTSTAMP_FILTER_NONE:
5514                 tsync_rx_ctl = 0;
5515                 break;
5516         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5517         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5518         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5519         case HWTSTAMP_FILTER_ALL:
5520                 /*
5521                  * register TSYNCRXCFG must be set, therefore it is not
5522                  * possible to time stamp both Sync and Delay_Req messages
5523                  * => fall back to time stamping all packets
5524                  */
5525                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5526                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5527                 break;
5528         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5529                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5530                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5531                 is_l4 = true;
5532                 break;
5533         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5534                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5535                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5536                 is_l4 = true;
5537                 break;
5538         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5539         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5540                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5541                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5542                 is_l2 = true;
5543                 is_l4 = true;
5544                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5545                 break;
5546         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5547         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5548                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5549                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5550                 is_l2 = true;
5551                 is_l4 = true;
5552                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5553                 break;
5554         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5555         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5556         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5557                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5558                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5559                 is_l2 = true;
5560                 break;
5561         default:
5562                 return -ERANGE;
5563         }
5564
5565         if (hw->mac.type == e1000_82575) {
5566                 if (tsync_rx_ctl | tsync_tx_ctl)
5567                         return -EINVAL;
5568                 return 0;
5569         }
5570
5571         /* enable/disable TX */
5572         regval = rd32(E1000_TSYNCTXCTL);
5573         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5574         regval |= tsync_tx_ctl;
5575         wr32(E1000_TSYNCTXCTL, regval);
5576
5577         /* enable/disable RX */
5578         regval = rd32(E1000_TSYNCRXCTL);
5579         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5580         regval |= tsync_rx_ctl;
5581         wr32(E1000_TSYNCRXCTL, regval);
5582
5583         /* define which PTP packets are time stamped */
5584         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5585
5586         /* define ethertype filter for timestamped packets */
5587         if (is_l2)
5588                 wr32(E1000_ETQF(3),
5589                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5590                                  E1000_ETQF_1588 | /* enable timestamping */
5591                                  ETH_P_1588));     /* 1588 eth protocol type */
5592         else
5593                 wr32(E1000_ETQF(3), 0);
5594
5595 #define PTP_PORT 319
5596         /* L4 Queue Filter[3]: filter by destination port and protocol */
5597         if (is_l4) {
5598                 u32 ftqf = (IPPROTO_UDP /* UDP */
5599                         | E1000_FTQF_VF_BP /* VF not compared */
5600                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5601                         | E1000_FTQF_MASK); /* mask all inputs */
5602                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5603
5604                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5605                 wr32(E1000_IMIREXT(3),
5606                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5607                 if (hw->mac.type == e1000_82576) {
5608                         /* enable source port check */
5609                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5610                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5611                 }
5612                 wr32(E1000_FTQF(3), ftqf);
5613         } else {
5614                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5615         }
5616         wrfl();
5617
5618         adapter->hwtstamp_config = config;
5619
5620         /* clear TX/RX time stamp registers, just to be sure */
5621         regval = rd32(E1000_TXSTMPH);
5622         regval = rd32(E1000_RXSTMPH);
5623
5624         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5625                 -EFAULT : 0;
5626 }
5627
5628 /**
5629  * igb_ioctl -
5630  * @netdev:
5631  * @ifreq:
5632  * @cmd:
5633  **/
5634 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5635 {
5636         switch (cmd) {
5637         case SIOCGMIIPHY:
5638         case SIOCGMIIREG:
5639         case SIOCSMIIREG:
5640                 return igb_mii_ioctl(netdev, ifr, cmd);
5641         case SIOCSHWTSTAMP:
5642                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5643         default:
5644                 return -EOPNOTSUPP;
5645         }
5646 }
5647
5648 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5649 {
5650         struct igb_adapter *adapter = hw->back;
5651         u16 cap_offset;
5652
5653         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5654         if (!cap_offset)
5655                 return -E1000_ERR_CONFIG;
5656
5657         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5658
5659         return 0;
5660 }
5661
5662 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5663 {
5664         struct igb_adapter *adapter = hw->back;
5665         u16 cap_offset;
5666
5667         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5668         if (!cap_offset)
5669                 return -E1000_ERR_CONFIG;
5670
5671         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5672
5673         return 0;
5674 }
5675
5676 static void igb_vlan_rx_register(struct net_device *netdev,
5677                                  struct vlan_group *grp)
5678 {
5679         struct igb_adapter *adapter = netdev_priv(netdev);
5680         struct e1000_hw *hw = &adapter->hw;
5681         u32 ctrl, rctl;
5682
5683         igb_irq_disable(adapter);
5684         adapter->vlgrp = grp;
5685
5686         if (grp) {
5687                 /* enable VLAN tag insert/strip */
5688                 ctrl = rd32(E1000_CTRL);
5689                 ctrl |= E1000_CTRL_VME;
5690                 wr32(E1000_CTRL, ctrl);
5691
5692                 /* Disable CFI check */
5693                 rctl = rd32(E1000_RCTL);
5694                 rctl &= ~E1000_RCTL_CFIEN;
5695                 wr32(E1000_RCTL, rctl);
5696         } else {
5697                 /* disable VLAN tag insert/strip */
5698                 ctrl = rd32(E1000_CTRL);
5699                 ctrl &= ~E1000_CTRL_VME;
5700                 wr32(E1000_CTRL, ctrl);
5701         }
5702
5703         igb_rlpml_set(adapter);
5704
5705         if (!test_bit(__IGB_DOWN, &adapter->state))
5706                 igb_irq_enable(adapter);
5707 }
5708
5709 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5710 {
5711         struct igb_adapter *adapter = netdev_priv(netdev);
5712         struct e1000_hw *hw = &adapter->hw;
5713         int pf_id = adapter->vfs_allocated_count;
5714
5715         /* attempt to add filter to vlvf array */
5716         igb_vlvf_set(adapter, vid, true, pf_id);
5717
5718         /* add the filter since PF can receive vlans w/o entry in vlvf */
5719         igb_vfta_set(hw, vid, true);
5720 }
5721
5722 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5723 {
5724         struct igb_adapter *adapter = netdev_priv(netdev);
5725         struct e1000_hw *hw = &adapter->hw;
5726         int pf_id = adapter->vfs_allocated_count;
5727         s32 err;
5728
5729         igb_irq_disable(adapter);
5730         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5731
5732         if (!test_bit(__IGB_DOWN, &adapter->state))
5733                 igb_irq_enable(adapter);
5734
5735         /* remove vlan from VLVF table array */
5736         err = igb_vlvf_set(adapter, vid, false, pf_id);
5737
5738         /* if vid was not present in VLVF just remove it from table */
5739         if (err)
5740                 igb_vfta_set(hw, vid, false);
5741 }
5742
5743 static void igb_restore_vlan(struct igb_adapter *adapter)
5744 {
5745         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5746
5747         if (adapter->vlgrp) {
5748                 u16 vid;
5749                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5750                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5751                                 continue;
5752                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5753                 }
5754         }
5755 }
5756
5757 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5758 {
5759         struct pci_dev *pdev = adapter->pdev;
5760         struct e1000_mac_info *mac = &adapter->hw.mac;
5761
5762         mac->autoneg = 0;
5763
5764         switch (spddplx) {
5765         case SPEED_10 + DUPLEX_HALF:
5766                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5767                 break;
5768         case SPEED_10 + DUPLEX_FULL:
5769                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5770                 break;
5771         case SPEED_100 + DUPLEX_HALF:
5772                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5773                 break;
5774         case SPEED_100 + DUPLEX_FULL:
5775                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5776                 break;
5777         case SPEED_1000 + DUPLEX_FULL:
5778                 mac->autoneg = 1;
5779                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5780                 break;
5781         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5782         default:
5783                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5784                 return -EINVAL;
5785         }
5786         return 0;
5787 }
5788
5789 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5790 {
5791         struct net_device *netdev = pci_get_drvdata(pdev);
5792         struct igb_adapter *adapter = netdev_priv(netdev);
5793         struct e1000_hw *hw = &adapter->hw;
5794         u32 ctrl, rctl, status;
5795         u32 wufc = adapter->wol;
5796 #ifdef CONFIG_PM
5797         int retval = 0;
5798 #endif
5799
5800         netif_device_detach(netdev);
5801
5802         if (netif_running(netdev))
5803                 igb_close(netdev);
5804
5805         igb_clear_interrupt_scheme(adapter);
5806
5807 #ifdef CONFIG_PM
5808         retval = pci_save_state(pdev);
5809         if (retval)
5810                 return retval;
5811 #endif
5812
5813         status = rd32(E1000_STATUS);
5814         if (status & E1000_STATUS_LU)
5815                 wufc &= ~E1000_WUFC_LNKC;
5816
5817         if (wufc) {
5818                 igb_setup_rctl(adapter);
5819                 igb_set_rx_mode(netdev);
5820
5821                 /* turn on all-multi mode if wake on multicast is enabled */
5822                 if (wufc & E1000_WUFC_MC) {
5823                         rctl = rd32(E1000_RCTL);
5824                         rctl |= E1000_RCTL_MPE;
5825                         wr32(E1000_RCTL, rctl);
5826                 }
5827
5828                 ctrl = rd32(E1000_CTRL);
5829                 /* advertise wake from D3Cold */
5830                 #define E1000_CTRL_ADVD3WUC 0x00100000
5831                 /* phy power management enable */
5832                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5833                 ctrl |= E1000_CTRL_ADVD3WUC;
5834                 wr32(E1000_CTRL, ctrl);
5835
5836                 /* Allow time for pending master requests to run */
5837                 igb_disable_pcie_master(hw);
5838
5839                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5840                 wr32(E1000_WUFC, wufc);
5841         } else {
5842                 wr32(E1000_WUC, 0);
5843                 wr32(E1000_WUFC, 0);
5844         }
5845
5846         *enable_wake = wufc || adapter->en_mng_pt;
5847         if (!*enable_wake)
5848                 igb_power_down_link(adapter);
5849         else
5850                 igb_power_up_link(adapter);
5851
5852         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5853          * would have already happened in close and is redundant. */
5854         igb_release_hw_control(adapter);
5855
5856         pci_disable_device(pdev);
5857
5858         return 0;
5859 }
5860
5861 #ifdef CONFIG_PM
5862 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5863 {
5864         int retval;
5865         bool wake;
5866
5867         retval = __igb_shutdown(pdev, &wake);
5868         if (retval)
5869                 return retval;
5870
5871         if (wake) {
5872                 pci_prepare_to_sleep(pdev);
5873         } else {
5874                 pci_wake_from_d3(pdev, false);
5875                 pci_set_power_state(pdev, PCI_D3hot);
5876         }
5877
5878         return 0;
5879 }
5880
5881 static int igb_resume(struct pci_dev *pdev)
5882 {
5883         struct net_device *netdev = pci_get_drvdata(pdev);
5884         struct igb_adapter *adapter = netdev_priv(netdev);
5885         struct e1000_hw *hw = &adapter->hw;
5886         u32 err;
5887
5888         pci_set_power_state(pdev, PCI_D0);
5889         pci_restore_state(pdev);
5890         pci_save_state(pdev);
5891
5892         err = pci_enable_device_mem(pdev);
5893         if (err) {
5894                 dev_err(&pdev->dev,
5895                         "igb: Cannot enable PCI device from suspend\n");
5896                 return err;
5897         }
5898         pci_set_master(pdev);
5899
5900         pci_enable_wake(pdev, PCI_D3hot, 0);
5901         pci_enable_wake(pdev, PCI_D3cold, 0);
5902
5903         if (igb_init_interrupt_scheme(adapter)) {
5904                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5905                 return -ENOMEM;
5906         }
5907
5908         igb_reset(adapter);
5909
5910         /* let the f/w know that the h/w is now under the control of the
5911          * driver. */
5912         igb_get_hw_control(adapter);
5913
5914         wr32(E1000_WUS, ~0);
5915
5916         if (netif_running(netdev)) {
5917                 err = igb_open(netdev);
5918                 if (err)
5919                         return err;
5920         }
5921
5922         netif_device_attach(netdev);
5923
5924         return 0;
5925 }
5926 #endif
5927
5928 static void igb_shutdown(struct pci_dev *pdev)
5929 {
5930         bool wake;
5931
5932         __igb_shutdown(pdev, &wake);
5933
5934         if (system_state == SYSTEM_POWER_OFF) {
5935                 pci_wake_from_d3(pdev, wake);
5936                 pci_set_power_state(pdev, PCI_D3hot);
5937         }
5938 }
5939
5940 #ifdef CONFIG_NET_POLL_CONTROLLER
5941 /*
5942  * Polling 'interrupt' - used by things like netconsole to send skbs
5943  * without having to re-enable interrupts. It's not called while
5944  * the interrupt routine is executing.
5945  */
5946 static void igb_netpoll(struct net_device *netdev)
5947 {
5948         struct igb_adapter *adapter = netdev_priv(netdev);
5949         struct e1000_hw *hw = &adapter->hw;
5950         int i;
5951
5952         if (!adapter->msix_entries) {
5953                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5954                 igb_irq_disable(adapter);
5955                 napi_schedule(&q_vector->napi);
5956                 return;
5957         }
5958
5959         for (i = 0; i < adapter->num_q_vectors; i++) {
5960                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5961                 wr32(E1000_EIMC, q_vector->eims_value);
5962                 napi_schedule(&q_vector->napi);
5963         }
5964 }
5965 #endif /* CONFIG_NET_POLL_CONTROLLER */
5966
5967 /**
5968  * igb_io_error_detected - called when PCI error is detected
5969  * @pdev: Pointer to PCI device
5970  * @state: The current pci connection state
5971  *
5972  * This function is called after a PCI bus error affecting
5973  * this device has been detected.
5974  */
5975 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5976                                               pci_channel_state_t state)
5977 {
5978         struct net_device *netdev = pci_get_drvdata(pdev);
5979         struct igb_adapter *adapter = netdev_priv(netdev);
5980
5981         netif_device_detach(netdev);
5982
5983         if (state == pci_channel_io_perm_failure)
5984                 return PCI_ERS_RESULT_DISCONNECT;
5985
5986         if (netif_running(netdev))
5987                 igb_down(adapter);
5988         pci_disable_device(pdev);
5989
5990         /* Request a slot slot reset. */
5991         return PCI_ERS_RESULT_NEED_RESET;
5992 }
5993
5994 /**
5995  * igb_io_slot_reset - called after the pci bus has been reset.
5996  * @pdev: Pointer to PCI device
5997  *
5998  * Restart the card from scratch, as if from a cold-boot. Implementation
5999  * resembles the first-half of the igb_resume routine.
6000  */
6001 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6002 {
6003         struct net_device *netdev = pci_get_drvdata(pdev);
6004         struct igb_adapter *adapter = netdev_priv(netdev);
6005         struct e1000_hw *hw = &adapter->hw;
6006         pci_ers_result_t result;
6007         int err;
6008
6009         if (pci_enable_device_mem(pdev)) {
6010                 dev_err(&pdev->dev,
6011                         "Cannot re-enable PCI device after reset.\n");
6012                 result = PCI_ERS_RESULT_DISCONNECT;
6013         } else {
6014                 pci_set_master(pdev);
6015                 pci_restore_state(pdev);
6016                 pci_save_state(pdev);
6017
6018                 pci_enable_wake(pdev, PCI_D3hot, 0);
6019                 pci_enable_wake(pdev, PCI_D3cold, 0);
6020
6021                 igb_reset(adapter);
6022                 wr32(E1000_WUS, ~0);
6023                 result = PCI_ERS_RESULT_RECOVERED;
6024         }
6025
6026         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6027         if (err) {
6028                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6029                         "failed 0x%0x\n", err);
6030                 /* non-fatal, continue */
6031         }
6032
6033         return result;
6034 }
6035
6036 /**
6037  * igb_io_resume - called when traffic can start flowing again.
6038  * @pdev: Pointer to PCI device
6039  *
6040  * This callback is called when the error recovery driver tells us that
6041  * its OK to resume normal operation. Implementation resembles the
6042  * second-half of the igb_resume routine.
6043  */
6044 static void igb_io_resume(struct pci_dev *pdev)
6045 {
6046         struct net_device *netdev = pci_get_drvdata(pdev);
6047         struct igb_adapter *adapter = netdev_priv(netdev);
6048
6049         if (netif_running(netdev)) {
6050                 if (igb_up(adapter)) {
6051                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6052                         return;
6053                 }
6054         }
6055
6056         netif_device_attach(netdev);
6057
6058         /* let the f/w know that the h/w is now under the control of the
6059          * driver. */
6060         igb_get_hw_control(adapter);
6061 }
6062
6063 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6064                              u8 qsel)
6065 {
6066         u32 rar_low, rar_high;
6067         struct e1000_hw *hw = &adapter->hw;
6068
6069         /* HW expects these in little endian so we reverse the byte order
6070          * from network order (big endian) to little endian
6071          */
6072         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6073                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6074         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6075
6076         /* Indicate to hardware the Address is Valid. */
6077         rar_high |= E1000_RAH_AV;
6078
6079         if (hw->mac.type == e1000_82575)
6080                 rar_high |= E1000_RAH_POOL_1 * qsel;
6081         else
6082                 rar_high |= E1000_RAH_POOL_1 << qsel;
6083
6084         wr32(E1000_RAL(index), rar_low);
6085         wrfl();
6086         wr32(E1000_RAH(index), rar_high);
6087         wrfl();
6088 }
6089
6090 static int igb_set_vf_mac(struct igb_adapter *adapter,
6091                           int vf, unsigned char *mac_addr)
6092 {
6093         struct e1000_hw *hw = &adapter->hw;
6094         /* VF MAC addresses start at end of receive addresses and moves
6095          * torwards the first, as a result a collision should not be possible */
6096         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6097
6098         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6099
6100         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6101
6102         return 0;
6103 }
6104
6105 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6106 {
6107         struct igb_adapter *adapter = netdev_priv(netdev);
6108         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6109                 return -EINVAL;
6110         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6111         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6112         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6113                                       " change effective.");
6114         if (test_bit(__IGB_DOWN, &adapter->state)) {
6115                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6116                          " but the PF device is not up.\n");
6117                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6118                          " attempting to use the VF device.\n");
6119         }
6120         return igb_set_vf_mac(adapter, vf, mac);
6121 }
6122
6123 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6124 {
6125         return -EOPNOTSUPP;
6126 }
6127
6128 static int igb_ndo_get_vf_config(struct net_device *netdev,
6129                                  int vf, struct ifla_vf_info *ivi)
6130 {
6131         struct igb_adapter *adapter = netdev_priv(netdev);
6132         if (vf >= adapter->vfs_allocated_count)
6133                 return -EINVAL;
6134         ivi->vf = vf;
6135         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6136         ivi->tx_rate = 0;
6137         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6138         ivi->qos = adapter->vf_data[vf].pf_qos;
6139         return 0;
6140 }
6141
6142 static void igb_vmm_control(struct igb_adapter *adapter)
6143 {
6144         struct e1000_hw *hw = &adapter->hw;
6145         u32 reg;
6146
6147         /* replication is not supported for 82575 */
6148         if (hw->mac.type == e1000_82575)
6149                 return;
6150
6151         /* enable replication vlan tag stripping */
6152         reg = rd32(E1000_RPLOLR);
6153         reg |= E1000_RPLOLR_STRVLAN;
6154         wr32(E1000_RPLOLR, reg);
6155
6156         /* notify HW that the MAC is adding vlan tags */
6157         reg = rd32(E1000_DTXCTL);
6158         reg |= E1000_DTXCTL_VLAN_ADDED;
6159         wr32(E1000_DTXCTL, reg);
6160
6161         if (adapter->vfs_allocated_count) {
6162                 igb_vmdq_set_loopback_pf(hw, true);
6163                 igb_vmdq_set_replication_pf(hw, true);
6164         } else {
6165                 igb_vmdq_set_loopback_pf(hw, false);
6166                 igb_vmdq_set_replication_pf(hw, false);
6167         }
6168 }
6169
6170 /* igb_main.c */