igb: replace the VF clear_to_send with a flags value
[safe/jmp/linux-2.6] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "1.3.16-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56                                 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60         [board_82575] = &e1000_82575_info,
61 };
62
63 static struct pci_device_id igb_pci_tbl[] = {
64         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
74         /* required last entry */
75         {0, }
76 };
77
78 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
79
80 void igb_reset(struct igb_adapter *);
81 static int igb_setup_all_tx_resources(struct igb_adapter *);
82 static int igb_setup_all_rx_resources(struct igb_adapter *);
83 static void igb_free_all_tx_resources(struct igb_adapter *);
84 static void igb_free_all_rx_resources(struct igb_adapter *);
85 static void igb_setup_mrqc(struct igb_adapter *);
86 void igb_update_stats(struct igb_adapter *);
87 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
88 static void __devexit igb_remove(struct pci_dev *pdev);
89 static int igb_sw_init(struct igb_adapter *);
90 static int igb_open(struct net_device *);
91 static int igb_close(struct net_device *);
92 static void igb_configure_tx(struct igb_adapter *);
93 static void igb_configure_rx(struct igb_adapter *);
94 static void igb_clean_all_tx_rings(struct igb_adapter *);
95 static void igb_clean_all_rx_rings(struct igb_adapter *);
96 static void igb_clean_tx_ring(struct igb_ring *);
97 static void igb_clean_rx_ring(struct igb_ring *);
98 static void igb_set_rx_mode(struct net_device *);
99 static void igb_update_phy_info(unsigned long);
100 static void igb_watchdog(unsigned long);
101 static void igb_watchdog_task(struct work_struct *);
102 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
103 static struct net_device_stats *igb_get_stats(struct net_device *);
104 static int igb_change_mtu(struct net_device *, int);
105 static int igb_set_mac(struct net_device *, void *);
106 static void igb_set_uta(struct igb_adapter *adapter);
107 static irqreturn_t igb_intr(int irq, void *);
108 static irqreturn_t igb_intr_msi(int irq, void *);
109 static irqreturn_t igb_msix_other(int irq, void *);
110 static irqreturn_t igb_msix_ring(int irq, void *);
111 #ifdef CONFIG_IGB_DCA
112 static void igb_update_dca(struct igb_q_vector *);
113 static void igb_setup_dca(struct igb_adapter *);
114 #endif /* CONFIG_IGB_DCA */
115 static bool igb_clean_tx_irq(struct igb_q_vector *);
116 static int igb_poll(struct napi_struct *, int);
117 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
118 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
119 static void igb_tx_timeout(struct net_device *);
120 static void igb_reset_task(struct work_struct *);
121 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
122 static void igb_vlan_rx_add_vid(struct net_device *, u16);
123 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
124 static void igb_restore_vlan(struct igb_adapter *);
125 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
126 static void igb_ping_all_vfs(struct igb_adapter *);
127 static void igb_msg_task(struct igb_adapter *);
128 static void igb_vmm_control(struct igb_adapter *);
129 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
130 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
131
132 static inline void igb_set_vmolr(struct e1000_hw *hw, int vfn)
133 {
134         u32 reg_data;
135
136         reg_data = rd32(E1000_VMOLR(vfn));
137         reg_data |= E1000_VMOLR_BAM |    /* Accept broadcast */
138                     E1000_VMOLR_ROMPE |  /* Accept packets matched in MTA */
139                     E1000_VMOLR_AUPE |   /* Accept untagged packets */
140                     E1000_VMOLR_STRVLAN; /* Strip vlan tags */
141         wr32(E1000_VMOLR(vfn), reg_data);
142 }
143
144 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
145                                  int vfn)
146 {
147         struct e1000_hw *hw = &adapter->hw;
148         u32 vmolr;
149
150         /* if it isn't the PF check to see if VFs are enabled and
151          * increase the size to support vlan tags */
152         if (vfn < adapter->vfs_allocated_count &&
153             adapter->vf_data[vfn].vlans_enabled)
154                 size += VLAN_TAG_SIZE;
155
156         vmolr = rd32(E1000_VMOLR(vfn));
157         vmolr &= ~E1000_VMOLR_RLPML_MASK;
158         vmolr |= size | E1000_VMOLR_LPE;
159         wr32(E1000_VMOLR(vfn), vmolr);
160
161         return 0;
162 }
163
164 #ifdef CONFIG_PM
165 static int igb_suspend(struct pci_dev *, pm_message_t);
166 static int igb_resume(struct pci_dev *);
167 #endif
168 static void igb_shutdown(struct pci_dev *);
169 #ifdef CONFIG_IGB_DCA
170 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
171 static struct notifier_block dca_notifier = {
172         .notifier_call  = igb_notify_dca,
173         .next           = NULL,
174         .priority       = 0
175 };
176 #endif
177 #ifdef CONFIG_NET_POLL_CONTROLLER
178 /* for netdump / net console */
179 static void igb_netpoll(struct net_device *);
180 #endif
181 #ifdef CONFIG_PCI_IOV
182 static unsigned int max_vfs = 0;
183 module_param(max_vfs, uint, 0);
184 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
185                  "per physical function");
186 #endif /* CONFIG_PCI_IOV */
187
188 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
189                      pci_channel_state_t);
190 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
191 static void igb_io_resume(struct pci_dev *);
192
193 static struct pci_error_handlers igb_err_handler = {
194         .error_detected = igb_io_error_detected,
195         .slot_reset = igb_io_slot_reset,
196         .resume = igb_io_resume,
197 };
198
199
200 static struct pci_driver igb_driver = {
201         .name     = igb_driver_name,
202         .id_table = igb_pci_tbl,
203         .probe    = igb_probe,
204         .remove   = __devexit_p(igb_remove),
205 #ifdef CONFIG_PM
206         /* Power Managment Hooks */
207         .suspend  = igb_suspend,
208         .resume   = igb_resume,
209 #endif
210         .shutdown = igb_shutdown,
211         .err_handler = &igb_err_handler
212 };
213
214 static int global_quad_port_a; /* global quad port a indication */
215
216 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
217 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
218 MODULE_LICENSE("GPL");
219 MODULE_VERSION(DRV_VERSION);
220
221 /**
222  * igb_read_clock - read raw cycle counter (to be used by time counter)
223  */
224 static cycle_t igb_read_clock(const struct cyclecounter *tc)
225 {
226         struct igb_adapter *adapter =
227                 container_of(tc, struct igb_adapter, cycles);
228         struct e1000_hw *hw = &adapter->hw;
229         u64 stamp = 0;
230         int shift = 0;
231
232         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
233         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
234         return stamp;
235 }
236
237 #ifdef DEBUG
238 /**
239  * igb_get_hw_dev_name - return device name string
240  * used by hardware layer to print debugging information
241  **/
242 char *igb_get_hw_dev_name(struct e1000_hw *hw)
243 {
244         struct igb_adapter *adapter = hw->back;
245         return adapter->netdev->name;
246 }
247
248 /**
249  * igb_get_time_str - format current NIC and system time as string
250  */
251 static char *igb_get_time_str(struct igb_adapter *adapter,
252                               char buffer[160])
253 {
254         cycle_t hw = adapter->cycles.read(&adapter->cycles);
255         struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
256         struct timespec sys;
257         struct timespec delta;
258         getnstimeofday(&sys);
259
260         delta = timespec_sub(nic, sys);
261
262         sprintf(buffer,
263                 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
264                 hw,
265                 (long)nic.tv_sec, nic.tv_nsec,
266                 (long)sys.tv_sec, sys.tv_nsec,
267                 (long)delta.tv_sec, delta.tv_nsec);
268
269         return buffer;
270 }
271 #endif
272
273 /**
274  * igb_init_module - Driver Registration Routine
275  *
276  * igb_init_module is the first routine called when the driver is
277  * loaded. All it does is register with the PCI subsystem.
278  **/
279 static int __init igb_init_module(void)
280 {
281         int ret;
282         printk(KERN_INFO "%s - version %s\n",
283                igb_driver_string, igb_driver_version);
284
285         printk(KERN_INFO "%s\n", igb_copyright);
286
287         global_quad_port_a = 0;
288
289 #ifdef CONFIG_IGB_DCA
290         dca_register_notify(&dca_notifier);
291 #endif
292
293         ret = pci_register_driver(&igb_driver);
294         return ret;
295 }
296
297 module_init(igb_init_module);
298
299 /**
300  * igb_exit_module - Driver Exit Cleanup Routine
301  *
302  * igb_exit_module is called just before the driver is removed
303  * from memory.
304  **/
305 static void __exit igb_exit_module(void)
306 {
307 #ifdef CONFIG_IGB_DCA
308         dca_unregister_notify(&dca_notifier);
309 #endif
310         pci_unregister_driver(&igb_driver);
311 }
312
313 module_exit(igb_exit_module);
314
315 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
316 /**
317  * igb_cache_ring_register - Descriptor ring to register mapping
318  * @adapter: board private structure to initialize
319  *
320  * Once we know the feature-set enabled for the device, we'll cache
321  * the register offset the descriptor ring is assigned to.
322  **/
323 static void igb_cache_ring_register(struct igb_adapter *adapter)
324 {
325         int i;
326         u32 rbase_offset = adapter->vfs_allocated_count;
327
328         switch (adapter->hw.mac.type) {
329         case e1000_82576:
330                 /* The queues are allocated for virtualization such that VF 0
331                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
332                  * In order to avoid collision we start at the first free queue
333                  * and continue consuming queues in the same sequence
334                  */
335                 for (i = 0; i < adapter->num_rx_queues; i++)
336                         adapter->rx_ring[i].reg_idx = rbase_offset +
337                                                       Q_IDX_82576(i);
338                 for (i = 0; i < adapter->num_tx_queues; i++)
339                         adapter->tx_ring[i].reg_idx = rbase_offset +
340                                                       Q_IDX_82576(i);
341                 break;
342         case e1000_82575:
343         default:
344                 for (i = 0; i < adapter->num_rx_queues; i++)
345                         adapter->rx_ring[i].reg_idx = i;
346                 for (i = 0; i < adapter->num_tx_queues; i++)
347                         adapter->tx_ring[i].reg_idx = i;
348                 break;
349         }
350 }
351
352 static void igb_free_queues(struct igb_adapter *adapter)
353 {
354         kfree(adapter->tx_ring);
355         kfree(adapter->rx_ring);
356
357         adapter->tx_ring = NULL;
358         adapter->rx_ring = NULL;
359
360         adapter->num_rx_queues = 0;
361         adapter->num_tx_queues = 0;
362 }
363
364 /**
365  * igb_alloc_queues - Allocate memory for all rings
366  * @adapter: board private structure to initialize
367  *
368  * We allocate one ring per queue at run-time since we don't know the
369  * number of queues at compile-time.
370  **/
371 static int igb_alloc_queues(struct igb_adapter *adapter)
372 {
373         int i;
374
375         adapter->tx_ring = kcalloc(adapter->num_tx_queues,
376                                    sizeof(struct igb_ring), GFP_KERNEL);
377         if (!adapter->tx_ring)
378                 goto err;
379
380         adapter->rx_ring = kcalloc(adapter->num_rx_queues,
381                                    sizeof(struct igb_ring), GFP_KERNEL);
382         if (!adapter->rx_ring)
383                 goto err;
384
385         for (i = 0; i < adapter->num_tx_queues; i++) {
386                 struct igb_ring *ring = &(adapter->tx_ring[i]);
387                 ring->count = adapter->tx_ring_count;
388                 ring->queue_index = i;
389                 ring->pdev = adapter->pdev;
390                 ring->netdev = adapter->netdev;
391                 /* For 82575, context index must be unique per ring. */
392                 if (adapter->hw.mac.type == e1000_82575)
393                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
394         }
395
396         for (i = 0; i < adapter->num_rx_queues; i++) {
397                 struct igb_ring *ring = &(adapter->rx_ring[i]);
398                 ring->count = adapter->rx_ring_count;
399                 ring->queue_index = i;
400                 ring->pdev = adapter->pdev;
401                 ring->netdev = adapter->netdev;
402                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
403                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
404                 /* set flag indicating ring supports SCTP checksum offload */
405                 if (adapter->hw.mac.type >= e1000_82576)
406                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
407         }
408
409         igb_cache_ring_register(adapter);
410
411         return 0;
412
413 err:
414         igb_free_queues(adapter);
415
416         return -ENOMEM;
417 }
418
419 #define IGB_N0_QUEUE -1
420 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
421 {
422         u32 msixbm = 0;
423         struct igb_adapter *adapter = q_vector->adapter;
424         struct e1000_hw *hw = &adapter->hw;
425         u32 ivar, index;
426         int rx_queue = IGB_N0_QUEUE;
427         int tx_queue = IGB_N0_QUEUE;
428
429         if (q_vector->rx_ring)
430                 rx_queue = q_vector->rx_ring->reg_idx;
431         if (q_vector->tx_ring)
432                 tx_queue = q_vector->tx_ring->reg_idx;
433
434         switch (hw->mac.type) {
435         case e1000_82575:
436                 /* The 82575 assigns vectors using a bitmask, which matches the
437                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
438                    or more queues to a vector, we write the appropriate bits
439                    into the MSIXBM register for that vector. */
440                 if (rx_queue > IGB_N0_QUEUE)
441                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
442                 if (tx_queue > IGB_N0_QUEUE)
443                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
444                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
445                 q_vector->eims_value = msixbm;
446                 break;
447         case e1000_82576:
448                 /* 82576 uses a table-based method for assigning vectors.
449                    Each queue has a single entry in the table to which we write
450                    a vector number along with a "valid" bit.  Sadly, the layout
451                    of the table is somewhat counterintuitive. */
452                 if (rx_queue > IGB_N0_QUEUE) {
453                         index = (rx_queue & 0x7);
454                         ivar = array_rd32(E1000_IVAR0, index);
455                         if (rx_queue < 8) {
456                                 /* vector goes into low byte of register */
457                                 ivar = ivar & 0xFFFFFF00;
458                                 ivar |= msix_vector | E1000_IVAR_VALID;
459                         } else {
460                                 /* vector goes into third byte of register */
461                                 ivar = ivar & 0xFF00FFFF;
462                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
463                         }
464                         array_wr32(E1000_IVAR0, index, ivar);
465                 }
466                 if (tx_queue > IGB_N0_QUEUE) {
467                         index = (tx_queue & 0x7);
468                         ivar = array_rd32(E1000_IVAR0, index);
469                         if (tx_queue < 8) {
470                                 /* vector goes into second byte of register */
471                                 ivar = ivar & 0xFFFF00FF;
472                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
473                         } else {
474                                 /* vector goes into high byte of register */
475                                 ivar = ivar & 0x00FFFFFF;
476                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
477                         }
478                         array_wr32(E1000_IVAR0, index, ivar);
479                 }
480                 q_vector->eims_value = 1 << msix_vector;
481                 break;
482         default:
483                 BUG();
484                 break;
485         }
486 }
487
488 /**
489  * igb_configure_msix - Configure MSI-X hardware
490  *
491  * igb_configure_msix sets up the hardware to properly
492  * generate MSI-X interrupts.
493  **/
494 static void igb_configure_msix(struct igb_adapter *adapter)
495 {
496         u32 tmp;
497         int i, vector = 0;
498         struct e1000_hw *hw = &adapter->hw;
499
500         adapter->eims_enable_mask = 0;
501
502         /* set vector for other causes, i.e. link changes */
503         switch (hw->mac.type) {
504         case e1000_82575:
505                 tmp = rd32(E1000_CTRL_EXT);
506                 /* enable MSI-X PBA support*/
507                 tmp |= E1000_CTRL_EXT_PBA_CLR;
508
509                 /* Auto-Mask interrupts upon ICR read. */
510                 tmp |= E1000_CTRL_EXT_EIAME;
511                 tmp |= E1000_CTRL_EXT_IRCA;
512
513                 wr32(E1000_CTRL_EXT, tmp);
514
515                 /* enable msix_other interrupt */
516                 array_wr32(E1000_MSIXBM(0), vector++,
517                                       E1000_EIMS_OTHER);
518                 adapter->eims_other = E1000_EIMS_OTHER;
519
520                 break;
521
522         case e1000_82576:
523                 /* Turn on MSI-X capability first, or our settings
524                  * won't stick.  And it will take days to debug. */
525                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
526                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
527                                 E1000_GPIE_NSICR);
528
529                 /* enable msix_other interrupt */
530                 adapter->eims_other = 1 << vector;
531                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
532
533                 wr32(E1000_IVAR_MISC, tmp);
534                 break;
535         default:
536                 /* do nothing, since nothing else supports MSI-X */
537                 break;
538         } /* switch (hw->mac.type) */
539
540         adapter->eims_enable_mask |= adapter->eims_other;
541
542         for (i = 0; i < adapter->num_q_vectors; i++) {
543                 struct igb_q_vector *q_vector = adapter->q_vector[i];
544                 igb_assign_vector(q_vector, vector++);
545                 adapter->eims_enable_mask |= q_vector->eims_value;
546         }
547
548         wrfl();
549 }
550
551 /**
552  * igb_request_msix - Initialize MSI-X interrupts
553  *
554  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
555  * kernel.
556  **/
557 static int igb_request_msix(struct igb_adapter *adapter)
558 {
559         struct net_device *netdev = adapter->netdev;
560         struct e1000_hw *hw = &adapter->hw;
561         int i, err = 0, vector = 0;
562
563         err = request_irq(adapter->msix_entries[vector].vector,
564                           &igb_msix_other, 0, netdev->name, adapter);
565         if (err)
566                 goto out;
567         vector++;
568
569         for (i = 0; i < adapter->num_q_vectors; i++) {
570                 struct igb_q_vector *q_vector = adapter->q_vector[i];
571
572                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
573
574                 if (q_vector->rx_ring && q_vector->tx_ring)
575                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
576                                 q_vector->rx_ring->queue_index);
577                 else if (q_vector->tx_ring)
578                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
579                                 q_vector->tx_ring->queue_index);
580                 else if (q_vector->rx_ring)
581                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
582                                 q_vector->rx_ring->queue_index);
583                 else
584                         sprintf(q_vector->name, "%s-unused", netdev->name);
585
586                 err = request_irq(adapter->msix_entries[vector].vector,
587                                   &igb_msix_ring, 0, q_vector->name,
588                                   q_vector);
589                 if (err)
590                         goto out;
591                 vector++;
592         }
593
594         igb_configure_msix(adapter);
595         return 0;
596 out:
597         return err;
598 }
599
600 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
601 {
602         if (adapter->msix_entries) {
603                 pci_disable_msix(adapter->pdev);
604                 kfree(adapter->msix_entries);
605                 adapter->msix_entries = NULL;
606         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
607                 pci_disable_msi(adapter->pdev);
608         }
609 }
610
611 /**
612  * igb_free_q_vectors - Free memory allocated for interrupt vectors
613  * @adapter: board private structure to initialize
614  *
615  * This function frees the memory allocated to the q_vectors.  In addition if
616  * NAPI is enabled it will delete any references to the NAPI struct prior
617  * to freeing the q_vector.
618  **/
619 static void igb_free_q_vectors(struct igb_adapter *adapter)
620 {
621         int v_idx;
622
623         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
624                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
625                 adapter->q_vector[v_idx] = NULL;
626                 netif_napi_del(&q_vector->napi);
627                 kfree(q_vector);
628         }
629         adapter->num_q_vectors = 0;
630 }
631
632 /**
633  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
634  *
635  * This function resets the device so that it has 0 rx queues, tx queues, and
636  * MSI-X interrupts allocated.
637  */
638 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
639 {
640         igb_free_queues(adapter);
641         igb_free_q_vectors(adapter);
642         igb_reset_interrupt_capability(adapter);
643 }
644
645 /**
646  * igb_set_interrupt_capability - set MSI or MSI-X if supported
647  *
648  * Attempt to configure interrupts using the best available
649  * capabilities of the hardware and kernel.
650  **/
651 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
652 {
653         int err;
654         int numvecs, i;
655
656         /* Number of supported queues. */
657         adapter->num_rx_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
658         adapter->num_tx_queues = min_t(u32, IGB_MAX_TX_QUEUES, num_online_cpus());
659
660         /* start with one vector for every rx queue */
661         numvecs = adapter->num_rx_queues;
662
663         /* if tx handler is seperate add 1 for every tx queue */
664         numvecs += adapter->num_tx_queues;
665
666         /* store the number of vectors reserved for queues */
667         adapter->num_q_vectors = numvecs;
668
669         /* add 1 vector for link status interrupts */
670         numvecs++;
671         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
672                                         GFP_KERNEL);
673         if (!adapter->msix_entries)
674                 goto msi_only;
675
676         for (i = 0; i < numvecs; i++)
677                 adapter->msix_entries[i].entry = i;
678
679         err = pci_enable_msix(adapter->pdev,
680                               adapter->msix_entries,
681                               numvecs);
682         if (err == 0)
683                 goto out;
684
685         igb_reset_interrupt_capability(adapter);
686
687         /* If we can't do MSI-X, try MSI */
688 msi_only:
689 #ifdef CONFIG_PCI_IOV
690         /* disable SR-IOV for non MSI-X configurations */
691         if (adapter->vf_data) {
692                 struct e1000_hw *hw = &adapter->hw;
693                 /* disable iov and allow time for transactions to clear */
694                 pci_disable_sriov(adapter->pdev);
695                 msleep(500);
696
697                 kfree(adapter->vf_data);
698                 adapter->vf_data = NULL;
699                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
700                 msleep(100);
701                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
702         }
703 #endif
704         adapter->vfs_allocated_count = 0;
705         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
706         adapter->num_rx_queues = 1;
707         adapter->num_tx_queues = 1;
708         adapter->num_q_vectors = 1;
709         if (!pci_enable_msi(adapter->pdev))
710                 adapter->flags |= IGB_FLAG_HAS_MSI;
711 out:
712         /* Notify the stack of the (possibly) reduced Tx Queue count. */
713         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
714         return;
715 }
716
717 /**
718  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
719  * @adapter: board private structure to initialize
720  *
721  * We allocate one q_vector per queue interrupt.  If allocation fails we
722  * return -ENOMEM.
723  **/
724 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
725 {
726         struct igb_q_vector *q_vector;
727         struct e1000_hw *hw = &adapter->hw;
728         int v_idx;
729
730         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
731                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
732                 if (!q_vector)
733                         goto err_out;
734                 q_vector->adapter = adapter;
735                 q_vector->itr_shift = (hw->mac.type == e1000_82575) ? 16 : 0;
736                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
737                 q_vector->itr_val = IGB_START_ITR;
738                 q_vector->set_itr = 1;
739                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
740                 adapter->q_vector[v_idx] = q_vector;
741         }
742         return 0;
743
744 err_out:
745         while (v_idx) {
746                 v_idx--;
747                 q_vector = adapter->q_vector[v_idx];
748                 netif_napi_del(&q_vector->napi);
749                 kfree(q_vector);
750                 adapter->q_vector[v_idx] = NULL;
751         }
752         return -ENOMEM;
753 }
754
755 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
756                                       int ring_idx, int v_idx)
757 {
758         struct igb_q_vector *q_vector;
759
760         q_vector = adapter->q_vector[v_idx];
761         q_vector->rx_ring = &adapter->rx_ring[ring_idx];
762         q_vector->rx_ring->q_vector = q_vector;
763         q_vector->itr_val = adapter->rx_itr_setting;
764         if (q_vector->itr_val && q_vector->itr_val <= 3)
765                 q_vector->itr_val = IGB_START_ITR;
766 }
767
768 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
769                                       int ring_idx, int v_idx)
770 {
771         struct igb_q_vector *q_vector;
772
773         q_vector = adapter->q_vector[v_idx];
774         q_vector->tx_ring = &adapter->tx_ring[ring_idx];
775         q_vector->tx_ring->q_vector = q_vector;
776         q_vector->itr_val = adapter->tx_itr_setting;
777         if (q_vector->itr_val && q_vector->itr_val <= 3)
778                 q_vector->itr_val = IGB_START_ITR;
779 }
780
781 /**
782  * igb_map_ring_to_vector - maps allocated queues to vectors
783  *
784  * This function maps the recently allocated queues to vectors.
785  **/
786 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
787 {
788         int i;
789         int v_idx = 0;
790
791         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
792             (adapter->num_q_vectors < adapter->num_tx_queues))
793                 return -ENOMEM;
794
795         if (adapter->num_q_vectors >=
796             (adapter->num_rx_queues + adapter->num_tx_queues)) {
797                 for (i = 0; i < adapter->num_rx_queues; i++)
798                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
799                 for (i = 0; i < adapter->num_tx_queues; i++)
800                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
801         } else {
802                 for (i = 0; i < adapter->num_rx_queues; i++) {
803                         if (i < adapter->num_tx_queues)
804                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
805                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
806                 }
807                 for (; i < adapter->num_tx_queues; i++)
808                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
809         }
810         return 0;
811 }
812
813 /**
814  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
815  *
816  * This function initializes the interrupts and allocates all of the queues.
817  **/
818 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
819 {
820         struct pci_dev *pdev = adapter->pdev;
821         int err;
822
823         igb_set_interrupt_capability(adapter);
824
825         err = igb_alloc_q_vectors(adapter);
826         if (err) {
827                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
828                 goto err_alloc_q_vectors;
829         }
830
831         err = igb_alloc_queues(adapter);
832         if (err) {
833                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
834                 goto err_alloc_queues;
835         }
836
837         err = igb_map_ring_to_vector(adapter);
838         if (err) {
839                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
840                 goto err_map_queues;
841         }
842
843
844         return 0;
845 err_map_queues:
846         igb_free_queues(adapter);
847 err_alloc_queues:
848         igb_free_q_vectors(adapter);
849 err_alloc_q_vectors:
850         igb_reset_interrupt_capability(adapter);
851         return err;
852 }
853
854 /**
855  * igb_request_irq - initialize interrupts
856  *
857  * Attempts to configure interrupts using the best available
858  * capabilities of the hardware and kernel.
859  **/
860 static int igb_request_irq(struct igb_adapter *adapter)
861 {
862         struct net_device *netdev = adapter->netdev;
863         struct pci_dev *pdev = adapter->pdev;
864         struct e1000_hw *hw = &adapter->hw;
865         int err = 0;
866
867         if (adapter->msix_entries) {
868                 err = igb_request_msix(adapter);
869                 if (!err)
870                         goto request_done;
871                 /* fall back to MSI */
872                 igb_clear_interrupt_scheme(adapter);
873                 if (!pci_enable_msi(adapter->pdev))
874                         adapter->flags |= IGB_FLAG_HAS_MSI;
875                 igb_free_all_tx_resources(adapter);
876                 igb_free_all_rx_resources(adapter);
877                 adapter->num_tx_queues = 1;
878                 adapter->num_rx_queues = 1;
879                 adapter->num_q_vectors = 1;
880                 err = igb_alloc_q_vectors(adapter);
881                 if (err) {
882                         dev_err(&pdev->dev,
883                                 "Unable to allocate memory for vectors\n");
884                         goto request_done;
885                 }
886                 err = igb_alloc_queues(adapter);
887                 if (err) {
888                         dev_err(&pdev->dev,
889                                 "Unable to allocate memory for queues\n");
890                         igb_free_q_vectors(adapter);
891                         goto request_done;
892                 }
893                 igb_setup_all_tx_resources(adapter);
894                 igb_setup_all_rx_resources(adapter);
895         } else {
896                 switch (hw->mac.type) {
897                 case e1000_82575:
898                         wr32(E1000_MSIXBM(0),
899                              (E1000_EICR_RX_QUEUE0 |
900                               E1000_EICR_TX_QUEUE0 |
901                               E1000_EIMS_OTHER));
902                         break;
903                 case e1000_82576:
904                         wr32(E1000_IVAR0, E1000_IVAR_VALID);
905                         break;
906                 default:
907                         break;
908                 }
909         }
910
911         if (adapter->flags & IGB_FLAG_HAS_MSI) {
912                 err = request_irq(adapter->pdev->irq, &igb_intr_msi, 0,
913                                   netdev->name, adapter);
914                 if (!err)
915                         goto request_done;
916
917                 /* fall back to legacy interrupts */
918                 igb_reset_interrupt_capability(adapter);
919                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
920         }
921
922         err = request_irq(adapter->pdev->irq, &igb_intr, IRQF_SHARED,
923                           netdev->name, adapter);
924
925         if (err)
926                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
927                         err);
928
929 request_done:
930         return err;
931 }
932
933 static void igb_free_irq(struct igb_adapter *adapter)
934 {
935         if (adapter->msix_entries) {
936                 int vector = 0, i;
937
938                 free_irq(adapter->msix_entries[vector++].vector, adapter);
939
940                 for (i = 0; i < adapter->num_q_vectors; i++) {
941                         struct igb_q_vector *q_vector = adapter->q_vector[i];
942                         free_irq(adapter->msix_entries[vector++].vector,
943                                  q_vector);
944                 }
945         } else {
946                 free_irq(adapter->pdev->irq, adapter);
947         }
948 }
949
950 /**
951  * igb_irq_disable - Mask off interrupt generation on the NIC
952  * @adapter: board private structure
953  **/
954 static void igb_irq_disable(struct igb_adapter *adapter)
955 {
956         struct e1000_hw *hw = &adapter->hw;
957
958         if (adapter->msix_entries) {
959                 u32 regval = rd32(E1000_EIAM);
960                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
961                 wr32(E1000_EIMC, adapter->eims_enable_mask);
962                 regval = rd32(E1000_EIAC);
963                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
964         }
965
966         wr32(E1000_IAM, 0);
967         wr32(E1000_IMC, ~0);
968         wrfl();
969         synchronize_irq(adapter->pdev->irq);
970 }
971
972 /**
973  * igb_irq_enable - Enable default interrupt generation settings
974  * @adapter: board private structure
975  **/
976 static void igb_irq_enable(struct igb_adapter *adapter)
977 {
978         struct e1000_hw *hw = &adapter->hw;
979
980         if (adapter->msix_entries) {
981                 u32 regval = rd32(E1000_EIAC);
982                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
983                 regval = rd32(E1000_EIAM);
984                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
985                 wr32(E1000_EIMS, adapter->eims_enable_mask);
986                 if (adapter->vfs_allocated_count)
987                         wr32(E1000_MBVFIMR, 0xFF);
988                 wr32(E1000_IMS, (E1000_IMS_LSC | E1000_IMS_VMMB |
989                                  E1000_IMS_DOUTSYNC));
990         } else {
991                 wr32(E1000_IMS, IMS_ENABLE_MASK);
992                 wr32(E1000_IAM, IMS_ENABLE_MASK);
993         }
994 }
995
996 static void igb_update_mng_vlan(struct igb_adapter *adapter)
997 {
998         struct net_device *netdev = adapter->netdev;
999         u16 vid = adapter->hw.mng_cookie.vlan_id;
1000         u16 old_vid = adapter->mng_vlan_id;
1001         if (adapter->vlgrp) {
1002                 if (!vlan_group_get_device(adapter->vlgrp, vid)) {
1003                         if (adapter->hw.mng_cookie.status &
1004                                 E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1005                                 igb_vlan_rx_add_vid(netdev, vid);
1006                                 adapter->mng_vlan_id = vid;
1007                         } else
1008                                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1009
1010                         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1011                                         (vid != old_vid) &&
1012                             !vlan_group_get_device(adapter->vlgrp, old_vid))
1013                                 igb_vlan_rx_kill_vid(netdev, old_vid);
1014                 } else
1015                         adapter->mng_vlan_id = vid;
1016         }
1017 }
1018
1019 /**
1020  * igb_release_hw_control - release control of the h/w to f/w
1021  * @adapter: address of board private structure
1022  *
1023  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1024  * For ASF and Pass Through versions of f/w this means that the
1025  * driver is no longer loaded.
1026  *
1027  **/
1028 static void igb_release_hw_control(struct igb_adapter *adapter)
1029 {
1030         struct e1000_hw *hw = &adapter->hw;
1031         u32 ctrl_ext;
1032
1033         /* Let firmware take over control of h/w */
1034         ctrl_ext = rd32(E1000_CTRL_EXT);
1035         wr32(E1000_CTRL_EXT,
1036                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1037 }
1038
1039
1040 /**
1041  * igb_get_hw_control - get control of the h/w from f/w
1042  * @adapter: address of board private structure
1043  *
1044  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1045  * For ASF and Pass Through versions of f/w this means that
1046  * the driver is loaded.
1047  *
1048  **/
1049 static void igb_get_hw_control(struct igb_adapter *adapter)
1050 {
1051         struct e1000_hw *hw = &adapter->hw;
1052         u32 ctrl_ext;
1053
1054         /* Let firmware know the driver has taken over */
1055         ctrl_ext = rd32(E1000_CTRL_EXT);
1056         wr32(E1000_CTRL_EXT,
1057                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1058 }
1059
1060 /**
1061  * igb_configure - configure the hardware for RX and TX
1062  * @adapter: private board structure
1063  **/
1064 static void igb_configure(struct igb_adapter *adapter)
1065 {
1066         struct net_device *netdev = adapter->netdev;
1067         int i;
1068
1069         igb_get_hw_control(adapter);
1070         igb_set_rx_mode(netdev);
1071
1072         igb_restore_vlan(adapter);
1073
1074         igb_setup_tctl(adapter);
1075         igb_setup_mrqc(adapter);
1076         igb_setup_rctl(adapter);
1077
1078         igb_configure_tx(adapter);
1079         igb_configure_rx(adapter);
1080
1081         igb_rx_fifo_flush_82575(&adapter->hw);
1082
1083         /* call igb_desc_unused which always leaves
1084          * at least 1 descriptor unused to make sure
1085          * next_to_use != next_to_clean */
1086         for (i = 0; i < adapter->num_rx_queues; i++) {
1087                 struct igb_ring *ring = &adapter->rx_ring[i];
1088                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1089         }
1090
1091
1092         adapter->tx_queue_len = netdev->tx_queue_len;
1093 }
1094
1095
1096 /**
1097  * igb_up - Open the interface and prepare it to handle traffic
1098  * @adapter: board private structure
1099  **/
1100
1101 int igb_up(struct igb_adapter *adapter)
1102 {
1103         struct e1000_hw *hw = &adapter->hw;
1104         int i;
1105
1106         /* hardware has been reset, we need to reload some things */
1107         igb_configure(adapter);
1108
1109         clear_bit(__IGB_DOWN, &adapter->state);
1110
1111         for (i = 0; i < adapter->num_q_vectors; i++) {
1112                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1113                 napi_enable(&q_vector->napi);
1114         }
1115         if (adapter->msix_entries)
1116                 igb_configure_msix(adapter);
1117
1118         igb_set_vmolr(hw, adapter->vfs_allocated_count);
1119
1120         /* Clear any pending interrupts. */
1121         rd32(E1000_ICR);
1122         igb_irq_enable(adapter);
1123
1124         /* notify VFs that reset has been completed */
1125         if (adapter->vfs_allocated_count) {
1126                 u32 reg_data = rd32(E1000_CTRL_EXT);
1127                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1128                 wr32(E1000_CTRL_EXT, reg_data);
1129         }
1130
1131         netif_tx_start_all_queues(adapter->netdev);
1132
1133         /* Fire a link change interrupt to start the watchdog. */
1134         wr32(E1000_ICS, E1000_ICS_LSC);
1135         return 0;
1136 }
1137
1138 void igb_down(struct igb_adapter *adapter)
1139 {
1140         struct e1000_hw *hw = &adapter->hw;
1141         struct net_device *netdev = adapter->netdev;
1142         u32 tctl, rctl;
1143         int i;
1144
1145         /* signal that we're down so the interrupt handler does not
1146          * reschedule our watchdog timer */
1147         set_bit(__IGB_DOWN, &adapter->state);
1148
1149         /* disable receives in the hardware */
1150         rctl = rd32(E1000_RCTL);
1151         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1152         /* flush and sleep below */
1153
1154         netif_tx_stop_all_queues(netdev);
1155
1156         /* disable transmits in the hardware */
1157         tctl = rd32(E1000_TCTL);
1158         tctl &= ~E1000_TCTL_EN;
1159         wr32(E1000_TCTL, tctl);
1160         /* flush both disables and wait for them to finish */
1161         wrfl();
1162         msleep(10);
1163
1164         for (i = 0; i < adapter->num_q_vectors; i++) {
1165                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1166                 napi_disable(&q_vector->napi);
1167         }
1168
1169         igb_irq_disable(adapter);
1170
1171         del_timer_sync(&adapter->watchdog_timer);
1172         del_timer_sync(&adapter->phy_info_timer);
1173
1174         netdev->tx_queue_len = adapter->tx_queue_len;
1175         netif_carrier_off(netdev);
1176
1177         /* record the stats before reset*/
1178         igb_update_stats(adapter);
1179
1180         adapter->link_speed = 0;
1181         adapter->link_duplex = 0;
1182
1183         if (!pci_channel_offline(adapter->pdev))
1184                 igb_reset(adapter);
1185         igb_clean_all_tx_rings(adapter);
1186         igb_clean_all_rx_rings(adapter);
1187 #ifdef CONFIG_IGB_DCA
1188
1189         /* since we reset the hardware DCA settings were cleared */
1190         igb_setup_dca(adapter);
1191 #endif
1192 }
1193
1194 void igb_reinit_locked(struct igb_adapter *adapter)
1195 {
1196         WARN_ON(in_interrupt());
1197         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1198                 msleep(1);
1199         igb_down(adapter);
1200         igb_up(adapter);
1201         clear_bit(__IGB_RESETTING, &adapter->state);
1202 }
1203
1204 void igb_reset(struct igb_adapter *adapter)
1205 {
1206         struct e1000_hw *hw = &adapter->hw;
1207         struct e1000_mac_info *mac = &hw->mac;
1208         struct e1000_fc_info *fc = &hw->fc;
1209         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1210         u16 hwm;
1211
1212         /* Repartition Pba for greater than 9k mtu
1213          * To take effect CTRL.RST is required.
1214          */
1215         switch (mac->type) {
1216         case e1000_82576:
1217                 pba = rd32(E1000_RXPBS);
1218                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1219                 break;
1220         case e1000_82575:
1221         default:
1222                 pba = E1000_PBA_34K;
1223                 break;
1224         }
1225
1226         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1227             (mac->type < e1000_82576)) {
1228                 /* adjust PBA for jumbo frames */
1229                 wr32(E1000_PBA, pba);
1230
1231                 /* To maintain wire speed transmits, the Tx FIFO should be
1232                  * large enough to accommodate two full transmit packets,
1233                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1234                  * the Rx FIFO should be large enough to accommodate at least
1235                  * one full receive packet and is similarly rounded up and
1236                  * expressed in KB. */
1237                 pba = rd32(E1000_PBA);
1238                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1239                 tx_space = pba >> 16;
1240                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1241                 pba &= 0xffff;
1242                 /* the tx fifo also stores 16 bytes of information about the tx
1243                  * but don't include ethernet FCS because hardware appends it */
1244                 min_tx_space = (adapter->max_frame_size +
1245                                 sizeof(union e1000_adv_tx_desc) -
1246                                 ETH_FCS_LEN) * 2;
1247                 min_tx_space = ALIGN(min_tx_space, 1024);
1248                 min_tx_space >>= 10;
1249                 /* software strips receive CRC, so leave room for it */
1250                 min_rx_space = adapter->max_frame_size;
1251                 min_rx_space = ALIGN(min_rx_space, 1024);
1252                 min_rx_space >>= 10;
1253
1254                 /* If current Tx allocation is less than the min Tx FIFO size,
1255                  * and the min Tx FIFO size is less than the current Rx FIFO
1256                  * allocation, take space away from current Rx allocation */
1257                 if (tx_space < min_tx_space &&
1258                     ((min_tx_space - tx_space) < pba)) {
1259                         pba = pba - (min_tx_space - tx_space);
1260
1261                         /* if short on rx space, rx wins and must trump tx
1262                          * adjustment */
1263                         if (pba < min_rx_space)
1264                                 pba = min_rx_space;
1265                 }
1266                 wr32(E1000_PBA, pba);
1267         }
1268
1269         /* flow control settings */
1270         /* The high water mark must be low enough to fit one full frame
1271          * (or the size used for early receive) above it in the Rx FIFO.
1272          * Set it to the lower of:
1273          * - 90% of the Rx FIFO size, or
1274          * - the full Rx FIFO size minus one full frame */
1275         hwm = min(((pba << 10) * 9 / 10),
1276                         ((pba << 10) - 2 * adapter->max_frame_size));
1277
1278         if (mac->type < e1000_82576) {
1279                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
1280                 fc->low_water = fc->high_water - 8;
1281         } else {
1282                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1283                 fc->low_water = fc->high_water - 16;
1284         }
1285         fc->pause_time = 0xFFFF;
1286         fc->send_xon = 1;
1287         fc->current_mode = fc->requested_mode;
1288
1289         /* disable receive for all VFs and wait one second */
1290         if (adapter->vfs_allocated_count) {
1291                 int i;
1292                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1293                         adapter->vf_data[i].flags = 0;
1294
1295                 /* ping all the active vfs to let them know we are going down */
1296                 igb_ping_all_vfs(adapter);
1297
1298                 /* disable transmits and receives */
1299                 wr32(E1000_VFRE, 0);
1300                 wr32(E1000_VFTE, 0);
1301         }
1302
1303         /* Allow time for pending master requests to run */
1304         adapter->hw.mac.ops.reset_hw(&adapter->hw);
1305         wr32(E1000_WUC, 0);
1306
1307         if (adapter->hw.mac.ops.init_hw(&adapter->hw))
1308                 dev_err(&adapter->pdev->dev, "Hardware Error\n");
1309
1310         igb_update_mng_vlan(adapter);
1311
1312         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1313         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1314
1315         igb_reset_adaptive(&adapter->hw);
1316         igb_get_phy_info(&adapter->hw);
1317 }
1318
1319 static const struct net_device_ops igb_netdev_ops = {
1320         .ndo_open               = igb_open,
1321         .ndo_stop               = igb_close,
1322         .ndo_start_xmit         = igb_xmit_frame_adv,
1323         .ndo_get_stats          = igb_get_stats,
1324         .ndo_set_rx_mode        = igb_set_rx_mode,
1325         .ndo_set_multicast_list = igb_set_rx_mode,
1326         .ndo_set_mac_address    = igb_set_mac,
1327         .ndo_change_mtu         = igb_change_mtu,
1328         .ndo_do_ioctl           = igb_ioctl,
1329         .ndo_tx_timeout         = igb_tx_timeout,
1330         .ndo_validate_addr      = eth_validate_addr,
1331         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1332         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1333         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1334 #ifdef CONFIG_NET_POLL_CONTROLLER
1335         .ndo_poll_controller    = igb_netpoll,
1336 #endif
1337 };
1338
1339 /**
1340  * igb_probe - Device Initialization Routine
1341  * @pdev: PCI device information struct
1342  * @ent: entry in igb_pci_tbl
1343  *
1344  * Returns 0 on success, negative on failure
1345  *
1346  * igb_probe initializes an adapter identified by a pci_dev structure.
1347  * The OS initialization, configuring of the adapter private structure,
1348  * and a hardware reset occur.
1349  **/
1350 static int __devinit igb_probe(struct pci_dev *pdev,
1351                                const struct pci_device_id *ent)
1352 {
1353         struct net_device *netdev;
1354         struct igb_adapter *adapter;
1355         struct e1000_hw *hw;
1356         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1357         unsigned long mmio_start, mmio_len;
1358         int err, pci_using_dac;
1359         u16 eeprom_data = 0;
1360         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1361         u32 part_num;
1362
1363         err = pci_enable_device_mem(pdev);
1364         if (err)
1365                 return err;
1366
1367         pci_using_dac = 0;
1368         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1369         if (!err) {
1370                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1371                 if (!err)
1372                         pci_using_dac = 1;
1373         } else {
1374                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1375                 if (err) {
1376                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1377                         if (err) {
1378                                 dev_err(&pdev->dev, "No usable DMA "
1379                                         "configuration, aborting\n");
1380                                 goto err_dma;
1381                         }
1382                 }
1383         }
1384
1385         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1386                                            IORESOURCE_MEM),
1387                                            igb_driver_name);
1388         if (err)
1389                 goto err_pci_reg;
1390
1391         pci_enable_pcie_error_reporting(pdev);
1392
1393         pci_set_master(pdev);
1394         pci_save_state(pdev);
1395
1396         err = -ENOMEM;
1397         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1398                                    IGB_ABS_MAX_TX_QUEUES);
1399         if (!netdev)
1400                 goto err_alloc_etherdev;
1401
1402         SET_NETDEV_DEV(netdev, &pdev->dev);
1403
1404         pci_set_drvdata(pdev, netdev);
1405         adapter = netdev_priv(netdev);
1406         adapter->netdev = netdev;
1407         adapter->pdev = pdev;
1408         hw = &adapter->hw;
1409         hw->back = adapter;
1410         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1411
1412         mmio_start = pci_resource_start(pdev, 0);
1413         mmio_len = pci_resource_len(pdev, 0);
1414
1415         err = -EIO;
1416         hw->hw_addr = ioremap(mmio_start, mmio_len);
1417         if (!hw->hw_addr)
1418                 goto err_ioremap;
1419
1420         netdev->netdev_ops = &igb_netdev_ops;
1421         igb_set_ethtool_ops(netdev);
1422         netdev->watchdog_timeo = 5 * HZ;
1423
1424         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1425
1426         netdev->mem_start = mmio_start;
1427         netdev->mem_end = mmio_start + mmio_len;
1428
1429         /* PCI config space info */
1430         hw->vendor_id = pdev->vendor;
1431         hw->device_id = pdev->device;
1432         hw->revision_id = pdev->revision;
1433         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1434         hw->subsystem_device_id = pdev->subsystem_device;
1435
1436         /* setup the private structure */
1437         hw->back = adapter;
1438         /* Copy the default MAC, PHY and NVM function pointers */
1439         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1440         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1441         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1442         /* Initialize skew-specific constants */
1443         err = ei->get_invariants(hw);
1444         if (err)
1445                 goto err_sw_init;
1446
1447 #ifdef CONFIG_PCI_IOV
1448         /* since iov functionality isn't critical to base device function we
1449          * can accept failure.  If it fails we don't allow iov to be enabled */
1450         if (hw->mac.type == e1000_82576) {
1451                 /* 82576 supports a maximum of 7 VFs in addition to the PF */
1452                 unsigned int num_vfs = (max_vfs > 7) ? 7 : max_vfs;
1453                 int i;
1454                 unsigned char mac_addr[ETH_ALEN];
1455
1456                 if (num_vfs) {
1457                         adapter->vf_data = kcalloc(num_vfs,
1458                                                 sizeof(struct vf_data_storage),
1459                                                 GFP_KERNEL);
1460                         if (!adapter->vf_data) {
1461                                 dev_err(&pdev->dev,
1462                                         "Could not allocate VF private data - "
1463                                         "IOV enable failed\n");
1464                         } else {
1465                                 err = pci_enable_sriov(pdev, num_vfs);
1466                                 if (!err) {
1467                                         adapter->vfs_allocated_count = num_vfs;
1468                                         dev_info(&pdev->dev,
1469                                                  "%d vfs allocated\n",
1470                                                  num_vfs);
1471                                         for (i = 0;
1472                                              i < adapter->vfs_allocated_count;
1473                                              i++) {
1474                                                 random_ether_addr(mac_addr);
1475                                                 igb_set_vf_mac(adapter, i,
1476                                                                mac_addr);
1477                                         }
1478                                 } else {
1479                                         kfree(adapter->vf_data);
1480                                         adapter->vf_data = NULL;
1481                                 }
1482                         }
1483                 }
1484         }
1485
1486 #endif
1487         /* setup the private structure */
1488         err = igb_sw_init(adapter);
1489         if (err)
1490                 goto err_sw_init;
1491
1492         igb_get_bus_info_pcie(hw);
1493
1494         hw->phy.autoneg_wait_to_complete = false;
1495         hw->mac.adaptive_ifs = true;
1496
1497         /* Copper options */
1498         if (hw->phy.media_type == e1000_media_type_copper) {
1499                 hw->phy.mdix = AUTO_ALL_MODES;
1500                 hw->phy.disable_polarity_correction = false;
1501                 hw->phy.ms_type = e1000_ms_hw_default;
1502         }
1503
1504         if (igb_check_reset_block(hw))
1505                 dev_info(&pdev->dev,
1506                         "PHY reset is blocked due to SOL/IDER session.\n");
1507
1508         netdev->features = NETIF_F_SG |
1509                            NETIF_F_IP_CSUM |
1510                            NETIF_F_HW_VLAN_TX |
1511                            NETIF_F_HW_VLAN_RX |
1512                            NETIF_F_HW_VLAN_FILTER;
1513
1514         netdev->features |= NETIF_F_IPV6_CSUM;
1515         netdev->features |= NETIF_F_TSO;
1516         netdev->features |= NETIF_F_TSO6;
1517
1518         netdev->features |= NETIF_F_GRO;
1519
1520         netdev->vlan_features |= NETIF_F_TSO;
1521         netdev->vlan_features |= NETIF_F_TSO6;
1522         netdev->vlan_features |= NETIF_F_IP_CSUM;
1523         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1524         netdev->vlan_features |= NETIF_F_SG;
1525
1526         if (pci_using_dac)
1527                 netdev->features |= NETIF_F_HIGHDMA;
1528
1529         if (adapter->hw.mac.type == e1000_82576)
1530                 netdev->features |= NETIF_F_SCTP_CSUM;
1531
1532         adapter->en_mng_pt = igb_enable_mng_pass_thru(&adapter->hw);
1533
1534         /* before reading the NVM, reset the controller to put the device in a
1535          * known good starting state */
1536         hw->mac.ops.reset_hw(hw);
1537
1538         /* make sure the NVM is good */
1539         if (igb_validate_nvm_checksum(hw) < 0) {
1540                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1541                 err = -EIO;
1542                 goto err_eeprom;
1543         }
1544
1545         /* copy the MAC address out of the NVM */
1546         if (hw->mac.ops.read_mac_addr(hw))
1547                 dev_err(&pdev->dev, "NVM Read Error\n");
1548
1549         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1550         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1551
1552         if (!is_valid_ether_addr(netdev->perm_addr)) {
1553                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1554                 err = -EIO;
1555                 goto err_eeprom;
1556         }
1557
1558         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1559                     (unsigned long) adapter);
1560         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1561                     (unsigned long) adapter);
1562
1563         INIT_WORK(&adapter->reset_task, igb_reset_task);
1564         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1565
1566         /* Initialize link properties that are user-changeable */
1567         adapter->fc_autoneg = true;
1568         hw->mac.autoneg = true;
1569         hw->phy.autoneg_advertised = 0x2f;
1570
1571         hw->fc.requested_mode = e1000_fc_default;
1572         hw->fc.current_mode = e1000_fc_default;
1573
1574         igb_validate_mdi_setting(hw);
1575
1576         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1577          * enable the ACPI Magic Packet filter
1578          */
1579
1580         if (hw->bus.func == 0)
1581                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1582         else if (hw->bus.func == 1)
1583                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1584
1585         if (eeprom_data & eeprom_apme_mask)
1586                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1587
1588         /* now that we have the eeprom settings, apply the special cases where
1589          * the eeprom may be wrong or the board simply won't support wake on
1590          * lan on a particular port */
1591         switch (pdev->device) {
1592         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1593                 adapter->eeprom_wol = 0;
1594                 break;
1595         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1596         case E1000_DEV_ID_82576_FIBER:
1597         case E1000_DEV_ID_82576_SERDES:
1598                 /* Wake events only supported on port A for dual fiber
1599                  * regardless of eeprom setting */
1600                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1601                         adapter->eeprom_wol = 0;
1602                 break;
1603         case E1000_DEV_ID_82576_QUAD_COPPER:
1604                 /* if quad port adapter, disable WoL on all but port A */
1605                 if (global_quad_port_a != 0)
1606                         adapter->eeprom_wol = 0;
1607                 else
1608                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1609                 /* Reset for multiple quad port adapters */
1610                 if (++global_quad_port_a == 4)
1611                         global_quad_port_a = 0;
1612                 break;
1613         }
1614
1615         /* initialize the wol settings based on the eeprom settings */
1616         adapter->wol = adapter->eeprom_wol;
1617         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1618
1619         /* reset the hardware with the new settings */
1620         igb_reset(adapter);
1621
1622         /* let the f/w know that the h/w is now under the control of the
1623          * driver. */
1624         igb_get_hw_control(adapter);
1625
1626         strcpy(netdev->name, "eth%d");
1627         err = register_netdev(netdev);
1628         if (err)
1629                 goto err_register;
1630
1631         /* carrier off reporting is important to ethtool even BEFORE open */
1632         netif_carrier_off(netdev);
1633
1634 #ifdef CONFIG_IGB_DCA
1635         if (dca_add_requester(&pdev->dev) == 0) {
1636                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1637                 dev_info(&pdev->dev, "DCA enabled\n");
1638                 igb_setup_dca(adapter);
1639         }
1640
1641 #endif
1642
1643         switch (hw->mac.type) {
1644         case e1000_82576:
1645                 /*
1646                  * Initialize hardware timer: we keep it running just in case
1647                  * that some program needs it later on.
1648                  */
1649                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1650                 adapter->cycles.read = igb_read_clock;
1651                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1652                 adapter->cycles.mult = 1;
1653                 /**
1654                  * Scale the NIC clock cycle by a large factor so that
1655                  * relatively small clock corrections can be added or
1656                  * substracted at each clock tick. The drawbacks of a large
1657                  * factor are a) that the clock register overflows more quickly
1658                  * (not such a big deal) and b) that the increment per tick has
1659                  * to fit into 24 bits.  As a result we need to use a shift of
1660                  * 19 so we can fit a value of 16 into the TIMINCA register.
1661                  */
1662                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1663                 wr32(E1000_TIMINCA,
1664                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1665                                 (16 << IGB_82576_TSYNC_SHIFT));
1666
1667                 /* Set registers so that rollover occurs soon to test this. */
1668                 wr32(E1000_SYSTIML, 0x00000000);
1669                 wr32(E1000_SYSTIMH, 0xFF800000);
1670                 wrfl();
1671
1672                 timecounter_init(&adapter->clock,
1673                                  &adapter->cycles,
1674                                  ktime_to_ns(ktime_get_real()));
1675                 /*
1676                  * Synchronize our NIC clock against system wall clock. NIC
1677                  * time stamp reading requires ~3us per sample, each sample
1678                  * was pretty stable even under load => only require 10
1679                  * samples for each offset comparison.
1680                  */
1681                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1682                 adapter->compare.source = &adapter->clock;
1683                 adapter->compare.target = ktime_get_real;
1684                 adapter->compare.num_samples = 10;
1685                 timecompare_update(&adapter->compare, 0);
1686                 break;
1687         case e1000_82575:
1688                 /* 82575 does not support timesync */
1689         default:
1690                 break;
1691         }
1692
1693         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1694         /* print bus type/speed/width info */
1695         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1696                  netdev->name,
1697                  ((hw->bus.speed == e1000_bus_speed_2500)
1698                   ? "2.5Gb/s" : "unknown"),
1699                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1700                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1701                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1702                    "unknown"),
1703                  netdev->dev_addr);
1704
1705         igb_read_part_num(hw, &part_num);
1706         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1707                 (part_num >> 8), (part_num & 0xff));
1708
1709         dev_info(&pdev->dev,
1710                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1711                 adapter->msix_entries ? "MSI-X" :
1712                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1713                 adapter->num_rx_queues, adapter->num_tx_queues);
1714
1715         return 0;
1716
1717 err_register:
1718         igb_release_hw_control(adapter);
1719 err_eeprom:
1720         if (!igb_check_reset_block(hw))
1721                 igb_reset_phy(hw);
1722
1723         if (hw->flash_address)
1724                 iounmap(hw->flash_address);
1725 err_sw_init:
1726         igb_clear_interrupt_scheme(adapter);
1727         iounmap(hw->hw_addr);
1728 err_ioremap:
1729         free_netdev(netdev);
1730 err_alloc_etherdev:
1731         pci_release_selected_regions(pdev, pci_select_bars(pdev,
1732                                      IORESOURCE_MEM));
1733 err_pci_reg:
1734 err_dma:
1735         pci_disable_device(pdev);
1736         return err;
1737 }
1738
1739 /**
1740  * igb_remove - Device Removal Routine
1741  * @pdev: PCI device information struct
1742  *
1743  * igb_remove is called by the PCI subsystem to alert the driver
1744  * that it should release a PCI device.  The could be caused by a
1745  * Hot-Plug event, or because the driver is going to be removed from
1746  * memory.
1747  **/
1748 static void __devexit igb_remove(struct pci_dev *pdev)
1749 {
1750         struct net_device *netdev = pci_get_drvdata(pdev);
1751         struct igb_adapter *adapter = netdev_priv(netdev);
1752         struct e1000_hw *hw = &adapter->hw;
1753
1754         /* flush_scheduled work may reschedule our watchdog task, so
1755          * explicitly disable watchdog tasks from being rescheduled  */
1756         set_bit(__IGB_DOWN, &adapter->state);
1757         del_timer_sync(&adapter->watchdog_timer);
1758         del_timer_sync(&adapter->phy_info_timer);
1759
1760         flush_scheduled_work();
1761
1762 #ifdef CONFIG_IGB_DCA
1763         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1764                 dev_info(&pdev->dev, "DCA disabled\n");
1765                 dca_remove_requester(&pdev->dev);
1766                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1767                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1768         }
1769 #endif
1770
1771         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1772          * would have already happened in close and is redundant. */
1773         igb_release_hw_control(adapter);
1774
1775         unregister_netdev(netdev);
1776
1777         if (!igb_check_reset_block(&adapter->hw))
1778                 igb_reset_phy(&adapter->hw);
1779
1780         igb_clear_interrupt_scheme(adapter);
1781
1782 #ifdef CONFIG_PCI_IOV
1783         /* reclaim resources allocated to VFs */
1784         if (adapter->vf_data) {
1785                 /* disable iov and allow time for transactions to clear */
1786                 pci_disable_sriov(pdev);
1787                 msleep(500);
1788
1789                 kfree(adapter->vf_data);
1790                 adapter->vf_data = NULL;
1791                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1792                 msleep(100);
1793                 dev_info(&pdev->dev, "IOV Disabled\n");
1794         }
1795 #endif
1796         iounmap(hw->hw_addr);
1797         if (hw->flash_address)
1798                 iounmap(hw->flash_address);
1799         pci_release_selected_regions(pdev, pci_select_bars(pdev,
1800                                      IORESOURCE_MEM));
1801
1802         free_netdev(netdev);
1803
1804         pci_disable_pcie_error_reporting(pdev);
1805
1806         pci_disable_device(pdev);
1807 }
1808
1809 /**
1810  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1811  * @adapter: board private structure to initialize
1812  *
1813  * igb_sw_init initializes the Adapter private data structure.
1814  * Fields are initialized based on PCI device information and
1815  * OS network device settings (MTU size).
1816  **/
1817 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1818 {
1819         struct e1000_hw *hw = &adapter->hw;
1820         struct net_device *netdev = adapter->netdev;
1821         struct pci_dev *pdev = adapter->pdev;
1822
1823         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1824
1825         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1826         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1827         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1828         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1829
1830         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1831         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1832
1833         /* This call may decrease the number of queues depending on
1834          * interrupt mode. */
1835         if (igb_init_interrupt_scheme(adapter)) {
1836                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1837                 return -ENOMEM;
1838         }
1839
1840         /* Explicitly disable IRQ since the NIC can be in any state. */
1841         igb_irq_disable(adapter);
1842
1843         set_bit(__IGB_DOWN, &adapter->state);
1844         return 0;
1845 }
1846
1847 /**
1848  * igb_open - Called when a network interface is made active
1849  * @netdev: network interface device structure
1850  *
1851  * Returns 0 on success, negative value on failure
1852  *
1853  * The open entry point is called when a network interface is made
1854  * active by the system (IFF_UP).  At this point all resources needed
1855  * for transmit and receive operations are allocated, the interrupt
1856  * handler is registered with the OS, the watchdog timer is started,
1857  * and the stack is notified that the interface is ready.
1858  **/
1859 static int igb_open(struct net_device *netdev)
1860 {
1861         struct igb_adapter *adapter = netdev_priv(netdev);
1862         struct e1000_hw *hw = &adapter->hw;
1863         int err;
1864         int i;
1865
1866         /* disallow open during test */
1867         if (test_bit(__IGB_TESTING, &adapter->state))
1868                 return -EBUSY;
1869
1870         netif_carrier_off(netdev);
1871
1872         /* allocate transmit descriptors */
1873         err = igb_setup_all_tx_resources(adapter);
1874         if (err)
1875                 goto err_setup_tx;
1876
1877         /* allocate receive descriptors */
1878         err = igb_setup_all_rx_resources(adapter);
1879         if (err)
1880                 goto err_setup_rx;
1881
1882         /* e1000_power_up_phy(adapter); */
1883
1884         adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1885         if ((adapter->hw.mng_cookie.status &
1886              E1000_MNG_DHCP_COOKIE_STATUS_VLAN))
1887                 igb_update_mng_vlan(adapter);
1888
1889         /* before we allocate an interrupt, we must be ready to handle it.
1890          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1891          * as soon as we call pci_request_irq, so we have to setup our
1892          * clean_rx handler before we do so.  */
1893         igb_configure(adapter);
1894
1895         igb_set_vmolr(hw, adapter->vfs_allocated_count);
1896
1897         err = igb_request_irq(adapter);
1898         if (err)
1899                 goto err_req_irq;
1900
1901         /* From here on the code is the same as igb_up() */
1902         clear_bit(__IGB_DOWN, &adapter->state);
1903
1904         for (i = 0; i < adapter->num_q_vectors; i++) {
1905                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1906                 napi_enable(&q_vector->napi);
1907         }
1908
1909         /* Clear any pending interrupts. */
1910         rd32(E1000_ICR);
1911
1912         igb_irq_enable(adapter);
1913
1914         /* notify VFs that reset has been completed */
1915         if (adapter->vfs_allocated_count) {
1916                 u32 reg_data = rd32(E1000_CTRL_EXT);
1917                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1918                 wr32(E1000_CTRL_EXT, reg_data);
1919         }
1920
1921         netif_tx_start_all_queues(netdev);
1922
1923         /* Fire a link status change interrupt to start the watchdog. */
1924         wr32(E1000_ICS, E1000_ICS_LSC);
1925
1926         return 0;
1927
1928 err_req_irq:
1929         igb_release_hw_control(adapter);
1930         /* e1000_power_down_phy(adapter); */
1931         igb_free_all_rx_resources(adapter);
1932 err_setup_rx:
1933         igb_free_all_tx_resources(adapter);
1934 err_setup_tx:
1935         igb_reset(adapter);
1936
1937         return err;
1938 }
1939
1940 /**
1941  * igb_close - Disables a network interface
1942  * @netdev: network interface device structure
1943  *
1944  * Returns 0, this is not allowed to fail
1945  *
1946  * The close entry point is called when an interface is de-activated
1947  * by the OS.  The hardware is still under the driver's control, but
1948  * needs to be disabled.  A global MAC reset is issued to stop the
1949  * hardware, and all transmit and receive resources are freed.
1950  **/
1951 static int igb_close(struct net_device *netdev)
1952 {
1953         struct igb_adapter *adapter = netdev_priv(netdev);
1954
1955         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
1956         igb_down(adapter);
1957
1958         igb_free_irq(adapter);
1959
1960         igb_free_all_tx_resources(adapter);
1961         igb_free_all_rx_resources(adapter);
1962
1963         /* kill manageability vlan ID if supported, but not if a vlan with
1964          * the same ID is registered on the host OS (let 8021q kill it) */
1965         if ((adapter->hw.mng_cookie.status &
1966                           E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
1967              !(adapter->vlgrp &&
1968                vlan_group_get_device(adapter->vlgrp, adapter->mng_vlan_id)))
1969                 igb_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
1970
1971         return 0;
1972 }
1973
1974 /**
1975  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
1976  * @tx_ring: tx descriptor ring (for a specific queue) to setup
1977  *
1978  * Return 0 on success, negative on failure
1979  **/
1980 int igb_setup_tx_resources(struct igb_ring *tx_ring)
1981 {
1982         struct pci_dev *pdev = tx_ring->pdev;
1983         int size;
1984
1985         size = sizeof(struct igb_buffer) * tx_ring->count;
1986         tx_ring->buffer_info = vmalloc(size);
1987         if (!tx_ring->buffer_info)
1988                 goto err;
1989         memset(tx_ring->buffer_info, 0, size);
1990
1991         /* round up to nearest 4K */
1992         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
1993         tx_ring->size = ALIGN(tx_ring->size, 4096);
1994
1995         tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size,
1996                                              &tx_ring->dma);
1997
1998         if (!tx_ring->desc)
1999                 goto err;
2000
2001         tx_ring->next_to_use = 0;
2002         tx_ring->next_to_clean = 0;
2003         return 0;
2004
2005 err:
2006         vfree(tx_ring->buffer_info);
2007         dev_err(&pdev->dev,
2008                 "Unable to allocate memory for the transmit descriptor ring\n");
2009         return -ENOMEM;
2010 }
2011
2012 /**
2013  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2014  *                                (Descriptors) for all queues
2015  * @adapter: board private structure
2016  *
2017  * Return 0 on success, negative on failure
2018  **/
2019 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2020 {
2021         int i, err = 0;
2022         int r_idx;
2023
2024         for (i = 0; i < adapter->num_tx_queues; i++) {
2025                 err = igb_setup_tx_resources(&adapter->tx_ring[i]);
2026                 if (err) {
2027                         dev_err(&adapter->pdev->dev,
2028                                 "Allocation for Tx Queue %u failed\n", i);
2029                         for (i--; i >= 0; i--)
2030                                 igb_free_tx_resources(&adapter->tx_ring[i]);
2031                         break;
2032                 }
2033         }
2034
2035         for (i = 0; i < IGB_MAX_TX_QUEUES; i++) {
2036                 r_idx = i % adapter->num_tx_queues;
2037                 adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
2038         }
2039         return err;
2040 }
2041
2042 /**
2043  * igb_setup_tctl - configure the transmit control registers
2044  * @adapter: Board private structure
2045  **/
2046 void igb_setup_tctl(struct igb_adapter *adapter)
2047 {
2048         struct e1000_hw *hw = &adapter->hw;
2049         u32 tctl;
2050
2051         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2052         wr32(E1000_TXDCTL(0), 0);
2053
2054         /* Program the Transmit Control Register */
2055         tctl = rd32(E1000_TCTL);
2056         tctl &= ~E1000_TCTL_CT;
2057         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2058                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2059
2060         igb_config_collision_dist(hw);
2061
2062         /* Enable transmits */
2063         tctl |= E1000_TCTL_EN;
2064
2065         wr32(E1000_TCTL, tctl);
2066 }
2067
2068 /**
2069  * igb_configure_tx_ring - Configure transmit ring after Reset
2070  * @adapter: board private structure
2071  * @ring: tx ring to configure
2072  *
2073  * Configure a transmit ring after a reset.
2074  **/
2075 void igb_configure_tx_ring(struct igb_adapter *adapter,
2076                            struct igb_ring *ring)
2077 {
2078         struct e1000_hw *hw = &adapter->hw;
2079         u32 txdctl;
2080         u64 tdba = ring->dma;
2081         int reg_idx = ring->reg_idx;
2082
2083         /* disable the queue */
2084         txdctl = rd32(E1000_TXDCTL(reg_idx));
2085         wr32(E1000_TXDCTL(reg_idx),
2086                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2087         wrfl();
2088         mdelay(10);
2089
2090         wr32(E1000_TDLEN(reg_idx),
2091                         ring->count * sizeof(union e1000_adv_tx_desc));
2092         wr32(E1000_TDBAL(reg_idx),
2093                         tdba & 0x00000000ffffffffULL);
2094         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2095
2096         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2097         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2098         writel(0, ring->head);
2099         writel(0, ring->tail);
2100
2101         txdctl |= IGB_TX_PTHRESH;
2102         txdctl |= IGB_TX_HTHRESH << 8;
2103         txdctl |= IGB_TX_WTHRESH << 16;
2104
2105         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2106         wr32(E1000_TXDCTL(reg_idx), txdctl);
2107 }
2108
2109 /**
2110  * igb_configure_tx - Configure transmit Unit after Reset
2111  * @adapter: board private structure
2112  *
2113  * Configure the Tx unit of the MAC after a reset.
2114  **/
2115 static void igb_configure_tx(struct igb_adapter *adapter)
2116 {
2117         int i;
2118
2119         for (i = 0; i < adapter->num_tx_queues; i++)
2120                 igb_configure_tx_ring(adapter, &adapter->tx_ring[i]);
2121 }
2122
2123 /**
2124  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2125  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2126  *
2127  * Returns 0 on success, negative on failure
2128  **/
2129 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2130 {
2131         struct pci_dev *pdev = rx_ring->pdev;
2132         int size, desc_len;
2133
2134         size = sizeof(struct igb_buffer) * rx_ring->count;
2135         rx_ring->buffer_info = vmalloc(size);
2136         if (!rx_ring->buffer_info)
2137                 goto err;
2138         memset(rx_ring->buffer_info, 0, size);
2139
2140         desc_len = sizeof(union e1000_adv_rx_desc);
2141
2142         /* Round up to nearest 4K */
2143         rx_ring->size = rx_ring->count * desc_len;
2144         rx_ring->size = ALIGN(rx_ring->size, 4096);
2145
2146         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2147                                              &rx_ring->dma);
2148
2149         if (!rx_ring->desc)
2150                 goto err;
2151
2152         rx_ring->next_to_clean = 0;
2153         rx_ring->next_to_use = 0;
2154
2155         return 0;
2156
2157 err:
2158         vfree(rx_ring->buffer_info);
2159         dev_err(&pdev->dev, "Unable to allocate memory for "
2160                 "the receive descriptor ring\n");
2161         return -ENOMEM;
2162 }
2163
2164 /**
2165  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2166  *                                (Descriptors) for all queues
2167  * @adapter: board private structure
2168  *
2169  * Return 0 on success, negative on failure
2170  **/
2171 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2172 {
2173         int i, err = 0;
2174
2175         for (i = 0; i < adapter->num_rx_queues; i++) {
2176                 err = igb_setup_rx_resources(&adapter->rx_ring[i]);
2177                 if (err) {
2178                         dev_err(&adapter->pdev->dev,
2179                                 "Allocation for Rx Queue %u failed\n", i);
2180                         for (i--; i >= 0; i--)
2181                                 igb_free_rx_resources(&adapter->rx_ring[i]);
2182                         break;
2183                 }
2184         }
2185
2186         return err;
2187 }
2188
2189 /**
2190  * igb_setup_mrqc - configure the multiple receive queue control registers
2191  * @adapter: Board private structure
2192  **/
2193 static void igb_setup_mrqc(struct igb_adapter *adapter)
2194 {
2195         struct e1000_hw *hw = &adapter->hw;
2196         u32 mrqc, rxcsum;
2197         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2198         union e1000_reta {
2199                 u32 dword;
2200                 u8  bytes[4];
2201         } reta;
2202         static const u8 rsshash[40] = {
2203                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2204                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2205                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2206                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2207
2208         /* Fill out hash function seeds */
2209         for (j = 0; j < 10; j++) {
2210                 u32 rsskey = rsshash[(j * 4)];
2211                 rsskey |= rsshash[(j * 4) + 1] << 8;
2212                 rsskey |= rsshash[(j * 4) + 2] << 16;
2213                 rsskey |= rsshash[(j * 4) + 3] << 24;
2214                 array_wr32(E1000_RSSRK(0), j, rsskey);
2215         }
2216
2217         num_rx_queues = adapter->num_rx_queues;
2218
2219         if (adapter->vfs_allocated_count) {
2220                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2221                 switch (hw->mac.type) {
2222                 case e1000_82576:
2223                         shift = 3;
2224                         num_rx_queues = 2;
2225                         break;
2226                 case e1000_82575:
2227                         shift = 2;
2228                         shift2 = 6;
2229                 default:
2230                         break;
2231                 }
2232         } else {
2233                 if (hw->mac.type == e1000_82575)
2234                         shift = 6;
2235         }
2236
2237         for (j = 0; j < (32 * 4); j++) {
2238                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2239                 if (shift2)
2240                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2241                 if ((j & 3) == 3)
2242                         wr32(E1000_RETA(j >> 2), reta.dword);
2243         }
2244
2245         /*
2246          * Disable raw packet checksumming so that RSS hash is placed in
2247          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2248          * offloads as they are enabled by default
2249          */
2250         rxcsum = rd32(E1000_RXCSUM);
2251         rxcsum |= E1000_RXCSUM_PCSD;
2252
2253         if (adapter->hw.mac.type >= e1000_82576)
2254                 /* Enable Receive Checksum Offload for SCTP */
2255                 rxcsum |= E1000_RXCSUM_CRCOFL;
2256
2257         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2258         wr32(E1000_RXCSUM, rxcsum);
2259
2260         /* If VMDq is enabled then we set the appropriate mode for that, else
2261          * we default to RSS so that an RSS hash is calculated per packet even
2262          * if we are only using one queue */
2263         if (adapter->vfs_allocated_count) {
2264                 if (hw->mac.type > e1000_82575) {
2265                         /* Set the default pool for the PF's first queue */
2266                         u32 vtctl = rd32(E1000_VT_CTL);
2267                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2268                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2269                         vtctl |= adapter->vfs_allocated_count <<
2270                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2271                         wr32(E1000_VT_CTL, vtctl);
2272                 }
2273                 if (adapter->num_rx_queues > 1)
2274                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2275                 else
2276                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2277         } else {
2278                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2279         }
2280         igb_vmm_control(adapter);
2281
2282         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2283                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2284         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2285                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2286         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2287                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2288         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2289                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2290
2291         wr32(E1000_MRQC, mrqc);
2292 }
2293
2294 /**
2295  * igb_setup_rctl - configure the receive control registers
2296  * @adapter: Board private structure
2297  **/
2298 void igb_setup_rctl(struct igb_adapter *adapter)
2299 {
2300         struct e1000_hw *hw = &adapter->hw;
2301         u32 rctl;
2302
2303         rctl = rd32(E1000_RCTL);
2304
2305         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2306         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2307
2308         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2309                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2310
2311         /*
2312          * enable stripping of CRC. It's unlikely this will break BMC
2313          * redirection as it did with e1000. Newer features require
2314          * that the HW strips the CRC.
2315          */
2316         rctl |= E1000_RCTL_SECRC;
2317
2318         /*
2319          * disable store bad packets and clear size bits.
2320          */
2321         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2322
2323         /* enable LPE to prevent packets larger than max_frame_size */
2324         rctl |= E1000_RCTL_LPE;
2325
2326         /* disable queue 0 to prevent tail write w/o re-config */
2327         wr32(E1000_RXDCTL(0), 0);
2328
2329         /* Attention!!!  For SR-IOV PF driver operations you must enable
2330          * queue drop for all VF and PF queues to prevent head of line blocking
2331          * if an un-trusted VF does not provide descriptors to hardware.
2332          */
2333         if (adapter->vfs_allocated_count) {
2334                 u32 vmolr;
2335
2336                 /* set all queue drop enable bits */
2337                 wr32(E1000_QDE, ALL_QUEUES);
2338
2339                 vmolr = rd32(E1000_VMOLR(adapter->vfs_allocated_count));
2340                 if (rctl & E1000_RCTL_LPE)
2341                         vmolr |= E1000_VMOLR_LPE;
2342                 if (adapter->num_rx_queues > 1)
2343                         vmolr |= E1000_VMOLR_RSSE;
2344                 wr32(E1000_VMOLR(adapter->vfs_allocated_count), vmolr);
2345         }
2346
2347         wr32(E1000_RCTL, rctl);
2348 }
2349
2350 /**
2351  * igb_rlpml_set - set maximum receive packet size
2352  * @adapter: board private structure
2353  *
2354  * Configure maximum receivable packet size.
2355  **/
2356 static void igb_rlpml_set(struct igb_adapter *adapter)
2357 {
2358         u32 max_frame_size = adapter->max_frame_size;
2359         struct e1000_hw *hw = &adapter->hw;
2360         u16 pf_id = adapter->vfs_allocated_count;
2361
2362         if (adapter->vlgrp)
2363                 max_frame_size += VLAN_TAG_SIZE;
2364
2365         /* if vfs are enabled we set RLPML to the largest possible request
2366          * size and set the VMOLR RLPML to the size we need */
2367         if (pf_id) {
2368                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2369                 max_frame_size = MAX_STD_JUMBO_FRAME_SIZE + VLAN_TAG_SIZE;
2370         }
2371
2372         wr32(E1000_RLPML, max_frame_size);
2373 }
2374
2375 /**
2376  * igb_configure_rx_ring - Configure a receive ring after Reset
2377  * @adapter: board private structure
2378  * @ring: receive ring to be configured
2379  *
2380  * Configure the Rx unit of the MAC after a reset.
2381  **/
2382 void igb_configure_rx_ring(struct igb_adapter *adapter,
2383                            struct igb_ring *ring)
2384 {
2385         struct e1000_hw *hw = &adapter->hw;
2386         u64 rdba = ring->dma;
2387         int reg_idx = ring->reg_idx;
2388         u32 srrctl, rxdctl;
2389
2390         /* disable the queue */
2391         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2392         wr32(E1000_RXDCTL(reg_idx),
2393                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2394
2395         /* Set DMA base address registers */
2396         wr32(E1000_RDBAL(reg_idx),
2397              rdba & 0x00000000ffffffffULL);
2398         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2399         wr32(E1000_RDLEN(reg_idx),
2400                        ring->count * sizeof(union e1000_adv_rx_desc));
2401
2402         /* initialize head and tail */
2403         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2404         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2405         writel(0, ring->head);
2406         writel(0, ring->tail);
2407
2408         /* set descriptor configuration */
2409         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2410                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2411                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2412 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2413                 srrctl |= IGB_RXBUFFER_16384 >>
2414                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2415 #else
2416                 srrctl |= (PAGE_SIZE / 2) >>
2417                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2418 #endif
2419                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2420         } else {
2421                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2422                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2423                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2424         }
2425
2426         wr32(E1000_SRRCTL(reg_idx), srrctl);
2427
2428         /* enable receive descriptor fetching */
2429         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2430         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2431         rxdctl &= 0xFFF00000;
2432         rxdctl |= IGB_RX_PTHRESH;
2433         rxdctl |= IGB_RX_HTHRESH << 8;
2434         rxdctl |= IGB_RX_WTHRESH << 16;
2435         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2436 }
2437
2438 /**
2439  * igb_configure_rx - Configure receive Unit after Reset
2440  * @adapter: board private structure
2441  *
2442  * Configure the Rx unit of the MAC after a reset.
2443  **/
2444 static void igb_configure_rx(struct igb_adapter *adapter)
2445 {
2446         int i;
2447
2448         /* set UTA to appropriate mode */
2449         igb_set_uta(adapter);
2450
2451         /* set the correct pool for the PF default MAC address in entry 0 */
2452         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2453                          adapter->vfs_allocated_count);
2454
2455         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2456          * the Base and Length of the Rx Descriptor Ring */
2457         for (i = 0; i < adapter->num_rx_queues; i++)
2458                 igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
2459 }
2460
2461 /**
2462  * igb_free_tx_resources - Free Tx Resources per Queue
2463  * @tx_ring: Tx descriptor ring for a specific queue
2464  *
2465  * Free all transmit software resources
2466  **/
2467 void igb_free_tx_resources(struct igb_ring *tx_ring)
2468 {
2469         igb_clean_tx_ring(tx_ring);
2470
2471         vfree(tx_ring->buffer_info);
2472         tx_ring->buffer_info = NULL;
2473
2474         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2475                             tx_ring->desc, tx_ring->dma);
2476
2477         tx_ring->desc = NULL;
2478 }
2479
2480 /**
2481  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2482  * @adapter: board private structure
2483  *
2484  * Free all transmit software resources
2485  **/
2486 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2487 {
2488         int i;
2489
2490         for (i = 0; i < adapter->num_tx_queues; i++)
2491                 igb_free_tx_resources(&adapter->tx_ring[i]);
2492 }
2493
2494 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2495                                     struct igb_buffer *buffer_info)
2496 {
2497         buffer_info->dma = 0;
2498         if (buffer_info->skb) {
2499                 skb_dma_unmap(&tx_ring->pdev->dev,
2500                               buffer_info->skb,
2501                               DMA_TO_DEVICE);
2502                 dev_kfree_skb_any(buffer_info->skb);
2503                 buffer_info->skb = NULL;
2504         }
2505         buffer_info->time_stamp = 0;
2506         /* buffer_info must be completely set up in the transmit path */
2507 }
2508
2509 /**
2510  * igb_clean_tx_ring - Free Tx Buffers
2511  * @tx_ring: ring to be cleaned
2512  **/
2513 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2514 {
2515         struct igb_buffer *buffer_info;
2516         unsigned long size;
2517         unsigned int i;
2518
2519         if (!tx_ring->buffer_info)
2520                 return;
2521         /* Free all the Tx ring sk_buffs */
2522
2523         for (i = 0; i < tx_ring->count; i++) {
2524                 buffer_info = &tx_ring->buffer_info[i];
2525                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2526         }
2527
2528         size = sizeof(struct igb_buffer) * tx_ring->count;
2529         memset(tx_ring->buffer_info, 0, size);
2530
2531         /* Zero out the descriptor ring */
2532
2533         memset(tx_ring->desc, 0, tx_ring->size);
2534
2535         tx_ring->next_to_use = 0;
2536         tx_ring->next_to_clean = 0;
2537
2538         writel(0, tx_ring->head);
2539         writel(0, tx_ring->tail);
2540 }
2541
2542 /**
2543  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2544  * @adapter: board private structure
2545  **/
2546 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2547 {
2548         int i;
2549
2550         for (i = 0; i < adapter->num_tx_queues; i++)
2551                 igb_clean_tx_ring(&adapter->tx_ring[i]);
2552 }
2553
2554 /**
2555  * igb_free_rx_resources - Free Rx Resources
2556  * @rx_ring: ring to clean the resources from
2557  *
2558  * Free all receive software resources
2559  **/
2560 void igb_free_rx_resources(struct igb_ring *rx_ring)
2561 {
2562         igb_clean_rx_ring(rx_ring);
2563
2564         vfree(rx_ring->buffer_info);
2565         rx_ring->buffer_info = NULL;
2566
2567         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2568                             rx_ring->desc, rx_ring->dma);
2569
2570         rx_ring->desc = NULL;
2571 }
2572
2573 /**
2574  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2575  * @adapter: board private structure
2576  *
2577  * Free all receive software resources
2578  **/
2579 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2580 {
2581         int i;
2582
2583         for (i = 0; i < adapter->num_rx_queues; i++)
2584                 igb_free_rx_resources(&adapter->rx_ring[i]);
2585 }
2586
2587 /**
2588  * igb_clean_rx_ring - Free Rx Buffers per Queue
2589  * @rx_ring: ring to free buffers from
2590  **/
2591 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2592 {
2593         struct igb_buffer *buffer_info;
2594         unsigned long size;
2595         unsigned int i;
2596
2597         if (!rx_ring->buffer_info)
2598                 return;
2599         /* Free all the Rx ring sk_buffs */
2600         for (i = 0; i < rx_ring->count; i++) {
2601                 buffer_info = &rx_ring->buffer_info[i];
2602                 if (buffer_info->dma) {
2603                         pci_unmap_single(rx_ring->pdev,
2604                                          buffer_info->dma,
2605                                          rx_ring->rx_buffer_len,
2606                                          PCI_DMA_FROMDEVICE);
2607                         buffer_info->dma = 0;
2608                 }
2609
2610                 if (buffer_info->skb) {
2611                         dev_kfree_skb(buffer_info->skb);
2612                         buffer_info->skb = NULL;
2613                 }
2614                 if (buffer_info->page_dma) {
2615                         pci_unmap_page(rx_ring->pdev,
2616                                        buffer_info->page_dma,
2617                                        PAGE_SIZE / 2,
2618                                        PCI_DMA_FROMDEVICE);
2619                         buffer_info->page_dma = 0;
2620                 }
2621                 if (buffer_info->page) {
2622                         put_page(buffer_info->page);
2623                         buffer_info->page = NULL;
2624                         buffer_info->page_offset = 0;
2625                 }
2626         }
2627
2628         size = sizeof(struct igb_buffer) * rx_ring->count;
2629         memset(rx_ring->buffer_info, 0, size);
2630
2631         /* Zero out the descriptor ring */
2632         memset(rx_ring->desc, 0, rx_ring->size);
2633
2634         rx_ring->next_to_clean = 0;
2635         rx_ring->next_to_use = 0;
2636
2637         writel(0, rx_ring->head);
2638         writel(0, rx_ring->tail);
2639 }
2640
2641 /**
2642  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2643  * @adapter: board private structure
2644  **/
2645 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2646 {
2647         int i;
2648
2649         for (i = 0; i < adapter->num_rx_queues; i++)
2650                 igb_clean_rx_ring(&adapter->rx_ring[i]);
2651 }
2652
2653 /**
2654  * igb_set_mac - Change the Ethernet Address of the NIC
2655  * @netdev: network interface device structure
2656  * @p: pointer to an address structure
2657  *
2658  * Returns 0 on success, negative on failure
2659  **/
2660 static int igb_set_mac(struct net_device *netdev, void *p)
2661 {
2662         struct igb_adapter *adapter = netdev_priv(netdev);
2663         struct e1000_hw *hw = &adapter->hw;
2664         struct sockaddr *addr = p;
2665
2666         if (!is_valid_ether_addr(addr->sa_data))
2667                 return -EADDRNOTAVAIL;
2668
2669         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2670         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2671
2672         /* set the correct pool for the new PF MAC address in entry 0 */
2673         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2674                          adapter->vfs_allocated_count);
2675
2676         return 0;
2677 }
2678
2679 /**
2680  * igb_write_mc_addr_list - write multicast addresses to MTA
2681  * @netdev: network interface device structure
2682  *
2683  * Writes multicast address list to the MTA hash table.
2684  * Returns: -ENOMEM on failure
2685  *                0 on no addresses written
2686  *                X on writing X addresses to MTA
2687  **/
2688 static int igb_write_mc_addr_list(struct net_device *netdev)
2689 {
2690         struct igb_adapter *adapter = netdev_priv(netdev);
2691         struct e1000_hw *hw = &adapter->hw;
2692         struct dev_mc_list *mc_ptr = netdev->mc_list;
2693         u8  *mta_list;
2694         u32 vmolr = 0;
2695         int i;
2696
2697         if (!netdev->mc_count) {
2698                 /* nothing to program, so clear mc list */
2699                 igb_update_mc_addr_list(hw, NULL, 0);
2700                 igb_restore_vf_multicasts(adapter);
2701                 return 0;
2702         }
2703
2704         mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
2705         if (!mta_list)
2706                 return -ENOMEM;
2707
2708         /* set vmolr receive overflow multicast bit */
2709         vmolr |= E1000_VMOLR_ROMPE;
2710
2711         /* The shared function expects a packed array of only addresses. */
2712         mc_ptr = netdev->mc_list;
2713
2714         for (i = 0; i < netdev->mc_count; i++) {
2715                 if (!mc_ptr)
2716                         break;
2717                 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2718                 mc_ptr = mc_ptr->next;
2719         }
2720         igb_update_mc_addr_list(hw, mta_list, i);
2721         kfree(mta_list);
2722
2723         return netdev->mc_count;
2724 }
2725
2726 /**
2727  * igb_write_uc_addr_list - write unicast addresses to RAR table
2728  * @netdev: network interface device structure
2729  *
2730  * Writes unicast address list to the RAR table.
2731  * Returns: -ENOMEM on failure/insufficient address space
2732  *                0 on no addresses written
2733  *                X on writing X addresses to the RAR table
2734  **/
2735 static int igb_write_uc_addr_list(struct net_device *netdev)
2736 {
2737         struct igb_adapter *adapter = netdev_priv(netdev);
2738         struct e1000_hw *hw = &adapter->hw;
2739         unsigned int vfn = adapter->vfs_allocated_count;
2740         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2741         int count = 0;
2742
2743         /* return ENOMEM indicating insufficient memory for addresses */
2744         if (netdev->uc.count > rar_entries)
2745                 return -ENOMEM;
2746
2747         if (netdev->uc.count && rar_entries) {
2748                 struct netdev_hw_addr *ha;
2749                 list_for_each_entry(ha, &netdev->uc.list, list) {
2750                         if (!rar_entries)
2751                                 break;
2752                         igb_rar_set_qsel(adapter, ha->addr,
2753                                          rar_entries--,
2754                                          vfn);
2755                         count++;
2756                 }
2757         }
2758         /* write the addresses in reverse order to avoid write combining */
2759         for (; rar_entries > 0 ; rar_entries--) {
2760                 wr32(E1000_RAH(rar_entries), 0);
2761                 wr32(E1000_RAL(rar_entries), 0);
2762         }
2763         wrfl();
2764
2765         return count;
2766 }
2767
2768 /**
2769  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2770  * @netdev: network interface device structure
2771  *
2772  * The set_rx_mode entry point is called whenever the unicast or multicast
2773  * address lists or the network interface flags are updated.  This routine is
2774  * responsible for configuring the hardware for proper unicast, multicast,
2775  * promiscuous mode, and all-multi behavior.
2776  **/
2777 static void igb_set_rx_mode(struct net_device *netdev)
2778 {
2779         struct igb_adapter *adapter = netdev_priv(netdev);
2780         struct e1000_hw *hw = &adapter->hw;
2781         unsigned int vfn = adapter->vfs_allocated_count;
2782         u32 rctl, vmolr = 0;
2783         int count;
2784
2785         /* Check for Promiscuous and All Multicast modes */
2786         rctl = rd32(E1000_RCTL);
2787
2788         /* clear the effected bits */
2789         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2790
2791         if (netdev->flags & IFF_PROMISC) {
2792                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2793                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2794         } else {
2795                 if (netdev->flags & IFF_ALLMULTI) {
2796                         rctl |= E1000_RCTL_MPE;
2797                         vmolr |= E1000_VMOLR_MPME;
2798                 } else {
2799                         /*
2800                          * Write addresses to the MTA, if the attempt fails
2801                          * then we should just turn on promiscous mode so
2802                          * that we can at least receive multicast traffic
2803                          */
2804                         count = igb_write_mc_addr_list(netdev);
2805                         if (count < 0) {
2806                                 rctl |= E1000_RCTL_MPE;
2807                                 vmolr |= E1000_VMOLR_MPME;
2808                         } else if (count) {
2809                                 vmolr |= E1000_VMOLR_ROMPE;
2810                         }
2811                 }
2812                 /*
2813                  * Write addresses to available RAR registers, if there is not
2814                  * sufficient space to store all the addresses then enable
2815                  * unicast promiscous mode
2816                  */
2817                 count = igb_write_uc_addr_list(netdev);
2818                 if (count < 0) {
2819                         rctl |= E1000_RCTL_UPE;
2820                         vmolr |= E1000_VMOLR_ROPE;
2821                 }
2822                 rctl |= E1000_RCTL_VFE;
2823         }
2824         wr32(E1000_RCTL, rctl);
2825
2826         /*
2827          * In order to support SR-IOV and eventually VMDq it is necessary to set
2828          * the VMOLR to enable the appropriate modes.  Without this workaround
2829          * we will have issues with VLAN tag stripping not being done for frames
2830          * that are only arriving because we are the default pool
2831          */
2832         if (hw->mac.type < e1000_82576)
2833                 return;
2834
2835         vmolr |= rd32(E1000_VMOLR(vfn)) &
2836                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
2837         wr32(E1000_VMOLR(vfn), vmolr);
2838         igb_restore_vf_multicasts(adapter);
2839 }
2840
2841 /* Need to wait a few seconds after link up to get diagnostic information from
2842  * the phy */
2843 static void igb_update_phy_info(unsigned long data)
2844 {
2845         struct igb_adapter *adapter = (struct igb_adapter *) data;
2846         igb_get_phy_info(&adapter->hw);
2847 }
2848
2849 /**
2850  * igb_has_link - check shared code for link and determine up/down
2851  * @adapter: pointer to driver private info
2852  **/
2853 static bool igb_has_link(struct igb_adapter *adapter)
2854 {
2855         struct e1000_hw *hw = &adapter->hw;
2856         bool link_active = false;
2857         s32 ret_val = 0;
2858
2859         /* get_link_status is set on LSC (link status) interrupt or
2860          * rx sequence error interrupt.  get_link_status will stay
2861          * false until the e1000_check_for_link establishes link
2862          * for copper adapters ONLY
2863          */
2864         switch (hw->phy.media_type) {
2865         case e1000_media_type_copper:
2866                 if (hw->mac.get_link_status) {
2867                         ret_val = hw->mac.ops.check_for_link(hw);
2868                         link_active = !hw->mac.get_link_status;
2869                 } else {
2870                         link_active = true;
2871                 }
2872                 break;
2873         case e1000_media_type_internal_serdes:
2874                 ret_val = hw->mac.ops.check_for_link(hw);
2875                 link_active = hw->mac.serdes_has_link;
2876                 break;
2877         default:
2878         case e1000_media_type_unknown:
2879                 break;
2880         }
2881
2882         return link_active;
2883 }
2884
2885 /**
2886  * igb_watchdog - Timer Call-back
2887  * @data: pointer to adapter cast into an unsigned long
2888  **/
2889 static void igb_watchdog(unsigned long data)
2890 {
2891         struct igb_adapter *adapter = (struct igb_adapter *)data;
2892         /* Do the rest outside of interrupt context */
2893         schedule_work(&adapter->watchdog_task);
2894 }
2895
2896 static void igb_watchdog_task(struct work_struct *work)
2897 {
2898         struct igb_adapter *adapter = container_of(work,
2899                                         struct igb_adapter, watchdog_task);
2900         struct e1000_hw *hw = &adapter->hw;
2901         struct net_device *netdev = adapter->netdev;
2902         struct igb_ring *tx_ring = adapter->tx_ring;
2903         u32 link;
2904         int i;
2905
2906         link = igb_has_link(adapter);
2907         if ((netif_carrier_ok(netdev)) && link)
2908                 goto link_up;
2909
2910         if (link) {
2911                 if (!netif_carrier_ok(netdev)) {
2912                         u32 ctrl;
2913                         hw->mac.ops.get_speed_and_duplex(&adapter->hw,
2914                                                    &adapter->link_speed,
2915                                                    &adapter->link_duplex);
2916
2917                         ctrl = rd32(E1000_CTRL);
2918                         /* Links status message must follow this format */
2919                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
2920                                  "Flow Control: %s\n",
2921                                  netdev->name,
2922                                  adapter->link_speed,
2923                                  adapter->link_duplex == FULL_DUPLEX ?
2924                                  "Full Duplex" : "Half Duplex",
2925                                  ((ctrl & E1000_CTRL_TFCE) && (ctrl &
2926                                  E1000_CTRL_RFCE)) ? "RX/TX" : ((ctrl &
2927                                  E1000_CTRL_RFCE) ? "RX" : ((ctrl &
2928                                  E1000_CTRL_TFCE) ? "TX" : "None")));
2929
2930                         /* tweak tx_queue_len according to speed/duplex and
2931                          * adjust the timeout factor */
2932                         netdev->tx_queue_len = adapter->tx_queue_len;
2933                         adapter->tx_timeout_factor = 1;
2934                         switch (adapter->link_speed) {
2935                         case SPEED_10:
2936                                 netdev->tx_queue_len = 10;
2937                                 adapter->tx_timeout_factor = 14;
2938                                 break;
2939                         case SPEED_100:
2940                                 netdev->tx_queue_len = 100;
2941                                 /* maybe add some timeout factor ? */
2942                                 break;
2943                         }
2944
2945                         netif_carrier_on(netdev);
2946
2947                         igb_ping_all_vfs(adapter);
2948
2949                         /* link state has changed, schedule phy info update */
2950                         if (!test_bit(__IGB_DOWN, &adapter->state))
2951                                 mod_timer(&adapter->phy_info_timer,
2952                                           round_jiffies(jiffies + 2 * HZ));
2953                 }
2954         } else {
2955                 if (netif_carrier_ok(netdev)) {
2956                         adapter->link_speed = 0;
2957                         adapter->link_duplex = 0;
2958                         /* Links status message must follow this format */
2959                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
2960                                netdev->name);
2961                         netif_carrier_off(netdev);
2962
2963                         igb_ping_all_vfs(adapter);
2964
2965                         /* link state has changed, schedule phy info update */
2966                         if (!test_bit(__IGB_DOWN, &adapter->state))
2967                                 mod_timer(&adapter->phy_info_timer,
2968                                           round_jiffies(jiffies + 2 * HZ));
2969                 }
2970         }
2971
2972 link_up:
2973         igb_update_stats(adapter);
2974
2975         hw->mac.tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;
2976         adapter->tpt_old = adapter->stats.tpt;
2977         hw->mac.collision_delta = adapter->stats.colc - adapter->colc_old;
2978         adapter->colc_old = adapter->stats.colc;
2979
2980         adapter->gorc = adapter->stats.gorc - adapter->gorc_old;
2981         adapter->gorc_old = adapter->stats.gorc;
2982         adapter->gotc = adapter->stats.gotc - adapter->gotc_old;
2983         adapter->gotc_old = adapter->stats.gotc;
2984
2985         igb_update_adaptive(&adapter->hw);
2986
2987         if (!netif_carrier_ok(netdev)) {
2988                 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
2989                         /* We've lost link, so the controller stops DMA,
2990                          * but we've got queued Tx work that's never going
2991                          * to get done, so reset controller to flush Tx.
2992                          * (Do the reset outside of interrupt context). */
2993                         adapter->tx_timeout_count++;
2994                         schedule_work(&adapter->reset_task);
2995                         /* return immediately since reset is imminent */
2996                         return;
2997                 }
2998         }
2999
3000         /* Cause software interrupt to ensure rx ring is cleaned */
3001         if (adapter->msix_entries) {
3002                 u32 eics = 0;
3003                 for (i = 0; i < adapter->num_q_vectors; i++) {
3004                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3005                         eics |= q_vector->eims_value;
3006                 }
3007                 wr32(E1000_EICS, eics);
3008         } else {
3009                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3010         }
3011
3012         /* Force detection of hung controller every watchdog period */
3013         tx_ring->detect_tx_hung = true;
3014
3015         /* Reset the timer */
3016         if (!test_bit(__IGB_DOWN, &adapter->state))
3017                 mod_timer(&adapter->watchdog_timer,
3018                           round_jiffies(jiffies + 2 * HZ));
3019 }
3020
3021 enum latency_range {
3022         lowest_latency = 0,
3023         low_latency = 1,
3024         bulk_latency = 2,
3025         latency_invalid = 255
3026 };
3027
3028 /**
3029  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3030  *
3031  *      Stores a new ITR value based on strictly on packet size.  This
3032  *      algorithm is less sophisticated than that used in igb_update_itr,
3033  *      due to the difficulty of synchronizing statistics across multiple
3034  *      receive rings.  The divisors and thresholds used by this fuction
3035  *      were determined based on theoretical maximum wire speed and testing
3036  *      data, in order to minimize response time while increasing bulk
3037  *      throughput.
3038  *      This functionality is controlled by the InterruptThrottleRate module
3039  *      parameter (see igb_param.c)
3040  *      NOTE:  This function is called only when operating in a multiqueue
3041  *             receive environment.
3042  * @q_vector: pointer to q_vector
3043  **/
3044 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3045 {
3046         int new_val = q_vector->itr_val;
3047         int avg_wire_size = 0;
3048         struct igb_adapter *adapter = q_vector->adapter;
3049
3050         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3051          * ints/sec - ITR timer value of 120 ticks.
3052          */
3053         if (adapter->link_speed != SPEED_1000) {
3054                 new_val = 976;
3055                 goto set_itr_val;
3056         }
3057
3058         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3059                 struct igb_ring *ring = q_vector->rx_ring;
3060                 avg_wire_size = ring->total_bytes / ring->total_packets;
3061         }
3062
3063         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3064                 struct igb_ring *ring = q_vector->tx_ring;
3065                 avg_wire_size = max_t(u32, avg_wire_size,
3066                                       (ring->total_bytes /
3067                                        ring->total_packets));
3068         }
3069
3070         /* if avg_wire_size isn't set no work was done */
3071         if (!avg_wire_size)
3072                 goto clear_counts;
3073
3074         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3075         avg_wire_size += 24;
3076
3077         /* Don't starve jumbo frames */
3078         avg_wire_size = min(avg_wire_size, 3000);
3079
3080         /* Give a little boost to mid-size frames */
3081         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3082                 new_val = avg_wire_size / 3;
3083         else
3084                 new_val = avg_wire_size / 2;
3085
3086 set_itr_val:
3087         if (new_val != q_vector->itr_val) {
3088                 q_vector->itr_val = new_val;
3089                 q_vector->set_itr = 1;
3090         }
3091 clear_counts:
3092         if (q_vector->rx_ring) {
3093                 q_vector->rx_ring->total_bytes = 0;
3094                 q_vector->rx_ring->total_packets = 0;
3095         }
3096         if (q_vector->tx_ring) {
3097                 q_vector->tx_ring->total_bytes = 0;
3098                 q_vector->tx_ring->total_packets = 0;
3099         }
3100 }
3101
3102 /**
3103  * igb_update_itr - update the dynamic ITR value based on statistics
3104  *      Stores a new ITR value based on packets and byte
3105  *      counts during the last interrupt.  The advantage of per interrupt
3106  *      computation is faster updates and more accurate ITR for the current
3107  *      traffic pattern.  Constants in this function were computed
3108  *      based on theoretical maximum wire speed and thresholds were set based
3109  *      on testing data as well as attempting to minimize response time
3110  *      while increasing bulk throughput.
3111  *      this functionality is controlled by the InterruptThrottleRate module
3112  *      parameter (see igb_param.c)
3113  *      NOTE:  These calculations are only valid when operating in a single-
3114  *             queue environment.
3115  * @adapter: pointer to adapter
3116  * @itr_setting: current q_vector->itr_val
3117  * @packets: the number of packets during this measurement interval
3118  * @bytes: the number of bytes during this measurement interval
3119  **/
3120 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3121                                    int packets, int bytes)
3122 {
3123         unsigned int retval = itr_setting;
3124
3125         if (packets == 0)
3126                 goto update_itr_done;
3127
3128         switch (itr_setting) {
3129         case lowest_latency:
3130                 /* handle TSO and jumbo frames */
3131                 if (bytes/packets > 8000)
3132                         retval = bulk_latency;
3133                 else if ((packets < 5) && (bytes > 512))
3134                         retval = low_latency;
3135                 break;
3136         case low_latency:  /* 50 usec aka 20000 ints/s */
3137                 if (bytes > 10000) {
3138                         /* this if handles the TSO accounting */
3139                         if (bytes/packets > 8000) {
3140                                 retval = bulk_latency;
3141                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3142                                 retval = bulk_latency;
3143                         } else if ((packets > 35)) {
3144                                 retval = lowest_latency;
3145                         }
3146                 } else if (bytes/packets > 2000) {
3147                         retval = bulk_latency;
3148                 } else if (packets <= 2 && bytes < 512) {
3149                         retval = lowest_latency;
3150                 }
3151                 break;
3152         case bulk_latency: /* 250 usec aka 4000 ints/s */
3153                 if (bytes > 25000) {
3154                         if (packets > 35)
3155                                 retval = low_latency;
3156                 } else if (bytes < 1500) {
3157                         retval = low_latency;
3158                 }
3159                 break;
3160         }
3161
3162 update_itr_done:
3163         return retval;
3164 }
3165
3166 static void igb_set_itr(struct igb_adapter *adapter)
3167 {
3168         struct igb_q_vector *q_vector = adapter->q_vector[0];
3169         u16 current_itr;
3170         u32 new_itr = q_vector->itr_val;
3171
3172         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3173         if (adapter->link_speed != SPEED_1000) {
3174                 current_itr = 0;
3175                 new_itr = 4000;
3176                 goto set_itr_now;
3177         }
3178
3179         adapter->rx_itr = igb_update_itr(adapter,
3180                                     adapter->rx_itr,
3181                                     adapter->rx_ring->total_packets,
3182                                     adapter->rx_ring->total_bytes);
3183
3184         adapter->tx_itr = igb_update_itr(adapter,
3185                                     adapter->tx_itr,
3186                                     adapter->tx_ring->total_packets,
3187                                     adapter->tx_ring->total_bytes);
3188         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3189
3190         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3191         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3192                 current_itr = low_latency;
3193
3194         switch (current_itr) {
3195         /* counts and packets in update_itr are dependent on these numbers */
3196         case lowest_latency:
3197                 new_itr = 56;  /* aka 70,000 ints/sec */
3198                 break;
3199         case low_latency:
3200                 new_itr = 196; /* aka 20,000 ints/sec */
3201                 break;
3202         case bulk_latency:
3203                 new_itr = 980; /* aka 4,000 ints/sec */
3204                 break;
3205         default:
3206                 break;
3207         }
3208
3209 set_itr_now:
3210         adapter->rx_ring->total_bytes = 0;
3211         adapter->rx_ring->total_packets = 0;
3212         adapter->tx_ring->total_bytes = 0;
3213         adapter->tx_ring->total_packets = 0;
3214
3215         if (new_itr != q_vector->itr_val) {
3216                 /* this attempts to bias the interrupt rate towards Bulk
3217                  * by adding intermediate steps when interrupt rate is
3218                  * increasing */
3219                 new_itr = new_itr > q_vector->itr_val ?
3220                              max((new_itr * q_vector->itr_val) /
3221                                  (new_itr + (q_vector->itr_val >> 2)),
3222                                  new_itr) :
3223                              new_itr;
3224                 /* Don't write the value here; it resets the adapter's
3225                  * internal timer, and causes us to delay far longer than
3226                  * we should between interrupts.  Instead, we write the ITR
3227                  * value at the beginning of the next interrupt so the timing
3228                  * ends up being correct.
3229                  */
3230                 q_vector->itr_val = new_itr;
3231                 q_vector->set_itr = 1;
3232         }
3233
3234         return;
3235 }
3236
3237 #define IGB_TX_FLAGS_CSUM               0x00000001
3238 #define IGB_TX_FLAGS_VLAN               0x00000002
3239 #define IGB_TX_FLAGS_TSO                0x00000004
3240 #define IGB_TX_FLAGS_IPV4               0x00000008
3241 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3242 #define IGB_TX_FLAGS_VLAN_MASK  0xffff0000
3243 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3244
3245 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3246                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3247 {
3248         struct e1000_adv_tx_context_desc *context_desc;
3249         unsigned int i;
3250         int err;
3251         struct igb_buffer *buffer_info;
3252         u32 info = 0, tu_cmd = 0;
3253         u32 mss_l4len_idx, l4len;
3254         *hdr_len = 0;
3255
3256         if (skb_header_cloned(skb)) {
3257                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3258                 if (err)
3259                         return err;
3260         }
3261
3262         l4len = tcp_hdrlen(skb);
3263         *hdr_len += l4len;
3264
3265         if (skb->protocol == htons(ETH_P_IP)) {
3266                 struct iphdr *iph = ip_hdr(skb);
3267                 iph->tot_len = 0;
3268                 iph->check = 0;
3269                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3270                                                          iph->daddr, 0,
3271                                                          IPPROTO_TCP,
3272                                                          0);
3273         } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
3274                 ipv6_hdr(skb)->payload_len = 0;
3275                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3276                                                        &ipv6_hdr(skb)->daddr,
3277                                                        0, IPPROTO_TCP, 0);
3278         }
3279
3280         i = tx_ring->next_to_use;
3281
3282         buffer_info = &tx_ring->buffer_info[i];
3283         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3284         /* VLAN MACLEN IPLEN */
3285         if (tx_flags & IGB_TX_FLAGS_VLAN)
3286                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3287         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3288         *hdr_len += skb_network_offset(skb);
3289         info |= skb_network_header_len(skb);
3290         *hdr_len += skb_network_header_len(skb);
3291         context_desc->vlan_macip_lens = cpu_to_le32(info);
3292
3293         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3294         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3295
3296         if (skb->protocol == htons(ETH_P_IP))
3297                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3298         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3299
3300         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3301
3302         /* MSS L4LEN IDX */
3303         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3304         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3305
3306         /* For 82575, context index must be unique per ring. */
3307         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3308                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3309
3310         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3311         context_desc->seqnum_seed = 0;
3312
3313         buffer_info->time_stamp = jiffies;
3314         buffer_info->next_to_watch = i;
3315         buffer_info->dma = 0;
3316         i++;
3317         if (i == tx_ring->count)
3318                 i = 0;
3319
3320         tx_ring->next_to_use = i;
3321
3322         return true;
3323 }
3324
3325 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3326                                    struct sk_buff *skb, u32 tx_flags)
3327 {
3328         struct e1000_adv_tx_context_desc *context_desc;
3329         struct pci_dev *pdev = tx_ring->pdev;
3330         struct igb_buffer *buffer_info;
3331         u32 info = 0, tu_cmd = 0;
3332         unsigned int i;
3333
3334         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3335             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3336                 i = tx_ring->next_to_use;
3337                 buffer_info = &tx_ring->buffer_info[i];
3338                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3339
3340                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3341                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3342                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3343                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3344                         info |= skb_network_header_len(skb);
3345
3346                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3347
3348                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3349
3350                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3351                         __be16 protocol;
3352
3353                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3354                                 const struct vlan_ethhdr *vhdr =
3355                                           (const struct vlan_ethhdr*)skb->data;
3356
3357                                 protocol = vhdr->h_vlan_encapsulated_proto;
3358                         } else {
3359                                 protocol = skb->protocol;
3360                         }
3361
3362                         switch (protocol) {
3363                         case cpu_to_be16(ETH_P_IP):
3364                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3365                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3366                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3367                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3368                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3369                                 break;
3370                         case cpu_to_be16(ETH_P_IPV6):
3371                                 /* XXX what about other V6 headers?? */
3372                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3373                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3374                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3375                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3376                                 break;
3377                         default:
3378                                 if (unlikely(net_ratelimit()))
3379                                         dev_warn(&pdev->dev,
3380                                             "partial checksum but proto=%x!\n",
3381                                             skb->protocol);
3382                                 break;
3383                         }
3384                 }
3385
3386                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3387                 context_desc->seqnum_seed = 0;
3388                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3389                         context_desc->mss_l4len_idx =
3390                                 cpu_to_le32(tx_ring->reg_idx << 4);
3391
3392                 buffer_info->time_stamp = jiffies;
3393                 buffer_info->next_to_watch = i;
3394                 buffer_info->dma = 0;
3395
3396                 i++;
3397                 if (i == tx_ring->count)
3398                         i = 0;
3399                 tx_ring->next_to_use = i;
3400
3401                 return true;
3402         }
3403         return false;
3404 }
3405
3406 #define IGB_MAX_TXD_PWR 16
3407 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3408
3409 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3410                                  unsigned int first)
3411 {
3412         struct igb_buffer *buffer_info;
3413         struct pci_dev *pdev = tx_ring->pdev;
3414         unsigned int len = skb_headlen(skb);
3415         unsigned int count = 0, i;
3416         unsigned int f;
3417         dma_addr_t *map;
3418
3419         i = tx_ring->next_to_use;
3420
3421         if (skb_dma_map(&pdev->dev, skb, DMA_TO_DEVICE)) {
3422                 dev_err(&pdev->dev, "TX DMA map failed\n");
3423                 return 0;
3424         }
3425
3426         map = skb_shinfo(skb)->dma_maps;
3427
3428         buffer_info = &tx_ring->buffer_info[i];
3429         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3430         buffer_info->length = len;
3431         /* set time_stamp *before* dma to help avoid a possible race */
3432         buffer_info->time_stamp = jiffies;
3433         buffer_info->next_to_watch = i;
3434         buffer_info->dma = skb_shinfo(skb)->dma_head;
3435
3436         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3437                 struct skb_frag_struct *frag;
3438
3439                 i++;
3440                 if (i == tx_ring->count)
3441                         i = 0;
3442
3443                 frag = &skb_shinfo(skb)->frags[f];
3444                 len = frag->size;
3445
3446                 buffer_info = &tx_ring->buffer_info[i];
3447                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3448                 buffer_info->length = len;
3449                 buffer_info->time_stamp = jiffies;
3450                 buffer_info->next_to_watch = i;
3451                 buffer_info->dma = map[count];
3452                 count++;
3453         }
3454
3455         tx_ring->buffer_info[i].skb = skb;
3456         tx_ring->buffer_info[first].next_to_watch = i;
3457
3458         return count + 1;
3459 }
3460
3461 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3462                                     int tx_flags, int count, u32 paylen,
3463                                     u8 hdr_len)
3464 {
3465         union e1000_adv_tx_desc *tx_desc = NULL;
3466         struct igb_buffer *buffer_info;
3467         u32 olinfo_status = 0, cmd_type_len;
3468         unsigned int i;
3469
3470         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3471                         E1000_ADVTXD_DCMD_DEXT);
3472
3473         if (tx_flags & IGB_TX_FLAGS_VLAN)
3474                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3475
3476         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3477                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3478
3479         if (tx_flags & IGB_TX_FLAGS_TSO) {
3480                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3481
3482                 /* insert tcp checksum */
3483                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3484
3485                 /* insert ip checksum */
3486                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3487                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3488
3489         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3490                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3491         }
3492
3493         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3494             (tx_flags & (IGB_TX_FLAGS_CSUM |
3495                          IGB_TX_FLAGS_TSO |
3496                          IGB_TX_FLAGS_VLAN)))
3497                 olinfo_status |= tx_ring->reg_idx << 4;
3498
3499         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3500
3501         i = tx_ring->next_to_use;
3502         while (count--) {
3503                 buffer_info = &tx_ring->buffer_info[i];
3504                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3505                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3506                 tx_desc->read.cmd_type_len =
3507                         cpu_to_le32(cmd_type_len | buffer_info->length);
3508                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3509                 i++;
3510                 if (i == tx_ring->count)
3511                         i = 0;
3512         }
3513
3514         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3515         /* Force memory writes to complete before letting h/w
3516          * know there are new descriptors to fetch.  (Only
3517          * applicable for weak-ordered memory model archs,
3518          * such as IA-64). */
3519         wmb();
3520
3521         tx_ring->next_to_use = i;
3522         writel(i, tx_ring->tail);
3523         /* we need this if more than one processor can write to our tail
3524          * at a time, it syncronizes IO on IA64/Altix systems */
3525         mmiowb();
3526 }
3527
3528 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3529 {
3530         struct net_device *netdev = tx_ring->netdev;
3531
3532         netif_stop_subqueue(netdev, tx_ring->queue_index);
3533
3534         /* Herbert's original patch had:
3535          *  smp_mb__after_netif_stop_queue();
3536          * but since that doesn't exist yet, just open code it. */
3537         smp_mb();
3538
3539         /* We need to check again in a case another CPU has just
3540          * made room available. */
3541         if (igb_desc_unused(tx_ring) < size)
3542                 return -EBUSY;
3543
3544         /* A reprieve! */
3545         netif_wake_subqueue(netdev, tx_ring->queue_index);
3546         tx_ring->tx_stats.restart_queue++;
3547         return 0;
3548 }
3549
3550 static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3551 {
3552         if (igb_desc_unused(tx_ring) >= size)
3553                 return 0;
3554         return __igb_maybe_stop_tx(tx_ring, size);
3555 }
3556
3557 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3558                                     struct igb_ring *tx_ring)
3559 {
3560         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3561         unsigned int first;
3562         unsigned int tx_flags = 0;
3563         u8 hdr_len = 0;
3564         int count = 0;
3565         int tso = 0;
3566         union skb_shared_tx *shtx = skb_tx(skb);
3567
3568         /* need: 1 descriptor per page,
3569          *       + 2 desc gap to keep tail from touching head,
3570          *       + 1 desc for skb->data,
3571          *       + 1 desc for context descriptor,
3572          * otherwise try next time */
3573         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3574                 /* this is a hard error */
3575                 return NETDEV_TX_BUSY;
3576         }
3577
3578         if (unlikely(shtx->hardware)) {
3579                 shtx->in_progress = 1;
3580                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3581         }
3582
3583         if (adapter->vlgrp && vlan_tx_tag_present(skb)) {
3584                 tx_flags |= IGB_TX_FLAGS_VLAN;
3585                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3586         }
3587
3588         if (skb->protocol == htons(ETH_P_IP))
3589                 tx_flags |= IGB_TX_FLAGS_IPV4;
3590
3591         first = tx_ring->next_to_use;
3592         if (skb_is_gso(skb)) {
3593                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3594                 if (tso < 0) {
3595                         dev_kfree_skb_any(skb);
3596                         return NETDEV_TX_OK;
3597                 }
3598         }
3599
3600         if (tso)
3601                 tx_flags |= IGB_TX_FLAGS_TSO;
3602         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3603                  (skb->ip_summed == CHECKSUM_PARTIAL))
3604                 tx_flags |= IGB_TX_FLAGS_CSUM;
3605
3606         /*
3607          * count reflects descriptors mapped, if 0 then mapping error
3608          * has occured and we need to rewind the descriptor queue
3609          */
3610         count = igb_tx_map_adv(tx_ring, skb, first);
3611
3612         if (!count) {
3613                 dev_kfree_skb_any(skb);
3614                 tx_ring->buffer_info[first].time_stamp = 0;
3615                 tx_ring->next_to_use = first;
3616                 return NETDEV_TX_OK;
3617         }
3618
3619         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3620
3621         /* Make sure there is space in the ring for the next send. */
3622         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3623
3624         return NETDEV_TX_OK;
3625 }
3626
3627 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3628                                       struct net_device *netdev)
3629 {
3630         struct igb_adapter *adapter = netdev_priv(netdev);
3631         struct igb_ring *tx_ring;
3632         int r_idx = 0;
3633
3634         if (test_bit(__IGB_DOWN, &adapter->state)) {
3635                 dev_kfree_skb_any(skb);
3636                 return NETDEV_TX_OK;
3637         }
3638
3639         if (skb->len <= 0) {
3640                 dev_kfree_skb_any(skb);
3641                 return NETDEV_TX_OK;
3642         }
3643
3644         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3645         tx_ring = adapter->multi_tx_table[r_idx];
3646
3647         /* This goes back to the question of how to logically map a tx queue
3648          * to a flow.  Right now, performance is impacted slightly negatively
3649          * if using multiple tx queues.  If the stack breaks away from a
3650          * single qdisc implementation, we can look at this again. */
3651         return igb_xmit_frame_ring_adv(skb, tx_ring);
3652 }
3653
3654 /**
3655  * igb_tx_timeout - Respond to a Tx Hang
3656  * @netdev: network interface device structure
3657  **/
3658 static void igb_tx_timeout(struct net_device *netdev)
3659 {
3660         struct igb_adapter *adapter = netdev_priv(netdev);
3661         struct e1000_hw *hw = &adapter->hw;
3662
3663         /* Do the reset outside of interrupt context */
3664         adapter->tx_timeout_count++;
3665         schedule_work(&adapter->reset_task);
3666         wr32(E1000_EICS,
3667              (adapter->eims_enable_mask & ~adapter->eims_other));
3668 }
3669
3670 static void igb_reset_task(struct work_struct *work)
3671 {
3672         struct igb_adapter *adapter;
3673         adapter = container_of(work, struct igb_adapter, reset_task);
3674
3675         igb_reinit_locked(adapter);
3676 }
3677
3678 /**
3679  * igb_get_stats - Get System Network Statistics
3680  * @netdev: network interface device structure
3681  *
3682  * Returns the address of the device statistics structure.
3683  * The statistics are actually updated from the timer callback.
3684  **/
3685 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3686 {
3687         /* only return the current stats */
3688         return &netdev->stats;
3689 }
3690
3691 /**
3692  * igb_change_mtu - Change the Maximum Transfer Unit
3693  * @netdev: network interface device structure
3694  * @new_mtu: new value for maximum frame size
3695  *
3696  * Returns 0 on success, negative on failure
3697  **/
3698 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3699 {
3700         struct igb_adapter *adapter = netdev_priv(netdev);
3701         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3702         u32 rx_buffer_len, i;
3703
3704         if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) ||
3705             (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3706                 dev_err(&adapter->pdev->dev, "Invalid MTU setting\n");
3707                 return -EINVAL;
3708         }
3709
3710         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3711                 dev_err(&adapter->pdev->dev, "MTU > 9216 not supported.\n");
3712                 return -EINVAL;
3713         }
3714
3715         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3716                 msleep(1);
3717
3718         /* igb_down has a dependency on max_frame_size */
3719         adapter->max_frame_size = max_frame;
3720         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3721          * means we reserve 2 more, this pushes us to allocate from the next
3722          * larger slab size.
3723          * i.e. RXBUFFER_2048 --> size-4096 slab
3724          */
3725
3726         if (max_frame <= IGB_RXBUFFER_1024)
3727                 rx_buffer_len = IGB_RXBUFFER_1024;
3728         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3729                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3730         else
3731                 rx_buffer_len = IGB_RXBUFFER_128;
3732
3733         if (netif_running(netdev))
3734                 igb_down(adapter);
3735
3736         dev_info(&adapter->pdev->dev, "changing MTU from %d to %d\n",
3737                  netdev->mtu, new_mtu);
3738         netdev->mtu = new_mtu;
3739
3740         for (i = 0; i < adapter->num_rx_queues; i++)
3741                 adapter->rx_ring[i].rx_buffer_len = rx_buffer_len;
3742
3743         if (netif_running(netdev))
3744                 igb_up(adapter);
3745         else
3746                 igb_reset(adapter);
3747
3748         clear_bit(__IGB_RESETTING, &adapter->state);
3749
3750         return 0;
3751 }
3752
3753 /**
3754  * igb_update_stats - Update the board statistics counters
3755  * @adapter: board private structure
3756  **/
3757
3758 void igb_update_stats(struct igb_adapter *adapter)
3759 {
3760         struct net_device *netdev = adapter->netdev;
3761         struct e1000_hw *hw = &adapter->hw;
3762         struct pci_dev *pdev = adapter->pdev;
3763         u16 phy_tmp;
3764
3765 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3766
3767         /*
3768          * Prevent stats update while adapter is being reset, or if the pci
3769          * connection is down.
3770          */
3771         if (adapter->link_speed == 0)
3772                 return;
3773         if (pci_channel_offline(pdev))
3774                 return;
3775
3776         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
3777         adapter->stats.gprc += rd32(E1000_GPRC);
3778         adapter->stats.gorc += rd32(E1000_GORCL);
3779         rd32(E1000_GORCH); /* clear GORCL */
3780         adapter->stats.bprc += rd32(E1000_BPRC);
3781         adapter->stats.mprc += rd32(E1000_MPRC);
3782         adapter->stats.roc += rd32(E1000_ROC);
3783
3784         adapter->stats.prc64 += rd32(E1000_PRC64);
3785         adapter->stats.prc127 += rd32(E1000_PRC127);
3786         adapter->stats.prc255 += rd32(E1000_PRC255);
3787         adapter->stats.prc511 += rd32(E1000_PRC511);
3788         adapter->stats.prc1023 += rd32(E1000_PRC1023);
3789         adapter->stats.prc1522 += rd32(E1000_PRC1522);
3790         adapter->stats.symerrs += rd32(E1000_SYMERRS);
3791         adapter->stats.sec += rd32(E1000_SEC);
3792
3793         adapter->stats.mpc += rd32(E1000_MPC);
3794         adapter->stats.scc += rd32(E1000_SCC);
3795         adapter->stats.ecol += rd32(E1000_ECOL);
3796         adapter->stats.mcc += rd32(E1000_MCC);
3797         adapter->stats.latecol += rd32(E1000_LATECOL);
3798         adapter->stats.dc += rd32(E1000_DC);
3799         adapter->stats.rlec += rd32(E1000_RLEC);
3800         adapter->stats.xonrxc += rd32(E1000_XONRXC);
3801         adapter->stats.xontxc += rd32(E1000_XONTXC);
3802         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
3803         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
3804         adapter->stats.fcruc += rd32(E1000_FCRUC);
3805         adapter->stats.gptc += rd32(E1000_GPTC);
3806         adapter->stats.gotc += rd32(E1000_GOTCL);
3807         rd32(E1000_GOTCH); /* clear GOTCL */
3808         adapter->stats.rnbc += rd32(E1000_RNBC);
3809         adapter->stats.ruc += rd32(E1000_RUC);
3810         adapter->stats.rfc += rd32(E1000_RFC);
3811         adapter->stats.rjc += rd32(E1000_RJC);
3812         adapter->stats.tor += rd32(E1000_TORH);
3813         adapter->stats.tot += rd32(E1000_TOTH);
3814         adapter->stats.tpr += rd32(E1000_TPR);
3815
3816         adapter->stats.ptc64 += rd32(E1000_PTC64);
3817         adapter->stats.ptc127 += rd32(E1000_PTC127);
3818         adapter->stats.ptc255 += rd32(E1000_PTC255);
3819         adapter->stats.ptc511 += rd32(E1000_PTC511);
3820         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
3821         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
3822
3823         adapter->stats.mptc += rd32(E1000_MPTC);
3824         adapter->stats.bptc += rd32(E1000_BPTC);
3825
3826         /* used for adaptive IFS */
3827
3828         hw->mac.tx_packet_delta = rd32(E1000_TPT);
3829         adapter->stats.tpt += hw->mac.tx_packet_delta;
3830         hw->mac.collision_delta = rd32(E1000_COLC);
3831         adapter->stats.colc += hw->mac.collision_delta;
3832
3833         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
3834         adapter->stats.rxerrc += rd32(E1000_RXERRC);
3835         adapter->stats.tncrs += rd32(E1000_TNCRS);
3836         adapter->stats.tsctc += rd32(E1000_TSCTC);
3837         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
3838
3839         adapter->stats.iac += rd32(E1000_IAC);
3840         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
3841         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
3842         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
3843         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
3844         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
3845         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
3846         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
3847         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
3848
3849         /* Fill out the OS statistics structure */
3850         netdev->stats.multicast = adapter->stats.mprc;
3851         netdev->stats.collisions = adapter->stats.colc;
3852
3853         /* Rx Errors */
3854
3855         if (hw->mac.type != e1000_82575) {
3856                 u32 rqdpc_tmp;
3857                 u64 rqdpc_total = 0;
3858                 int i;
3859                 /* Read out drops stats per RX queue.  Notice RQDPC (Receive
3860                  * Queue Drop Packet Count) stats only gets incremented, if
3861                  * the DROP_EN but it set (in the SRRCTL register for that
3862                  * queue).  If DROP_EN bit is NOT set, then the some what
3863                  * equivalent count is stored in RNBC (not per queue basis).
3864                  * Also note the drop count is due to lack of available
3865                  * descriptors.
3866                  */
3867                 for (i = 0; i < adapter->num_rx_queues; i++) {
3868                         rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0xFFF;
3869                         adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
3870                         rqdpc_total += adapter->rx_ring[i].rx_stats.drops;
3871                 }
3872                 netdev->stats.rx_fifo_errors = rqdpc_total;
3873         }
3874
3875         /* Note RNBC (Receive No Buffers Count) is an not an exact
3876          * drop count as the hardware FIFO might save the day.  Thats
3877          * one of the reason for saving it in rx_fifo_errors, as its
3878          * potentially not a true drop.
3879          */
3880         netdev->stats.rx_fifo_errors += adapter->stats.rnbc;
3881
3882         /* RLEC on some newer hardware can be incorrect so build
3883          * our own version based on RUC and ROC */
3884         netdev->stats.rx_errors = adapter->stats.rxerrc +
3885                 adapter->stats.crcerrs + adapter->stats.algnerrc +
3886                 adapter->stats.ruc + adapter->stats.roc +
3887                 adapter->stats.cexterr;
3888         netdev->stats.rx_length_errors = adapter->stats.ruc +
3889                                               adapter->stats.roc;
3890         netdev->stats.rx_crc_errors = adapter->stats.crcerrs;
3891         netdev->stats.rx_frame_errors = adapter->stats.algnerrc;
3892         netdev->stats.rx_missed_errors = adapter->stats.mpc;
3893
3894         /* Tx Errors */
3895         netdev->stats.tx_errors = adapter->stats.ecol +
3896                                        adapter->stats.latecol;
3897         netdev->stats.tx_aborted_errors = adapter->stats.ecol;
3898         netdev->stats.tx_window_errors = adapter->stats.latecol;
3899         netdev->stats.tx_carrier_errors = adapter->stats.tncrs;
3900
3901         /* Tx Dropped needs to be maintained elsewhere */
3902
3903         /* Phy Stats */
3904         if (hw->phy.media_type == e1000_media_type_copper) {
3905                 if ((adapter->link_speed == SPEED_1000) &&
3906                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
3907                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
3908                         adapter->phy_stats.idle_errors += phy_tmp;
3909                 }
3910         }
3911
3912         /* Management Stats */
3913         adapter->stats.mgptc += rd32(E1000_MGTPTC);
3914         adapter->stats.mgprc += rd32(E1000_MGTPRC);
3915         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
3916 }
3917
3918 static irqreturn_t igb_msix_other(int irq, void *data)
3919 {
3920         struct igb_adapter *adapter = data;
3921         struct e1000_hw *hw = &adapter->hw;
3922         u32 icr = rd32(E1000_ICR);
3923         /* reading ICR causes bit 31 of EICR to be cleared */
3924
3925         if (icr & E1000_ICR_DOUTSYNC) {
3926                 /* HW is reporting DMA is out of sync */
3927                 adapter->stats.doosync++;
3928         }
3929
3930         /* Check for a mailbox event */
3931         if (icr & E1000_ICR_VMMB)
3932                 igb_msg_task(adapter);
3933
3934         if (icr & E1000_ICR_LSC) {
3935                 hw->mac.get_link_status = 1;
3936                 /* guard against interrupt when we're going down */
3937                 if (!test_bit(__IGB_DOWN, &adapter->state))
3938                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
3939         }
3940
3941         wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_VMMB);
3942         wr32(E1000_EIMS, adapter->eims_other);
3943
3944         return IRQ_HANDLED;
3945 }
3946
3947 static void igb_write_itr(struct igb_q_vector *q_vector)
3948 {
3949         u32 itr_val = q_vector->itr_val & 0x7FFC;
3950
3951         if (!q_vector->set_itr)
3952                 return;
3953
3954         if (!itr_val)
3955                 itr_val = 0x4;
3956
3957         if (q_vector->itr_shift)
3958                 itr_val |= itr_val << q_vector->itr_shift;
3959         else
3960                 itr_val |= 0x8000000;
3961
3962         writel(itr_val, q_vector->itr_register);
3963         q_vector->set_itr = 0;
3964 }
3965
3966 static irqreturn_t igb_msix_ring(int irq, void *data)
3967 {
3968         struct igb_q_vector *q_vector = data;
3969
3970         /* Write the ITR value calculated from the previous interrupt. */
3971         igb_write_itr(q_vector);
3972
3973         napi_schedule(&q_vector->napi);
3974
3975         return IRQ_HANDLED;
3976 }
3977
3978 #ifdef CONFIG_IGB_DCA
3979 static void igb_update_dca(struct igb_q_vector *q_vector)
3980 {
3981         struct igb_adapter *adapter = q_vector->adapter;
3982         struct e1000_hw *hw = &adapter->hw;
3983         int cpu = get_cpu();
3984
3985         if (q_vector->cpu == cpu)
3986                 goto out_no_update;
3987
3988         if (q_vector->tx_ring) {
3989                 int q = q_vector->tx_ring->reg_idx;
3990                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
3991                 if (hw->mac.type == e1000_82575) {
3992                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
3993                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
3994                 } else {
3995                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
3996                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
3997                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
3998                 }
3999                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4000                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4001         }
4002         if (q_vector->rx_ring) {
4003                 int q = q_vector->rx_ring->reg_idx;
4004                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4005                 if (hw->mac.type == e1000_82575) {
4006                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4007                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4008                 } else {
4009                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4010                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4011                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4012                 }
4013                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4014                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4015                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4016                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4017         }
4018         q_vector->cpu = cpu;
4019 out_no_update:
4020         put_cpu();
4021 }
4022
4023 static void igb_setup_dca(struct igb_adapter *adapter)
4024 {
4025         struct e1000_hw *hw = &adapter->hw;
4026         int i;
4027
4028         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4029                 return;
4030
4031         /* Always use CB2 mode, difference is masked in the CB driver. */
4032         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4033
4034         for (i = 0; i < adapter->num_q_vectors; i++) {
4035                 struct igb_q_vector *q_vector = adapter->q_vector[i];
4036                 q_vector->cpu = -1;
4037                 igb_update_dca(q_vector);
4038         }
4039 }
4040
4041 static int __igb_notify_dca(struct device *dev, void *data)
4042 {
4043         struct net_device *netdev = dev_get_drvdata(dev);
4044         struct igb_adapter *adapter = netdev_priv(netdev);
4045         struct e1000_hw *hw = &adapter->hw;
4046         unsigned long event = *(unsigned long *)data;
4047
4048         switch (event) {
4049         case DCA_PROVIDER_ADD:
4050                 /* if already enabled, don't do it again */
4051                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4052                         break;
4053                 /* Always use CB2 mode, difference is masked
4054                  * in the CB driver. */
4055                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4056                 if (dca_add_requester(dev) == 0) {
4057                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4058                         dev_info(&adapter->pdev->dev, "DCA enabled\n");
4059                         igb_setup_dca(adapter);
4060                         break;
4061                 }
4062                 /* Fall Through since DCA is disabled. */
4063         case DCA_PROVIDER_REMOVE:
4064                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4065                         /* without this a class_device is left
4066                          * hanging around in the sysfs model */
4067                         dca_remove_requester(dev);
4068                         dev_info(&adapter->pdev->dev, "DCA disabled\n");
4069                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4070                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4071                 }
4072                 break;
4073         }
4074
4075         return 0;
4076 }
4077
4078 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4079                           void *p)
4080 {
4081         int ret_val;
4082
4083         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4084                                          __igb_notify_dca);
4085
4086         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4087 }
4088 #endif /* CONFIG_IGB_DCA */
4089
4090 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4091 {
4092         struct e1000_hw *hw = &adapter->hw;
4093         u32 ping;
4094         int i;
4095
4096         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4097                 ping = E1000_PF_CONTROL_MSG;
4098                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4099                         ping |= E1000_VT_MSGTYPE_CTS;
4100                 igb_write_mbx(hw, &ping, 1, i);
4101         }
4102 }
4103
4104 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4105                                   u32 *msgbuf, u32 vf)
4106 {
4107         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4108         u16 *hash_list = (u16 *)&msgbuf[1];
4109         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4110         int i;
4111
4112         /* only up to 30 hash values supported */
4113         if (n > 30)
4114                 n = 30;
4115
4116         /* salt away the number of multi cast addresses assigned
4117          * to this VF for later use to restore when the PF multi cast
4118          * list changes
4119          */
4120         vf_data->num_vf_mc_hashes = n;
4121
4122         /* VFs are limited to using the MTA hash table for their multicast
4123          * addresses */
4124         for (i = 0; i < n; i++)
4125                 vf_data->vf_mc_hashes[i] = hash_list[i];
4126
4127         /* Flush and reset the mta with the new values */
4128         igb_set_rx_mode(adapter->netdev);
4129
4130         return 0;
4131 }
4132
4133 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4134 {
4135         struct e1000_hw *hw = &adapter->hw;
4136         struct vf_data_storage *vf_data;
4137         int i, j;
4138
4139         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4140                 vf_data = &adapter->vf_data[i];
4141                 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4142                         igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4143         }
4144 }
4145
4146 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4147 {
4148         struct e1000_hw *hw = &adapter->hw;
4149         u32 pool_mask, reg, vid;
4150         int i;
4151
4152         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4153
4154         /* Find the vlan filter for this id */
4155         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4156                 reg = rd32(E1000_VLVF(i));
4157
4158                 /* remove the vf from the pool */
4159                 reg &= ~pool_mask;
4160
4161                 /* if pool is empty then remove entry from vfta */
4162                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4163                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4164                         reg = 0;
4165                         vid = reg & E1000_VLVF_VLANID_MASK;
4166                         igb_vfta_set(hw, vid, false);
4167                 }
4168
4169                 wr32(E1000_VLVF(i), reg);
4170         }
4171
4172         adapter->vf_data[vf].vlans_enabled = 0;
4173 }
4174
4175 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4176 {
4177         struct e1000_hw *hw = &adapter->hw;
4178         u32 reg, i;
4179
4180         /* It is an error to call this function when VFs are not enabled */
4181         if (!adapter->vfs_allocated_count)
4182                 return -1;
4183
4184         /* Find the vlan filter for this id */
4185         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4186                 reg = rd32(E1000_VLVF(i));
4187                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4188                     vid == (reg & E1000_VLVF_VLANID_MASK))
4189                         break;
4190         }
4191
4192         if (add) {
4193                 if (i == E1000_VLVF_ARRAY_SIZE) {
4194                         /* Did not find a matching VLAN ID entry that was
4195                          * enabled.  Search for a free filter entry, i.e.
4196                          * one without the enable bit set
4197                          */
4198                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4199                                 reg = rd32(E1000_VLVF(i));
4200                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4201                                         break;
4202                         }
4203                 }
4204                 if (i < E1000_VLVF_ARRAY_SIZE) {
4205                         /* Found an enabled/available entry */
4206                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4207
4208                         /* if !enabled we need to set this up in vfta */
4209                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4210                                 /* add VID to filter table, if bit already set
4211                                  * PF must have added it outside of table */
4212                                 if (igb_vfta_set(hw, vid, true))
4213                                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT +
4214                                                 adapter->vfs_allocated_count);
4215                                 reg |= E1000_VLVF_VLANID_ENABLE;
4216                         }
4217                         reg &= ~E1000_VLVF_VLANID_MASK;
4218                         reg |= vid;
4219
4220                         wr32(E1000_VLVF(i), reg);
4221
4222                         /* do not modify RLPML for PF devices */
4223                         if (vf >= adapter->vfs_allocated_count)
4224                                 return 0;
4225
4226                         if (!adapter->vf_data[vf].vlans_enabled) {
4227                                 u32 size;
4228                                 reg = rd32(E1000_VMOLR(vf));
4229                                 size = reg & E1000_VMOLR_RLPML_MASK;
4230                                 size += 4;
4231                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4232                                 reg |= size;
4233                                 wr32(E1000_VMOLR(vf), reg);
4234                         }
4235                         adapter->vf_data[vf].vlans_enabled++;
4236
4237                         return 0;
4238                 }
4239         } else {
4240                 if (i < E1000_VLVF_ARRAY_SIZE) {
4241                         /* remove vf from the pool */
4242                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4243                         /* if pool is empty then remove entry from vfta */
4244                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4245                                 reg = 0;
4246                                 igb_vfta_set(hw, vid, false);
4247                         }
4248                         wr32(E1000_VLVF(i), reg);
4249
4250                         /* do not modify RLPML for PF devices */
4251                         if (vf >= adapter->vfs_allocated_count)
4252                                 return 0;
4253
4254                         adapter->vf_data[vf].vlans_enabled--;
4255                         if (!adapter->vf_data[vf].vlans_enabled) {
4256                                 u32 size;
4257                                 reg = rd32(E1000_VMOLR(vf));
4258                                 size = reg & E1000_VMOLR_RLPML_MASK;
4259                                 size -= 4;
4260                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4261                                 reg |= size;
4262                                 wr32(E1000_VMOLR(vf), reg);
4263                         }
4264                         return 0;
4265                 }
4266         }
4267         return -1;
4268 }
4269
4270 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4271 {
4272         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4273         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4274
4275         return igb_vlvf_set(adapter, vid, add, vf);
4276 }
4277
4278 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4279 {
4280         /* clear all flags */
4281         adapter->vf_data[vf].flags = 0;
4282         adapter->vf_data[vf].last_nack = jiffies;
4283
4284         /* reset offloads to defaults */
4285         igb_set_vmolr(&adapter->hw, vf);
4286
4287         /* reset vlans for device */
4288         igb_clear_vf_vfta(adapter, vf);
4289
4290         /* reset multicast table array for vf */
4291         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4292
4293         /* Flush and reset the mta with the new values */
4294         igb_set_rx_mode(adapter->netdev);
4295 }
4296
4297 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4298 {
4299         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4300
4301         /* generate a new mac address as we were hotplug removed/added */
4302         random_ether_addr(vf_mac);
4303
4304         /* process remaining reset events */
4305         igb_vf_reset(adapter, vf);
4306 }
4307
4308 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4309 {
4310         struct e1000_hw *hw = &adapter->hw;
4311         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4312         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4313         u32 reg, msgbuf[3];
4314         u8 *addr = (u8 *)(&msgbuf[1]);
4315
4316         /* process all the same items cleared in a function level reset */
4317         igb_vf_reset(adapter, vf);
4318
4319         /* set vf mac address */
4320         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4321
4322         /* enable transmit and receive for vf */
4323         reg = rd32(E1000_VFTE);
4324         wr32(E1000_VFTE, reg | (1 << vf));
4325         reg = rd32(E1000_VFRE);
4326         wr32(E1000_VFRE, reg | (1 << vf));
4327
4328         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4329
4330         /* reply to reset with ack and vf mac address */
4331         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4332         memcpy(addr, vf_mac, 6);
4333         igb_write_mbx(hw, msgbuf, 3, vf);
4334 }
4335
4336 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4337 {
4338         unsigned char *addr = (char *)&msg[1];
4339         int err = -1;
4340
4341         if (is_valid_ether_addr(addr))
4342                 err = igb_set_vf_mac(adapter, vf, addr);
4343
4344         return err;
4345 }
4346
4347 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4348 {
4349         struct e1000_hw *hw = &adapter->hw;
4350         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4351         u32 msg = E1000_VT_MSGTYPE_NACK;
4352
4353         /* if device isn't clear to send it shouldn't be reading either */
4354         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4355             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4356                 igb_write_mbx(hw, &msg, 1, vf);
4357                 vf_data->last_nack = jiffies;
4358         }
4359 }
4360
4361 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4362 {
4363         struct pci_dev *pdev = adapter->pdev;
4364         u32 msgbuf[E1000_VFMAILBOX_SIZE];
4365         struct e1000_hw *hw = &adapter->hw;
4366         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4367         s32 retval;
4368
4369         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4370
4371         if (retval)
4372                 dev_err(&pdev->dev, "Error receiving message from VF\n");
4373
4374         /* this is a message we already processed, do nothing */
4375         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4376                 return;
4377
4378         /*
4379          * until the vf completes a reset it should not be
4380          * allowed to start any configuration.
4381          */
4382
4383         if (msgbuf[0] == E1000_VF_RESET) {
4384                 igb_vf_reset_msg(adapter, vf);
4385                 return;
4386         }
4387
4388         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4389                 msgbuf[0] = E1000_VT_MSGTYPE_NACK;
4390                 if (time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4391                         igb_write_mbx(hw, msgbuf, 1, vf);
4392                         vf_data->last_nack = jiffies;
4393                 }
4394                 return;
4395         }
4396
4397         switch ((msgbuf[0] & 0xFFFF)) {
4398         case E1000_VF_SET_MAC_ADDR:
4399                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4400                 break;
4401         case E1000_VF_SET_MULTICAST:
4402                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4403                 break;
4404         case E1000_VF_SET_LPE:
4405                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4406                 break;
4407         case E1000_VF_SET_VLAN:
4408                 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4409                 break;
4410         default:
4411                 dev_err(&adapter->pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4412                 retval = -1;
4413                 break;
4414         }
4415
4416         /* notify the VF of the results of what it sent us */
4417         if (retval)
4418                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4419         else
4420                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4421
4422         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4423
4424         igb_write_mbx(hw, msgbuf, 1, vf);
4425 }
4426
4427 static void igb_msg_task(struct igb_adapter *adapter)
4428 {
4429         struct e1000_hw *hw = &adapter->hw;
4430         u32 vf;
4431
4432         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4433                 /* process any reset requests */
4434                 if (!igb_check_for_rst(hw, vf))
4435                         igb_vf_reset_event(adapter, vf);
4436
4437                 /* process any messages pending */
4438                 if (!igb_check_for_msg(hw, vf))
4439                         igb_rcv_msg_from_vf(adapter, vf);
4440
4441                 /* process any acks */
4442                 if (!igb_check_for_ack(hw, vf))
4443                         igb_rcv_ack_from_vf(adapter, vf);
4444         }
4445 }
4446
4447 /**
4448  *  igb_set_uta - Set unicast filter table address
4449  *  @adapter: board private structure
4450  *
4451  *  The unicast table address is a register array of 32-bit registers.
4452  *  The table is meant to be used in a way similar to how the MTA is used
4453  *  however due to certain limitations in the hardware it is necessary to
4454  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4455  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4456  **/
4457 static void igb_set_uta(struct igb_adapter *adapter)
4458 {
4459         struct e1000_hw *hw = &adapter->hw;
4460         int i;
4461
4462         /* The UTA table only exists on 82576 hardware and newer */
4463         if (hw->mac.type < e1000_82576)
4464                 return;
4465
4466         /* we only need to do this if VMDq is enabled */
4467         if (!adapter->vfs_allocated_count)
4468                 return;
4469
4470         for (i = 0; i < hw->mac.uta_reg_count; i++)
4471                 array_wr32(E1000_UTA, i, ~0);
4472 }
4473
4474 /**
4475  * igb_intr_msi - Interrupt Handler
4476  * @irq: interrupt number
4477  * @data: pointer to a network interface device structure
4478  **/
4479 static irqreturn_t igb_intr_msi(int irq, void *data)
4480 {
4481         struct igb_adapter *adapter = data;
4482         struct igb_q_vector *q_vector = adapter->q_vector[0];
4483         struct e1000_hw *hw = &adapter->hw;
4484         /* read ICR disables interrupts using IAM */
4485         u32 icr = rd32(E1000_ICR);
4486
4487         igb_write_itr(q_vector);
4488
4489         if (icr & E1000_ICR_DOUTSYNC) {
4490                 /* HW is reporting DMA is out of sync */
4491                 adapter->stats.doosync++;
4492         }
4493
4494         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4495                 hw->mac.get_link_status = 1;
4496                 if (!test_bit(__IGB_DOWN, &adapter->state))
4497                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4498         }
4499
4500         napi_schedule(&q_vector->napi);
4501
4502         return IRQ_HANDLED;
4503 }
4504
4505 /**
4506  * igb_intr - Legacy Interrupt Handler
4507  * @irq: interrupt number
4508  * @data: pointer to a network interface device structure
4509  **/
4510 static irqreturn_t igb_intr(int irq, void *data)
4511 {
4512         struct igb_adapter *adapter = data;
4513         struct igb_q_vector *q_vector = adapter->q_vector[0];
4514         struct e1000_hw *hw = &adapter->hw;
4515         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4516          * need for the IMC write */
4517         u32 icr = rd32(E1000_ICR);
4518         if (!icr)
4519                 return IRQ_NONE;  /* Not our interrupt */
4520
4521         igb_write_itr(q_vector);
4522
4523         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4524          * not set, then the adapter didn't send an interrupt */
4525         if (!(icr & E1000_ICR_INT_ASSERTED))
4526                 return IRQ_NONE;
4527
4528         if (icr & E1000_ICR_DOUTSYNC) {
4529                 /* HW is reporting DMA is out of sync */
4530                 adapter->stats.doosync++;
4531         }
4532
4533         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4534                 hw->mac.get_link_status = 1;
4535                 /* guard against interrupt when we're going down */
4536                 if (!test_bit(__IGB_DOWN, &adapter->state))
4537                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4538         }
4539
4540         napi_schedule(&q_vector->napi);
4541
4542         return IRQ_HANDLED;
4543 }
4544
4545 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4546 {
4547         struct igb_adapter *adapter = q_vector->adapter;
4548         struct e1000_hw *hw = &adapter->hw;
4549
4550         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4551             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4552                 if (!adapter->msix_entries)
4553                         igb_set_itr(adapter);
4554                 else
4555                         igb_update_ring_itr(q_vector);
4556         }
4557
4558         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4559                 if (adapter->msix_entries)
4560                         wr32(E1000_EIMS, q_vector->eims_value);
4561                 else
4562                         igb_irq_enable(adapter);
4563         }
4564 }
4565
4566 /**
4567  * igb_poll - NAPI Rx polling callback
4568  * @napi: napi polling structure
4569  * @budget: count of how many packets we should handle
4570  **/
4571 static int igb_poll(struct napi_struct *napi, int budget)
4572 {
4573         struct igb_q_vector *q_vector = container_of(napi,
4574                                                      struct igb_q_vector,
4575                                                      napi);
4576         int tx_clean_complete = 1, work_done = 0;
4577
4578 #ifdef CONFIG_IGB_DCA
4579         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4580                 igb_update_dca(q_vector);
4581 #endif
4582         if (q_vector->tx_ring)
4583                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4584
4585         if (q_vector->rx_ring)
4586                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4587
4588         if (!tx_clean_complete)
4589                 work_done = budget;
4590
4591         /* If not enough Rx work done, exit the polling mode */
4592         if (work_done < budget) {
4593                 napi_complete(napi);
4594                 igb_ring_irq_enable(q_vector);
4595         }
4596
4597         return work_done;
4598 }
4599
4600 /**
4601  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4602  * @adapter: board private structure
4603  * @shhwtstamps: timestamp structure to update
4604  * @regval: unsigned 64bit system time value.
4605  *
4606  * We need to convert the system time value stored in the RX/TXSTMP registers
4607  * into a hwtstamp which can be used by the upper level timestamping functions
4608  */
4609 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4610                                    struct skb_shared_hwtstamps *shhwtstamps,
4611                                    u64 regval)
4612 {
4613         u64 ns;
4614
4615         ns = timecounter_cyc2time(&adapter->clock, regval);
4616         timecompare_update(&adapter->compare, ns);
4617         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4618         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4619         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4620 }
4621
4622 /**
4623  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4624  * @q_vector: pointer to q_vector containing needed info
4625  * @skb: packet that was just sent
4626  *
4627  * If we were asked to do hardware stamping and such a time stamp is
4628  * available, then it must have been for this skb here because we only
4629  * allow only one such packet into the queue.
4630  */
4631 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4632 {
4633         struct igb_adapter *adapter = q_vector->adapter;
4634         union skb_shared_tx *shtx = skb_tx(skb);
4635         struct e1000_hw *hw = &adapter->hw;
4636         struct skb_shared_hwtstamps shhwtstamps;
4637         u64 regval;
4638
4639         /* if skb does not support hw timestamp or TX stamp not valid exit */
4640         if (likely(!shtx->hardware) ||
4641             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4642                 return;
4643
4644         regval = rd32(E1000_TXSTMPL);
4645         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4646
4647         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4648         skb_tstamp_tx(skb, &shhwtstamps);
4649 }
4650
4651 /**
4652  * igb_clean_tx_irq - Reclaim resources after transmit completes
4653  * @q_vector: pointer to q_vector containing needed info
4654  * returns true if ring is completely cleaned
4655  **/
4656 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4657 {
4658         struct igb_adapter *adapter = q_vector->adapter;
4659         struct igb_ring *tx_ring = q_vector->tx_ring;
4660         struct net_device *netdev = tx_ring->netdev;
4661         struct e1000_hw *hw = &adapter->hw;
4662         struct igb_buffer *buffer_info;
4663         struct sk_buff *skb;
4664         union e1000_adv_tx_desc *tx_desc, *eop_desc;
4665         unsigned int total_bytes = 0, total_packets = 0;
4666         unsigned int i, eop, count = 0;
4667         bool cleaned = false;
4668
4669         i = tx_ring->next_to_clean;
4670         eop = tx_ring->buffer_info[i].next_to_watch;
4671         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4672
4673         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
4674                (count < tx_ring->count)) {
4675                 for (cleaned = false; !cleaned; count++) {
4676                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4677                         buffer_info = &tx_ring->buffer_info[i];
4678                         cleaned = (i == eop);
4679                         skb = buffer_info->skb;
4680
4681                         if (skb) {
4682                                 unsigned int segs, bytecount;
4683                                 /* gso_segs is currently only valid for tcp */
4684                                 segs = skb_shinfo(skb)->gso_segs ?: 1;
4685                                 /* multiply data chunks by size of headers */
4686                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
4687                                             skb->len;
4688                                 total_packets += segs;
4689                                 total_bytes += bytecount;
4690
4691                                 igb_tx_hwtstamp(q_vector, skb);
4692                         }
4693
4694                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4695                         tx_desc->wb.status = 0;
4696
4697                         i++;
4698                         if (i == tx_ring->count)
4699                                 i = 0;
4700                 }
4701                 eop = tx_ring->buffer_info[i].next_to_watch;
4702                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4703         }
4704
4705         tx_ring->next_to_clean = i;
4706
4707         if (unlikely(count &&
4708                      netif_carrier_ok(netdev) &&
4709                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
4710                 /* Make sure that anybody stopping the queue after this
4711                  * sees the new next_to_clean.
4712                  */
4713                 smp_mb();
4714                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
4715                     !(test_bit(__IGB_DOWN, &adapter->state))) {
4716                         netif_wake_subqueue(netdev, tx_ring->queue_index);
4717                         tx_ring->tx_stats.restart_queue++;
4718                 }
4719         }
4720
4721         if (tx_ring->detect_tx_hung) {
4722                 /* Detect a transmit hang in hardware, this serializes the
4723                  * check with the clearing of time_stamp and movement of i */
4724                 tx_ring->detect_tx_hung = false;
4725                 if (tx_ring->buffer_info[i].time_stamp &&
4726                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
4727                                (adapter->tx_timeout_factor * HZ))
4728                     && !(rd32(E1000_STATUS) &
4729                          E1000_STATUS_TXOFF)) {
4730
4731                         /* detected Tx unit hang */
4732                         dev_err(&tx_ring->pdev->dev,
4733                                 "Detected Tx Unit Hang\n"
4734                                 "  Tx Queue             <%d>\n"
4735                                 "  TDH                  <%x>\n"
4736                                 "  TDT                  <%x>\n"
4737                                 "  next_to_use          <%x>\n"
4738                                 "  next_to_clean        <%x>\n"
4739                                 "buffer_info[next_to_clean]\n"
4740                                 "  time_stamp           <%lx>\n"
4741                                 "  next_to_watch        <%x>\n"
4742                                 "  jiffies              <%lx>\n"
4743                                 "  desc.status          <%x>\n",
4744                                 tx_ring->queue_index,
4745                                 readl(tx_ring->head),
4746                                 readl(tx_ring->tail),
4747                                 tx_ring->next_to_use,
4748                                 tx_ring->next_to_clean,
4749                                 tx_ring->buffer_info[i].time_stamp,
4750                                 eop,
4751                                 jiffies,
4752                                 eop_desc->wb.status);
4753                         netif_stop_subqueue(netdev, tx_ring->queue_index);
4754                 }
4755         }
4756         tx_ring->total_bytes += total_bytes;
4757         tx_ring->total_packets += total_packets;
4758         tx_ring->tx_stats.bytes += total_bytes;
4759         tx_ring->tx_stats.packets += total_packets;
4760         netdev->stats.tx_bytes += total_bytes;
4761         netdev->stats.tx_packets += total_packets;
4762         return (count < tx_ring->count);
4763 }
4764
4765 /**
4766  * igb_receive_skb - helper function to handle rx indications
4767  * @q_vector: structure containing interrupt and ring information
4768  * @skb: packet to send up
4769  * @vlan_tag: vlan tag for packet
4770  **/
4771 static void igb_receive_skb(struct igb_q_vector *q_vector,
4772                             struct sk_buff *skb,
4773                             u16 vlan_tag)
4774 {
4775         struct igb_adapter *adapter = q_vector->adapter;
4776
4777         if (vlan_tag)
4778                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
4779                                  vlan_tag, skb);
4780         else
4781                 napi_gro_receive(&q_vector->napi, skb);
4782 }
4783
4784 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
4785                                        u32 status_err, struct sk_buff *skb)
4786 {
4787         skb->ip_summed = CHECKSUM_NONE;
4788
4789         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
4790         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
4791              (status_err & E1000_RXD_STAT_IXSM))
4792                 return;
4793
4794         /* TCP/UDP checksum error bit is set */
4795         if (status_err &
4796             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
4797                 /*
4798                  * work around errata with sctp packets where the TCPE aka
4799                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
4800                  * packets, (aka let the stack check the crc32c)
4801                  */
4802                 if ((skb->len == 60) &&
4803                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
4804                         ring->rx_stats.csum_err++;
4805
4806                 /* let the stack verify checksum errors */
4807                 return;
4808         }
4809         /* It must be a TCP or UDP packet with a valid checksum */
4810         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
4811                 skb->ip_summed = CHECKSUM_UNNECESSARY;
4812
4813         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
4814 }
4815
4816 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
4817                                    struct sk_buff *skb)
4818 {
4819         struct igb_adapter *adapter = q_vector->adapter;
4820         struct e1000_hw *hw = &adapter->hw;
4821         u64 regval;
4822
4823         /*
4824          * If this bit is set, then the RX registers contain the time stamp. No
4825          * other packet will be time stamped until we read these registers, so
4826          * read the registers to make them available again. Because only one
4827          * packet can be time stamped at a time, we know that the register
4828          * values must belong to this one here and therefore we don't need to
4829          * compare any of the additional attributes stored for it.
4830          *
4831          * If nothing went wrong, then it should have a skb_shared_tx that we
4832          * can turn into a skb_shared_hwtstamps.
4833          */
4834         if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
4835                 return;
4836         if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
4837                 return;
4838
4839         regval = rd32(E1000_RXSTMPL);
4840         regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4841
4842         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
4843 }
4844 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
4845                                union e1000_adv_rx_desc *rx_desc)
4846 {
4847         /* HW will not DMA in data larger than the given buffer, even if it
4848          * parses the (NFS, of course) header to be larger.  In that case, it
4849          * fills the header buffer and spills the rest into the page.
4850          */
4851         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
4852                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
4853         if (hlen > rx_ring->rx_buffer_len)
4854                 hlen = rx_ring->rx_buffer_len;
4855         return hlen;
4856 }
4857
4858 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
4859                                  int *work_done, int budget)
4860 {
4861         struct igb_ring *rx_ring = q_vector->rx_ring;
4862         struct net_device *netdev = rx_ring->netdev;
4863         struct pci_dev *pdev = rx_ring->pdev;
4864         union e1000_adv_rx_desc *rx_desc , *next_rxd;
4865         struct igb_buffer *buffer_info , *next_buffer;
4866         struct sk_buff *skb;
4867         bool cleaned = false;
4868         int cleaned_count = 0;
4869         unsigned int total_bytes = 0, total_packets = 0;
4870         unsigned int i;
4871         u32 staterr;
4872         u16 length;
4873         u16 vlan_tag;
4874
4875         i = rx_ring->next_to_clean;
4876         buffer_info = &rx_ring->buffer_info[i];
4877         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
4878         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4879
4880         while (staterr & E1000_RXD_STAT_DD) {
4881                 if (*work_done >= budget)
4882                         break;
4883                 (*work_done)++;
4884
4885                 skb = buffer_info->skb;
4886                 prefetch(skb->data - NET_IP_ALIGN);
4887                 buffer_info->skb = NULL;
4888
4889                 i++;
4890                 if (i == rx_ring->count)
4891                         i = 0;
4892                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
4893                 prefetch(next_rxd);
4894                 next_buffer = &rx_ring->buffer_info[i];
4895
4896                 length = le16_to_cpu(rx_desc->wb.upper.length);
4897                 cleaned = true;
4898                 cleaned_count++;
4899
4900                 if (buffer_info->dma) {
4901                         pci_unmap_single(pdev, buffer_info->dma,
4902                                          rx_ring->rx_buffer_len,
4903                                          PCI_DMA_FROMDEVICE);
4904                         buffer_info->dma = 0;
4905                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
4906                                 skb_put(skb, length);
4907                                 goto send_up;
4908                         }
4909                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
4910                 }
4911
4912                 if (length) {
4913                         pci_unmap_page(pdev, buffer_info->page_dma,
4914                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
4915                         buffer_info->page_dma = 0;
4916
4917                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
4918                                                 buffer_info->page,
4919                                                 buffer_info->page_offset,
4920                                                 length);
4921
4922                         if (page_count(buffer_info->page) != 1)
4923                                 buffer_info->page = NULL;
4924                         else
4925                                 get_page(buffer_info->page);
4926
4927                         skb->len += length;
4928                         skb->data_len += length;
4929
4930                         skb->truesize += length;
4931                 }
4932
4933                 if (!(staterr & E1000_RXD_STAT_EOP)) {
4934                         buffer_info->skb = next_buffer->skb;
4935                         buffer_info->dma = next_buffer->dma;
4936                         next_buffer->skb = skb;
4937                         next_buffer->dma = 0;
4938                         goto next_desc;
4939                 }
4940 send_up:
4941                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
4942                         dev_kfree_skb_irq(skb);
4943                         goto next_desc;
4944                 }
4945
4946                 igb_rx_hwtstamp(q_vector, staterr, skb);
4947                 total_bytes += skb->len;
4948                 total_packets++;
4949
4950                 igb_rx_checksum_adv(rx_ring, staterr, skb);
4951
4952                 skb->protocol = eth_type_trans(skb, netdev);
4953                 skb_record_rx_queue(skb, rx_ring->queue_index);
4954
4955                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
4956                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
4957
4958                 igb_receive_skb(q_vector, skb, vlan_tag);
4959
4960 next_desc:
4961                 rx_desc->wb.upper.status_error = 0;
4962
4963                 /* return some buffers to hardware, one at a time is too slow */
4964                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
4965                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
4966                         cleaned_count = 0;
4967                 }
4968
4969                 /* use prefetched values */
4970                 rx_desc = next_rxd;
4971                 buffer_info = next_buffer;
4972                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4973         }
4974
4975         rx_ring->next_to_clean = i;
4976         cleaned_count = igb_desc_unused(rx_ring);
4977
4978         if (cleaned_count)
4979                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
4980
4981         rx_ring->total_packets += total_packets;
4982         rx_ring->total_bytes += total_bytes;
4983         rx_ring->rx_stats.packets += total_packets;
4984         rx_ring->rx_stats.bytes += total_bytes;
4985         netdev->stats.rx_bytes += total_bytes;
4986         netdev->stats.rx_packets += total_packets;
4987         return cleaned;
4988 }
4989
4990 /**
4991  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
4992  * @adapter: address of board private structure
4993  **/
4994 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
4995 {
4996         struct net_device *netdev = rx_ring->netdev;
4997         union e1000_adv_rx_desc *rx_desc;
4998         struct igb_buffer *buffer_info;
4999         struct sk_buff *skb;
5000         unsigned int i;
5001         int bufsz;
5002
5003         i = rx_ring->next_to_use;
5004         buffer_info = &rx_ring->buffer_info[i];
5005
5006         bufsz = rx_ring->rx_buffer_len;
5007
5008         while (cleaned_count--) {
5009                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5010
5011                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5012                         if (!buffer_info->page) {
5013                                 buffer_info->page = alloc_page(GFP_ATOMIC);
5014                                 if (!buffer_info->page) {
5015                                         rx_ring->rx_stats.alloc_failed++;
5016                                         goto no_buffers;
5017                                 }
5018                                 buffer_info->page_offset = 0;
5019                         } else {
5020                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5021                         }
5022                         buffer_info->page_dma =
5023                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5024                                              buffer_info->page_offset,
5025                                              PAGE_SIZE / 2,
5026                                              PCI_DMA_FROMDEVICE);
5027                 }
5028
5029                 if (!buffer_info->skb) {
5030                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5031                         if (!skb) {
5032                                 rx_ring->rx_stats.alloc_failed++;
5033                                 goto no_buffers;
5034                         }
5035
5036                         buffer_info->skb = skb;
5037                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5038                                                           skb->data,
5039                                                           bufsz,
5040                                                           PCI_DMA_FROMDEVICE);
5041                 }
5042                 /* Refresh the desc even if buffer_addrs didn't change because
5043                  * each write-back erases this info. */
5044                 if (bufsz < IGB_RXBUFFER_1024) {
5045                         rx_desc->read.pkt_addr =
5046                              cpu_to_le64(buffer_info->page_dma);
5047                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5048                 } else {
5049                         rx_desc->read.pkt_addr =
5050                              cpu_to_le64(buffer_info->dma);
5051                         rx_desc->read.hdr_addr = 0;
5052                 }
5053
5054                 i++;
5055                 if (i == rx_ring->count)
5056                         i = 0;
5057                 buffer_info = &rx_ring->buffer_info[i];
5058         }
5059
5060 no_buffers:
5061         if (rx_ring->next_to_use != i) {
5062                 rx_ring->next_to_use = i;
5063                 if (i == 0)
5064                         i = (rx_ring->count - 1);
5065                 else
5066                         i--;
5067
5068                 /* Force memory writes to complete before letting h/w
5069                  * know there are new descriptors to fetch.  (Only
5070                  * applicable for weak-ordered memory model archs,
5071                  * such as IA-64). */
5072                 wmb();
5073                 writel(i, rx_ring->tail);
5074         }
5075 }
5076
5077 /**
5078  * igb_mii_ioctl -
5079  * @netdev:
5080  * @ifreq:
5081  * @cmd:
5082  **/
5083 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5084 {
5085         struct igb_adapter *adapter = netdev_priv(netdev);
5086         struct mii_ioctl_data *data = if_mii(ifr);
5087
5088         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5089                 return -EOPNOTSUPP;
5090
5091         switch (cmd) {
5092         case SIOCGMIIPHY:
5093                 data->phy_id = adapter->hw.phy.addr;
5094                 break;
5095         case SIOCGMIIREG:
5096                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5097                                      &data->val_out))
5098                         return -EIO;
5099                 break;
5100         case SIOCSMIIREG:
5101         default:
5102                 return -EOPNOTSUPP;
5103         }
5104         return 0;
5105 }
5106
5107 /**
5108  * igb_hwtstamp_ioctl - control hardware time stamping
5109  * @netdev:
5110  * @ifreq:
5111  * @cmd:
5112  *
5113  * Outgoing time stamping can be enabled and disabled. Play nice and
5114  * disable it when requested, although it shouldn't case any overhead
5115  * when no packet needs it. At most one packet in the queue may be
5116  * marked for time stamping, otherwise it would be impossible to tell
5117  * for sure to which packet the hardware time stamp belongs.
5118  *
5119  * Incoming time stamping has to be configured via the hardware
5120  * filters. Not all combinations are supported, in particular event
5121  * type has to be specified. Matching the kind of event packet is
5122  * not supported, with the exception of "all V2 events regardless of
5123  * level 2 or 4".
5124  *
5125  **/
5126 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5127                               struct ifreq *ifr, int cmd)
5128 {
5129         struct igb_adapter *adapter = netdev_priv(netdev);
5130         struct e1000_hw *hw = &adapter->hw;
5131         struct hwtstamp_config config;
5132         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5133         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5134         u32 tsync_rx_cfg = 0;
5135         bool is_l4 = false;
5136         bool is_l2 = false;
5137         u32 regval;
5138
5139         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5140                 return -EFAULT;
5141
5142         /* reserved for future extensions */
5143         if (config.flags)
5144                 return -EINVAL;
5145
5146         switch (config.tx_type) {
5147         case HWTSTAMP_TX_OFF:
5148                 tsync_tx_ctl = 0;
5149         case HWTSTAMP_TX_ON:
5150                 break;
5151         default:
5152                 return -ERANGE;
5153         }
5154
5155         switch (config.rx_filter) {
5156         case HWTSTAMP_FILTER_NONE:
5157                 tsync_rx_ctl = 0;
5158                 break;
5159         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5160         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5161         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5162         case HWTSTAMP_FILTER_ALL:
5163                 /*
5164                  * register TSYNCRXCFG must be set, therefore it is not
5165                  * possible to time stamp both Sync and Delay_Req messages
5166                  * => fall back to time stamping all packets
5167                  */
5168                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5169                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5170                 break;
5171         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5172                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5173                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5174                 is_l4 = true;
5175                 break;
5176         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5177                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5178                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5179                 is_l4 = true;
5180                 break;
5181         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5182         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5183                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5184                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5185                 is_l2 = true;
5186                 is_l4 = true;
5187                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5188                 break;
5189         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5190         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5191                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5192                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5193                 is_l2 = true;
5194                 is_l4 = true;
5195                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5196                 break;
5197         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5198         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5199         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5200                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5201                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5202                 is_l2 = true;
5203                 break;
5204         default:
5205                 return -ERANGE;
5206         }
5207
5208         if (hw->mac.type == e1000_82575) {
5209                 if (tsync_rx_ctl | tsync_tx_ctl)
5210                         return -EINVAL;
5211                 return 0;
5212         }
5213
5214         /* enable/disable TX */
5215         regval = rd32(E1000_TSYNCTXCTL);
5216         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5217         regval |= tsync_tx_ctl;
5218         wr32(E1000_TSYNCTXCTL, regval);
5219
5220         /* enable/disable RX */
5221         regval = rd32(E1000_TSYNCRXCTL);
5222         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5223         regval |= tsync_rx_ctl;
5224         wr32(E1000_TSYNCRXCTL, regval);
5225
5226         /* define which PTP packets are time stamped */
5227         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5228
5229         /* define ethertype filter for timestamped packets */
5230         if (is_l2)
5231                 wr32(E1000_ETQF(3),
5232                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5233                                  E1000_ETQF_1588 | /* enable timestamping */
5234                                  ETH_P_1588));     /* 1588 eth protocol type */
5235         else
5236                 wr32(E1000_ETQF(3), 0);
5237
5238 #define PTP_PORT 319
5239         /* L4 Queue Filter[3]: filter by destination port and protocol */
5240         if (is_l4) {
5241                 u32 ftqf = (IPPROTO_UDP /* UDP */
5242                         | E1000_FTQF_VF_BP /* VF not compared */
5243                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5244                         | E1000_FTQF_MASK); /* mask all inputs */
5245                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5246
5247                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5248                 wr32(E1000_IMIREXT(3),
5249                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5250                 if (hw->mac.type == e1000_82576) {
5251                         /* enable source port check */
5252                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5253                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5254                 }
5255                 wr32(E1000_FTQF(3), ftqf);
5256         } else {
5257                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5258         }
5259         wrfl();
5260
5261         adapter->hwtstamp_config = config;
5262
5263         /* clear TX/RX time stamp registers, just to be sure */
5264         regval = rd32(E1000_TXSTMPH);
5265         regval = rd32(E1000_RXSTMPH);
5266
5267         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5268                 -EFAULT : 0;
5269 }
5270
5271 /**
5272  * igb_ioctl -
5273  * @netdev:
5274  * @ifreq:
5275  * @cmd:
5276  **/
5277 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5278 {
5279         switch (cmd) {
5280         case SIOCGMIIPHY:
5281         case SIOCGMIIREG:
5282         case SIOCSMIIREG:
5283                 return igb_mii_ioctl(netdev, ifr, cmd);
5284         case SIOCSHWTSTAMP:
5285                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5286         default:
5287                 return -EOPNOTSUPP;
5288         }
5289 }
5290
5291 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5292 {
5293         struct igb_adapter *adapter = hw->back;
5294         u16 cap_offset;
5295
5296         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5297         if (!cap_offset)
5298                 return -E1000_ERR_CONFIG;
5299
5300         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5301
5302         return 0;
5303 }
5304
5305 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5306 {
5307         struct igb_adapter *adapter = hw->back;
5308         u16 cap_offset;
5309
5310         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5311         if (!cap_offset)
5312                 return -E1000_ERR_CONFIG;
5313
5314         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5315
5316         return 0;
5317 }
5318
5319 static void igb_vlan_rx_register(struct net_device *netdev,
5320                                  struct vlan_group *grp)
5321 {
5322         struct igb_adapter *adapter = netdev_priv(netdev);
5323         struct e1000_hw *hw = &adapter->hw;
5324         u32 ctrl, rctl;
5325
5326         igb_irq_disable(adapter);
5327         adapter->vlgrp = grp;
5328
5329         if (grp) {
5330                 /* enable VLAN tag insert/strip */
5331                 ctrl = rd32(E1000_CTRL);
5332                 ctrl |= E1000_CTRL_VME;
5333                 wr32(E1000_CTRL, ctrl);
5334
5335                 /* enable VLAN receive filtering */
5336                 rctl = rd32(E1000_RCTL);
5337                 rctl &= ~E1000_RCTL_CFIEN;
5338                 wr32(E1000_RCTL, rctl);
5339                 igb_update_mng_vlan(adapter);
5340         } else {
5341                 /* disable VLAN tag insert/strip */
5342                 ctrl = rd32(E1000_CTRL);
5343                 ctrl &= ~E1000_CTRL_VME;
5344                 wr32(E1000_CTRL, ctrl);
5345
5346                 if (adapter->mng_vlan_id != (u16)IGB_MNG_VLAN_NONE) {
5347                         igb_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
5348                         adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
5349                 }
5350         }
5351
5352         igb_rlpml_set(adapter);
5353
5354         if (!test_bit(__IGB_DOWN, &adapter->state))
5355                 igb_irq_enable(adapter);
5356 }
5357
5358 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5359 {
5360         struct igb_adapter *adapter = netdev_priv(netdev);
5361         struct e1000_hw *hw = &adapter->hw;
5362         int pf_id = adapter->vfs_allocated_count;
5363
5364         if ((hw->mng_cookie.status &
5365              E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
5366             (vid == adapter->mng_vlan_id))
5367                 return;
5368
5369         /* add vid to vlvf if sr-iov is enabled,
5370          * if that fails add directly to filter table */
5371         if (igb_vlvf_set(adapter, vid, true, pf_id))
5372                 igb_vfta_set(hw, vid, true);
5373
5374 }
5375
5376 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5377 {
5378         struct igb_adapter *adapter = netdev_priv(netdev);
5379         struct e1000_hw *hw = &adapter->hw;
5380         int pf_id = adapter->vfs_allocated_count;
5381
5382         igb_irq_disable(adapter);
5383         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5384
5385         if (!test_bit(__IGB_DOWN, &adapter->state))
5386                 igb_irq_enable(adapter);
5387
5388         if ((adapter->hw.mng_cookie.status &
5389              E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
5390             (vid == adapter->mng_vlan_id)) {
5391                 /* release control to f/w */
5392                 igb_release_hw_control(adapter);
5393                 return;
5394         }
5395
5396         /* remove vid from vlvf if sr-iov is enabled,
5397          * if not in vlvf remove from vfta */
5398         if (igb_vlvf_set(adapter, vid, false, pf_id))
5399                 igb_vfta_set(hw, vid, false);
5400 }
5401
5402 static void igb_restore_vlan(struct igb_adapter *adapter)
5403 {
5404         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5405
5406         if (adapter->vlgrp) {
5407                 u16 vid;
5408                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5409                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5410                                 continue;
5411                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5412                 }
5413         }
5414 }
5415
5416 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5417 {
5418         struct e1000_mac_info *mac = &adapter->hw.mac;
5419
5420         mac->autoneg = 0;
5421
5422         switch (spddplx) {
5423         case SPEED_10 + DUPLEX_HALF:
5424                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5425                 break;
5426         case SPEED_10 + DUPLEX_FULL:
5427                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5428                 break;
5429         case SPEED_100 + DUPLEX_HALF:
5430                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5431                 break;
5432         case SPEED_100 + DUPLEX_FULL:
5433                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5434                 break;
5435         case SPEED_1000 + DUPLEX_FULL:
5436                 mac->autoneg = 1;
5437                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5438                 break;
5439         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5440         default:
5441                 dev_err(&adapter->pdev->dev,
5442                         "Unsupported Speed/Duplex configuration\n");
5443                 return -EINVAL;
5444         }
5445         return 0;
5446 }
5447
5448 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5449 {
5450         struct net_device *netdev = pci_get_drvdata(pdev);
5451         struct igb_adapter *adapter = netdev_priv(netdev);
5452         struct e1000_hw *hw = &adapter->hw;
5453         u32 ctrl, rctl, status;
5454         u32 wufc = adapter->wol;
5455 #ifdef CONFIG_PM
5456         int retval = 0;
5457 #endif
5458
5459         netif_device_detach(netdev);
5460
5461         if (netif_running(netdev))
5462                 igb_close(netdev);
5463
5464         igb_clear_interrupt_scheme(adapter);
5465
5466 #ifdef CONFIG_PM
5467         retval = pci_save_state(pdev);
5468         if (retval)
5469                 return retval;
5470 #endif
5471
5472         status = rd32(E1000_STATUS);
5473         if (status & E1000_STATUS_LU)
5474                 wufc &= ~E1000_WUFC_LNKC;
5475
5476         if (wufc) {
5477                 igb_setup_rctl(adapter);
5478                 igb_set_rx_mode(netdev);
5479
5480                 /* turn on all-multi mode if wake on multicast is enabled */
5481                 if (wufc & E1000_WUFC_MC) {
5482                         rctl = rd32(E1000_RCTL);
5483                         rctl |= E1000_RCTL_MPE;
5484                         wr32(E1000_RCTL, rctl);
5485                 }
5486
5487                 ctrl = rd32(E1000_CTRL);
5488                 /* advertise wake from D3Cold */
5489                 #define E1000_CTRL_ADVD3WUC 0x00100000
5490                 /* phy power management enable */
5491                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5492                 ctrl |= E1000_CTRL_ADVD3WUC;
5493                 wr32(E1000_CTRL, ctrl);
5494
5495                 /* Allow time for pending master requests to run */
5496                 igb_disable_pcie_master(&adapter->hw);
5497
5498                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5499                 wr32(E1000_WUFC, wufc);
5500         } else {
5501                 wr32(E1000_WUC, 0);
5502                 wr32(E1000_WUFC, 0);
5503         }
5504
5505         *enable_wake = wufc || adapter->en_mng_pt;
5506         if (!*enable_wake)
5507                 igb_shutdown_serdes_link_82575(hw);
5508
5509         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5510          * would have already happened in close and is redundant. */
5511         igb_release_hw_control(adapter);
5512
5513         pci_disable_device(pdev);
5514
5515         return 0;
5516 }
5517
5518 #ifdef CONFIG_PM
5519 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5520 {
5521         int retval;
5522         bool wake;
5523
5524         retval = __igb_shutdown(pdev, &wake);
5525         if (retval)
5526                 return retval;
5527
5528         if (wake) {
5529                 pci_prepare_to_sleep(pdev);
5530         } else {
5531                 pci_wake_from_d3(pdev, false);
5532                 pci_set_power_state(pdev, PCI_D3hot);
5533         }
5534
5535         return 0;
5536 }
5537
5538 static int igb_resume(struct pci_dev *pdev)
5539 {
5540         struct net_device *netdev = pci_get_drvdata(pdev);
5541         struct igb_adapter *adapter = netdev_priv(netdev);
5542         struct e1000_hw *hw = &adapter->hw;
5543         u32 err;
5544
5545         pci_set_power_state(pdev, PCI_D0);
5546         pci_restore_state(pdev);
5547
5548         err = pci_enable_device_mem(pdev);
5549         if (err) {
5550                 dev_err(&pdev->dev,
5551                         "igb: Cannot enable PCI device from suspend\n");
5552                 return err;
5553         }
5554         pci_set_master(pdev);
5555
5556         pci_enable_wake(pdev, PCI_D3hot, 0);
5557         pci_enable_wake(pdev, PCI_D3cold, 0);
5558
5559         if (igb_init_interrupt_scheme(adapter)) {
5560                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5561                 return -ENOMEM;
5562         }
5563
5564         /* e1000_power_up_phy(adapter); */
5565
5566         igb_reset(adapter);
5567
5568         /* let the f/w know that the h/w is now under the control of the
5569          * driver. */
5570         igb_get_hw_control(adapter);
5571
5572         wr32(E1000_WUS, ~0);
5573
5574         if (netif_running(netdev)) {
5575                 err = igb_open(netdev);
5576                 if (err)
5577                         return err;
5578         }
5579
5580         netif_device_attach(netdev);
5581
5582         return 0;
5583 }
5584 #endif
5585
5586 static void igb_shutdown(struct pci_dev *pdev)
5587 {
5588         bool wake;
5589
5590         __igb_shutdown(pdev, &wake);
5591
5592         if (system_state == SYSTEM_POWER_OFF) {
5593                 pci_wake_from_d3(pdev, wake);
5594                 pci_set_power_state(pdev, PCI_D3hot);
5595         }
5596 }
5597
5598 #ifdef CONFIG_NET_POLL_CONTROLLER
5599 /*
5600  * Polling 'interrupt' - used by things like netconsole to send skbs
5601  * without having to re-enable interrupts. It's not called while
5602  * the interrupt routine is executing.
5603  */
5604 static void igb_netpoll(struct net_device *netdev)
5605 {
5606         struct igb_adapter *adapter = netdev_priv(netdev);
5607         struct e1000_hw *hw = &adapter->hw;
5608         int i;
5609
5610         if (!adapter->msix_entries) {
5611                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5612                 igb_irq_disable(adapter);
5613                 napi_schedule(&q_vector->napi);
5614                 return;
5615         }
5616
5617         for (i = 0; i < adapter->num_q_vectors; i++) {
5618                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5619                 wr32(E1000_EIMC, q_vector->eims_value);
5620                 napi_schedule(&q_vector->napi);
5621         }
5622 }
5623 #endif /* CONFIG_NET_POLL_CONTROLLER */
5624
5625 /**
5626  * igb_io_error_detected - called when PCI error is detected
5627  * @pdev: Pointer to PCI device
5628  * @state: The current pci connection state
5629  *
5630  * This function is called after a PCI bus error affecting
5631  * this device has been detected.
5632  */
5633 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5634                                               pci_channel_state_t state)
5635 {
5636         struct net_device *netdev = pci_get_drvdata(pdev);
5637         struct igb_adapter *adapter = netdev_priv(netdev);
5638
5639         netif_device_detach(netdev);
5640
5641         if (state == pci_channel_io_perm_failure)
5642                 return PCI_ERS_RESULT_DISCONNECT;
5643
5644         if (netif_running(netdev))
5645                 igb_down(adapter);
5646         pci_disable_device(pdev);
5647
5648         /* Request a slot slot reset. */
5649         return PCI_ERS_RESULT_NEED_RESET;
5650 }
5651
5652 /**
5653  * igb_io_slot_reset - called after the pci bus has been reset.
5654  * @pdev: Pointer to PCI device
5655  *
5656  * Restart the card from scratch, as if from a cold-boot. Implementation
5657  * resembles the first-half of the igb_resume routine.
5658  */
5659 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5660 {
5661         struct net_device *netdev = pci_get_drvdata(pdev);
5662         struct igb_adapter *adapter = netdev_priv(netdev);
5663         struct e1000_hw *hw = &adapter->hw;
5664         pci_ers_result_t result;
5665         int err;
5666
5667         if (pci_enable_device_mem(pdev)) {
5668                 dev_err(&pdev->dev,
5669                         "Cannot re-enable PCI device after reset.\n");
5670                 result = PCI_ERS_RESULT_DISCONNECT;
5671         } else {
5672                 pci_set_master(pdev);
5673                 pci_restore_state(pdev);
5674
5675                 pci_enable_wake(pdev, PCI_D3hot, 0);
5676                 pci_enable_wake(pdev, PCI_D3cold, 0);
5677
5678                 igb_reset(adapter);
5679                 wr32(E1000_WUS, ~0);
5680                 result = PCI_ERS_RESULT_RECOVERED;
5681         }
5682
5683         err = pci_cleanup_aer_uncorrect_error_status(pdev);
5684         if (err) {
5685                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
5686                         "failed 0x%0x\n", err);
5687                 /* non-fatal, continue */
5688         }
5689
5690         return result;
5691 }
5692
5693 /**
5694  * igb_io_resume - called when traffic can start flowing again.
5695  * @pdev: Pointer to PCI device
5696  *
5697  * This callback is called when the error recovery driver tells us that
5698  * its OK to resume normal operation. Implementation resembles the
5699  * second-half of the igb_resume routine.
5700  */
5701 static void igb_io_resume(struct pci_dev *pdev)
5702 {
5703         struct net_device *netdev = pci_get_drvdata(pdev);
5704         struct igb_adapter *adapter = netdev_priv(netdev);
5705
5706         if (netif_running(netdev)) {
5707                 if (igb_up(adapter)) {
5708                         dev_err(&pdev->dev, "igb_up failed after reset\n");
5709                         return;
5710                 }
5711         }
5712
5713         netif_device_attach(netdev);
5714
5715         /* let the f/w know that the h/w is now under the control of the
5716          * driver. */
5717         igb_get_hw_control(adapter);
5718 }
5719
5720 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
5721                              u8 qsel)
5722 {
5723         u32 rar_low, rar_high;
5724         struct e1000_hw *hw = &adapter->hw;
5725
5726         /* HW expects these in little endian so we reverse the byte order
5727          * from network order (big endian) to little endian
5728          */
5729         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
5730                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
5731         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
5732
5733         /* Indicate to hardware the Address is Valid. */
5734         rar_high |= E1000_RAH_AV;
5735
5736         if (hw->mac.type == e1000_82575)
5737                 rar_high |= E1000_RAH_POOL_1 * qsel;
5738         else
5739                 rar_high |= E1000_RAH_POOL_1 << qsel;
5740
5741         wr32(E1000_RAL(index), rar_low);
5742         wrfl();
5743         wr32(E1000_RAH(index), rar_high);
5744         wrfl();
5745 }
5746
5747 static int igb_set_vf_mac(struct igb_adapter *adapter,
5748                           int vf, unsigned char *mac_addr)
5749 {
5750         struct e1000_hw *hw = &adapter->hw;
5751         /* VF MAC addresses start at end of receive addresses and moves
5752          * torwards the first, as a result a collision should not be possible */
5753         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5754
5755         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
5756
5757         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
5758
5759         return 0;
5760 }
5761
5762 static void igb_vmm_control(struct igb_adapter *adapter)
5763 {
5764         struct e1000_hw *hw = &adapter->hw;
5765         u32 reg;
5766
5767         /* replication is not supported for 82575 */
5768         if (hw->mac.type == e1000_82575)
5769                 return;
5770
5771         /* enable replication vlan tag stripping */
5772         reg = rd32(E1000_RPLOLR);
5773         reg |= E1000_RPLOLR_STRVLAN;
5774         wr32(E1000_RPLOLR, reg);
5775
5776         /* notify HW that the MAC is adding vlan tags */
5777         reg = rd32(E1000_DTXCTL);
5778         reg |= E1000_DTXCTL_VLAN_ADDED;
5779         wr32(E1000_DTXCTL, reg);
5780
5781         if (adapter->vfs_allocated_count) {
5782                 igb_vmdq_set_loopback_pf(hw, true);
5783                 igb_vmdq_set_replication_pf(hw, true);
5784         } else {
5785                 igb_vmdq_set_loopback_pf(hw, false);
5786                 igb_vmdq_set_replication_pf(hw, false);
5787         }
5788 }
5789
5790 /* igb_main.c */