igb: cleanup some of the code related to hw timestamping
[safe/jmp/linux-2.6] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "1.3.16-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56                                 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60         [board_82575] = &e1000_82575_info,
61 };
62
63 static struct pci_device_id igb_pci_tbl[] = {
64         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
74         /* required last entry */
75         {0, }
76 };
77
78 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
79
80 void igb_reset(struct igb_adapter *);
81 static int igb_setup_all_tx_resources(struct igb_adapter *);
82 static int igb_setup_all_rx_resources(struct igb_adapter *);
83 static void igb_free_all_tx_resources(struct igb_adapter *);
84 static void igb_free_all_rx_resources(struct igb_adapter *);
85 static void igb_setup_mrqc(struct igb_adapter *);
86 void igb_update_stats(struct igb_adapter *);
87 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
88 static void __devexit igb_remove(struct pci_dev *pdev);
89 static int igb_sw_init(struct igb_adapter *);
90 static int igb_open(struct net_device *);
91 static int igb_close(struct net_device *);
92 static void igb_configure_tx(struct igb_adapter *);
93 static void igb_configure_rx(struct igb_adapter *);
94 static void igb_clean_all_tx_rings(struct igb_adapter *);
95 static void igb_clean_all_rx_rings(struct igb_adapter *);
96 static void igb_clean_tx_ring(struct igb_ring *);
97 static void igb_clean_rx_ring(struct igb_ring *);
98 static void igb_set_rx_mode(struct net_device *);
99 static void igb_update_phy_info(unsigned long);
100 static void igb_watchdog(unsigned long);
101 static void igb_watchdog_task(struct work_struct *);
102 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
103 static struct net_device_stats *igb_get_stats(struct net_device *);
104 static int igb_change_mtu(struct net_device *, int);
105 static int igb_set_mac(struct net_device *, void *);
106 static void igb_set_uta(struct igb_adapter *adapter);
107 static irqreturn_t igb_intr(int irq, void *);
108 static irqreturn_t igb_intr_msi(int irq, void *);
109 static irqreturn_t igb_msix_other(int irq, void *);
110 static irqreturn_t igb_msix_ring(int irq, void *);
111 #ifdef CONFIG_IGB_DCA
112 static void igb_update_dca(struct igb_q_vector *);
113 static void igb_setup_dca(struct igb_adapter *);
114 #endif /* CONFIG_IGB_DCA */
115 static bool igb_clean_tx_irq(struct igb_q_vector *);
116 static int igb_poll(struct napi_struct *, int);
117 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
118 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
119 static void igb_tx_timeout(struct net_device *);
120 static void igb_reset_task(struct work_struct *);
121 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
122 static void igb_vlan_rx_add_vid(struct net_device *, u16);
123 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
124 static void igb_restore_vlan(struct igb_adapter *);
125 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
126 static void igb_ping_all_vfs(struct igb_adapter *);
127 static void igb_msg_task(struct igb_adapter *);
128 static int igb_rcv_msg_from_vf(struct igb_adapter *, u32);
129 static void igb_vmm_control(struct igb_adapter *);
130 static int igb_set_vf_mac(struct igb_adapter *adapter, int, unsigned char *);
131 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
132
133 static inline void igb_set_vmolr(struct e1000_hw *hw, int vfn)
134 {
135         u32 reg_data;
136
137         reg_data = rd32(E1000_VMOLR(vfn));
138         reg_data |= E1000_VMOLR_BAM |    /* Accept broadcast */
139                     E1000_VMOLR_ROMPE |  /* Accept packets matched in MTA */
140                     E1000_VMOLR_AUPE |   /* Accept untagged packets */
141                     E1000_VMOLR_STRVLAN; /* Strip vlan tags */
142         wr32(E1000_VMOLR(vfn), reg_data);
143 }
144
145 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
146                                  int vfn)
147 {
148         struct e1000_hw *hw = &adapter->hw;
149         u32 vmolr;
150
151         /* if it isn't the PF check to see if VFs are enabled and
152          * increase the size to support vlan tags */
153         if (vfn < adapter->vfs_allocated_count &&
154             adapter->vf_data[vfn].vlans_enabled)
155                 size += VLAN_TAG_SIZE;
156
157         vmolr = rd32(E1000_VMOLR(vfn));
158         vmolr &= ~E1000_VMOLR_RLPML_MASK;
159         vmolr |= size | E1000_VMOLR_LPE;
160         wr32(E1000_VMOLR(vfn), vmolr);
161
162         return 0;
163 }
164
165 #ifdef CONFIG_PM
166 static int igb_suspend(struct pci_dev *, pm_message_t);
167 static int igb_resume(struct pci_dev *);
168 #endif
169 static void igb_shutdown(struct pci_dev *);
170 #ifdef CONFIG_IGB_DCA
171 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
172 static struct notifier_block dca_notifier = {
173         .notifier_call  = igb_notify_dca,
174         .next           = NULL,
175         .priority       = 0
176 };
177 #endif
178 #ifdef CONFIG_NET_POLL_CONTROLLER
179 /* for netdump / net console */
180 static void igb_netpoll(struct net_device *);
181 #endif
182 #ifdef CONFIG_PCI_IOV
183 static unsigned int max_vfs = 0;
184 module_param(max_vfs, uint, 0);
185 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
186                  "per physical function");
187 #endif /* CONFIG_PCI_IOV */
188
189 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
190                      pci_channel_state_t);
191 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
192 static void igb_io_resume(struct pci_dev *);
193
194 static struct pci_error_handlers igb_err_handler = {
195         .error_detected = igb_io_error_detected,
196         .slot_reset = igb_io_slot_reset,
197         .resume = igb_io_resume,
198 };
199
200
201 static struct pci_driver igb_driver = {
202         .name     = igb_driver_name,
203         .id_table = igb_pci_tbl,
204         .probe    = igb_probe,
205         .remove   = __devexit_p(igb_remove),
206 #ifdef CONFIG_PM
207         /* Power Managment Hooks */
208         .suspend  = igb_suspend,
209         .resume   = igb_resume,
210 #endif
211         .shutdown = igb_shutdown,
212         .err_handler = &igb_err_handler
213 };
214
215 static int global_quad_port_a; /* global quad port a indication */
216
217 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
218 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
219 MODULE_LICENSE("GPL");
220 MODULE_VERSION(DRV_VERSION);
221
222 /**
223  * igb_read_clock - read raw cycle counter (to be used by time counter)
224  */
225 static cycle_t igb_read_clock(const struct cyclecounter *tc)
226 {
227         struct igb_adapter *adapter =
228                 container_of(tc, struct igb_adapter, cycles);
229         struct e1000_hw *hw = &adapter->hw;
230         u64 stamp = 0;
231         int shift = 0;
232
233         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
234         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
235         return stamp;
236 }
237
238 #ifdef DEBUG
239 /**
240  * igb_get_hw_dev_name - return device name string
241  * used by hardware layer to print debugging information
242  **/
243 char *igb_get_hw_dev_name(struct e1000_hw *hw)
244 {
245         struct igb_adapter *adapter = hw->back;
246         return adapter->netdev->name;
247 }
248
249 /**
250  * igb_get_time_str - format current NIC and system time as string
251  */
252 static char *igb_get_time_str(struct igb_adapter *adapter,
253                               char buffer[160])
254 {
255         cycle_t hw = adapter->cycles.read(&adapter->cycles);
256         struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
257         struct timespec sys;
258         struct timespec delta;
259         getnstimeofday(&sys);
260
261         delta = timespec_sub(nic, sys);
262
263         sprintf(buffer,
264                 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
265                 hw,
266                 (long)nic.tv_sec, nic.tv_nsec,
267                 (long)sys.tv_sec, sys.tv_nsec,
268                 (long)delta.tv_sec, delta.tv_nsec);
269
270         return buffer;
271 }
272 #endif
273
274 /**
275  * igb_init_module - Driver Registration Routine
276  *
277  * igb_init_module is the first routine called when the driver is
278  * loaded. All it does is register with the PCI subsystem.
279  **/
280 static int __init igb_init_module(void)
281 {
282         int ret;
283         printk(KERN_INFO "%s - version %s\n",
284                igb_driver_string, igb_driver_version);
285
286         printk(KERN_INFO "%s\n", igb_copyright);
287
288         global_quad_port_a = 0;
289
290 #ifdef CONFIG_IGB_DCA
291         dca_register_notify(&dca_notifier);
292 #endif
293
294         ret = pci_register_driver(&igb_driver);
295         return ret;
296 }
297
298 module_init(igb_init_module);
299
300 /**
301  * igb_exit_module - Driver Exit Cleanup Routine
302  *
303  * igb_exit_module is called just before the driver is removed
304  * from memory.
305  **/
306 static void __exit igb_exit_module(void)
307 {
308 #ifdef CONFIG_IGB_DCA
309         dca_unregister_notify(&dca_notifier);
310 #endif
311         pci_unregister_driver(&igb_driver);
312 }
313
314 module_exit(igb_exit_module);
315
316 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
317 /**
318  * igb_cache_ring_register - Descriptor ring to register mapping
319  * @adapter: board private structure to initialize
320  *
321  * Once we know the feature-set enabled for the device, we'll cache
322  * the register offset the descriptor ring is assigned to.
323  **/
324 static void igb_cache_ring_register(struct igb_adapter *adapter)
325 {
326         int i;
327         u32 rbase_offset = adapter->vfs_allocated_count;
328
329         switch (adapter->hw.mac.type) {
330         case e1000_82576:
331                 /* The queues are allocated for virtualization such that VF 0
332                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
333                  * In order to avoid collision we start at the first free queue
334                  * and continue consuming queues in the same sequence
335                  */
336                 for (i = 0; i < adapter->num_rx_queues; i++)
337                         adapter->rx_ring[i].reg_idx = rbase_offset +
338                                                       Q_IDX_82576(i);
339                 for (i = 0; i < adapter->num_tx_queues; i++)
340                         adapter->tx_ring[i].reg_idx = rbase_offset +
341                                                       Q_IDX_82576(i);
342                 break;
343         case e1000_82575:
344         default:
345                 for (i = 0; i < adapter->num_rx_queues; i++)
346                         adapter->rx_ring[i].reg_idx = i;
347                 for (i = 0; i < adapter->num_tx_queues; i++)
348                         adapter->tx_ring[i].reg_idx = i;
349                 break;
350         }
351 }
352
353 static void igb_free_queues(struct igb_adapter *adapter)
354 {
355         kfree(adapter->tx_ring);
356         kfree(adapter->rx_ring);
357
358         adapter->tx_ring = NULL;
359         adapter->rx_ring = NULL;
360
361         adapter->num_rx_queues = 0;
362         adapter->num_tx_queues = 0;
363 }
364
365 /**
366  * igb_alloc_queues - Allocate memory for all rings
367  * @adapter: board private structure to initialize
368  *
369  * We allocate one ring per queue at run-time since we don't know the
370  * number of queues at compile-time.
371  **/
372 static int igb_alloc_queues(struct igb_adapter *adapter)
373 {
374         int i;
375
376         adapter->tx_ring = kcalloc(adapter->num_tx_queues,
377                                    sizeof(struct igb_ring), GFP_KERNEL);
378         if (!adapter->tx_ring)
379                 goto err;
380
381         adapter->rx_ring = kcalloc(adapter->num_rx_queues,
382                                    sizeof(struct igb_ring), GFP_KERNEL);
383         if (!adapter->rx_ring)
384                 goto err;
385
386         for (i = 0; i < adapter->num_tx_queues; i++) {
387                 struct igb_ring *ring = &(adapter->tx_ring[i]);
388                 ring->count = adapter->tx_ring_count;
389                 ring->queue_index = i;
390                 ring->pdev = adapter->pdev;
391                 ring->netdev = adapter->netdev;
392                 /* For 82575, context index must be unique per ring. */
393                 if (adapter->hw.mac.type == e1000_82575)
394                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
395         }
396
397         for (i = 0; i < adapter->num_rx_queues; i++) {
398                 struct igb_ring *ring = &(adapter->rx_ring[i]);
399                 ring->count = adapter->rx_ring_count;
400                 ring->queue_index = i;
401                 ring->pdev = adapter->pdev;
402                 ring->netdev = adapter->netdev;
403                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
404                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
405                 /* set flag indicating ring supports SCTP checksum offload */
406                 if (adapter->hw.mac.type >= e1000_82576)
407                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
408         }
409
410         igb_cache_ring_register(adapter);
411
412         return 0;
413
414 err:
415         igb_free_queues(adapter);
416
417         return -ENOMEM;
418 }
419
420 #define IGB_N0_QUEUE -1
421 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
422 {
423         u32 msixbm = 0;
424         struct igb_adapter *adapter = q_vector->adapter;
425         struct e1000_hw *hw = &adapter->hw;
426         u32 ivar, index;
427         int rx_queue = IGB_N0_QUEUE;
428         int tx_queue = IGB_N0_QUEUE;
429
430         if (q_vector->rx_ring)
431                 rx_queue = q_vector->rx_ring->reg_idx;
432         if (q_vector->tx_ring)
433                 tx_queue = q_vector->tx_ring->reg_idx;
434
435         switch (hw->mac.type) {
436         case e1000_82575:
437                 /* The 82575 assigns vectors using a bitmask, which matches the
438                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
439                    or more queues to a vector, we write the appropriate bits
440                    into the MSIXBM register for that vector. */
441                 if (rx_queue > IGB_N0_QUEUE)
442                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
443                 if (tx_queue > IGB_N0_QUEUE)
444                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
445                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
446                 q_vector->eims_value = msixbm;
447                 break;
448         case e1000_82576:
449                 /* 82576 uses a table-based method for assigning vectors.
450                    Each queue has a single entry in the table to which we write
451                    a vector number along with a "valid" bit.  Sadly, the layout
452                    of the table is somewhat counterintuitive. */
453                 if (rx_queue > IGB_N0_QUEUE) {
454                         index = (rx_queue & 0x7);
455                         ivar = array_rd32(E1000_IVAR0, index);
456                         if (rx_queue < 8) {
457                                 /* vector goes into low byte of register */
458                                 ivar = ivar & 0xFFFFFF00;
459                                 ivar |= msix_vector | E1000_IVAR_VALID;
460                         } else {
461                                 /* vector goes into third byte of register */
462                                 ivar = ivar & 0xFF00FFFF;
463                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
464                         }
465                         array_wr32(E1000_IVAR0, index, ivar);
466                 }
467                 if (tx_queue > IGB_N0_QUEUE) {
468                         index = (tx_queue & 0x7);
469                         ivar = array_rd32(E1000_IVAR0, index);
470                         if (tx_queue < 8) {
471                                 /* vector goes into second byte of register */
472                                 ivar = ivar & 0xFFFF00FF;
473                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
474                         } else {
475                                 /* vector goes into high byte of register */
476                                 ivar = ivar & 0x00FFFFFF;
477                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
478                         }
479                         array_wr32(E1000_IVAR0, index, ivar);
480                 }
481                 q_vector->eims_value = 1 << msix_vector;
482                 break;
483         default:
484                 BUG();
485                 break;
486         }
487 }
488
489 /**
490  * igb_configure_msix - Configure MSI-X hardware
491  *
492  * igb_configure_msix sets up the hardware to properly
493  * generate MSI-X interrupts.
494  **/
495 static void igb_configure_msix(struct igb_adapter *adapter)
496 {
497         u32 tmp;
498         int i, vector = 0;
499         struct e1000_hw *hw = &adapter->hw;
500
501         adapter->eims_enable_mask = 0;
502
503         /* set vector for other causes, i.e. link changes */
504         switch (hw->mac.type) {
505         case e1000_82575:
506                 tmp = rd32(E1000_CTRL_EXT);
507                 /* enable MSI-X PBA support*/
508                 tmp |= E1000_CTRL_EXT_PBA_CLR;
509
510                 /* Auto-Mask interrupts upon ICR read. */
511                 tmp |= E1000_CTRL_EXT_EIAME;
512                 tmp |= E1000_CTRL_EXT_IRCA;
513
514                 wr32(E1000_CTRL_EXT, tmp);
515
516                 /* enable msix_other interrupt */
517                 array_wr32(E1000_MSIXBM(0), vector++,
518                                       E1000_EIMS_OTHER);
519                 adapter->eims_other = E1000_EIMS_OTHER;
520
521                 break;
522
523         case e1000_82576:
524                 /* Turn on MSI-X capability first, or our settings
525                  * won't stick.  And it will take days to debug. */
526                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
527                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
528                                 E1000_GPIE_NSICR);
529
530                 /* enable msix_other interrupt */
531                 adapter->eims_other = 1 << vector;
532                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
533
534                 wr32(E1000_IVAR_MISC, tmp);
535                 break;
536         default:
537                 /* do nothing, since nothing else supports MSI-X */
538                 break;
539         } /* switch (hw->mac.type) */
540
541         adapter->eims_enable_mask |= adapter->eims_other;
542
543         for (i = 0; i < adapter->num_q_vectors; i++) {
544                 struct igb_q_vector *q_vector = adapter->q_vector[i];
545                 igb_assign_vector(q_vector, vector++);
546                 adapter->eims_enable_mask |= q_vector->eims_value;
547         }
548
549         wrfl();
550 }
551
552 /**
553  * igb_request_msix - Initialize MSI-X interrupts
554  *
555  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
556  * kernel.
557  **/
558 static int igb_request_msix(struct igb_adapter *adapter)
559 {
560         struct net_device *netdev = adapter->netdev;
561         struct e1000_hw *hw = &adapter->hw;
562         int i, err = 0, vector = 0;
563
564         err = request_irq(adapter->msix_entries[vector].vector,
565                           &igb_msix_other, 0, netdev->name, adapter);
566         if (err)
567                 goto out;
568         vector++;
569
570         for (i = 0; i < adapter->num_q_vectors; i++) {
571                 struct igb_q_vector *q_vector = adapter->q_vector[i];
572
573                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
574
575                 if (q_vector->rx_ring && q_vector->tx_ring)
576                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
577                                 q_vector->rx_ring->queue_index);
578                 else if (q_vector->tx_ring)
579                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
580                                 q_vector->tx_ring->queue_index);
581                 else if (q_vector->rx_ring)
582                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
583                                 q_vector->rx_ring->queue_index);
584                 else
585                         sprintf(q_vector->name, "%s-unused", netdev->name);
586
587                 err = request_irq(adapter->msix_entries[vector].vector,
588                                   &igb_msix_ring, 0, q_vector->name,
589                                   q_vector);
590                 if (err)
591                         goto out;
592                 vector++;
593         }
594
595         igb_configure_msix(adapter);
596         return 0;
597 out:
598         return err;
599 }
600
601 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
602 {
603         if (adapter->msix_entries) {
604                 pci_disable_msix(adapter->pdev);
605                 kfree(adapter->msix_entries);
606                 adapter->msix_entries = NULL;
607         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
608                 pci_disable_msi(adapter->pdev);
609         }
610 }
611
612 /**
613  * igb_free_q_vectors - Free memory allocated for interrupt vectors
614  * @adapter: board private structure to initialize
615  *
616  * This function frees the memory allocated to the q_vectors.  In addition if
617  * NAPI is enabled it will delete any references to the NAPI struct prior
618  * to freeing the q_vector.
619  **/
620 static void igb_free_q_vectors(struct igb_adapter *adapter)
621 {
622         int v_idx;
623
624         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
625                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
626                 adapter->q_vector[v_idx] = NULL;
627                 netif_napi_del(&q_vector->napi);
628                 kfree(q_vector);
629         }
630         adapter->num_q_vectors = 0;
631 }
632
633 /**
634  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
635  *
636  * This function resets the device so that it has 0 rx queues, tx queues, and
637  * MSI-X interrupts allocated.
638  */
639 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
640 {
641         igb_free_queues(adapter);
642         igb_free_q_vectors(adapter);
643         igb_reset_interrupt_capability(adapter);
644 }
645
646 /**
647  * igb_set_interrupt_capability - set MSI or MSI-X if supported
648  *
649  * Attempt to configure interrupts using the best available
650  * capabilities of the hardware and kernel.
651  **/
652 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
653 {
654         int err;
655         int numvecs, i;
656
657         /* Number of supported queues. */
658         adapter->num_rx_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
659         adapter->num_tx_queues = min_t(u32, IGB_MAX_TX_QUEUES, num_online_cpus());
660
661         /* start with one vector for every rx queue */
662         numvecs = adapter->num_rx_queues;
663
664         /* if tx handler is seperate add 1 for every tx queue */
665         numvecs += adapter->num_tx_queues;
666
667         /* store the number of vectors reserved for queues */
668         adapter->num_q_vectors = numvecs;
669
670         /* add 1 vector for link status interrupts */
671         numvecs++;
672         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
673                                         GFP_KERNEL);
674         if (!adapter->msix_entries)
675                 goto msi_only;
676
677         for (i = 0; i < numvecs; i++)
678                 adapter->msix_entries[i].entry = i;
679
680         err = pci_enable_msix(adapter->pdev,
681                               adapter->msix_entries,
682                               numvecs);
683         if (err == 0)
684                 goto out;
685
686         igb_reset_interrupt_capability(adapter);
687
688         /* If we can't do MSI-X, try MSI */
689 msi_only:
690 #ifdef CONFIG_PCI_IOV
691         /* disable SR-IOV for non MSI-X configurations */
692         if (adapter->vf_data) {
693                 struct e1000_hw *hw = &adapter->hw;
694                 /* disable iov and allow time for transactions to clear */
695                 pci_disable_sriov(adapter->pdev);
696                 msleep(500);
697
698                 kfree(adapter->vf_data);
699                 adapter->vf_data = NULL;
700                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
701                 msleep(100);
702                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
703         }
704 #endif
705         adapter->vfs_allocated_count = 0;
706         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
707         adapter->num_rx_queues = 1;
708         adapter->num_tx_queues = 1;
709         adapter->num_q_vectors = 1;
710         if (!pci_enable_msi(adapter->pdev))
711                 adapter->flags |= IGB_FLAG_HAS_MSI;
712 out:
713         /* Notify the stack of the (possibly) reduced Tx Queue count. */
714         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
715         return;
716 }
717
718 /**
719  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
720  * @adapter: board private structure to initialize
721  *
722  * We allocate one q_vector per queue interrupt.  If allocation fails we
723  * return -ENOMEM.
724  **/
725 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
726 {
727         struct igb_q_vector *q_vector;
728         struct e1000_hw *hw = &adapter->hw;
729         int v_idx;
730
731         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
732                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
733                 if (!q_vector)
734                         goto err_out;
735                 q_vector->adapter = adapter;
736                 q_vector->itr_shift = (hw->mac.type == e1000_82575) ? 16 : 0;
737                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
738                 q_vector->itr_val = IGB_START_ITR;
739                 q_vector->set_itr = 1;
740                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
741                 adapter->q_vector[v_idx] = q_vector;
742         }
743         return 0;
744
745 err_out:
746         while (v_idx) {
747                 v_idx--;
748                 q_vector = adapter->q_vector[v_idx];
749                 netif_napi_del(&q_vector->napi);
750                 kfree(q_vector);
751                 adapter->q_vector[v_idx] = NULL;
752         }
753         return -ENOMEM;
754 }
755
756 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
757                                       int ring_idx, int v_idx)
758 {
759         struct igb_q_vector *q_vector;
760
761         q_vector = adapter->q_vector[v_idx];
762         q_vector->rx_ring = &adapter->rx_ring[ring_idx];
763         q_vector->rx_ring->q_vector = q_vector;
764         q_vector->itr_val = adapter->rx_itr_setting;
765         if (q_vector->itr_val && q_vector->itr_val <= 3)
766                 q_vector->itr_val = IGB_START_ITR;
767 }
768
769 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
770                                       int ring_idx, int v_idx)
771 {
772         struct igb_q_vector *q_vector;
773
774         q_vector = adapter->q_vector[v_idx];
775         q_vector->tx_ring = &adapter->tx_ring[ring_idx];
776         q_vector->tx_ring->q_vector = q_vector;
777         q_vector->itr_val = adapter->tx_itr_setting;
778         if (q_vector->itr_val && q_vector->itr_val <= 3)
779                 q_vector->itr_val = IGB_START_ITR;
780 }
781
782 /**
783  * igb_map_ring_to_vector - maps allocated queues to vectors
784  *
785  * This function maps the recently allocated queues to vectors.
786  **/
787 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
788 {
789         int i;
790         int v_idx = 0;
791
792         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
793             (adapter->num_q_vectors < adapter->num_tx_queues))
794                 return -ENOMEM;
795
796         if (adapter->num_q_vectors >=
797             (adapter->num_rx_queues + adapter->num_tx_queues)) {
798                 for (i = 0; i < adapter->num_rx_queues; i++)
799                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
800                 for (i = 0; i < adapter->num_tx_queues; i++)
801                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
802         } else {
803                 for (i = 0; i < adapter->num_rx_queues; i++) {
804                         if (i < adapter->num_tx_queues)
805                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
806                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
807                 }
808                 for (; i < adapter->num_tx_queues; i++)
809                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
810         }
811         return 0;
812 }
813
814 /**
815  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
816  *
817  * This function initializes the interrupts and allocates all of the queues.
818  **/
819 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
820 {
821         struct pci_dev *pdev = adapter->pdev;
822         int err;
823
824         igb_set_interrupt_capability(adapter);
825
826         err = igb_alloc_q_vectors(adapter);
827         if (err) {
828                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
829                 goto err_alloc_q_vectors;
830         }
831
832         err = igb_alloc_queues(adapter);
833         if (err) {
834                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
835                 goto err_alloc_queues;
836         }
837
838         err = igb_map_ring_to_vector(adapter);
839         if (err) {
840                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
841                 goto err_map_queues;
842         }
843
844
845         return 0;
846 err_map_queues:
847         igb_free_queues(adapter);
848 err_alloc_queues:
849         igb_free_q_vectors(adapter);
850 err_alloc_q_vectors:
851         igb_reset_interrupt_capability(adapter);
852         return err;
853 }
854
855 /**
856  * igb_request_irq - initialize interrupts
857  *
858  * Attempts to configure interrupts using the best available
859  * capabilities of the hardware and kernel.
860  **/
861 static int igb_request_irq(struct igb_adapter *adapter)
862 {
863         struct net_device *netdev = adapter->netdev;
864         struct pci_dev *pdev = adapter->pdev;
865         struct e1000_hw *hw = &adapter->hw;
866         int err = 0;
867
868         if (adapter->msix_entries) {
869                 err = igb_request_msix(adapter);
870                 if (!err)
871                         goto request_done;
872                 /* fall back to MSI */
873                 igb_clear_interrupt_scheme(adapter);
874                 if (!pci_enable_msi(adapter->pdev))
875                         adapter->flags |= IGB_FLAG_HAS_MSI;
876                 igb_free_all_tx_resources(adapter);
877                 igb_free_all_rx_resources(adapter);
878                 adapter->num_tx_queues = 1;
879                 adapter->num_rx_queues = 1;
880                 adapter->num_q_vectors = 1;
881                 err = igb_alloc_q_vectors(adapter);
882                 if (err) {
883                         dev_err(&pdev->dev,
884                                 "Unable to allocate memory for vectors\n");
885                         goto request_done;
886                 }
887                 err = igb_alloc_queues(adapter);
888                 if (err) {
889                         dev_err(&pdev->dev,
890                                 "Unable to allocate memory for queues\n");
891                         igb_free_q_vectors(adapter);
892                         goto request_done;
893                 }
894                 igb_setup_all_tx_resources(adapter);
895                 igb_setup_all_rx_resources(adapter);
896         } else {
897                 switch (hw->mac.type) {
898                 case e1000_82575:
899                         wr32(E1000_MSIXBM(0),
900                              (E1000_EICR_RX_QUEUE0 |
901                               E1000_EICR_TX_QUEUE0 |
902                               E1000_EIMS_OTHER));
903                         break;
904                 case e1000_82576:
905                         wr32(E1000_IVAR0, E1000_IVAR_VALID);
906                         break;
907                 default:
908                         break;
909                 }
910         }
911
912         if (adapter->flags & IGB_FLAG_HAS_MSI) {
913                 err = request_irq(adapter->pdev->irq, &igb_intr_msi, 0,
914                                   netdev->name, adapter);
915                 if (!err)
916                         goto request_done;
917
918                 /* fall back to legacy interrupts */
919                 igb_reset_interrupt_capability(adapter);
920                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
921         }
922
923         err = request_irq(adapter->pdev->irq, &igb_intr, IRQF_SHARED,
924                           netdev->name, adapter);
925
926         if (err)
927                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
928                         err);
929
930 request_done:
931         return err;
932 }
933
934 static void igb_free_irq(struct igb_adapter *adapter)
935 {
936         if (adapter->msix_entries) {
937                 int vector = 0, i;
938
939                 free_irq(adapter->msix_entries[vector++].vector, adapter);
940
941                 for (i = 0; i < adapter->num_q_vectors; i++) {
942                         struct igb_q_vector *q_vector = adapter->q_vector[i];
943                         free_irq(adapter->msix_entries[vector++].vector,
944                                  q_vector);
945                 }
946         } else {
947                 free_irq(adapter->pdev->irq, adapter);
948         }
949 }
950
951 /**
952  * igb_irq_disable - Mask off interrupt generation on the NIC
953  * @adapter: board private structure
954  **/
955 static void igb_irq_disable(struct igb_adapter *adapter)
956 {
957         struct e1000_hw *hw = &adapter->hw;
958
959         if (adapter->msix_entries) {
960                 u32 regval = rd32(E1000_EIAM);
961                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
962                 wr32(E1000_EIMC, adapter->eims_enable_mask);
963                 regval = rd32(E1000_EIAC);
964                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
965         }
966
967         wr32(E1000_IAM, 0);
968         wr32(E1000_IMC, ~0);
969         wrfl();
970         synchronize_irq(adapter->pdev->irq);
971 }
972
973 /**
974  * igb_irq_enable - Enable default interrupt generation settings
975  * @adapter: board private structure
976  **/
977 static void igb_irq_enable(struct igb_adapter *adapter)
978 {
979         struct e1000_hw *hw = &adapter->hw;
980
981         if (adapter->msix_entries) {
982                 u32 regval = rd32(E1000_EIAC);
983                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
984                 regval = rd32(E1000_EIAM);
985                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
986                 wr32(E1000_EIMS, adapter->eims_enable_mask);
987                 if (adapter->vfs_allocated_count)
988                         wr32(E1000_MBVFIMR, 0xFF);
989                 wr32(E1000_IMS, (E1000_IMS_LSC | E1000_IMS_VMMB |
990                                  E1000_IMS_DOUTSYNC));
991         } else {
992                 wr32(E1000_IMS, IMS_ENABLE_MASK);
993                 wr32(E1000_IAM, IMS_ENABLE_MASK);
994         }
995 }
996
997 static void igb_update_mng_vlan(struct igb_adapter *adapter)
998 {
999         struct net_device *netdev = adapter->netdev;
1000         u16 vid = adapter->hw.mng_cookie.vlan_id;
1001         u16 old_vid = adapter->mng_vlan_id;
1002         if (adapter->vlgrp) {
1003                 if (!vlan_group_get_device(adapter->vlgrp, vid)) {
1004                         if (adapter->hw.mng_cookie.status &
1005                                 E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1006                                 igb_vlan_rx_add_vid(netdev, vid);
1007                                 adapter->mng_vlan_id = vid;
1008                         } else
1009                                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1010
1011                         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1012                                         (vid != old_vid) &&
1013                             !vlan_group_get_device(adapter->vlgrp, old_vid))
1014                                 igb_vlan_rx_kill_vid(netdev, old_vid);
1015                 } else
1016                         adapter->mng_vlan_id = vid;
1017         }
1018 }
1019
1020 /**
1021  * igb_release_hw_control - release control of the h/w to f/w
1022  * @adapter: address of board private structure
1023  *
1024  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1025  * For ASF and Pass Through versions of f/w this means that the
1026  * driver is no longer loaded.
1027  *
1028  **/
1029 static void igb_release_hw_control(struct igb_adapter *adapter)
1030 {
1031         struct e1000_hw *hw = &adapter->hw;
1032         u32 ctrl_ext;
1033
1034         /* Let firmware take over control of h/w */
1035         ctrl_ext = rd32(E1000_CTRL_EXT);
1036         wr32(E1000_CTRL_EXT,
1037                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1038 }
1039
1040
1041 /**
1042  * igb_get_hw_control - get control of the h/w from f/w
1043  * @adapter: address of board private structure
1044  *
1045  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1046  * For ASF and Pass Through versions of f/w this means that
1047  * the driver is loaded.
1048  *
1049  **/
1050 static void igb_get_hw_control(struct igb_adapter *adapter)
1051 {
1052         struct e1000_hw *hw = &adapter->hw;
1053         u32 ctrl_ext;
1054
1055         /* Let firmware know the driver has taken over */
1056         ctrl_ext = rd32(E1000_CTRL_EXT);
1057         wr32(E1000_CTRL_EXT,
1058                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1059 }
1060
1061 /**
1062  * igb_configure - configure the hardware for RX and TX
1063  * @adapter: private board structure
1064  **/
1065 static void igb_configure(struct igb_adapter *adapter)
1066 {
1067         struct net_device *netdev = adapter->netdev;
1068         int i;
1069
1070         igb_get_hw_control(adapter);
1071         igb_set_rx_mode(netdev);
1072
1073         igb_restore_vlan(adapter);
1074
1075         igb_setup_tctl(adapter);
1076         igb_setup_mrqc(adapter);
1077         igb_setup_rctl(adapter);
1078
1079         igb_configure_tx(adapter);
1080         igb_configure_rx(adapter);
1081
1082         igb_rx_fifo_flush_82575(&adapter->hw);
1083
1084         /* call igb_desc_unused which always leaves
1085          * at least 1 descriptor unused to make sure
1086          * next_to_use != next_to_clean */
1087         for (i = 0; i < adapter->num_rx_queues; i++) {
1088                 struct igb_ring *ring = &adapter->rx_ring[i];
1089                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1090         }
1091
1092
1093         adapter->tx_queue_len = netdev->tx_queue_len;
1094 }
1095
1096
1097 /**
1098  * igb_up - Open the interface and prepare it to handle traffic
1099  * @adapter: board private structure
1100  **/
1101
1102 int igb_up(struct igb_adapter *adapter)
1103 {
1104         struct e1000_hw *hw = &adapter->hw;
1105         int i;
1106
1107         /* hardware has been reset, we need to reload some things */
1108         igb_configure(adapter);
1109
1110         clear_bit(__IGB_DOWN, &adapter->state);
1111
1112         for (i = 0; i < adapter->num_q_vectors; i++) {
1113                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1114                 napi_enable(&q_vector->napi);
1115         }
1116         if (adapter->msix_entries)
1117                 igb_configure_msix(adapter);
1118
1119         igb_set_vmolr(hw, adapter->vfs_allocated_count);
1120
1121         /* Clear any pending interrupts. */
1122         rd32(E1000_ICR);
1123         igb_irq_enable(adapter);
1124
1125         /* notify VFs that reset has been completed */
1126         if (adapter->vfs_allocated_count) {
1127                 u32 reg_data = rd32(E1000_CTRL_EXT);
1128                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1129                 wr32(E1000_CTRL_EXT, reg_data);
1130         }
1131
1132         netif_tx_start_all_queues(adapter->netdev);
1133
1134         /* Fire a link change interrupt to start the watchdog. */
1135         wr32(E1000_ICS, E1000_ICS_LSC);
1136         return 0;
1137 }
1138
1139 void igb_down(struct igb_adapter *adapter)
1140 {
1141         struct e1000_hw *hw = &adapter->hw;
1142         struct net_device *netdev = adapter->netdev;
1143         u32 tctl, rctl;
1144         int i;
1145
1146         /* signal that we're down so the interrupt handler does not
1147          * reschedule our watchdog timer */
1148         set_bit(__IGB_DOWN, &adapter->state);
1149
1150         /* disable receives in the hardware */
1151         rctl = rd32(E1000_RCTL);
1152         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1153         /* flush and sleep below */
1154
1155         netif_tx_stop_all_queues(netdev);
1156
1157         /* disable transmits in the hardware */
1158         tctl = rd32(E1000_TCTL);
1159         tctl &= ~E1000_TCTL_EN;
1160         wr32(E1000_TCTL, tctl);
1161         /* flush both disables and wait for them to finish */
1162         wrfl();
1163         msleep(10);
1164
1165         for (i = 0; i < adapter->num_q_vectors; i++) {
1166                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1167                 napi_disable(&q_vector->napi);
1168         }
1169
1170         igb_irq_disable(adapter);
1171
1172         del_timer_sync(&adapter->watchdog_timer);
1173         del_timer_sync(&adapter->phy_info_timer);
1174
1175         netdev->tx_queue_len = adapter->tx_queue_len;
1176         netif_carrier_off(netdev);
1177
1178         /* record the stats before reset*/
1179         igb_update_stats(adapter);
1180
1181         adapter->link_speed = 0;
1182         adapter->link_duplex = 0;
1183
1184         if (!pci_channel_offline(adapter->pdev))
1185                 igb_reset(adapter);
1186         igb_clean_all_tx_rings(adapter);
1187         igb_clean_all_rx_rings(adapter);
1188 #ifdef CONFIG_IGB_DCA
1189
1190         /* since we reset the hardware DCA settings were cleared */
1191         igb_setup_dca(adapter);
1192 #endif
1193 }
1194
1195 void igb_reinit_locked(struct igb_adapter *adapter)
1196 {
1197         WARN_ON(in_interrupt());
1198         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1199                 msleep(1);
1200         igb_down(adapter);
1201         igb_up(adapter);
1202         clear_bit(__IGB_RESETTING, &adapter->state);
1203 }
1204
1205 void igb_reset(struct igb_adapter *adapter)
1206 {
1207         struct e1000_hw *hw = &adapter->hw;
1208         struct e1000_mac_info *mac = &hw->mac;
1209         struct e1000_fc_info *fc = &hw->fc;
1210         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1211         u16 hwm;
1212
1213         /* Repartition Pba for greater than 9k mtu
1214          * To take effect CTRL.RST is required.
1215          */
1216         switch (mac->type) {
1217         case e1000_82576:
1218                 pba = E1000_PBA_64K;
1219                 break;
1220         case e1000_82575:
1221         default:
1222                 pba = E1000_PBA_34K;
1223                 break;
1224         }
1225
1226         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1227             (mac->type < e1000_82576)) {
1228                 /* adjust PBA for jumbo frames */
1229                 wr32(E1000_PBA, pba);
1230
1231                 /* To maintain wire speed transmits, the Tx FIFO should be
1232                  * large enough to accommodate two full transmit packets,
1233                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1234                  * the Rx FIFO should be large enough to accommodate at least
1235                  * one full receive packet and is similarly rounded up and
1236                  * expressed in KB. */
1237                 pba = rd32(E1000_PBA);
1238                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1239                 tx_space = pba >> 16;
1240                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1241                 pba &= 0xffff;
1242                 /* the tx fifo also stores 16 bytes of information about the tx
1243                  * but don't include ethernet FCS because hardware appends it */
1244                 min_tx_space = (adapter->max_frame_size +
1245                                 sizeof(union e1000_adv_tx_desc) -
1246                                 ETH_FCS_LEN) * 2;
1247                 min_tx_space = ALIGN(min_tx_space, 1024);
1248                 min_tx_space >>= 10;
1249                 /* software strips receive CRC, so leave room for it */
1250                 min_rx_space = adapter->max_frame_size;
1251                 min_rx_space = ALIGN(min_rx_space, 1024);
1252                 min_rx_space >>= 10;
1253
1254                 /* If current Tx allocation is less than the min Tx FIFO size,
1255                  * and the min Tx FIFO size is less than the current Rx FIFO
1256                  * allocation, take space away from current Rx allocation */
1257                 if (tx_space < min_tx_space &&
1258                     ((min_tx_space - tx_space) < pba)) {
1259                         pba = pba - (min_tx_space - tx_space);
1260
1261                         /* if short on rx space, rx wins and must trump tx
1262                          * adjustment */
1263                         if (pba < min_rx_space)
1264                                 pba = min_rx_space;
1265                 }
1266                 wr32(E1000_PBA, pba);
1267         }
1268
1269         /* flow control settings */
1270         /* The high water mark must be low enough to fit one full frame
1271          * (or the size used for early receive) above it in the Rx FIFO.
1272          * Set it to the lower of:
1273          * - 90% of the Rx FIFO size, or
1274          * - the full Rx FIFO size minus one full frame */
1275         hwm = min(((pba << 10) * 9 / 10),
1276                         ((pba << 10) - 2 * adapter->max_frame_size));
1277
1278         if (mac->type < e1000_82576) {
1279                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
1280                 fc->low_water = fc->high_water - 8;
1281         } else {
1282                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1283                 fc->low_water = fc->high_water - 16;
1284         }
1285         fc->pause_time = 0xFFFF;
1286         fc->send_xon = 1;
1287         fc->current_mode = fc->requested_mode;
1288
1289         /* disable receive for all VFs and wait one second */
1290         if (adapter->vfs_allocated_count) {
1291                 int i;
1292                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1293                         adapter->vf_data[i].clear_to_send = false;
1294
1295                 /* ping all the active vfs to let them know we are going down */
1296                         igb_ping_all_vfs(adapter);
1297
1298                 /* disable transmits and receives */
1299                 wr32(E1000_VFRE, 0);
1300                 wr32(E1000_VFTE, 0);
1301         }
1302
1303         /* Allow time for pending master requests to run */
1304         adapter->hw.mac.ops.reset_hw(&adapter->hw);
1305         wr32(E1000_WUC, 0);
1306
1307         if (adapter->hw.mac.ops.init_hw(&adapter->hw))
1308                 dev_err(&adapter->pdev->dev, "Hardware Error\n");
1309
1310         igb_update_mng_vlan(adapter);
1311
1312         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1313         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1314
1315         igb_reset_adaptive(&adapter->hw);
1316         igb_get_phy_info(&adapter->hw);
1317 }
1318
1319 static const struct net_device_ops igb_netdev_ops = {
1320         .ndo_open               = igb_open,
1321         .ndo_stop               = igb_close,
1322         .ndo_start_xmit         = igb_xmit_frame_adv,
1323         .ndo_get_stats          = igb_get_stats,
1324         .ndo_set_rx_mode        = igb_set_rx_mode,
1325         .ndo_set_multicast_list = igb_set_rx_mode,
1326         .ndo_set_mac_address    = igb_set_mac,
1327         .ndo_change_mtu         = igb_change_mtu,
1328         .ndo_do_ioctl           = igb_ioctl,
1329         .ndo_tx_timeout         = igb_tx_timeout,
1330         .ndo_validate_addr      = eth_validate_addr,
1331         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1332         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1333         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1334 #ifdef CONFIG_NET_POLL_CONTROLLER
1335         .ndo_poll_controller    = igb_netpoll,
1336 #endif
1337 };
1338
1339 /**
1340  * igb_probe - Device Initialization Routine
1341  * @pdev: PCI device information struct
1342  * @ent: entry in igb_pci_tbl
1343  *
1344  * Returns 0 on success, negative on failure
1345  *
1346  * igb_probe initializes an adapter identified by a pci_dev structure.
1347  * The OS initialization, configuring of the adapter private structure,
1348  * and a hardware reset occur.
1349  **/
1350 static int __devinit igb_probe(struct pci_dev *pdev,
1351                                const struct pci_device_id *ent)
1352 {
1353         struct net_device *netdev;
1354         struct igb_adapter *adapter;
1355         struct e1000_hw *hw;
1356         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1357         unsigned long mmio_start, mmio_len;
1358         int err, pci_using_dac;
1359         u16 eeprom_data = 0;
1360         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1361         u32 part_num;
1362
1363         err = pci_enable_device_mem(pdev);
1364         if (err)
1365                 return err;
1366
1367         pci_using_dac = 0;
1368         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1369         if (!err) {
1370                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1371                 if (!err)
1372                         pci_using_dac = 1;
1373         } else {
1374                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1375                 if (err) {
1376                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1377                         if (err) {
1378                                 dev_err(&pdev->dev, "No usable DMA "
1379                                         "configuration, aborting\n");
1380                                 goto err_dma;
1381                         }
1382                 }
1383         }
1384
1385         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1386                                            IORESOURCE_MEM),
1387                                            igb_driver_name);
1388         if (err)
1389                 goto err_pci_reg;
1390
1391         pci_enable_pcie_error_reporting(pdev);
1392
1393         pci_set_master(pdev);
1394         pci_save_state(pdev);
1395
1396         err = -ENOMEM;
1397         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1398                                    IGB_ABS_MAX_TX_QUEUES);
1399         if (!netdev)
1400                 goto err_alloc_etherdev;
1401
1402         SET_NETDEV_DEV(netdev, &pdev->dev);
1403
1404         pci_set_drvdata(pdev, netdev);
1405         adapter = netdev_priv(netdev);
1406         adapter->netdev = netdev;
1407         adapter->pdev = pdev;
1408         hw = &adapter->hw;
1409         hw->back = adapter;
1410         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1411
1412         mmio_start = pci_resource_start(pdev, 0);
1413         mmio_len = pci_resource_len(pdev, 0);
1414
1415         err = -EIO;
1416         hw->hw_addr = ioremap(mmio_start, mmio_len);
1417         if (!hw->hw_addr)
1418                 goto err_ioremap;
1419
1420         netdev->netdev_ops = &igb_netdev_ops;
1421         igb_set_ethtool_ops(netdev);
1422         netdev->watchdog_timeo = 5 * HZ;
1423
1424         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1425
1426         netdev->mem_start = mmio_start;
1427         netdev->mem_end = mmio_start + mmio_len;
1428
1429         /* PCI config space info */
1430         hw->vendor_id = pdev->vendor;
1431         hw->device_id = pdev->device;
1432         hw->revision_id = pdev->revision;
1433         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1434         hw->subsystem_device_id = pdev->subsystem_device;
1435
1436         /* setup the private structure */
1437         hw->back = adapter;
1438         /* Copy the default MAC, PHY and NVM function pointers */
1439         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1440         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1441         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1442         /* Initialize skew-specific constants */
1443         err = ei->get_invariants(hw);
1444         if (err)
1445                 goto err_sw_init;
1446
1447 #ifdef CONFIG_PCI_IOV
1448         /* since iov functionality isn't critical to base device function we
1449          * can accept failure.  If it fails we don't allow iov to be enabled */
1450         if (hw->mac.type == e1000_82576) {
1451                 /* 82576 supports a maximum of 7 VFs in addition to the PF */
1452                 unsigned int num_vfs = (max_vfs > 7) ? 7 : max_vfs;
1453                 int i;
1454                 unsigned char mac_addr[ETH_ALEN];
1455
1456                 if (num_vfs) {
1457                         adapter->vf_data = kcalloc(num_vfs,
1458                                                 sizeof(struct vf_data_storage),
1459                                                 GFP_KERNEL);
1460                         if (!adapter->vf_data) {
1461                                 dev_err(&pdev->dev,
1462                                         "Could not allocate VF private data - "
1463                                         "IOV enable failed\n");
1464                         } else {
1465                                 err = pci_enable_sriov(pdev, num_vfs);
1466                                 if (!err) {
1467                                         adapter->vfs_allocated_count = num_vfs;
1468                                         dev_info(&pdev->dev,
1469                                                  "%d vfs allocated\n",
1470                                                  num_vfs);
1471                                         for (i = 0;
1472                                              i < adapter->vfs_allocated_count;
1473                                              i++) {
1474                                                 random_ether_addr(mac_addr);
1475                                                 igb_set_vf_mac(adapter, i,
1476                                                                mac_addr);
1477                                         }
1478                                 } else {
1479                                         kfree(adapter->vf_data);
1480                                         adapter->vf_data = NULL;
1481                                 }
1482                         }
1483                 }
1484         }
1485
1486 #endif
1487         /* setup the private structure */
1488         err = igb_sw_init(adapter);
1489         if (err)
1490                 goto err_sw_init;
1491
1492         igb_get_bus_info_pcie(hw);
1493
1494         hw->phy.autoneg_wait_to_complete = false;
1495         hw->mac.adaptive_ifs = true;
1496
1497         /* Copper options */
1498         if (hw->phy.media_type == e1000_media_type_copper) {
1499                 hw->phy.mdix = AUTO_ALL_MODES;
1500                 hw->phy.disable_polarity_correction = false;
1501                 hw->phy.ms_type = e1000_ms_hw_default;
1502         }
1503
1504         if (igb_check_reset_block(hw))
1505                 dev_info(&pdev->dev,
1506                         "PHY reset is blocked due to SOL/IDER session.\n");
1507
1508         netdev->features = NETIF_F_SG |
1509                            NETIF_F_IP_CSUM |
1510                            NETIF_F_HW_VLAN_TX |
1511                            NETIF_F_HW_VLAN_RX |
1512                            NETIF_F_HW_VLAN_FILTER;
1513
1514         netdev->features |= NETIF_F_IPV6_CSUM;
1515         netdev->features |= NETIF_F_TSO;
1516         netdev->features |= NETIF_F_TSO6;
1517
1518         netdev->features |= NETIF_F_GRO;
1519
1520         netdev->vlan_features |= NETIF_F_TSO;
1521         netdev->vlan_features |= NETIF_F_TSO6;
1522         netdev->vlan_features |= NETIF_F_IP_CSUM;
1523         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1524         netdev->vlan_features |= NETIF_F_SG;
1525
1526         if (pci_using_dac)
1527                 netdev->features |= NETIF_F_HIGHDMA;
1528
1529         if (adapter->hw.mac.type == e1000_82576)
1530                 netdev->features |= NETIF_F_SCTP_CSUM;
1531
1532         adapter->en_mng_pt = igb_enable_mng_pass_thru(&adapter->hw);
1533
1534         /* before reading the NVM, reset the controller to put the device in a
1535          * known good starting state */
1536         hw->mac.ops.reset_hw(hw);
1537
1538         /* make sure the NVM is good */
1539         if (igb_validate_nvm_checksum(hw) < 0) {
1540                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1541                 err = -EIO;
1542                 goto err_eeprom;
1543         }
1544
1545         /* copy the MAC address out of the NVM */
1546         if (hw->mac.ops.read_mac_addr(hw))
1547                 dev_err(&pdev->dev, "NVM Read Error\n");
1548
1549         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1550         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1551
1552         if (!is_valid_ether_addr(netdev->perm_addr)) {
1553                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1554                 err = -EIO;
1555                 goto err_eeprom;
1556         }
1557
1558         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1559                     (unsigned long) adapter);
1560         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1561                     (unsigned long) adapter);
1562
1563         INIT_WORK(&adapter->reset_task, igb_reset_task);
1564         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1565
1566         /* Initialize link properties that are user-changeable */
1567         adapter->fc_autoneg = true;
1568         hw->mac.autoneg = true;
1569         hw->phy.autoneg_advertised = 0x2f;
1570
1571         hw->fc.requested_mode = e1000_fc_default;
1572         hw->fc.current_mode = e1000_fc_default;
1573
1574         igb_validate_mdi_setting(hw);
1575
1576         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1577          * enable the ACPI Magic Packet filter
1578          */
1579
1580         if (hw->bus.func == 0)
1581                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1582         else if (hw->bus.func == 1)
1583                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1584
1585         if (eeprom_data & eeprom_apme_mask)
1586                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1587
1588         /* now that we have the eeprom settings, apply the special cases where
1589          * the eeprom may be wrong or the board simply won't support wake on
1590          * lan on a particular port */
1591         switch (pdev->device) {
1592         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1593                 adapter->eeprom_wol = 0;
1594                 break;
1595         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1596         case E1000_DEV_ID_82576_FIBER:
1597         case E1000_DEV_ID_82576_SERDES:
1598                 /* Wake events only supported on port A for dual fiber
1599                  * regardless of eeprom setting */
1600                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1601                         adapter->eeprom_wol = 0;
1602                 break;
1603         case E1000_DEV_ID_82576_QUAD_COPPER:
1604                 /* if quad port adapter, disable WoL on all but port A */
1605                 if (global_quad_port_a != 0)
1606                         adapter->eeprom_wol = 0;
1607                 else
1608                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1609                 /* Reset for multiple quad port adapters */
1610                 if (++global_quad_port_a == 4)
1611                         global_quad_port_a = 0;
1612                 break;
1613         }
1614
1615         /* initialize the wol settings based on the eeprom settings */
1616         adapter->wol = adapter->eeprom_wol;
1617         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1618
1619         /* reset the hardware with the new settings */
1620         igb_reset(adapter);
1621
1622         /* let the f/w know that the h/w is now under the control of the
1623          * driver. */
1624         igb_get_hw_control(adapter);
1625
1626         strcpy(netdev->name, "eth%d");
1627         err = register_netdev(netdev);
1628         if (err)
1629                 goto err_register;
1630
1631         /* carrier off reporting is important to ethtool even BEFORE open */
1632         netif_carrier_off(netdev);
1633
1634 #ifdef CONFIG_IGB_DCA
1635         if (dca_add_requester(&pdev->dev) == 0) {
1636                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1637                 dev_info(&pdev->dev, "DCA enabled\n");
1638                 igb_setup_dca(adapter);
1639         }
1640
1641 #endif
1642
1643         switch (hw->mac.type) {
1644         case e1000_82576:
1645                 /*
1646                  * Initialize hardware timer: we keep it running just in case
1647                  * that some program needs it later on.
1648                  */
1649                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1650                 adapter->cycles.read = igb_read_clock;
1651                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1652                 adapter->cycles.mult = 1;
1653                 /**
1654                  * Scale the NIC clock cycle by a large factor so that
1655                  * relatively small clock corrections can be added or
1656                  * substracted at each clock tick. The drawbacks of a large
1657                  * factor are a) that the clock register overflows more quickly
1658                  * (not such a big deal) and b) that the increment per tick has
1659                  * to fit into 24 bits.  As a result we need to use a shift of
1660                  * 19 so we can fit a value of 16 into the TIMINCA register.
1661                  */
1662                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1663                 wr32(E1000_TIMINCA,
1664                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1665                                 (16 << IGB_82576_TSYNC_SHIFT));
1666
1667                 /* Set registers so that rollover occurs soon to test this. */
1668                 wr32(E1000_SYSTIML, 0x00000000);
1669                 wr32(E1000_SYSTIMH, 0xFF800000);
1670                 wrfl();
1671
1672                 timecounter_init(&adapter->clock,
1673                                  &adapter->cycles,
1674                                  ktime_to_ns(ktime_get_real()));
1675                 /*
1676                  * Synchronize our NIC clock against system wall clock. NIC
1677                  * time stamp reading requires ~3us per sample, each sample
1678                  * was pretty stable even under load => only require 10
1679                  * samples for each offset comparison.
1680                  */
1681                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1682                 adapter->compare.source = &adapter->clock;
1683                 adapter->compare.target = ktime_get_real;
1684                 adapter->compare.num_samples = 10;
1685                 timecompare_update(&adapter->compare, 0);
1686                 break;
1687         case e1000_82575:
1688                 /* 82575 does not support timesync */
1689         default:
1690                 break;
1691         }
1692
1693         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1694         /* print bus type/speed/width info */
1695         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1696                  netdev->name,
1697                  ((hw->bus.speed == e1000_bus_speed_2500)
1698                   ? "2.5Gb/s" : "unknown"),
1699                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1700                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1701                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1702                    "unknown"),
1703                  netdev->dev_addr);
1704
1705         igb_read_part_num(hw, &part_num);
1706         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1707                 (part_num >> 8), (part_num & 0xff));
1708
1709         dev_info(&pdev->dev,
1710                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1711                 adapter->msix_entries ? "MSI-X" :
1712                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1713                 adapter->num_rx_queues, adapter->num_tx_queues);
1714
1715         return 0;
1716
1717 err_register:
1718         igb_release_hw_control(adapter);
1719 err_eeprom:
1720         if (!igb_check_reset_block(hw))
1721                 igb_reset_phy(hw);
1722
1723         if (hw->flash_address)
1724                 iounmap(hw->flash_address);
1725 err_sw_init:
1726         igb_clear_interrupt_scheme(adapter);
1727         iounmap(hw->hw_addr);
1728 err_ioremap:
1729         free_netdev(netdev);
1730 err_alloc_etherdev:
1731         pci_release_selected_regions(pdev, pci_select_bars(pdev,
1732                                      IORESOURCE_MEM));
1733 err_pci_reg:
1734 err_dma:
1735         pci_disable_device(pdev);
1736         return err;
1737 }
1738
1739 /**
1740  * igb_remove - Device Removal Routine
1741  * @pdev: PCI device information struct
1742  *
1743  * igb_remove is called by the PCI subsystem to alert the driver
1744  * that it should release a PCI device.  The could be caused by a
1745  * Hot-Plug event, or because the driver is going to be removed from
1746  * memory.
1747  **/
1748 static void __devexit igb_remove(struct pci_dev *pdev)
1749 {
1750         struct net_device *netdev = pci_get_drvdata(pdev);
1751         struct igb_adapter *adapter = netdev_priv(netdev);
1752         struct e1000_hw *hw = &adapter->hw;
1753
1754         /* flush_scheduled work may reschedule our watchdog task, so
1755          * explicitly disable watchdog tasks from being rescheduled  */
1756         set_bit(__IGB_DOWN, &adapter->state);
1757         del_timer_sync(&adapter->watchdog_timer);
1758         del_timer_sync(&adapter->phy_info_timer);
1759
1760         flush_scheduled_work();
1761
1762 #ifdef CONFIG_IGB_DCA
1763         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1764                 dev_info(&pdev->dev, "DCA disabled\n");
1765                 dca_remove_requester(&pdev->dev);
1766                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1767                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1768         }
1769 #endif
1770
1771         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1772          * would have already happened in close and is redundant. */
1773         igb_release_hw_control(adapter);
1774
1775         unregister_netdev(netdev);
1776
1777         if (!igb_check_reset_block(&adapter->hw))
1778                 igb_reset_phy(&adapter->hw);
1779
1780         igb_clear_interrupt_scheme(adapter);
1781
1782 #ifdef CONFIG_PCI_IOV
1783         /* reclaim resources allocated to VFs */
1784         if (adapter->vf_data) {
1785                 /* disable iov and allow time for transactions to clear */
1786                 pci_disable_sriov(pdev);
1787                 msleep(500);
1788
1789                 kfree(adapter->vf_data);
1790                 adapter->vf_data = NULL;
1791                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1792                 msleep(100);
1793                 dev_info(&pdev->dev, "IOV Disabled\n");
1794         }
1795 #endif
1796         iounmap(hw->hw_addr);
1797         if (hw->flash_address)
1798                 iounmap(hw->flash_address);
1799         pci_release_selected_regions(pdev, pci_select_bars(pdev,
1800                                      IORESOURCE_MEM));
1801
1802         free_netdev(netdev);
1803
1804         pci_disable_pcie_error_reporting(pdev);
1805
1806         pci_disable_device(pdev);
1807 }
1808
1809 /**
1810  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1811  * @adapter: board private structure to initialize
1812  *
1813  * igb_sw_init initializes the Adapter private data structure.
1814  * Fields are initialized based on PCI device information and
1815  * OS network device settings (MTU size).
1816  **/
1817 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1818 {
1819         struct e1000_hw *hw = &adapter->hw;
1820         struct net_device *netdev = adapter->netdev;
1821         struct pci_dev *pdev = adapter->pdev;
1822
1823         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1824
1825         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1826         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1827         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1828         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1829
1830         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1831         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1832
1833         /* This call may decrease the number of queues depending on
1834          * interrupt mode. */
1835         if (igb_init_interrupt_scheme(adapter)) {
1836                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1837                 return -ENOMEM;
1838         }
1839
1840         /* Explicitly disable IRQ since the NIC can be in any state. */
1841         igb_irq_disable(adapter);
1842
1843         set_bit(__IGB_DOWN, &adapter->state);
1844         return 0;
1845 }
1846
1847 /**
1848  * igb_open - Called when a network interface is made active
1849  * @netdev: network interface device structure
1850  *
1851  * Returns 0 on success, negative value on failure
1852  *
1853  * The open entry point is called when a network interface is made
1854  * active by the system (IFF_UP).  At this point all resources needed
1855  * for transmit and receive operations are allocated, the interrupt
1856  * handler is registered with the OS, the watchdog timer is started,
1857  * and the stack is notified that the interface is ready.
1858  **/
1859 static int igb_open(struct net_device *netdev)
1860 {
1861         struct igb_adapter *adapter = netdev_priv(netdev);
1862         struct e1000_hw *hw = &adapter->hw;
1863         int err;
1864         int i;
1865
1866         /* disallow open during test */
1867         if (test_bit(__IGB_TESTING, &adapter->state))
1868                 return -EBUSY;
1869
1870         netif_carrier_off(netdev);
1871
1872         /* allocate transmit descriptors */
1873         err = igb_setup_all_tx_resources(adapter);
1874         if (err)
1875                 goto err_setup_tx;
1876
1877         /* allocate receive descriptors */
1878         err = igb_setup_all_rx_resources(adapter);
1879         if (err)
1880                 goto err_setup_rx;
1881
1882         /* e1000_power_up_phy(adapter); */
1883
1884         adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1885         if ((adapter->hw.mng_cookie.status &
1886              E1000_MNG_DHCP_COOKIE_STATUS_VLAN))
1887                 igb_update_mng_vlan(adapter);
1888
1889         /* before we allocate an interrupt, we must be ready to handle it.
1890          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1891          * as soon as we call pci_request_irq, so we have to setup our
1892          * clean_rx handler before we do so.  */
1893         igb_configure(adapter);
1894
1895         igb_set_vmolr(hw, adapter->vfs_allocated_count);
1896
1897         err = igb_request_irq(adapter);
1898         if (err)
1899                 goto err_req_irq;
1900
1901         /* From here on the code is the same as igb_up() */
1902         clear_bit(__IGB_DOWN, &adapter->state);
1903
1904         for (i = 0; i < adapter->num_q_vectors; i++) {
1905                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1906                 napi_enable(&q_vector->napi);
1907         }
1908
1909         /* Clear any pending interrupts. */
1910         rd32(E1000_ICR);
1911
1912         igb_irq_enable(adapter);
1913
1914         /* notify VFs that reset has been completed */
1915         if (adapter->vfs_allocated_count) {
1916                 u32 reg_data = rd32(E1000_CTRL_EXT);
1917                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1918                 wr32(E1000_CTRL_EXT, reg_data);
1919         }
1920
1921         netif_tx_start_all_queues(netdev);
1922
1923         /* Fire a link status change interrupt to start the watchdog. */
1924         wr32(E1000_ICS, E1000_ICS_LSC);
1925
1926         return 0;
1927
1928 err_req_irq:
1929         igb_release_hw_control(adapter);
1930         /* e1000_power_down_phy(adapter); */
1931         igb_free_all_rx_resources(adapter);
1932 err_setup_rx:
1933         igb_free_all_tx_resources(adapter);
1934 err_setup_tx:
1935         igb_reset(adapter);
1936
1937         return err;
1938 }
1939
1940 /**
1941  * igb_close - Disables a network interface
1942  * @netdev: network interface device structure
1943  *
1944  * Returns 0, this is not allowed to fail
1945  *
1946  * The close entry point is called when an interface is de-activated
1947  * by the OS.  The hardware is still under the driver's control, but
1948  * needs to be disabled.  A global MAC reset is issued to stop the
1949  * hardware, and all transmit and receive resources are freed.
1950  **/
1951 static int igb_close(struct net_device *netdev)
1952 {
1953         struct igb_adapter *adapter = netdev_priv(netdev);
1954
1955         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
1956         igb_down(adapter);
1957
1958         igb_free_irq(adapter);
1959
1960         igb_free_all_tx_resources(adapter);
1961         igb_free_all_rx_resources(adapter);
1962
1963         /* kill manageability vlan ID if supported, but not if a vlan with
1964          * the same ID is registered on the host OS (let 8021q kill it) */
1965         if ((adapter->hw.mng_cookie.status &
1966                           E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
1967              !(adapter->vlgrp &&
1968                vlan_group_get_device(adapter->vlgrp, adapter->mng_vlan_id)))
1969                 igb_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
1970
1971         return 0;
1972 }
1973
1974 /**
1975  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
1976  * @tx_ring: tx descriptor ring (for a specific queue) to setup
1977  *
1978  * Return 0 on success, negative on failure
1979  **/
1980 int igb_setup_tx_resources(struct igb_ring *tx_ring)
1981 {
1982         struct pci_dev *pdev = tx_ring->pdev;
1983         int size;
1984
1985         size = sizeof(struct igb_buffer) * tx_ring->count;
1986         tx_ring->buffer_info = vmalloc(size);
1987         if (!tx_ring->buffer_info)
1988                 goto err;
1989         memset(tx_ring->buffer_info, 0, size);
1990
1991         /* round up to nearest 4K */
1992         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
1993         tx_ring->size = ALIGN(tx_ring->size, 4096);
1994
1995         tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size,
1996                                              &tx_ring->dma);
1997
1998         if (!tx_ring->desc)
1999                 goto err;
2000
2001         tx_ring->next_to_use = 0;
2002         tx_ring->next_to_clean = 0;
2003         return 0;
2004
2005 err:
2006         vfree(tx_ring->buffer_info);
2007         dev_err(&pdev->dev,
2008                 "Unable to allocate memory for the transmit descriptor ring\n");
2009         return -ENOMEM;
2010 }
2011
2012 /**
2013  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2014  *                                (Descriptors) for all queues
2015  * @adapter: board private structure
2016  *
2017  * Return 0 on success, negative on failure
2018  **/
2019 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2020 {
2021         int i, err = 0;
2022         int r_idx;
2023
2024         for (i = 0; i < adapter->num_tx_queues; i++) {
2025                 err = igb_setup_tx_resources(&adapter->tx_ring[i]);
2026                 if (err) {
2027                         dev_err(&adapter->pdev->dev,
2028                                 "Allocation for Tx Queue %u failed\n", i);
2029                         for (i--; i >= 0; i--)
2030                                 igb_free_tx_resources(&adapter->tx_ring[i]);
2031                         break;
2032                 }
2033         }
2034
2035         for (i = 0; i < IGB_MAX_TX_QUEUES; i++) {
2036                 r_idx = i % adapter->num_tx_queues;
2037                 adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
2038         }
2039         return err;
2040 }
2041
2042 /**
2043  * igb_setup_tctl - configure the transmit control registers
2044  * @adapter: Board private structure
2045  **/
2046 void igb_setup_tctl(struct igb_adapter *adapter)
2047 {
2048         struct e1000_hw *hw = &adapter->hw;
2049         u32 tctl;
2050
2051         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2052         wr32(E1000_TXDCTL(0), 0);
2053
2054         /* Program the Transmit Control Register */
2055         tctl = rd32(E1000_TCTL);
2056         tctl &= ~E1000_TCTL_CT;
2057         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2058                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2059
2060         igb_config_collision_dist(hw);
2061
2062         /* Enable transmits */
2063         tctl |= E1000_TCTL_EN;
2064
2065         wr32(E1000_TCTL, tctl);
2066 }
2067
2068 /**
2069  * igb_configure_tx_ring - Configure transmit ring after Reset
2070  * @adapter: board private structure
2071  * @ring: tx ring to configure
2072  *
2073  * Configure a transmit ring after a reset.
2074  **/
2075 void igb_configure_tx_ring(struct igb_adapter *adapter,
2076                            struct igb_ring *ring)
2077 {
2078         struct e1000_hw *hw = &adapter->hw;
2079         u32 txdctl;
2080         u64 tdba = ring->dma;
2081         int reg_idx = ring->reg_idx;
2082
2083         /* disable the queue */
2084         txdctl = rd32(E1000_TXDCTL(reg_idx));
2085         wr32(E1000_TXDCTL(reg_idx),
2086                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2087         wrfl();
2088         mdelay(10);
2089
2090         wr32(E1000_TDLEN(reg_idx),
2091                         ring->count * sizeof(union e1000_adv_tx_desc));
2092         wr32(E1000_TDBAL(reg_idx),
2093                         tdba & 0x00000000ffffffffULL);
2094         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2095
2096         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2097         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2098         writel(0, ring->head);
2099         writel(0, ring->tail);
2100
2101         txdctl |= IGB_TX_PTHRESH;
2102         txdctl |= IGB_TX_HTHRESH << 8;
2103         txdctl |= IGB_TX_WTHRESH << 16;
2104
2105         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2106         wr32(E1000_TXDCTL(reg_idx), txdctl);
2107 }
2108
2109 /**
2110  * igb_configure_tx - Configure transmit Unit after Reset
2111  * @adapter: board private structure
2112  *
2113  * Configure the Tx unit of the MAC after a reset.
2114  **/
2115 static void igb_configure_tx(struct igb_adapter *adapter)
2116 {
2117         int i;
2118
2119         for (i = 0; i < adapter->num_tx_queues; i++)
2120                 igb_configure_tx_ring(adapter, &adapter->tx_ring[i]);
2121 }
2122
2123 /**
2124  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2125  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2126  *
2127  * Returns 0 on success, negative on failure
2128  **/
2129 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2130 {
2131         struct pci_dev *pdev = rx_ring->pdev;
2132         int size, desc_len;
2133
2134         size = sizeof(struct igb_buffer) * rx_ring->count;
2135         rx_ring->buffer_info = vmalloc(size);
2136         if (!rx_ring->buffer_info)
2137                 goto err;
2138         memset(rx_ring->buffer_info, 0, size);
2139
2140         desc_len = sizeof(union e1000_adv_rx_desc);
2141
2142         /* Round up to nearest 4K */
2143         rx_ring->size = rx_ring->count * desc_len;
2144         rx_ring->size = ALIGN(rx_ring->size, 4096);
2145
2146         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2147                                              &rx_ring->dma);
2148
2149         if (!rx_ring->desc)
2150                 goto err;
2151
2152         rx_ring->next_to_clean = 0;
2153         rx_ring->next_to_use = 0;
2154
2155         return 0;
2156
2157 err:
2158         vfree(rx_ring->buffer_info);
2159         dev_err(&pdev->dev, "Unable to allocate memory for "
2160                 "the receive descriptor ring\n");
2161         return -ENOMEM;
2162 }
2163
2164 /**
2165  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2166  *                                (Descriptors) for all queues
2167  * @adapter: board private structure
2168  *
2169  * Return 0 on success, negative on failure
2170  **/
2171 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2172 {
2173         int i, err = 0;
2174
2175         for (i = 0; i < adapter->num_rx_queues; i++) {
2176                 err = igb_setup_rx_resources(&adapter->rx_ring[i]);
2177                 if (err) {
2178                         dev_err(&adapter->pdev->dev,
2179                                 "Allocation for Rx Queue %u failed\n", i);
2180                         for (i--; i >= 0; i--)
2181                                 igb_free_rx_resources(&adapter->rx_ring[i]);
2182                         break;
2183                 }
2184         }
2185
2186         return err;
2187 }
2188
2189 /**
2190  * igb_setup_mrqc - configure the multiple receive queue control registers
2191  * @adapter: Board private structure
2192  **/
2193 static void igb_setup_mrqc(struct igb_adapter *adapter)
2194 {
2195         struct e1000_hw *hw = &adapter->hw;
2196         u32 mrqc, rxcsum;
2197         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2198         union e1000_reta {
2199                 u32 dword;
2200                 u8  bytes[4];
2201         } reta;
2202         static const u8 rsshash[40] = {
2203                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2204                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2205                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2206                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2207
2208         /* Fill out hash function seeds */
2209         for (j = 0; j < 10; j++) {
2210                 u32 rsskey = rsshash[(j * 4)];
2211                 rsskey |= rsshash[(j * 4) + 1] << 8;
2212                 rsskey |= rsshash[(j * 4) + 2] << 16;
2213                 rsskey |= rsshash[(j * 4) + 3] << 24;
2214                 array_wr32(E1000_RSSRK(0), j, rsskey);
2215         }
2216
2217         num_rx_queues = adapter->num_rx_queues;
2218
2219         if (adapter->vfs_allocated_count) {
2220                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2221                 switch (hw->mac.type) {
2222                 case e1000_82576:
2223                         shift = 3;
2224                         num_rx_queues = 2;
2225                         break;
2226                 case e1000_82575:
2227                         shift = 2;
2228                         shift2 = 6;
2229                 default:
2230                         break;
2231                 }
2232         } else {
2233                 if (hw->mac.type == e1000_82575)
2234                         shift = 6;
2235         }
2236
2237         for (j = 0; j < (32 * 4); j++) {
2238                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2239                 if (shift2)
2240                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2241                 if ((j & 3) == 3)
2242                         wr32(E1000_RETA(j >> 2), reta.dword);
2243         }
2244
2245         /*
2246          * Disable raw packet checksumming so that RSS hash is placed in
2247          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2248          * offloads as they are enabled by default
2249          */
2250         rxcsum = rd32(E1000_RXCSUM);
2251         rxcsum |= E1000_RXCSUM_PCSD;
2252
2253         if (adapter->hw.mac.type >= e1000_82576)
2254                 /* Enable Receive Checksum Offload for SCTP */
2255                 rxcsum |= E1000_RXCSUM_CRCOFL;
2256
2257         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2258         wr32(E1000_RXCSUM, rxcsum);
2259
2260         /* If VMDq is enabled then we set the appropriate mode for that, else
2261          * we default to RSS so that an RSS hash is calculated per packet even
2262          * if we are only using one queue */
2263         if (adapter->vfs_allocated_count) {
2264                 if (hw->mac.type > e1000_82575) {
2265                         /* Set the default pool for the PF's first queue */
2266                         u32 vtctl = rd32(E1000_VT_CTL);
2267                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2268                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2269                         vtctl |= adapter->vfs_allocated_count <<
2270                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2271                         wr32(E1000_VT_CTL, vtctl);
2272                 }
2273                 if (adapter->num_rx_queues > 1)
2274                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2275                 else
2276                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2277         } else {
2278                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2279         }
2280         igb_vmm_control(adapter);
2281
2282         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2283                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2284         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2285                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2286         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2287                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2288         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2289                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2290
2291         wr32(E1000_MRQC, mrqc);
2292 }
2293
2294 /**
2295  * igb_setup_rctl - configure the receive control registers
2296  * @adapter: Board private structure
2297  **/
2298 void igb_setup_rctl(struct igb_adapter *adapter)
2299 {
2300         struct e1000_hw *hw = &adapter->hw;
2301         u32 rctl;
2302
2303         rctl = rd32(E1000_RCTL);
2304
2305         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2306         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2307
2308         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2309                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2310
2311         /*
2312          * enable stripping of CRC. It's unlikely this will break BMC
2313          * redirection as it did with e1000. Newer features require
2314          * that the HW strips the CRC.
2315          */
2316         rctl |= E1000_RCTL_SECRC;
2317
2318         /*
2319          * disable store bad packets and clear size bits.
2320          */
2321         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2322
2323         /* enable LPE to prevent packets larger than max_frame_size */
2324         rctl |= E1000_RCTL_LPE;
2325
2326         /* disable queue 0 to prevent tail write w/o re-config */
2327         wr32(E1000_RXDCTL(0), 0);
2328
2329         /* Attention!!!  For SR-IOV PF driver operations you must enable
2330          * queue drop for all VF and PF queues to prevent head of line blocking
2331          * if an un-trusted VF does not provide descriptors to hardware.
2332          */
2333         if (adapter->vfs_allocated_count) {
2334                 u32 vmolr;
2335
2336                 /* set all queue drop enable bits */
2337                 wr32(E1000_QDE, ALL_QUEUES);
2338
2339                 vmolr = rd32(E1000_VMOLR(adapter->vfs_allocated_count));
2340                 if (rctl & E1000_RCTL_LPE)
2341                         vmolr |= E1000_VMOLR_LPE;
2342                 if (adapter->num_rx_queues > 1)
2343                         vmolr |= E1000_VMOLR_RSSE;
2344                 wr32(E1000_VMOLR(adapter->vfs_allocated_count), vmolr);
2345         }
2346
2347         wr32(E1000_RCTL, rctl);
2348 }
2349
2350 /**
2351  * igb_rlpml_set - set maximum receive packet size
2352  * @adapter: board private structure
2353  *
2354  * Configure maximum receivable packet size.
2355  **/
2356 static void igb_rlpml_set(struct igb_adapter *adapter)
2357 {
2358         u32 max_frame_size = adapter->max_frame_size;
2359         struct e1000_hw *hw = &adapter->hw;
2360         u16 pf_id = adapter->vfs_allocated_count;
2361
2362         if (adapter->vlgrp)
2363                 max_frame_size += VLAN_TAG_SIZE;
2364
2365         /* if vfs are enabled we set RLPML to the largest possible request
2366          * size and set the VMOLR RLPML to the size we need */
2367         if (pf_id) {
2368                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2369                 max_frame_size = MAX_STD_JUMBO_FRAME_SIZE + VLAN_TAG_SIZE;
2370         }
2371
2372         wr32(E1000_RLPML, max_frame_size);
2373 }
2374
2375 /**
2376  * igb_configure_rx_ring - Configure a receive ring after Reset
2377  * @adapter: board private structure
2378  * @ring: receive ring to be configured
2379  *
2380  * Configure the Rx unit of the MAC after a reset.
2381  **/
2382 void igb_configure_rx_ring(struct igb_adapter *adapter,
2383                            struct igb_ring *ring)
2384 {
2385         struct e1000_hw *hw = &adapter->hw;
2386         u64 rdba = ring->dma;
2387         int reg_idx = ring->reg_idx;
2388         u32 srrctl, rxdctl;
2389
2390         /* disable the queue */
2391         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2392         wr32(E1000_RXDCTL(reg_idx),
2393                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2394
2395         /* Set DMA base address registers */
2396         wr32(E1000_RDBAL(reg_idx),
2397              rdba & 0x00000000ffffffffULL);
2398         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2399         wr32(E1000_RDLEN(reg_idx),
2400                        ring->count * sizeof(union e1000_adv_rx_desc));
2401
2402         /* initialize head and tail */
2403         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2404         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2405         writel(0, ring->head);
2406         writel(0, ring->tail);
2407
2408         /* set descriptor configuration */
2409         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2410                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2411                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2412 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2413                 srrctl |= IGB_RXBUFFER_16384 >>
2414                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2415 #else
2416                 srrctl |= (PAGE_SIZE / 2) >>
2417                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2418 #endif
2419                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2420         } else {
2421                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2422                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2423                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2424         }
2425
2426         wr32(E1000_SRRCTL(reg_idx), srrctl);
2427
2428         /* enable receive descriptor fetching */
2429         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2430         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2431         rxdctl &= 0xFFF00000;
2432         rxdctl |= IGB_RX_PTHRESH;
2433         rxdctl |= IGB_RX_HTHRESH << 8;
2434         rxdctl |= IGB_RX_WTHRESH << 16;
2435         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2436 }
2437
2438 /**
2439  * igb_configure_rx - Configure receive Unit after Reset
2440  * @adapter: board private structure
2441  *
2442  * Configure the Rx unit of the MAC after a reset.
2443  **/
2444 static void igb_configure_rx(struct igb_adapter *adapter)
2445 {
2446         int i;
2447
2448         /* set UTA to appropriate mode */
2449         igb_set_uta(adapter);
2450
2451         /* set the correct pool for the PF default MAC address in entry 0 */
2452         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2453                          adapter->vfs_allocated_count);
2454
2455         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2456          * the Base and Length of the Rx Descriptor Ring */
2457         for (i = 0; i < adapter->num_rx_queues; i++)
2458                 igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
2459 }
2460
2461 /**
2462  * igb_free_tx_resources - Free Tx Resources per Queue
2463  * @tx_ring: Tx descriptor ring for a specific queue
2464  *
2465  * Free all transmit software resources
2466  **/
2467 void igb_free_tx_resources(struct igb_ring *tx_ring)
2468 {
2469         igb_clean_tx_ring(tx_ring);
2470
2471         vfree(tx_ring->buffer_info);
2472         tx_ring->buffer_info = NULL;
2473
2474         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2475                             tx_ring->desc, tx_ring->dma);
2476
2477         tx_ring->desc = NULL;
2478 }
2479
2480 /**
2481  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2482  * @adapter: board private structure
2483  *
2484  * Free all transmit software resources
2485  **/
2486 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2487 {
2488         int i;
2489
2490         for (i = 0; i < adapter->num_tx_queues; i++)
2491                 igb_free_tx_resources(&adapter->tx_ring[i]);
2492 }
2493
2494 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2495                                     struct igb_buffer *buffer_info)
2496 {
2497         buffer_info->dma = 0;
2498         if (buffer_info->skb) {
2499                 skb_dma_unmap(&tx_ring->pdev->dev,
2500                               buffer_info->skb,
2501                               DMA_TO_DEVICE);
2502                 dev_kfree_skb_any(buffer_info->skb);
2503                 buffer_info->skb = NULL;
2504         }
2505         buffer_info->time_stamp = 0;
2506         /* buffer_info must be completely set up in the transmit path */
2507 }
2508
2509 /**
2510  * igb_clean_tx_ring - Free Tx Buffers
2511  * @tx_ring: ring to be cleaned
2512  **/
2513 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2514 {
2515         struct igb_buffer *buffer_info;
2516         unsigned long size;
2517         unsigned int i;
2518
2519         if (!tx_ring->buffer_info)
2520                 return;
2521         /* Free all the Tx ring sk_buffs */
2522
2523         for (i = 0; i < tx_ring->count; i++) {
2524                 buffer_info = &tx_ring->buffer_info[i];
2525                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2526         }
2527
2528         size = sizeof(struct igb_buffer) * tx_ring->count;
2529         memset(tx_ring->buffer_info, 0, size);
2530
2531         /* Zero out the descriptor ring */
2532
2533         memset(tx_ring->desc, 0, tx_ring->size);
2534
2535         tx_ring->next_to_use = 0;
2536         tx_ring->next_to_clean = 0;
2537
2538         writel(0, tx_ring->head);
2539         writel(0, tx_ring->tail);
2540 }
2541
2542 /**
2543  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2544  * @adapter: board private structure
2545  **/
2546 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2547 {
2548         int i;
2549
2550         for (i = 0; i < adapter->num_tx_queues; i++)
2551                 igb_clean_tx_ring(&adapter->tx_ring[i]);
2552 }
2553
2554 /**
2555  * igb_free_rx_resources - Free Rx Resources
2556  * @rx_ring: ring to clean the resources from
2557  *
2558  * Free all receive software resources
2559  **/
2560 void igb_free_rx_resources(struct igb_ring *rx_ring)
2561 {
2562         igb_clean_rx_ring(rx_ring);
2563
2564         vfree(rx_ring->buffer_info);
2565         rx_ring->buffer_info = NULL;
2566
2567         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2568                             rx_ring->desc, rx_ring->dma);
2569
2570         rx_ring->desc = NULL;
2571 }
2572
2573 /**
2574  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2575  * @adapter: board private structure
2576  *
2577  * Free all receive software resources
2578  **/
2579 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2580 {
2581         int i;
2582
2583         for (i = 0; i < adapter->num_rx_queues; i++)
2584                 igb_free_rx_resources(&adapter->rx_ring[i]);
2585 }
2586
2587 /**
2588  * igb_clean_rx_ring - Free Rx Buffers per Queue
2589  * @rx_ring: ring to free buffers from
2590  **/
2591 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2592 {
2593         struct igb_buffer *buffer_info;
2594         unsigned long size;
2595         unsigned int i;
2596
2597         if (!rx_ring->buffer_info)
2598                 return;
2599         /* Free all the Rx ring sk_buffs */
2600         for (i = 0; i < rx_ring->count; i++) {
2601                 buffer_info = &rx_ring->buffer_info[i];
2602                 if (buffer_info->dma) {
2603                         pci_unmap_single(rx_ring->pdev,
2604                                          buffer_info->dma,
2605                                          rx_ring->rx_buffer_len,
2606                                          PCI_DMA_FROMDEVICE);
2607                         buffer_info->dma = 0;
2608                 }
2609
2610                 if (buffer_info->skb) {
2611                         dev_kfree_skb(buffer_info->skb);
2612                         buffer_info->skb = NULL;
2613                 }
2614                 if (buffer_info->page_dma) {
2615                         pci_unmap_page(rx_ring->pdev,
2616                                        buffer_info->page_dma,
2617                                        PAGE_SIZE / 2,
2618                                        PCI_DMA_FROMDEVICE);
2619                         buffer_info->page_dma = 0;
2620                 }
2621                 if (buffer_info->page) {
2622                         put_page(buffer_info->page);
2623                         buffer_info->page = NULL;
2624                         buffer_info->page_offset = 0;
2625                 }
2626         }
2627
2628         size = sizeof(struct igb_buffer) * rx_ring->count;
2629         memset(rx_ring->buffer_info, 0, size);
2630
2631         /* Zero out the descriptor ring */
2632         memset(rx_ring->desc, 0, rx_ring->size);
2633
2634         rx_ring->next_to_clean = 0;
2635         rx_ring->next_to_use = 0;
2636
2637         writel(0, rx_ring->head);
2638         writel(0, rx_ring->tail);
2639 }
2640
2641 /**
2642  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2643  * @adapter: board private structure
2644  **/
2645 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2646 {
2647         int i;
2648
2649         for (i = 0; i < adapter->num_rx_queues; i++)
2650                 igb_clean_rx_ring(&adapter->rx_ring[i]);
2651 }
2652
2653 /**
2654  * igb_set_mac - Change the Ethernet Address of the NIC
2655  * @netdev: network interface device structure
2656  * @p: pointer to an address structure
2657  *
2658  * Returns 0 on success, negative on failure
2659  **/
2660 static int igb_set_mac(struct net_device *netdev, void *p)
2661 {
2662         struct igb_adapter *adapter = netdev_priv(netdev);
2663         struct e1000_hw *hw = &adapter->hw;
2664         struct sockaddr *addr = p;
2665
2666         if (!is_valid_ether_addr(addr->sa_data))
2667                 return -EADDRNOTAVAIL;
2668
2669         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2670         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2671
2672         /* set the correct pool for the new PF MAC address in entry 0 */
2673         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2674                          adapter->vfs_allocated_count);
2675
2676         return 0;
2677 }
2678
2679 /**
2680  * igb_write_mc_addr_list - write multicast addresses to MTA
2681  * @netdev: network interface device structure
2682  *
2683  * Writes multicast address list to the MTA hash table.
2684  * Returns: -ENOMEM on failure
2685  *                0 on no addresses written
2686  *                X on writing X addresses to MTA
2687  **/
2688 static int igb_write_mc_addr_list(struct net_device *netdev)
2689 {
2690         struct igb_adapter *adapter = netdev_priv(netdev);
2691         struct e1000_hw *hw = &adapter->hw;
2692         struct dev_mc_list *mc_ptr = netdev->mc_list;
2693         u8  *mta_list;
2694         u32 vmolr = 0;
2695         int i;
2696
2697         if (!netdev->mc_count) {
2698                 /* nothing to program, so clear mc list */
2699                 igb_update_mc_addr_list(hw, NULL, 0);
2700                 igb_restore_vf_multicasts(adapter);
2701                 return 0;
2702         }
2703
2704         mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
2705         if (!mta_list)
2706                 return -ENOMEM;
2707
2708         /* set vmolr receive overflow multicast bit */
2709         vmolr |= E1000_VMOLR_ROMPE;
2710
2711         /* The shared function expects a packed array of only addresses. */
2712         mc_ptr = netdev->mc_list;
2713
2714         for (i = 0; i < netdev->mc_count; i++) {
2715                 if (!mc_ptr)
2716                         break;
2717                 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2718                 mc_ptr = mc_ptr->next;
2719         }
2720         igb_update_mc_addr_list(hw, mta_list, i);
2721         kfree(mta_list);
2722
2723         return netdev->mc_count;
2724 }
2725
2726 /**
2727  * igb_write_uc_addr_list - write unicast addresses to RAR table
2728  * @netdev: network interface device structure
2729  *
2730  * Writes unicast address list to the RAR table.
2731  * Returns: -ENOMEM on failure/insufficient address space
2732  *                0 on no addresses written
2733  *                X on writing X addresses to the RAR table
2734  **/
2735 static int igb_write_uc_addr_list(struct net_device *netdev)
2736 {
2737         struct igb_adapter *adapter = netdev_priv(netdev);
2738         struct e1000_hw *hw = &adapter->hw;
2739         unsigned int vfn = adapter->vfs_allocated_count;
2740         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2741         int count = 0;
2742
2743         /* return ENOMEM indicating insufficient memory for addresses */
2744         if (netdev->uc.count > rar_entries)
2745                 return -ENOMEM;
2746
2747         if (netdev->uc.count && rar_entries) {
2748                 struct netdev_hw_addr *ha;
2749                 list_for_each_entry(ha, &netdev->uc.list, list) {
2750                         if (!rar_entries)
2751                                 break;
2752                         igb_rar_set_qsel(adapter, ha->addr,
2753                                          rar_entries--,
2754                                          vfn);
2755                         count++;
2756                 }
2757         }
2758         /* write the addresses in reverse order to avoid write combining */
2759         for (; rar_entries > 0 ; rar_entries--) {
2760                 wr32(E1000_RAH(rar_entries), 0);
2761                 wr32(E1000_RAL(rar_entries), 0);
2762         }
2763         wrfl();
2764
2765         return count;
2766 }
2767
2768 /**
2769  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2770  * @netdev: network interface device structure
2771  *
2772  * The set_rx_mode entry point is called whenever the unicast or multicast
2773  * address lists or the network interface flags are updated.  This routine is
2774  * responsible for configuring the hardware for proper unicast, multicast,
2775  * promiscuous mode, and all-multi behavior.
2776  **/
2777 static void igb_set_rx_mode(struct net_device *netdev)
2778 {
2779         struct igb_adapter *adapter = netdev_priv(netdev);
2780         struct e1000_hw *hw = &adapter->hw;
2781         unsigned int vfn = adapter->vfs_allocated_count;
2782         u32 rctl, vmolr = 0;
2783         int count;
2784
2785         /* Check for Promiscuous and All Multicast modes */
2786         rctl = rd32(E1000_RCTL);
2787
2788         /* clear the effected bits */
2789         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2790
2791         if (netdev->flags & IFF_PROMISC) {
2792                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2793                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2794         } else {
2795                 if (netdev->flags & IFF_ALLMULTI) {
2796                         rctl |= E1000_RCTL_MPE;
2797                         vmolr |= E1000_VMOLR_MPME;
2798                 } else {
2799                         /*
2800                          * Write addresses to the MTA, if the attempt fails
2801                          * then we should just turn on promiscous mode so
2802                          * that we can at least receive multicast traffic
2803                          */
2804                         count = igb_write_mc_addr_list(netdev);
2805                         if (count < 0) {
2806                                 rctl |= E1000_RCTL_MPE;
2807                                 vmolr |= E1000_VMOLR_MPME;
2808                         } else if (count) {
2809                                 vmolr |= E1000_VMOLR_ROMPE;
2810                         }
2811                 }
2812                 /*
2813                  * Write addresses to available RAR registers, if there is not
2814                  * sufficient space to store all the addresses then enable
2815                  * unicast promiscous mode
2816                  */
2817                 count = igb_write_uc_addr_list(netdev);
2818                 if (count < 0) {
2819                         rctl |= E1000_RCTL_UPE;
2820                         vmolr |= E1000_VMOLR_ROPE;
2821                 }
2822                 rctl |= E1000_RCTL_VFE;
2823         }
2824         wr32(E1000_RCTL, rctl);
2825
2826         /*
2827          * In order to support SR-IOV and eventually VMDq it is necessary to set
2828          * the VMOLR to enable the appropriate modes.  Without this workaround
2829          * we will have issues with VLAN tag stripping not being done for frames
2830          * that are only arriving because we are the default pool
2831          */
2832         if (hw->mac.type < e1000_82576)
2833                 return;
2834
2835         vmolr |= rd32(E1000_VMOLR(vfn)) &
2836                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
2837         wr32(E1000_VMOLR(vfn), vmolr);
2838         igb_restore_vf_multicasts(adapter);
2839 }
2840
2841 /* Need to wait a few seconds after link up to get diagnostic information from
2842  * the phy */
2843 static void igb_update_phy_info(unsigned long data)
2844 {
2845         struct igb_adapter *adapter = (struct igb_adapter *) data;
2846         igb_get_phy_info(&adapter->hw);
2847 }
2848
2849 /**
2850  * igb_has_link - check shared code for link and determine up/down
2851  * @adapter: pointer to driver private info
2852  **/
2853 static bool igb_has_link(struct igb_adapter *adapter)
2854 {
2855         struct e1000_hw *hw = &adapter->hw;
2856         bool link_active = false;
2857         s32 ret_val = 0;
2858
2859         /* get_link_status is set on LSC (link status) interrupt or
2860          * rx sequence error interrupt.  get_link_status will stay
2861          * false until the e1000_check_for_link establishes link
2862          * for copper adapters ONLY
2863          */
2864         switch (hw->phy.media_type) {
2865         case e1000_media_type_copper:
2866                 if (hw->mac.get_link_status) {
2867                         ret_val = hw->mac.ops.check_for_link(hw);
2868                         link_active = !hw->mac.get_link_status;
2869                 } else {
2870                         link_active = true;
2871                 }
2872                 break;
2873         case e1000_media_type_internal_serdes:
2874                 ret_val = hw->mac.ops.check_for_link(hw);
2875                 link_active = hw->mac.serdes_has_link;
2876                 break;
2877         default:
2878         case e1000_media_type_unknown:
2879                 break;
2880         }
2881
2882         return link_active;
2883 }
2884
2885 /**
2886  * igb_watchdog - Timer Call-back
2887  * @data: pointer to adapter cast into an unsigned long
2888  **/
2889 static void igb_watchdog(unsigned long data)
2890 {
2891         struct igb_adapter *adapter = (struct igb_adapter *)data;
2892         /* Do the rest outside of interrupt context */
2893         schedule_work(&adapter->watchdog_task);
2894 }
2895
2896 static void igb_watchdog_task(struct work_struct *work)
2897 {
2898         struct igb_adapter *adapter = container_of(work,
2899                                         struct igb_adapter, watchdog_task);
2900         struct e1000_hw *hw = &adapter->hw;
2901         struct net_device *netdev = adapter->netdev;
2902         struct igb_ring *tx_ring = adapter->tx_ring;
2903         u32 link;
2904         int i;
2905
2906         link = igb_has_link(adapter);
2907         if ((netif_carrier_ok(netdev)) && link)
2908                 goto link_up;
2909
2910         if (link) {
2911                 if (!netif_carrier_ok(netdev)) {
2912                         u32 ctrl;
2913                         hw->mac.ops.get_speed_and_duplex(&adapter->hw,
2914                                                    &adapter->link_speed,
2915                                                    &adapter->link_duplex);
2916
2917                         ctrl = rd32(E1000_CTRL);
2918                         /* Links status message must follow this format */
2919                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
2920                                  "Flow Control: %s\n",
2921                                  netdev->name,
2922                                  adapter->link_speed,
2923                                  adapter->link_duplex == FULL_DUPLEX ?
2924                                  "Full Duplex" : "Half Duplex",
2925                                  ((ctrl & E1000_CTRL_TFCE) && (ctrl &
2926                                  E1000_CTRL_RFCE)) ? "RX/TX" : ((ctrl &
2927                                  E1000_CTRL_RFCE) ? "RX" : ((ctrl &
2928                                  E1000_CTRL_TFCE) ? "TX" : "None")));
2929
2930                         /* tweak tx_queue_len according to speed/duplex and
2931                          * adjust the timeout factor */
2932                         netdev->tx_queue_len = adapter->tx_queue_len;
2933                         adapter->tx_timeout_factor = 1;
2934                         switch (adapter->link_speed) {
2935                         case SPEED_10:
2936                                 netdev->tx_queue_len = 10;
2937                                 adapter->tx_timeout_factor = 14;
2938                                 break;
2939                         case SPEED_100:
2940                                 netdev->tx_queue_len = 100;
2941                                 /* maybe add some timeout factor ? */
2942                                 break;
2943                         }
2944
2945                         netif_carrier_on(netdev);
2946
2947                         igb_ping_all_vfs(adapter);
2948
2949                         /* link state has changed, schedule phy info update */
2950                         if (!test_bit(__IGB_DOWN, &adapter->state))
2951                                 mod_timer(&adapter->phy_info_timer,
2952                                           round_jiffies(jiffies + 2 * HZ));
2953                 }
2954         } else {
2955                 if (netif_carrier_ok(netdev)) {
2956                         adapter->link_speed = 0;
2957                         adapter->link_duplex = 0;
2958                         /* Links status message must follow this format */
2959                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
2960                                netdev->name);
2961                         netif_carrier_off(netdev);
2962
2963                         igb_ping_all_vfs(adapter);
2964
2965                         /* link state has changed, schedule phy info update */
2966                         if (!test_bit(__IGB_DOWN, &adapter->state))
2967                                 mod_timer(&adapter->phy_info_timer,
2968                                           round_jiffies(jiffies + 2 * HZ));
2969                 }
2970         }
2971
2972 link_up:
2973         igb_update_stats(adapter);
2974
2975         hw->mac.tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;
2976         adapter->tpt_old = adapter->stats.tpt;
2977         hw->mac.collision_delta = adapter->stats.colc - adapter->colc_old;
2978         adapter->colc_old = adapter->stats.colc;
2979
2980         adapter->gorc = adapter->stats.gorc - adapter->gorc_old;
2981         adapter->gorc_old = adapter->stats.gorc;
2982         adapter->gotc = adapter->stats.gotc - adapter->gotc_old;
2983         adapter->gotc_old = adapter->stats.gotc;
2984
2985         igb_update_adaptive(&adapter->hw);
2986
2987         if (!netif_carrier_ok(netdev)) {
2988                 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
2989                         /* We've lost link, so the controller stops DMA,
2990                          * but we've got queued Tx work that's never going
2991                          * to get done, so reset controller to flush Tx.
2992                          * (Do the reset outside of interrupt context). */
2993                         adapter->tx_timeout_count++;
2994                         schedule_work(&adapter->reset_task);
2995                         /* return immediately since reset is imminent */
2996                         return;
2997                 }
2998         }
2999
3000         /* Cause software interrupt to ensure rx ring is cleaned */
3001         if (adapter->msix_entries) {
3002                 u32 eics = 0;
3003                 for (i = 0; i < adapter->num_q_vectors; i++) {
3004                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3005                         eics |= q_vector->eims_value;
3006                 }
3007                 wr32(E1000_EICS, eics);
3008         } else {
3009                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3010         }
3011
3012         /* Force detection of hung controller every watchdog period */
3013         tx_ring->detect_tx_hung = true;
3014
3015         /* Reset the timer */
3016         if (!test_bit(__IGB_DOWN, &adapter->state))
3017                 mod_timer(&adapter->watchdog_timer,
3018                           round_jiffies(jiffies + 2 * HZ));
3019 }
3020
3021 enum latency_range {
3022         lowest_latency = 0,
3023         low_latency = 1,
3024         bulk_latency = 2,
3025         latency_invalid = 255
3026 };
3027
3028 /**
3029  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3030  *
3031  *      Stores a new ITR value based on strictly on packet size.  This
3032  *      algorithm is less sophisticated than that used in igb_update_itr,
3033  *      due to the difficulty of synchronizing statistics across multiple
3034  *      receive rings.  The divisors and thresholds used by this fuction
3035  *      were determined based on theoretical maximum wire speed and testing
3036  *      data, in order to minimize response time while increasing bulk
3037  *      throughput.
3038  *      This functionality is controlled by the InterruptThrottleRate module
3039  *      parameter (see igb_param.c)
3040  *      NOTE:  This function is called only when operating in a multiqueue
3041  *             receive environment.
3042  * @q_vector: pointer to q_vector
3043  **/
3044 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3045 {
3046         int new_val = q_vector->itr_val;
3047         int avg_wire_size = 0;
3048         struct igb_adapter *adapter = q_vector->adapter;
3049
3050         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3051          * ints/sec - ITR timer value of 120 ticks.
3052          */
3053         if (adapter->link_speed != SPEED_1000) {
3054                 new_val = 976;
3055                 goto set_itr_val;
3056         }
3057
3058         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3059                 struct igb_ring *ring = q_vector->rx_ring;
3060                 avg_wire_size = ring->total_bytes / ring->total_packets;
3061         }
3062
3063         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3064                 struct igb_ring *ring = q_vector->tx_ring;
3065                 avg_wire_size = max_t(u32, avg_wire_size,
3066                                       (ring->total_bytes /
3067                                        ring->total_packets));
3068         }
3069
3070         /* if avg_wire_size isn't set no work was done */
3071         if (!avg_wire_size)
3072                 goto clear_counts;
3073
3074         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3075         avg_wire_size += 24;
3076
3077         /* Don't starve jumbo frames */
3078         avg_wire_size = min(avg_wire_size, 3000);
3079
3080         /* Give a little boost to mid-size frames */
3081         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3082                 new_val = avg_wire_size / 3;
3083         else
3084                 new_val = avg_wire_size / 2;
3085
3086 set_itr_val:
3087         if (new_val != q_vector->itr_val) {
3088                 q_vector->itr_val = new_val;
3089                 q_vector->set_itr = 1;
3090         }
3091 clear_counts:
3092         if (q_vector->rx_ring) {
3093                 q_vector->rx_ring->total_bytes = 0;
3094                 q_vector->rx_ring->total_packets = 0;
3095         }
3096         if (q_vector->tx_ring) {
3097                 q_vector->tx_ring->total_bytes = 0;
3098                 q_vector->tx_ring->total_packets = 0;
3099         }
3100 }
3101
3102 /**
3103  * igb_update_itr - update the dynamic ITR value based on statistics
3104  *      Stores a new ITR value based on packets and byte
3105  *      counts during the last interrupt.  The advantage of per interrupt
3106  *      computation is faster updates and more accurate ITR for the current
3107  *      traffic pattern.  Constants in this function were computed
3108  *      based on theoretical maximum wire speed and thresholds were set based
3109  *      on testing data as well as attempting to minimize response time
3110  *      while increasing bulk throughput.
3111  *      this functionality is controlled by the InterruptThrottleRate module
3112  *      parameter (see igb_param.c)
3113  *      NOTE:  These calculations are only valid when operating in a single-
3114  *             queue environment.
3115  * @adapter: pointer to adapter
3116  * @itr_setting: current q_vector->itr_val
3117  * @packets: the number of packets during this measurement interval
3118  * @bytes: the number of bytes during this measurement interval
3119  **/
3120 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3121                                    int packets, int bytes)
3122 {
3123         unsigned int retval = itr_setting;
3124
3125         if (packets == 0)
3126                 goto update_itr_done;
3127
3128         switch (itr_setting) {
3129         case lowest_latency:
3130                 /* handle TSO and jumbo frames */
3131                 if (bytes/packets > 8000)
3132                         retval = bulk_latency;
3133                 else if ((packets < 5) && (bytes > 512))
3134                         retval = low_latency;
3135                 break;
3136         case low_latency:  /* 50 usec aka 20000 ints/s */
3137                 if (bytes > 10000) {
3138                         /* this if handles the TSO accounting */
3139                         if (bytes/packets > 8000) {
3140                                 retval = bulk_latency;
3141                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3142                                 retval = bulk_latency;
3143                         } else if ((packets > 35)) {
3144                                 retval = lowest_latency;
3145                         }
3146                 } else if (bytes/packets > 2000) {
3147                         retval = bulk_latency;
3148                 } else if (packets <= 2 && bytes < 512) {
3149                         retval = lowest_latency;
3150                 }
3151                 break;
3152         case bulk_latency: /* 250 usec aka 4000 ints/s */
3153                 if (bytes > 25000) {
3154                         if (packets > 35)
3155                                 retval = low_latency;
3156                 } else if (bytes < 1500) {
3157                         retval = low_latency;
3158                 }
3159                 break;
3160         }
3161
3162 update_itr_done:
3163         return retval;
3164 }
3165
3166 static void igb_set_itr(struct igb_adapter *adapter)
3167 {
3168         struct igb_q_vector *q_vector = adapter->q_vector[0];
3169         u16 current_itr;
3170         u32 new_itr = q_vector->itr_val;
3171
3172         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3173         if (adapter->link_speed != SPEED_1000) {
3174                 current_itr = 0;
3175                 new_itr = 4000;
3176                 goto set_itr_now;
3177         }
3178
3179         adapter->rx_itr = igb_update_itr(adapter,
3180                                     adapter->rx_itr,
3181                                     adapter->rx_ring->total_packets,
3182                                     adapter->rx_ring->total_bytes);
3183
3184         adapter->tx_itr = igb_update_itr(adapter,
3185                                     adapter->tx_itr,
3186                                     adapter->tx_ring->total_packets,
3187                                     adapter->tx_ring->total_bytes);
3188         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3189
3190         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3191         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3192                 current_itr = low_latency;
3193
3194         switch (current_itr) {
3195         /* counts and packets in update_itr are dependent on these numbers */
3196         case lowest_latency:
3197                 new_itr = 56;  /* aka 70,000 ints/sec */
3198                 break;
3199         case low_latency:
3200                 new_itr = 196; /* aka 20,000 ints/sec */
3201                 break;
3202         case bulk_latency:
3203                 new_itr = 980; /* aka 4,000 ints/sec */
3204                 break;
3205         default:
3206                 break;
3207         }
3208
3209 set_itr_now:
3210         adapter->rx_ring->total_bytes = 0;
3211         adapter->rx_ring->total_packets = 0;
3212         adapter->tx_ring->total_bytes = 0;
3213         adapter->tx_ring->total_packets = 0;
3214
3215         if (new_itr != q_vector->itr_val) {
3216                 /* this attempts to bias the interrupt rate towards Bulk
3217                  * by adding intermediate steps when interrupt rate is
3218                  * increasing */
3219                 new_itr = new_itr > q_vector->itr_val ?
3220                              max((new_itr * q_vector->itr_val) /
3221                                  (new_itr + (q_vector->itr_val >> 2)),
3222                                  new_itr) :
3223                              new_itr;
3224                 /* Don't write the value here; it resets the adapter's
3225                  * internal timer, and causes us to delay far longer than
3226                  * we should between interrupts.  Instead, we write the ITR
3227                  * value at the beginning of the next interrupt so the timing
3228                  * ends up being correct.
3229                  */
3230                 q_vector->itr_val = new_itr;
3231                 q_vector->set_itr = 1;
3232         }
3233
3234         return;
3235 }
3236
3237 #define IGB_TX_FLAGS_CSUM               0x00000001
3238 #define IGB_TX_FLAGS_VLAN               0x00000002
3239 #define IGB_TX_FLAGS_TSO                0x00000004
3240 #define IGB_TX_FLAGS_IPV4               0x00000008
3241 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3242 #define IGB_TX_FLAGS_VLAN_MASK  0xffff0000
3243 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3244
3245 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3246                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3247 {
3248         struct e1000_adv_tx_context_desc *context_desc;
3249         unsigned int i;
3250         int err;
3251         struct igb_buffer *buffer_info;
3252         u32 info = 0, tu_cmd = 0;
3253         u32 mss_l4len_idx, l4len;
3254         *hdr_len = 0;
3255
3256         if (skb_header_cloned(skb)) {
3257                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3258                 if (err)
3259                         return err;
3260         }
3261
3262         l4len = tcp_hdrlen(skb);
3263         *hdr_len += l4len;
3264
3265         if (skb->protocol == htons(ETH_P_IP)) {
3266                 struct iphdr *iph = ip_hdr(skb);
3267                 iph->tot_len = 0;
3268                 iph->check = 0;
3269                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3270                                                          iph->daddr, 0,
3271                                                          IPPROTO_TCP,
3272                                                          0);
3273         } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
3274                 ipv6_hdr(skb)->payload_len = 0;
3275                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3276                                                        &ipv6_hdr(skb)->daddr,
3277                                                        0, IPPROTO_TCP, 0);
3278         }
3279
3280         i = tx_ring->next_to_use;
3281
3282         buffer_info = &tx_ring->buffer_info[i];
3283         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3284         /* VLAN MACLEN IPLEN */
3285         if (tx_flags & IGB_TX_FLAGS_VLAN)
3286                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3287         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3288         *hdr_len += skb_network_offset(skb);
3289         info |= skb_network_header_len(skb);
3290         *hdr_len += skb_network_header_len(skb);
3291         context_desc->vlan_macip_lens = cpu_to_le32(info);
3292
3293         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3294         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3295
3296         if (skb->protocol == htons(ETH_P_IP))
3297                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3298         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3299
3300         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3301
3302         /* MSS L4LEN IDX */
3303         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3304         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3305
3306         /* For 82575, context index must be unique per ring. */
3307         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3308                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3309
3310         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3311         context_desc->seqnum_seed = 0;
3312
3313         buffer_info->time_stamp = jiffies;
3314         buffer_info->next_to_watch = i;
3315         buffer_info->dma = 0;
3316         i++;
3317         if (i == tx_ring->count)
3318                 i = 0;
3319
3320         tx_ring->next_to_use = i;
3321
3322         return true;
3323 }
3324
3325 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3326                                    struct sk_buff *skb, u32 tx_flags)
3327 {
3328         struct e1000_adv_tx_context_desc *context_desc;
3329         struct pci_dev *pdev = tx_ring->pdev;
3330         struct igb_buffer *buffer_info;
3331         u32 info = 0, tu_cmd = 0;
3332         unsigned int i;
3333
3334         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3335             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3336                 i = tx_ring->next_to_use;
3337                 buffer_info = &tx_ring->buffer_info[i];
3338                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3339
3340                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3341                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3342                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3343                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3344                         info |= skb_network_header_len(skb);
3345
3346                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3347
3348                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3349
3350                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3351                         __be16 protocol;
3352
3353                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3354                                 const struct vlan_ethhdr *vhdr =
3355                                           (const struct vlan_ethhdr*)skb->data;
3356
3357                                 protocol = vhdr->h_vlan_encapsulated_proto;
3358                         } else {
3359                                 protocol = skb->protocol;
3360                         }
3361
3362                         switch (protocol) {
3363                         case cpu_to_be16(ETH_P_IP):
3364                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3365                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3366                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3367                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3368                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3369                                 break;
3370                         case cpu_to_be16(ETH_P_IPV6):
3371                                 /* XXX what about other V6 headers?? */
3372                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3373                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3374                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3375                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3376                                 break;
3377                         default:
3378                                 if (unlikely(net_ratelimit()))
3379                                         dev_warn(&pdev->dev,
3380                                             "partial checksum but proto=%x!\n",
3381                                             skb->protocol);
3382                                 break;
3383                         }
3384                 }
3385
3386                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3387                 context_desc->seqnum_seed = 0;
3388                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3389                         context_desc->mss_l4len_idx =
3390                                 cpu_to_le32(tx_ring->reg_idx << 4);
3391
3392                 buffer_info->time_stamp = jiffies;
3393                 buffer_info->next_to_watch = i;
3394                 buffer_info->dma = 0;
3395
3396                 i++;
3397                 if (i == tx_ring->count)
3398                         i = 0;
3399                 tx_ring->next_to_use = i;
3400
3401                 return true;
3402         }
3403         return false;
3404 }
3405
3406 #define IGB_MAX_TXD_PWR 16
3407 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3408
3409 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3410                                  unsigned int first)
3411 {
3412         struct igb_buffer *buffer_info;
3413         struct pci_dev *pdev = tx_ring->pdev;
3414         unsigned int len = skb_headlen(skb);
3415         unsigned int count = 0, i;
3416         unsigned int f;
3417         dma_addr_t *map;
3418
3419         i = tx_ring->next_to_use;
3420
3421         if (skb_dma_map(&pdev->dev, skb, DMA_TO_DEVICE)) {
3422                 dev_err(&pdev->dev, "TX DMA map failed\n");
3423                 return 0;
3424         }
3425
3426         map = skb_shinfo(skb)->dma_maps;
3427
3428         buffer_info = &tx_ring->buffer_info[i];
3429         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3430         buffer_info->length = len;
3431         /* set time_stamp *before* dma to help avoid a possible race */
3432         buffer_info->time_stamp = jiffies;
3433         buffer_info->next_to_watch = i;
3434         buffer_info->dma = skb_shinfo(skb)->dma_head;
3435
3436         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3437                 struct skb_frag_struct *frag;
3438
3439                 i++;
3440                 if (i == tx_ring->count)
3441                         i = 0;
3442
3443                 frag = &skb_shinfo(skb)->frags[f];
3444                 len = frag->size;
3445
3446                 buffer_info = &tx_ring->buffer_info[i];
3447                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3448                 buffer_info->length = len;
3449                 buffer_info->time_stamp = jiffies;
3450                 buffer_info->next_to_watch = i;
3451                 buffer_info->dma = map[count];
3452                 count++;
3453         }
3454
3455         tx_ring->buffer_info[i].skb = skb;
3456         tx_ring->buffer_info[first].next_to_watch = i;
3457
3458         return count + 1;
3459 }
3460
3461 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3462                                     int tx_flags, int count, u32 paylen,
3463                                     u8 hdr_len)
3464 {
3465         union e1000_adv_tx_desc *tx_desc = NULL;
3466         struct igb_buffer *buffer_info;
3467         u32 olinfo_status = 0, cmd_type_len;
3468         unsigned int i;
3469
3470         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3471                         E1000_ADVTXD_DCMD_DEXT);
3472
3473         if (tx_flags & IGB_TX_FLAGS_VLAN)
3474                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3475
3476         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3477                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3478
3479         if (tx_flags & IGB_TX_FLAGS_TSO) {
3480                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3481
3482                 /* insert tcp checksum */
3483                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3484
3485                 /* insert ip checksum */
3486                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3487                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3488
3489         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3490                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3491         }
3492
3493         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3494             (tx_flags & (IGB_TX_FLAGS_CSUM |
3495                          IGB_TX_FLAGS_TSO |
3496                          IGB_TX_FLAGS_VLAN)))
3497                 olinfo_status |= tx_ring->reg_idx << 4;
3498
3499         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3500
3501         i = tx_ring->next_to_use;
3502         while (count--) {
3503                 buffer_info = &tx_ring->buffer_info[i];
3504                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3505                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3506                 tx_desc->read.cmd_type_len =
3507                         cpu_to_le32(cmd_type_len | buffer_info->length);
3508                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3509                 i++;
3510                 if (i == tx_ring->count)
3511                         i = 0;
3512         }
3513
3514         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3515         /* Force memory writes to complete before letting h/w
3516          * know there are new descriptors to fetch.  (Only
3517          * applicable for weak-ordered memory model archs,
3518          * such as IA-64). */
3519         wmb();
3520
3521         tx_ring->next_to_use = i;
3522         writel(i, tx_ring->tail);
3523         /* we need this if more than one processor can write to our tail
3524          * at a time, it syncronizes IO on IA64/Altix systems */
3525         mmiowb();
3526 }
3527
3528 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3529 {
3530         struct net_device *netdev = tx_ring->netdev;
3531
3532         netif_stop_subqueue(netdev, tx_ring->queue_index);
3533
3534         /* Herbert's original patch had:
3535          *  smp_mb__after_netif_stop_queue();
3536          * but since that doesn't exist yet, just open code it. */
3537         smp_mb();
3538
3539         /* We need to check again in a case another CPU has just
3540          * made room available. */
3541         if (igb_desc_unused(tx_ring) < size)
3542                 return -EBUSY;
3543
3544         /* A reprieve! */
3545         netif_wake_subqueue(netdev, tx_ring->queue_index);
3546         tx_ring->tx_stats.restart_queue++;
3547         return 0;
3548 }
3549
3550 static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3551 {
3552         if (igb_desc_unused(tx_ring) >= size)
3553                 return 0;
3554         return __igb_maybe_stop_tx(tx_ring, size);
3555 }
3556
3557 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3558                                     struct igb_ring *tx_ring)
3559 {
3560         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3561         unsigned int first;
3562         unsigned int tx_flags = 0;
3563         u8 hdr_len = 0;
3564         int count = 0;
3565         int tso = 0;
3566         union skb_shared_tx *shtx = skb_tx(skb);
3567
3568         /* need: 1 descriptor per page,
3569          *       + 2 desc gap to keep tail from touching head,
3570          *       + 1 desc for skb->data,
3571          *       + 1 desc for context descriptor,
3572          * otherwise try next time */
3573         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3574                 /* this is a hard error */
3575                 return NETDEV_TX_BUSY;
3576         }
3577
3578         if (unlikely(shtx->hardware)) {
3579                 shtx->in_progress = 1;
3580                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3581         }
3582
3583         if (adapter->vlgrp && vlan_tx_tag_present(skb)) {
3584                 tx_flags |= IGB_TX_FLAGS_VLAN;
3585                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3586         }
3587
3588         if (skb->protocol == htons(ETH_P_IP))
3589                 tx_flags |= IGB_TX_FLAGS_IPV4;
3590
3591         first = tx_ring->next_to_use;
3592         if (skb_is_gso(skb)) {
3593                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3594                 if (tso < 0) {
3595                         dev_kfree_skb_any(skb);
3596                         return NETDEV_TX_OK;
3597                 }
3598         }
3599
3600         if (tso)
3601                 tx_flags |= IGB_TX_FLAGS_TSO;
3602         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3603                  (skb->ip_summed == CHECKSUM_PARTIAL))
3604                 tx_flags |= IGB_TX_FLAGS_CSUM;
3605
3606         /*
3607          * count reflects descriptors mapped, if 0 then mapping error
3608          * has occured and we need to rewind the descriptor queue
3609          */
3610         count = igb_tx_map_adv(tx_ring, skb, first);
3611
3612         if (!count) {
3613                 dev_kfree_skb_any(skb);
3614                 tx_ring->buffer_info[first].time_stamp = 0;
3615                 tx_ring->next_to_use = first;
3616                 return NETDEV_TX_OK;
3617         }
3618
3619         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3620
3621         /* Make sure there is space in the ring for the next send. */
3622         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3623
3624         return NETDEV_TX_OK;
3625 }
3626
3627 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3628                                       struct net_device *netdev)
3629 {
3630         struct igb_adapter *adapter = netdev_priv(netdev);
3631         struct igb_ring *tx_ring;
3632         int r_idx = 0;
3633
3634         if (test_bit(__IGB_DOWN, &adapter->state)) {
3635                 dev_kfree_skb_any(skb);
3636                 return NETDEV_TX_OK;
3637         }
3638
3639         if (skb->len <= 0) {
3640                 dev_kfree_skb_any(skb);
3641                 return NETDEV_TX_OK;
3642         }
3643
3644         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3645         tx_ring = adapter->multi_tx_table[r_idx];
3646
3647         /* This goes back to the question of how to logically map a tx queue
3648          * to a flow.  Right now, performance is impacted slightly negatively
3649          * if using multiple tx queues.  If the stack breaks away from a
3650          * single qdisc implementation, we can look at this again. */
3651         return igb_xmit_frame_ring_adv(skb, tx_ring);
3652 }
3653
3654 /**
3655  * igb_tx_timeout - Respond to a Tx Hang
3656  * @netdev: network interface device structure
3657  **/
3658 static void igb_tx_timeout(struct net_device *netdev)
3659 {
3660         struct igb_adapter *adapter = netdev_priv(netdev);
3661         struct e1000_hw *hw = &adapter->hw;
3662
3663         /* Do the reset outside of interrupt context */
3664         adapter->tx_timeout_count++;
3665         schedule_work(&adapter->reset_task);
3666         wr32(E1000_EICS,
3667              (adapter->eims_enable_mask & ~adapter->eims_other));
3668 }
3669
3670 static void igb_reset_task(struct work_struct *work)
3671 {
3672         struct igb_adapter *adapter;
3673         adapter = container_of(work, struct igb_adapter, reset_task);
3674
3675         igb_reinit_locked(adapter);
3676 }
3677
3678 /**
3679  * igb_get_stats - Get System Network Statistics
3680  * @netdev: network interface device structure
3681  *
3682  * Returns the address of the device statistics structure.
3683  * The statistics are actually updated from the timer callback.
3684  **/
3685 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3686 {
3687         /* only return the current stats */
3688         return &netdev->stats;
3689 }
3690
3691 /**
3692  * igb_change_mtu - Change the Maximum Transfer Unit
3693  * @netdev: network interface device structure
3694  * @new_mtu: new value for maximum frame size
3695  *
3696  * Returns 0 on success, negative on failure
3697  **/
3698 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3699 {
3700         struct igb_adapter *adapter = netdev_priv(netdev);
3701         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3702         u32 rx_buffer_len, i;
3703
3704         if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) ||
3705             (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3706                 dev_err(&adapter->pdev->dev, "Invalid MTU setting\n");
3707                 return -EINVAL;
3708         }
3709
3710         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3711                 dev_err(&adapter->pdev->dev, "MTU > 9216 not supported.\n");
3712                 return -EINVAL;
3713         }
3714
3715         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3716                 msleep(1);
3717
3718         /* igb_down has a dependency on max_frame_size */
3719         adapter->max_frame_size = max_frame;
3720         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3721          * means we reserve 2 more, this pushes us to allocate from the next
3722          * larger slab size.
3723          * i.e. RXBUFFER_2048 --> size-4096 slab
3724          */
3725
3726         if (max_frame <= IGB_RXBUFFER_1024)
3727                 rx_buffer_len = IGB_RXBUFFER_1024;
3728         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3729                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3730         else
3731                 rx_buffer_len = IGB_RXBUFFER_128;
3732
3733         if (netif_running(netdev))
3734                 igb_down(adapter);
3735
3736         dev_info(&adapter->pdev->dev, "changing MTU from %d to %d\n",
3737                  netdev->mtu, new_mtu);
3738         netdev->mtu = new_mtu;
3739
3740         for (i = 0; i < adapter->num_rx_queues; i++)
3741                 adapter->rx_ring[i].rx_buffer_len = rx_buffer_len;
3742
3743         if (netif_running(netdev))
3744                 igb_up(adapter);
3745         else
3746                 igb_reset(adapter);
3747
3748         clear_bit(__IGB_RESETTING, &adapter->state);
3749
3750         return 0;
3751 }
3752
3753 /**
3754  * igb_update_stats - Update the board statistics counters
3755  * @adapter: board private structure
3756  **/
3757
3758 void igb_update_stats(struct igb_adapter *adapter)
3759 {
3760         struct net_device *netdev = adapter->netdev;
3761         struct e1000_hw *hw = &adapter->hw;
3762         struct pci_dev *pdev = adapter->pdev;
3763         u16 phy_tmp;
3764
3765 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3766
3767         /*
3768          * Prevent stats update while adapter is being reset, or if the pci
3769          * connection is down.
3770          */
3771         if (adapter->link_speed == 0)
3772                 return;
3773         if (pci_channel_offline(pdev))
3774                 return;
3775
3776         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
3777         adapter->stats.gprc += rd32(E1000_GPRC);
3778         adapter->stats.gorc += rd32(E1000_GORCL);
3779         rd32(E1000_GORCH); /* clear GORCL */
3780         adapter->stats.bprc += rd32(E1000_BPRC);
3781         adapter->stats.mprc += rd32(E1000_MPRC);
3782         adapter->stats.roc += rd32(E1000_ROC);
3783
3784         adapter->stats.prc64 += rd32(E1000_PRC64);
3785         adapter->stats.prc127 += rd32(E1000_PRC127);
3786         adapter->stats.prc255 += rd32(E1000_PRC255);
3787         adapter->stats.prc511 += rd32(E1000_PRC511);
3788         adapter->stats.prc1023 += rd32(E1000_PRC1023);
3789         adapter->stats.prc1522 += rd32(E1000_PRC1522);
3790         adapter->stats.symerrs += rd32(E1000_SYMERRS);
3791         adapter->stats.sec += rd32(E1000_SEC);
3792
3793         adapter->stats.mpc += rd32(E1000_MPC);
3794         adapter->stats.scc += rd32(E1000_SCC);
3795         adapter->stats.ecol += rd32(E1000_ECOL);
3796         adapter->stats.mcc += rd32(E1000_MCC);
3797         adapter->stats.latecol += rd32(E1000_LATECOL);
3798         adapter->stats.dc += rd32(E1000_DC);
3799         adapter->stats.rlec += rd32(E1000_RLEC);
3800         adapter->stats.xonrxc += rd32(E1000_XONRXC);
3801         adapter->stats.xontxc += rd32(E1000_XONTXC);
3802         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
3803         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
3804         adapter->stats.fcruc += rd32(E1000_FCRUC);
3805         adapter->stats.gptc += rd32(E1000_GPTC);
3806         adapter->stats.gotc += rd32(E1000_GOTCL);
3807         rd32(E1000_GOTCH); /* clear GOTCL */
3808         adapter->stats.rnbc += rd32(E1000_RNBC);
3809         adapter->stats.ruc += rd32(E1000_RUC);
3810         adapter->stats.rfc += rd32(E1000_RFC);
3811         adapter->stats.rjc += rd32(E1000_RJC);
3812         adapter->stats.tor += rd32(E1000_TORH);
3813         adapter->stats.tot += rd32(E1000_TOTH);
3814         adapter->stats.tpr += rd32(E1000_TPR);
3815
3816         adapter->stats.ptc64 += rd32(E1000_PTC64);
3817         adapter->stats.ptc127 += rd32(E1000_PTC127);
3818         adapter->stats.ptc255 += rd32(E1000_PTC255);
3819         adapter->stats.ptc511 += rd32(E1000_PTC511);
3820         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
3821         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
3822
3823         adapter->stats.mptc += rd32(E1000_MPTC);
3824         adapter->stats.bptc += rd32(E1000_BPTC);
3825
3826         /* used for adaptive IFS */
3827
3828         hw->mac.tx_packet_delta = rd32(E1000_TPT);
3829         adapter->stats.tpt += hw->mac.tx_packet_delta;
3830         hw->mac.collision_delta = rd32(E1000_COLC);
3831         adapter->stats.colc += hw->mac.collision_delta;
3832
3833         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
3834         adapter->stats.rxerrc += rd32(E1000_RXERRC);
3835         adapter->stats.tncrs += rd32(E1000_TNCRS);
3836         adapter->stats.tsctc += rd32(E1000_TSCTC);
3837         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
3838
3839         adapter->stats.iac += rd32(E1000_IAC);
3840         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
3841         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
3842         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
3843         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
3844         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
3845         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
3846         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
3847         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
3848
3849         /* Fill out the OS statistics structure */
3850         netdev->stats.multicast = adapter->stats.mprc;
3851         netdev->stats.collisions = adapter->stats.colc;
3852
3853         /* Rx Errors */
3854
3855         if (hw->mac.type != e1000_82575) {
3856                 u32 rqdpc_tmp;
3857                 u64 rqdpc_total = 0;
3858                 int i;
3859                 /* Read out drops stats per RX queue.  Notice RQDPC (Receive
3860                  * Queue Drop Packet Count) stats only gets incremented, if
3861                  * the DROP_EN but it set (in the SRRCTL register for that
3862                  * queue).  If DROP_EN bit is NOT set, then the some what
3863                  * equivalent count is stored in RNBC (not per queue basis).
3864                  * Also note the drop count is due to lack of available
3865                  * descriptors.
3866                  */
3867                 for (i = 0; i < adapter->num_rx_queues; i++) {
3868                         rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0xFFF;
3869                         adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
3870                         rqdpc_total += adapter->rx_ring[i].rx_stats.drops;
3871                 }
3872                 netdev->stats.rx_fifo_errors = rqdpc_total;
3873         }
3874
3875         /* Note RNBC (Receive No Buffers Count) is an not an exact
3876          * drop count as the hardware FIFO might save the day.  Thats
3877          * one of the reason for saving it in rx_fifo_errors, as its
3878          * potentially not a true drop.
3879          */
3880         netdev->stats.rx_fifo_errors += adapter->stats.rnbc;
3881
3882         /* RLEC on some newer hardware can be incorrect so build
3883          * our own version based on RUC and ROC */
3884         netdev->stats.rx_errors = adapter->stats.rxerrc +
3885                 adapter->stats.crcerrs + adapter->stats.algnerrc +
3886                 adapter->stats.ruc + adapter->stats.roc +
3887                 adapter->stats.cexterr;
3888         netdev->stats.rx_length_errors = adapter->stats.ruc +
3889                                               adapter->stats.roc;
3890         netdev->stats.rx_crc_errors = adapter->stats.crcerrs;
3891         netdev->stats.rx_frame_errors = adapter->stats.algnerrc;
3892         netdev->stats.rx_missed_errors = adapter->stats.mpc;
3893
3894         /* Tx Errors */
3895         netdev->stats.tx_errors = adapter->stats.ecol +
3896                                        adapter->stats.latecol;
3897         netdev->stats.tx_aborted_errors = adapter->stats.ecol;
3898         netdev->stats.tx_window_errors = adapter->stats.latecol;
3899         netdev->stats.tx_carrier_errors = adapter->stats.tncrs;
3900
3901         /* Tx Dropped needs to be maintained elsewhere */
3902
3903         /* Phy Stats */
3904         if (hw->phy.media_type == e1000_media_type_copper) {
3905                 if ((adapter->link_speed == SPEED_1000) &&
3906                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
3907                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
3908                         adapter->phy_stats.idle_errors += phy_tmp;
3909                 }
3910         }
3911
3912         /* Management Stats */
3913         adapter->stats.mgptc += rd32(E1000_MGTPTC);
3914         adapter->stats.mgprc += rd32(E1000_MGTPRC);
3915         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
3916 }
3917
3918 static irqreturn_t igb_msix_other(int irq, void *data)
3919 {
3920         struct igb_adapter *adapter = data;
3921         struct e1000_hw *hw = &adapter->hw;
3922         u32 icr = rd32(E1000_ICR);
3923         /* reading ICR causes bit 31 of EICR to be cleared */
3924
3925         if (icr & E1000_ICR_DOUTSYNC) {
3926                 /* HW is reporting DMA is out of sync */
3927                 adapter->stats.doosync++;
3928         }
3929
3930         /* Check for a mailbox event */
3931         if (icr & E1000_ICR_VMMB)
3932                 igb_msg_task(adapter);
3933
3934         if (icr & E1000_ICR_LSC) {
3935                 hw->mac.get_link_status = 1;
3936                 /* guard against interrupt when we're going down */
3937                 if (!test_bit(__IGB_DOWN, &adapter->state))
3938                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
3939         }
3940
3941         wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_VMMB);
3942         wr32(E1000_EIMS, adapter->eims_other);
3943
3944         return IRQ_HANDLED;
3945 }
3946
3947 static void igb_write_itr(struct igb_q_vector *q_vector)
3948 {
3949         u32 itr_val = q_vector->itr_val & 0x7FFC;
3950
3951         if (!q_vector->set_itr)
3952                 return;
3953
3954         if (!itr_val)
3955                 itr_val = 0x4;
3956
3957         if (q_vector->itr_shift)
3958                 itr_val |= itr_val << q_vector->itr_shift;
3959         else
3960                 itr_val |= 0x8000000;
3961
3962         writel(itr_val, q_vector->itr_register);
3963         q_vector->set_itr = 0;
3964 }
3965
3966 static irqreturn_t igb_msix_ring(int irq, void *data)
3967 {
3968         struct igb_q_vector *q_vector = data;
3969
3970         /* Write the ITR value calculated from the previous interrupt. */
3971         igb_write_itr(q_vector);
3972
3973         napi_schedule(&q_vector->napi);
3974
3975         return IRQ_HANDLED;
3976 }
3977
3978 #ifdef CONFIG_IGB_DCA
3979 static void igb_update_dca(struct igb_q_vector *q_vector)
3980 {
3981         struct igb_adapter *adapter = q_vector->adapter;
3982         struct e1000_hw *hw = &adapter->hw;
3983         int cpu = get_cpu();
3984
3985         if (q_vector->cpu == cpu)
3986                 goto out_no_update;
3987
3988         if (q_vector->tx_ring) {
3989                 int q = q_vector->tx_ring->reg_idx;
3990                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
3991                 if (hw->mac.type == e1000_82575) {
3992                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
3993                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
3994                 } else {
3995                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
3996                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
3997                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
3998                 }
3999                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4000                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4001         }
4002         if (q_vector->rx_ring) {
4003                 int q = q_vector->rx_ring->reg_idx;
4004                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4005                 if (hw->mac.type == e1000_82575) {
4006                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4007                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4008                 } else {
4009                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4010                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4011                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4012                 }
4013                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4014                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4015                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4016                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4017         }
4018         q_vector->cpu = cpu;
4019 out_no_update:
4020         put_cpu();
4021 }
4022
4023 static void igb_setup_dca(struct igb_adapter *adapter)
4024 {
4025         struct e1000_hw *hw = &adapter->hw;
4026         int i;
4027
4028         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4029                 return;
4030
4031         /* Always use CB2 mode, difference is masked in the CB driver. */
4032         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4033
4034         for (i = 0; i < adapter->num_q_vectors; i++) {
4035                 struct igb_q_vector *q_vector = adapter->q_vector[i];
4036                 q_vector->cpu = -1;
4037                 igb_update_dca(q_vector);
4038         }
4039 }
4040
4041 static int __igb_notify_dca(struct device *dev, void *data)
4042 {
4043         struct net_device *netdev = dev_get_drvdata(dev);
4044         struct igb_adapter *adapter = netdev_priv(netdev);
4045         struct e1000_hw *hw = &adapter->hw;
4046         unsigned long event = *(unsigned long *)data;
4047
4048         switch (event) {
4049         case DCA_PROVIDER_ADD:
4050                 /* if already enabled, don't do it again */
4051                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4052                         break;
4053                 /* Always use CB2 mode, difference is masked
4054                  * in the CB driver. */
4055                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4056                 if (dca_add_requester(dev) == 0) {
4057                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4058                         dev_info(&adapter->pdev->dev, "DCA enabled\n");
4059                         igb_setup_dca(adapter);
4060                         break;
4061                 }
4062                 /* Fall Through since DCA is disabled. */
4063         case DCA_PROVIDER_REMOVE:
4064                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4065                         /* without this a class_device is left
4066                          * hanging around in the sysfs model */
4067                         dca_remove_requester(dev);
4068                         dev_info(&adapter->pdev->dev, "DCA disabled\n");
4069                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4070                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4071                 }
4072                 break;
4073         }
4074
4075         return 0;
4076 }
4077
4078 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4079                           void *p)
4080 {
4081         int ret_val;
4082
4083         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4084                                          __igb_notify_dca);
4085
4086         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4087 }
4088 #endif /* CONFIG_IGB_DCA */
4089
4090 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4091 {
4092         struct e1000_hw *hw = &adapter->hw;
4093         u32 ping;
4094         int i;
4095
4096         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4097                 ping = E1000_PF_CONTROL_MSG;
4098                 if (adapter->vf_data[i].clear_to_send)
4099                         ping |= E1000_VT_MSGTYPE_CTS;
4100                 igb_write_mbx(hw, &ping, 1, i);
4101         }
4102 }
4103
4104 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4105                                   u32 *msgbuf, u32 vf)
4106 {
4107         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4108         u16 *hash_list = (u16 *)&msgbuf[1];
4109         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4110         int i;
4111
4112         /* only up to 30 hash values supported */
4113         if (n > 30)
4114                 n = 30;
4115
4116         /* salt away the number of multi cast addresses assigned
4117          * to this VF for later use to restore when the PF multi cast
4118          * list changes
4119          */
4120         vf_data->num_vf_mc_hashes = n;
4121
4122         /* VFs are limited to using the MTA hash table for their multicast
4123          * addresses */
4124         for (i = 0; i < n; i++)
4125                 vf_data->vf_mc_hashes[i] = hash_list[i];
4126
4127         /* Flush and reset the mta with the new values */
4128         igb_set_rx_mode(adapter->netdev);
4129
4130         return 0;
4131 }
4132
4133 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4134 {
4135         struct e1000_hw *hw = &adapter->hw;
4136         struct vf_data_storage *vf_data;
4137         int i, j;
4138
4139         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4140                 vf_data = &adapter->vf_data[i];
4141                 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4142                         igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4143         }
4144 }
4145
4146 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4147 {
4148         struct e1000_hw *hw = &adapter->hw;
4149         u32 pool_mask, reg, vid;
4150         int i;
4151
4152         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4153
4154         /* Find the vlan filter for this id */
4155         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4156                 reg = rd32(E1000_VLVF(i));
4157
4158                 /* remove the vf from the pool */
4159                 reg &= ~pool_mask;
4160
4161                 /* if pool is empty then remove entry from vfta */
4162                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4163                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4164                         reg = 0;
4165                         vid = reg & E1000_VLVF_VLANID_MASK;
4166                         igb_vfta_set(hw, vid, false);
4167                 }
4168
4169                 wr32(E1000_VLVF(i), reg);
4170         }
4171
4172         adapter->vf_data[vf].vlans_enabled = 0;
4173 }
4174
4175 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4176 {
4177         struct e1000_hw *hw = &adapter->hw;
4178         u32 reg, i;
4179
4180         /* It is an error to call this function when VFs are not enabled */
4181         if (!adapter->vfs_allocated_count)
4182                 return -1;
4183
4184         /* Find the vlan filter for this id */
4185         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4186                 reg = rd32(E1000_VLVF(i));
4187                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4188                     vid == (reg & E1000_VLVF_VLANID_MASK))
4189                         break;
4190         }
4191
4192         if (add) {
4193                 if (i == E1000_VLVF_ARRAY_SIZE) {
4194                         /* Did not find a matching VLAN ID entry that was
4195                          * enabled.  Search for a free filter entry, i.e.
4196                          * one without the enable bit set
4197                          */
4198                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4199                                 reg = rd32(E1000_VLVF(i));
4200                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4201                                         break;
4202                         }
4203                 }
4204                 if (i < E1000_VLVF_ARRAY_SIZE) {
4205                         /* Found an enabled/available entry */
4206                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4207
4208                         /* if !enabled we need to set this up in vfta */
4209                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4210                                 /* add VID to filter table, if bit already set
4211                                  * PF must have added it outside of table */
4212                                 if (igb_vfta_set(hw, vid, true))
4213                                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT +
4214                                                 adapter->vfs_allocated_count);
4215                                 reg |= E1000_VLVF_VLANID_ENABLE;
4216                         }
4217                         reg &= ~E1000_VLVF_VLANID_MASK;
4218                         reg |= vid;
4219
4220                         wr32(E1000_VLVF(i), reg);
4221
4222                         /* do not modify RLPML for PF devices */
4223                         if (vf >= adapter->vfs_allocated_count)
4224                                 return 0;
4225
4226                         if (!adapter->vf_data[vf].vlans_enabled) {
4227                                 u32 size;
4228                                 reg = rd32(E1000_VMOLR(vf));
4229                                 size = reg & E1000_VMOLR_RLPML_MASK;
4230                                 size += 4;
4231                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4232                                 reg |= size;
4233                                 wr32(E1000_VMOLR(vf), reg);
4234                         }
4235                         adapter->vf_data[vf].vlans_enabled++;
4236
4237                         return 0;
4238                 }
4239         } else {
4240                 if (i < E1000_VLVF_ARRAY_SIZE) {
4241                         /* remove vf from the pool */
4242                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4243                         /* if pool is empty then remove entry from vfta */
4244                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4245                                 reg = 0;
4246                                 igb_vfta_set(hw, vid, false);
4247                         }
4248                         wr32(E1000_VLVF(i), reg);
4249
4250                         /* do not modify RLPML for PF devices */
4251                         if (vf >= adapter->vfs_allocated_count)
4252                                 return 0;
4253
4254                         adapter->vf_data[vf].vlans_enabled--;
4255                         if (!adapter->vf_data[vf].vlans_enabled) {
4256                                 u32 size;
4257                                 reg = rd32(E1000_VMOLR(vf));
4258                                 size = reg & E1000_VMOLR_RLPML_MASK;
4259                                 size -= 4;
4260                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4261                                 reg |= size;
4262                                 wr32(E1000_VMOLR(vf), reg);
4263                         }
4264                         return 0;
4265                 }
4266         }
4267         return -1;
4268 }
4269
4270 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4271 {
4272         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4273         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4274
4275         return igb_vlvf_set(adapter, vid, add, vf);
4276 }
4277
4278 static inline void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4279 {
4280         struct e1000_hw *hw = &adapter->hw;
4281
4282         /* disable mailbox functionality for vf */
4283         adapter->vf_data[vf].clear_to_send = false;
4284
4285         /* reset offloads to defaults */
4286         igb_set_vmolr(hw, vf);
4287
4288         /* reset vlans for device */
4289         igb_clear_vf_vfta(adapter, vf);
4290
4291         /* reset multicast table array for vf */
4292         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4293
4294         /* Flush and reset the mta with the new values */
4295         igb_set_rx_mode(adapter->netdev);
4296 }
4297
4298 static inline void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4299 {
4300         struct e1000_hw *hw = &adapter->hw;
4301         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4302         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4303         u32 reg, msgbuf[3];
4304         u8 *addr = (u8 *)(&msgbuf[1]);
4305
4306         /* process all the same items cleared in a function level reset */
4307         igb_vf_reset_event(adapter, vf);
4308
4309         /* set vf mac address */
4310         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4311
4312         /* enable transmit and receive for vf */
4313         reg = rd32(E1000_VFTE);
4314         wr32(E1000_VFTE, reg | (1 << vf));
4315         reg = rd32(E1000_VFRE);
4316         wr32(E1000_VFRE, reg | (1 << vf));
4317
4318         /* enable mailbox functionality for vf */
4319         adapter->vf_data[vf].clear_to_send = true;
4320
4321         /* reply to reset with ack and vf mac address */
4322         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4323         memcpy(addr, vf_mac, 6);
4324         igb_write_mbx(hw, msgbuf, 3, vf);
4325 }
4326
4327 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4328 {
4329                 unsigned char *addr = (char *)&msg[1];
4330                 int err = -1;
4331
4332                 if (is_valid_ether_addr(addr))
4333                         err = igb_set_vf_mac(adapter, vf, addr);
4334
4335                 return err;
4336
4337 }
4338
4339 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4340 {
4341         struct e1000_hw *hw = &adapter->hw;
4342         u32 msg = E1000_VT_MSGTYPE_NACK;
4343
4344         /* if device isn't clear to send it shouldn't be reading either */
4345         if (!adapter->vf_data[vf].clear_to_send)
4346                 igb_write_mbx(hw, &msg, 1, vf);
4347 }
4348
4349
4350 static void igb_msg_task(struct igb_adapter *adapter)
4351 {
4352         struct e1000_hw *hw = &adapter->hw;
4353         u32 vf;
4354
4355         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4356                 /* process any reset requests */
4357                 if (!igb_check_for_rst(hw, vf)) {
4358                         adapter->vf_data[vf].clear_to_send = false;
4359                         igb_vf_reset_event(adapter, vf);
4360                 }
4361
4362                 /* process any messages pending */
4363                 if (!igb_check_for_msg(hw, vf))
4364                         igb_rcv_msg_from_vf(adapter, vf);
4365
4366                 /* process any acks */
4367                 if (!igb_check_for_ack(hw, vf))
4368                         igb_rcv_ack_from_vf(adapter, vf);
4369
4370         }
4371 }
4372
4373 static int igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4374 {
4375         u32 mbx_size = E1000_VFMAILBOX_SIZE;
4376         u32 msgbuf[mbx_size];
4377         struct e1000_hw *hw = &adapter->hw;
4378         s32 retval;
4379
4380         retval = igb_read_mbx(hw, msgbuf, mbx_size, vf);
4381
4382         if (retval)
4383                 dev_err(&adapter->pdev->dev,
4384                         "Error receiving message from VF\n");
4385
4386         /* this is a message we already processed, do nothing */
4387         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4388                 return retval;
4389
4390         /*
4391          * until the vf completes a reset it should not be
4392          * allowed to start any configuration.
4393          */
4394
4395         if (msgbuf[0] == E1000_VF_RESET) {
4396                 igb_vf_reset_msg(adapter, vf);
4397
4398                 return retval;
4399         }
4400
4401         if (!adapter->vf_data[vf].clear_to_send) {
4402                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4403                 igb_write_mbx(hw, msgbuf, 1, vf);
4404                 return retval;
4405         }
4406
4407         switch ((msgbuf[0] & 0xFFFF)) {
4408         case E1000_VF_SET_MAC_ADDR:
4409                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4410                 break;
4411         case E1000_VF_SET_MULTICAST:
4412                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4413                 break;
4414         case E1000_VF_SET_LPE:
4415                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4416                 break;
4417         case E1000_VF_SET_VLAN:
4418                 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4419                 break;
4420         default:
4421                 dev_err(&adapter->pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4422                 retval = -1;
4423                 break;
4424         }
4425
4426         /* notify the VF of the results of what it sent us */
4427         if (retval)
4428                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4429         else
4430                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4431
4432         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4433
4434         igb_write_mbx(hw, msgbuf, 1, vf);
4435
4436         return retval;
4437 }
4438
4439 /**
4440  *  igb_set_uta - Set unicast filter table address
4441  *  @adapter: board private structure
4442  *
4443  *  The unicast table address is a register array of 32-bit registers.
4444  *  The table is meant to be used in a way similar to how the MTA is used
4445  *  however due to certain limitations in the hardware it is necessary to
4446  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4447  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4448  **/
4449 static void igb_set_uta(struct igb_adapter *adapter)
4450 {
4451         struct e1000_hw *hw = &adapter->hw;
4452         int i;
4453
4454         /* The UTA table only exists on 82576 hardware and newer */
4455         if (hw->mac.type < e1000_82576)
4456                 return;
4457
4458         /* we only need to do this if VMDq is enabled */
4459         if (!adapter->vfs_allocated_count)
4460                 return;
4461
4462         for (i = 0; i < hw->mac.uta_reg_count; i++)
4463                 array_wr32(E1000_UTA, i, ~0);
4464 }
4465
4466 /**
4467  * igb_intr_msi - Interrupt Handler
4468  * @irq: interrupt number
4469  * @data: pointer to a network interface device structure
4470  **/
4471 static irqreturn_t igb_intr_msi(int irq, void *data)
4472 {
4473         struct igb_adapter *adapter = data;
4474         struct igb_q_vector *q_vector = adapter->q_vector[0];
4475         struct e1000_hw *hw = &adapter->hw;
4476         /* read ICR disables interrupts using IAM */
4477         u32 icr = rd32(E1000_ICR);
4478
4479         igb_write_itr(q_vector);
4480
4481         if (icr & E1000_ICR_DOUTSYNC) {
4482                 /* HW is reporting DMA is out of sync */
4483                 adapter->stats.doosync++;
4484         }
4485
4486         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4487                 hw->mac.get_link_status = 1;
4488                 if (!test_bit(__IGB_DOWN, &adapter->state))
4489                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4490         }
4491
4492         napi_schedule(&q_vector->napi);
4493
4494         return IRQ_HANDLED;
4495 }
4496
4497 /**
4498  * igb_intr - Legacy Interrupt Handler
4499  * @irq: interrupt number
4500  * @data: pointer to a network interface device structure
4501  **/
4502 static irqreturn_t igb_intr(int irq, void *data)
4503 {
4504         struct igb_adapter *adapter = data;
4505         struct igb_q_vector *q_vector = adapter->q_vector[0];
4506         struct e1000_hw *hw = &adapter->hw;
4507         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4508          * need for the IMC write */
4509         u32 icr = rd32(E1000_ICR);
4510         if (!icr)
4511                 return IRQ_NONE;  /* Not our interrupt */
4512
4513         igb_write_itr(q_vector);
4514
4515         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4516          * not set, then the adapter didn't send an interrupt */
4517         if (!(icr & E1000_ICR_INT_ASSERTED))
4518                 return IRQ_NONE;
4519
4520         if (icr & E1000_ICR_DOUTSYNC) {
4521                 /* HW is reporting DMA is out of sync */
4522                 adapter->stats.doosync++;
4523         }
4524
4525         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4526                 hw->mac.get_link_status = 1;
4527                 /* guard against interrupt when we're going down */
4528                 if (!test_bit(__IGB_DOWN, &adapter->state))
4529                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4530         }
4531
4532         napi_schedule(&q_vector->napi);
4533
4534         return IRQ_HANDLED;
4535 }
4536
4537 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4538 {
4539         struct igb_adapter *adapter = q_vector->adapter;
4540         struct e1000_hw *hw = &adapter->hw;
4541
4542         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4543             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4544                 if (!adapter->msix_entries)
4545                         igb_set_itr(adapter);
4546                 else
4547                         igb_update_ring_itr(q_vector);
4548         }
4549
4550         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4551                 if (adapter->msix_entries)
4552                         wr32(E1000_EIMS, q_vector->eims_value);
4553                 else
4554                         igb_irq_enable(adapter);
4555         }
4556 }
4557
4558 /**
4559  * igb_poll - NAPI Rx polling callback
4560  * @napi: napi polling structure
4561  * @budget: count of how many packets we should handle
4562  **/
4563 static int igb_poll(struct napi_struct *napi, int budget)
4564 {
4565         struct igb_q_vector *q_vector = container_of(napi,
4566                                                      struct igb_q_vector,
4567                                                      napi);
4568         int tx_clean_complete = 1, work_done = 0;
4569
4570 #ifdef CONFIG_IGB_DCA
4571         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4572                 igb_update_dca(q_vector);
4573 #endif
4574         if (q_vector->tx_ring)
4575                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4576
4577         if (q_vector->rx_ring)
4578                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4579
4580         if (!tx_clean_complete)
4581                 work_done = budget;
4582
4583         /* If not enough Rx work done, exit the polling mode */
4584         if (work_done < budget) {
4585                 napi_complete(napi);
4586                 igb_ring_irq_enable(q_vector);
4587         }
4588
4589         return work_done;
4590 }
4591
4592 /**
4593  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4594  * @adapter: board private structure
4595  * @shhwtstamps: timestamp structure to update
4596  * @regval: unsigned 64bit system time value.
4597  *
4598  * We need to convert the system time value stored in the RX/TXSTMP registers
4599  * into a hwtstamp which can be used by the upper level timestamping functions
4600  */
4601 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4602                                    struct skb_shared_hwtstamps *shhwtstamps,
4603                                    u64 regval)
4604 {
4605         u64 ns;
4606
4607         ns = timecounter_cyc2time(&adapter->clock, regval);
4608         timecompare_update(&adapter->compare, ns);
4609         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4610         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4611         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4612 }
4613
4614 /**
4615  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4616  * @q_vector: pointer to q_vector containing needed info
4617  * @skb: packet that was just sent
4618  *
4619  * If we were asked to do hardware stamping and such a time stamp is
4620  * available, then it must have been for this skb here because we only
4621  * allow only one such packet into the queue.
4622  */
4623 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4624 {
4625         struct igb_adapter *adapter = q_vector->adapter;
4626         union skb_shared_tx *shtx = skb_tx(skb);
4627         struct e1000_hw *hw = &adapter->hw;
4628         struct skb_shared_hwtstamps shhwtstamps;
4629         u64 regval;
4630
4631         /* if skb does not support hw timestamp or TX stamp not valid exit */
4632         if (likely(!shtx->hardware) ||
4633             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4634                 return;
4635
4636         regval = rd32(E1000_TXSTMPL);
4637         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4638
4639         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4640         skb_tstamp_tx(skb, &shhwtstamps);
4641 }
4642
4643 /**
4644  * igb_clean_tx_irq - Reclaim resources after transmit completes
4645  * @q_vector: pointer to q_vector containing needed info
4646  * returns true if ring is completely cleaned
4647  **/
4648 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4649 {
4650         struct igb_adapter *adapter = q_vector->adapter;
4651         struct igb_ring *tx_ring = q_vector->tx_ring;
4652         struct net_device *netdev = tx_ring->netdev;
4653         struct e1000_hw *hw = &adapter->hw;
4654         struct igb_buffer *buffer_info;
4655         struct sk_buff *skb;
4656         union e1000_adv_tx_desc *tx_desc, *eop_desc;
4657         unsigned int total_bytes = 0, total_packets = 0;
4658         unsigned int i, eop, count = 0;
4659         bool cleaned = false;
4660
4661         i = tx_ring->next_to_clean;
4662         eop = tx_ring->buffer_info[i].next_to_watch;
4663         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4664
4665         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
4666                (count < tx_ring->count)) {
4667                 for (cleaned = false; !cleaned; count++) {
4668                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4669                         buffer_info = &tx_ring->buffer_info[i];
4670                         cleaned = (i == eop);
4671                         skb = buffer_info->skb;
4672
4673                         if (skb) {
4674                                 unsigned int segs, bytecount;
4675                                 /* gso_segs is currently only valid for tcp */
4676                                 segs = skb_shinfo(skb)->gso_segs ?: 1;
4677                                 /* multiply data chunks by size of headers */
4678                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
4679                                             skb->len;
4680                                 total_packets += segs;
4681                                 total_bytes += bytecount;
4682
4683                                 igb_tx_hwtstamp(q_vector, skb);
4684                         }
4685
4686                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4687                         tx_desc->wb.status = 0;
4688
4689                         i++;
4690                         if (i == tx_ring->count)
4691                                 i = 0;
4692                 }
4693                 eop = tx_ring->buffer_info[i].next_to_watch;
4694                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4695         }
4696
4697         tx_ring->next_to_clean = i;
4698
4699         if (unlikely(count &&
4700                      netif_carrier_ok(netdev) &&
4701                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
4702                 /* Make sure that anybody stopping the queue after this
4703                  * sees the new next_to_clean.
4704                  */
4705                 smp_mb();
4706                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
4707                     !(test_bit(__IGB_DOWN, &adapter->state))) {
4708                         netif_wake_subqueue(netdev, tx_ring->queue_index);
4709                         tx_ring->tx_stats.restart_queue++;
4710                 }
4711         }
4712
4713         if (tx_ring->detect_tx_hung) {
4714                 /* Detect a transmit hang in hardware, this serializes the
4715                  * check with the clearing of time_stamp and movement of i */
4716                 tx_ring->detect_tx_hung = false;
4717                 if (tx_ring->buffer_info[i].time_stamp &&
4718                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
4719                                (adapter->tx_timeout_factor * HZ))
4720                     && !(rd32(E1000_STATUS) &
4721                          E1000_STATUS_TXOFF)) {
4722
4723                         /* detected Tx unit hang */
4724                         dev_err(&tx_ring->pdev->dev,
4725                                 "Detected Tx Unit Hang\n"
4726                                 "  Tx Queue             <%d>\n"
4727                                 "  TDH                  <%x>\n"
4728                                 "  TDT                  <%x>\n"
4729                                 "  next_to_use          <%x>\n"
4730                                 "  next_to_clean        <%x>\n"
4731                                 "buffer_info[next_to_clean]\n"
4732                                 "  time_stamp           <%lx>\n"
4733                                 "  next_to_watch        <%x>\n"
4734                                 "  jiffies              <%lx>\n"
4735                                 "  desc.status          <%x>\n",
4736                                 tx_ring->queue_index,
4737                                 readl(tx_ring->head),
4738                                 readl(tx_ring->tail),
4739                                 tx_ring->next_to_use,
4740                                 tx_ring->next_to_clean,
4741                                 tx_ring->buffer_info[i].time_stamp,
4742                                 eop,
4743                                 jiffies,
4744                                 eop_desc->wb.status);
4745                         netif_stop_subqueue(netdev, tx_ring->queue_index);
4746                 }
4747         }
4748         tx_ring->total_bytes += total_bytes;
4749         tx_ring->total_packets += total_packets;
4750         tx_ring->tx_stats.bytes += total_bytes;
4751         tx_ring->tx_stats.packets += total_packets;
4752         netdev->stats.tx_bytes += total_bytes;
4753         netdev->stats.tx_packets += total_packets;
4754         return (count < tx_ring->count);
4755 }
4756
4757 /**
4758  * igb_receive_skb - helper function to handle rx indications
4759  * @q_vector: structure containing interrupt and ring information
4760  * @skb: packet to send up
4761  * @vlan_tag: vlan tag for packet
4762  **/
4763 static void igb_receive_skb(struct igb_q_vector *q_vector,
4764                             struct sk_buff *skb,
4765                             u16 vlan_tag)
4766 {
4767         struct igb_adapter *adapter = q_vector->adapter;
4768
4769         if (vlan_tag)
4770                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
4771                                  vlan_tag, skb);
4772         else
4773                 napi_gro_receive(&q_vector->napi, skb);
4774 }
4775
4776 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
4777                                        u32 status_err, struct sk_buff *skb)
4778 {
4779         skb->ip_summed = CHECKSUM_NONE;
4780
4781         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
4782         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
4783              (status_err & E1000_RXD_STAT_IXSM))
4784                 return;
4785
4786         /* TCP/UDP checksum error bit is set */
4787         if (status_err &
4788             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
4789                 /*
4790                  * work around errata with sctp packets where the TCPE aka
4791                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
4792                  * packets, (aka let the stack check the crc32c)
4793                  */
4794                 if ((skb->len == 60) &&
4795                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
4796                         ring->rx_stats.csum_err++;
4797
4798                 /* let the stack verify checksum errors */
4799                 return;
4800         }
4801         /* It must be a TCP or UDP packet with a valid checksum */
4802         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
4803                 skb->ip_summed = CHECKSUM_UNNECESSARY;
4804
4805         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
4806 }
4807
4808 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
4809                                    struct sk_buff *skb)
4810 {
4811         struct igb_adapter *adapter = q_vector->adapter;
4812         struct e1000_hw *hw = &adapter->hw;
4813         u64 regval;
4814
4815         /*
4816          * If this bit is set, then the RX registers contain the time stamp. No
4817          * other packet will be time stamped until we read these registers, so
4818          * read the registers to make them available again. Because only one
4819          * packet can be time stamped at a time, we know that the register
4820          * values must belong to this one here and therefore we don't need to
4821          * compare any of the additional attributes stored for it.
4822          *
4823          * If nothing went wrong, then it should have a skb_shared_tx that we
4824          * can turn into a skb_shared_hwtstamps.
4825          */
4826         if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
4827                 return;
4828         if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
4829                 return;
4830
4831         regval = rd32(E1000_RXSTMPL);
4832         regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4833
4834         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
4835 }
4836 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
4837                                union e1000_adv_rx_desc *rx_desc)
4838 {
4839         /* HW will not DMA in data larger than the given buffer, even if it
4840          * parses the (NFS, of course) header to be larger.  In that case, it
4841          * fills the header buffer and spills the rest into the page.
4842          */
4843         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
4844                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
4845         if (hlen > rx_ring->rx_buffer_len)
4846                 hlen = rx_ring->rx_buffer_len;
4847         return hlen;
4848 }
4849
4850 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
4851                                  int *work_done, int budget)
4852 {
4853         struct igb_ring *rx_ring = q_vector->rx_ring;
4854         struct net_device *netdev = rx_ring->netdev;
4855         struct pci_dev *pdev = rx_ring->pdev;
4856         union e1000_adv_rx_desc *rx_desc , *next_rxd;
4857         struct igb_buffer *buffer_info , *next_buffer;
4858         struct sk_buff *skb;
4859         bool cleaned = false;
4860         int cleaned_count = 0;
4861         unsigned int total_bytes = 0, total_packets = 0;
4862         unsigned int i;
4863         u32 staterr;
4864         u16 length;
4865         u16 vlan_tag;
4866
4867         i = rx_ring->next_to_clean;
4868         buffer_info = &rx_ring->buffer_info[i];
4869         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
4870         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4871
4872         while (staterr & E1000_RXD_STAT_DD) {
4873                 if (*work_done >= budget)
4874                         break;
4875                 (*work_done)++;
4876
4877                 skb = buffer_info->skb;
4878                 prefetch(skb->data - NET_IP_ALIGN);
4879                 buffer_info->skb = NULL;
4880
4881                 i++;
4882                 if (i == rx_ring->count)
4883                         i = 0;
4884                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
4885                 prefetch(next_rxd);
4886                 next_buffer = &rx_ring->buffer_info[i];
4887
4888                 length = le16_to_cpu(rx_desc->wb.upper.length);
4889                 cleaned = true;
4890                 cleaned_count++;
4891
4892                 if (buffer_info->dma) {
4893                         pci_unmap_single(pdev, buffer_info->dma,
4894                                          rx_ring->rx_buffer_len,
4895                                          PCI_DMA_FROMDEVICE);
4896                         buffer_info->dma = 0;
4897                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
4898                                 skb_put(skb, length);
4899                                 goto send_up;
4900                         }
4901                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
4902                 }
4903
4904                 if (length) {
4905                         pci_unmap_page(pdev, buffer_info->page_dma,
4906                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
4907                         buffer_info->page_dma = 0;
4908
4909                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
4910                                                 buffer_info->page,
4911                                                 buffer_info->page_offset,
4912                                                 length);
4913
4914                         if (page_count(buffer_info->page) != 1)
4915                                 buffer_info->page = NULL;
4916                         else
4917                                 get_page(buffer_info->page);
4918
4919                         skb->len += length;
4920                         skb->data_len += length;
4921
4922                         skb->truesize += length;
4923                 }
4924
4925                 if (!(staterr & E1000_RXD_STAT_EOP)) {
4926                         buffer_info->skb = next_buffer->skb;
4927                         buffer_info->dma = next_buffer->dma;
4928                         next_buffer->skb = skb;
4929                         next_buffer->dma = 0;
4930                         goto next_desc;
4931                 }
4932 send_up:
4933                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
4934                         dev_kfree_skb_irq(skb);
4935                         goto next_desc;
4936                 }
4937
4938                 igb_rx_hwtstamp(q_vector, staterr, skb);
4939                 total_bytes += skb->len;
4940                 total_packets++;
4941
4942                 igb_rx_checksum_adv(rx_ring, staterr, skb);
4943
4944                 skb->protocol = eth_type_trans(skb, netdev);
4945                 skb_record_rx_queue(skb, rx_ring->queue_index);
4946
4947                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
4948                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
4949
4950                 igb_receive_skb(q_vector, skb, vlan_tag);
4951
4952 next_desc:
4953                 rx_desc->wb.upper.status_error = 0;
4954
4955                 /* return some buffers to hardware, one at a time is too slow */
4956                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
4957                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
4958                         cleaned_count = 0;
4959                 }
4960
4961                 /* use prefetched values */
4962                 rx_desc = next_rxd;
4963                 buffer_info = next_buffer;
4964                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4965         }
4966
4967         rx_ring->next_to_clean = i;
4968         cleaned_count = igb_desc_unused(rx_ring);
4969
4970         if (cleaned_count)
4971                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
4972
4973         rx_ring->total_packets += total_packets;
4974         rx_ring->total_bytes += total_bytes;
4975         rx_ring->rx_stats.packets += total_packets;
4976         rx_ring->rx_stats.bytes += total_bytes;
4977         netdev->stats.rx_bytes += total_bytes;
4978         netdev->stats.rx_packets += total_packets;
4979         return cleaned;
4980 }
4981
4982 /**
4983  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
4984  * @adapter: address of board private structure
4985  **/
4986 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
4987 {
4988         struct net_device *netdev = rx_ring->netdev;
4989         union e1000_adv_rx_desc *rx_desc;
4990         struct igb_buffer *buffer_info;
4991         struct sk_buff *skb;
4992         unsigned int i;
4993         int bufsz;
4994
4995         i = rx_ring->next_to_use;
4996         buffer_info = &rx_ring->buffer_info[i];
4997
4998         bufsz = rx_ring->rx_buffer_len;
4999
5000         while (cleaned_count--) {
5001                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5002
5003                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5004                         if (!buffer_info->page) {
5005                                 buffer_info->page = alloc_page(GFP_ATOMIC);
5006                                 if (!buffer_info->page) {
5007                                         rx_ring->rx_stats.alloc_failed++;
5008                                         goto no_buffers;
5009                                 }
5010                                 buffer_info->page_offset = 0;
5011                         } else {
5012                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5013                         }
5014                         buffer_info->page_dma =
5015                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5016                                              buffer_info->page_offset,
5017                                              PAGE_SIZE / 2,
5018                                              PCI_DMA_FROMDEVICE);
5019                 }
5020
5021                 if (!buffer_info->skb) {
5022                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5023                         if (!skb) {
5024                                 rx_ring->rx_stats.alloc_failed++;
5025                                 goto no_buffers;
5026                         }
5027
5028                         buffer_info->skb = skb;
5029                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5030                                                           skb->data,
5031                                                           bufsz,
5032                                                           PCI_DMA_FROMDEVICE);
5033                 }
5034                 /* Refresh the desc even if buffer_addrs didn't change because
5035                  * each write-back erases this info. */
5036                 if (bufsz < IGB_RXBUFFER_1024) {
5037                         rx_desc->read.pkt_addr =
5038                              cpu_to_le64(buffer_info->page_dma);
5039                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5040                 } else {
5041                         rx_desc->read.pkt_addr =
5042                              cpu_to_le64(buffer_info->dma);
5043                         rx_desc->read.hdr_addr = 0;
5044                 }
5045
5046                 i++;
5047                 if (i == rx_ring->count)
5048                         i = 0;
5049                 buffer_info = &rx_ring->buffer_info[i];
5050         }
5051
5052 no_buffers:
5053         if (rx_ring->next_to_use != i) {
5054                 rx_ring->next_to_use = i;
5055                 if (i == 0)
5056                         i = (rx_ring->count - 1);
5057                 else
5058                         i--;
5059
5060                 /* Force memory writes to complete before letting h/w
5061                  * know there are new descriptors to fetch.  (Only
5062                  * applicable for weak-ordered memory model archs,
5063                  * such as IA-64). */
5064                 wmb();
5065                 writel(i, rx_ring->tail);
5066         }
5067 }
5068
5069 /**
5070  * igb_mii_ioctl -
5071  * @netdev:
5072  * @ifreq:
5073  * @cmd:
5074  **/
5075 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5076 {
5077         struct igb_adapter *adapter = netdev_priv(netdev);
5078         struct mii_ioctl_data *data = if_mii(ifr);
5079
5080         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5081                 return -EOPNOTSUPP;
5082
5083         switch (cmd) {
5084         case SIOCGMIIPHY:
5085                 data->phy_id = adapter->hw.phy.addr;
5086                 break;
5087         case SIOCGMIIREG:
5088                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5089                                      &data->val_out))
5090                         return -EIO;
5091                 break;
5092         case SIOCSMIIREG:
5093         default:
5094                 return -EOPNOTSUPP;
5095         }
5096         return 0;
5097 }
5098
5099 /**
5100  * igb_hwtstamp_ioctl - control hardware time stamping
5101  * @netdev:
5102  * @ifreq:
5103  * @cmd:
5104  *
5105  * Outgoing time stamping can be enabled and disabled. Play nice and
5106  * disable it when requested, although it shouldn't case any overhead
5107  * when no packet needs it. At most one packet in the queue may be
5108  * marked for time stamping, otherwise it would be impossible to tell
5109  * for sure to which packet the hardware time stamp belongs.
5110  *
5111  * Incoming time stamping has to be configured via the hardware
5112  * filters. Not all combinations are supported, in particular event
5113  * type has to be specified. Matching the kind of event packet is
5114  * not supported, with the exception of "all V2 events regardless of
5115  * level 2 or 4".
5116  *
5117  **/
5118 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5119                               struct ifreq *ifr, int cmd)
5120 {
5121         struct igb_adapter *adapter = netdev_priv(netdev);
5122         struct e1000_hw *hw = &adapter->hw;
5123         struct hwtstamp_config config;
5124         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5125         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5126         u32 tsync_rx_cfg = 0;
5127         bool is_l4 = false;
5128         bool is_l2 = false;
5129         u32 regval;
5130
5131         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5132                 return -EFAULT;
5133
5134         /* reserved for future extensions */
5135         if (config.flags)
5136                 return -EINVAL;
5137
5138         switch (config.tx_type) {
5139         case HWTSTAMP_TX_OFF:
5140                 tsync_tx_ctl = 0;
5141         case HWTSTAMP_TX_ON:
5142                 break;
5143         default:
5144                 return -ERANGE;
5145         }
5146
5147         switch (config.rx_filter) {
5148         case HWTSTAMP_FILTER_NONE:
5149                 tsync_rx_ctl = 0;
5150                 break;
5151         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5152         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5153         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5154         case HWTSTAMP_FILTER_ALL:
5155                 /*
5156                  * register TSYNCRXCFG must be set, therefore it is not
5157                  * possible to time stamp both Sync and Delay_Req messages
5158                  * => fall back to time stamping all packets
5159                  */
5160                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5161                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5162                 break;
5163         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5164                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5165                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5166                 is_l4 = true;
5167                 break;
5168         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5169                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5170                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5171                 is_l4 = true;
5172                 break;
5173         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5174         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5175                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5176                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5177                 is_l2 = true;
5178                 is_l4 = true;
5179                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5180                 break;
5181         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5182         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5183                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5184                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5185                 is_l2 = true;
5186                 is_l4 = true;
5187                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5188                 break;
5189         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5190         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5191         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5192                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5193                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5194                 is_l2 = true;
5195                 break;
5196         default:
5197                 return -ERANGE;
5198         }
5199
5200         if (hw->mac.type == e1000_82575) {
5201                 if (tsync_rx_ctl | tsync_tx_ctl)
5202                         return -EINVAL;
5203                 return 0;
5204         }
5205
5206         /* enable/disable TX */
5207         regval = rd32(E1000_TSYNCTXCTL);
5208         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5209         regval |= tsync_tx_ctl;
5210         wr32(E1000_TSYNCTXCTL, regval);
5211
5212         /* enable/disable RX */
5213         regval = rd32(E1000_TSYNCRXCTL);
5214         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5215         regval |= tsync_rx_ctl;
5216         wr32(E1000_TSYNCRXCTL, regval);
5217
5218         /* define which PTP packets are time stamped */
5219         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5220
5221         /* define ethertype filter for timestamped packets */
5222         if (is_l2)
5223                 wr32(E1000_ETQF(3),
5224                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5225                                  E1000_ETQF_1588 | /* enable timestamping */
5226                                  ETH_P_1588));     /* 1588 eth protocol type */
5227         else
5228                 wr32(E1000_ETQF(3), 0);
5229
5230 #define PTP_PORT 319
5231         /* L4 Queue Filter[3]: filter by destination port and protocol */
5232         if (is_l4) {
5233                 u32 ftqf = (IPPROTO_UDP /* UDP */
5234                         | E1000_FTQF_VF_BP /* VF not compared */
5235                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5236                         | E1000_FTQF_MASK); /* mask all inputs */
5237                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5238
5239                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5240                 wr32(E1000_IMIREXT(3),
5241                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5242                 if (hw->mac.type == e1000_82576) {
5243                         /* enable source port check */
5244                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5245                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5246                 }
5247                 wr32(E1000_FTQF(3), ftqf);
5248         } else {
5249                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5250         }
5251         wrfl();
5252
5253         adapter->hwtstamp_config = config;
5254
5255         /* clear TX/RX time stamp registers, just to be sure */
5256         regval = rd32(E1000_TXSTMPH);
5257         regval = rd32(E1000_RXSTMPH);
5258
5259         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5260                 -EFAULT : 0;
5261 }
5262
5263 /**
5264  * igb_ioctl -
5265  * @netdev:
5266  * @ifreq:
5267  * @cmd:
5268  **/
5269 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5270 {
5271         switch (cmd) {
5272         case SIOCGMIIPHY:
5273         case SIOCGMIIREG:
5274         case SIOCSMIIREG:
5275                 return igb_mii_ioctl(netdev, ifr, cmd);
5276         case SIOCSHWTSTAMP:
5277                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5278         default:
5279                 return -EOPNOTSUPP;
5280         }
5281 }
5282
5283 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5284 {
5285         struct igb_adapter *adapter = hw->back;
5286         u16 cap_offset;
5287
5288         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5289         if (!cap_offset)
5290                 return -E1000_ERR_CONFIG;
5291
5292         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5293
5294         return 0;
5295 }
5296
5297 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5298 {
5299         struct igb_adapter *adapter = hw->back;
5300         u16 cap_offset;
5301
5302         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5303         if (!cap_offset)
5304                 return -E1000_ERR_CONFIG;
5305
5306         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5307
5308         return 0;
5309 }
5310
5311 static void igb_vlan_rx_register(struct net_device *netdev,
5312                                  struct vlan_group *grp)
5313 {
5314         struct igb_adapter *adapter = netdev_priv(netdev);
5315         struct e1000_hw *hw = &adapter->hw;
5316         u32 ctrl, rctl;
5317
5318         igb_irq_disable(adapter);
5319         adapter->vlgrp = grp;
5320
5321         if (grp) {
5322                 /* enable VLAN tag insert/strip */
5323                 ctrl = rd32(E1000_CTRL);
5324                 ctrl |= E1000_CTRL_VME;
5325                 wr32(E1000_CTRL, ctrl);
5326
5327                 /* enable VLAN receive filtering */
5328                 rctl = rd32(E1000_RCTL);
5329                 rctl &= ~E1000_RCTL_CFIEN;
5330                 wr32(E1000_RCTL, rctl);
5331                 igb_update_mng_vlan(adapter);
5332         } else {
5333                 /* disable VLAN tag insert/strip */
5334                 ctrl = rd32(E1000_CTRL);
5335                 ctrl &= ~E1000_CTRL_VME;
5336                 wr32(E1000_CTRL, ctrl);
5337
5338                 if (adapter->mng_vlan_id != (u16)IGB_MNG_VLAN_NONE) {
5339                         igb_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
5340                         adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
5341                 }
5342         }
5343
5344         igb_rlpml_set(adapter);
5345
5346         if (!test_bit(__IGB_DOWN, &adapter->state))
5347                 igb_irq_enable(adapter);
5348 }
5349
5350 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5351 {
5352         struct igb_adapter *adapter = netdev_priv(netdev);
5353         struct e1000_hw *hw = &adapter->hw;
5354         int pf_id = adapter->vfs_allocated_count;
5355
5356         if ((hw->mng_cookie.status &
5357              E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
5358             (vid == adapter->mng_vlan_id))
5359                 return;
5360
5361         /* add vid to vlvf if sr-iov is enabled,
5362          * if that fails add directly to filter table */
5363         if (igb_vlvf_set(adapter, vid, true, pf_id))
5364                 igb_vfta_set(hw, vid, true);
5365
5366 }
5367
5368 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5369 {
5370         struct igb_adapter *adapter = netdev_priv(netdev);
5371         struct e1000_hw *hw = &adapter->hw;
5372         int pf_id = adapter->vfs_allocated_count;
5373
5374         igb_irq_disable(adapter);
5375         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5376
5377         if (!test_bit(__IGB_DOWN, &adapter->state))
5378                 igb_irq_enable(adapter);
5379
5380         if ((adapter->hw.mng_cookie.status &
5381              E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
5382             (vid == adapter->mng_vlan_id)) {
5383                 /* release control to f/w */
5384                 igb_release_hw_control(adapter);
5385                 return;
5386         }
5387
5388         /* remove vid from vlvf if sr-iov is enabled,
5389          * if not in vlvf remove from vfta */
5390         if (igb_vlvf_set(adapter, vid, false, pf_id))
5391                 igb_vfta_set(hw, vid, false);
5392 }
5393
5394 static void igb_restore_vlan(struct igb_adapter *adapter)
5395 {
5396         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5397
5398         if (adapter->vlgrp) {
5399                 u16 vid;
5400                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5401                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5402                                 continue;
5403                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5404                 }
5405         }
5406 }
5407
5408 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5409 {
5410         struct e1000_mac_info *mac = &adapter->hw.mac;
5411
5412         mac->autoneg = 0;
5413
5414         switch (spddplx) {
5415         case SPEED_10 + DUPLEX_HALF:
5416                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5417                 break;
5418         case SPEED_10 + DUPLEX_FULL:
5419                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5420                 break;
5421         case SPEED_100 + DUPLEX_HALF:
5422                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5423                 break;
5424         case SPEED_100 + DUPLEX_FULL:
5425                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5426                 break;
5427         case SPEED_1000 + DUPLEX_FULL:
5428                 mac->autoneg = 1;
5429                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5430                 break;
5431         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5432         default:
5433                 dev_err(&adapter->pdev->dev,
5434                         "Unsupported Speed/Duplex configuration\n");
5435                 return -EINVAL;
5436         }
5437         return 0;
5438 }
5439
5440 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5441 {
5442         struct net_device *netdev = pci_get_drvdata(pdev);
5443         struct igb_adapter *adapter = netdev_priv(netdev);
5444         struct e1000_hw *hw = &adapter->hw;
5445         u32 ctrl, rctl, status;
5446         u32 wufc = adapter->wol;
5447 #ifdef CONFIG_PM
5448         int retval = 0;
5449 #endif
5450
5451         netif_device_detach(netdev);
5452
5453         if (netif_running(netdev))
5454                 igb_close(netdev);
5455
5456         igb_clear_interrupt_scheme(adapter);
5457
5458 #ifdef CONFIG_PM
5459         retval = pci_save_state(pdev);
5460         if (retval)
5461                 return retval;
5462 #endif
5463
5464         status = rd32(E1000_STATUS);
5465         if (status & E1000_STATUS_LU)
5466                 wufc &= ~E1000_WUFC_LNKC;
5467
5468         if (wufc) {
5469                 igb_setup_rctl(adapter);
5470                 igb_set_rx_mode(netdev);
5471
5472                 /* turn on all-multi mode if wake on multicast is enabled */
5473                 if (wufc & E1000_WUFC_MC) {
5474                         rctl = rd32(E1000_RCTL);
5475                         rctl |= E1000_RCTL_MPE;
5476                         wr32(E1000_RCTL, rctl);
5477                 }
5478
5479                 ctrl = rd32(E1000_CTRL);
5480                 /* advertise wake from D3Cold */
5481                 #define E1000_CTRL_ADVD3WUC 0x00100000
5482                 /* phy power management enable */
5483                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5484                 ctrl |= E1000_CTRL_ADVD3WUC;
5485                 wr32(E1000_CTRL, ctrl);
5486
5487                 /* Allow time for pending master requests to run */
5488                 igb_disable_pcie_master(&adapter->hw);
5489
5490                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5491                 wr32(E1000_WUFC, wufc);
5492         } else {
5493                 wr32(E1000_WUC, 0);
5494                 wr32(E1000_WUFC, 0);
5495         }
5496
5497         *enable_wake = wufc || adapter->en_mng_pt;
5498         if (!*enable_wake)
5499                 igb_shutdown_serdes_link_82575(hw);
5500
5501         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5502          * would have already happened in close and is redundant. */
5503         igb_release_hw_control(adapter);
5504
5505         pci_disable_device(pdev);
5506
5507         return 0;
5508 }
5509
5510 #ifdef CONFIG_PM
5511 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5512 {
5513         int retval;
5514         bool wake;
5515
5516         retval = __igb_shutdown(pdev, &wake);
5517         if (retval)
5518                 return retval;
5519
5520         if (wake) {
5521                 pci_prepare_to_sleep(pdev);
5522         } else {
5523                 pci_wake_from_d3(pdev, false);
5524                 pci_set_power_state(pdev, PCI_D3hot);
5525         }
5526
5527         return 0;
5528 }
5529
5530 static int igb_resume(struct pci_dev *pdev)
5531 {
5532         struct net_device *netdev = pci_get_drvdata(pdev);
5533         struct igb_adapter *adapter = netdev_priv(netdev);
5534         struct e1000_hw *hw = &adapter->hw;
5535         u32 err;
5536
5537         pci_set_power_state(pdev, PCI_D0);
5538         pci_restore_state(pdev);
5539
5540         err = pci_enable_device_mem(pdev);
5541         if (err) {
5542                 dev_err(&pdev->dev,
5543                         "igb: Cannot enable PCI device from suspend\n");
5544                 return err;
5545         }
5546         pci_set_master(pdev);
5547
5548         pci_enable_wake(pdev, PCI_D3hot, 0);
5549         pci_enable_wake(pdev, PCI_D3cold, 0);
5550
5551         if (igb_init_interrupt_scheme(adapter)) {
5552                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5553                 return -ENOMEM;
5554         }
5555
5556         /* e1000_power_up_phy(adapter); */
5557
5558         igb_reset(adapter);
5559
5560         /* let the f/w know that the h/w is now under the control of the
5561          * driver. */
5562         igb_get_hw_control(adapter);
5563
5564         wr32(E1000_WUS, ~0);
5565
5566         if (netif_running(netdev)) {
5567                 err = igb_open(netdev);
5568                 if (err)
5569                         return err;
5570         }
5571
5572         netif_device_attach(netdev);
5573
5574         return 0;
5575 }
5576 #endif
5577
5578 static void igb_shutdown(struct pci_dev *pdev)
5579 {
5580         bool wake;
5581
5582         __igb_shutdown(pdev, &wake);
5583
5584         if (system_state == SYSTEM_POWER_OFF) {
5585                 pci_wake_from_d3(pdev, wake);
5586                 pci_set_power_state(pdev, PCI_D3hot);
5587         }
5588 }
5589
5590 #ifdef CONFIG_NET_POLL_CONTROLLER
5591 /*
5592  * Polling 'interrupt' - used by things like netconsole to send skbs
5593  * without having to re-enable interrupts. It's not called while
5594  * the interrupt routine is executing.
5595  */
5596 static void igb_netpoll(struct net_device *netdev)
5597 {
5598         struct igb_adapter *adapter = netdev_priv(netdev);
5599         struct e1000_hw *hw = &adapter->hw;
5600         int i;
5601
5602         if (!adapter->msix_entries) {
5603                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5604                 igb_irq_disable(adapter);
5605                 napi_schedule(&q_vector->napi);
5606                 return;
5607         }
5608
5609         for (i = 0; i < adapter->num_q_vectors; i++) {
5610                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5611                 wr32(E1000_EIMC, q_vector->eims_value);
5612                 napi_schedule(&q_vector->napi);
5613         }
5614 }
5615 #endif /* CONFIG_NET_POLL_CONTROLLER */
5616
5617 /**
5618  * igb_io_error_detected - called when PCI error is detected
5619  * @pdev: Pointer to PCI device
5620  * @state: The current pci connection state
5621  *
5622  * This function is called after a PCI bus error affecting
5623  * this device has been detected.
5624  */
5625 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5626                                               pci_channel_state_t state)
5627 {
5628         struct net_device *netdev = pci_get_drvdata(pdev);
5629         struct igb_adapter *adapter = netdev_priv(netdev);
5630
5631         netif_device_detach(netdev);
5632
5633         if (state == pci_channel_io_perm_failure)
5634                 return PCI_ERS_RESULT_DISCONNECT;
5635
5636         if (netif_running(netdev))
5637                 igb_down(adapter);
5638         pci_disable_device(pdev);
5639
5640         /* Request a slot slot reset. */
5641         return PCI_ERS_RESULT_NEED_RESET;
5642 }
5643
5644 /**
5645  * igb_io_slot_reset - called after the pci bus has been reset.
5646  * @pdev: Pointer to PCI device
5647  *
5648  * Restart the card from scratch, as if from a cold-boot. Implementation
5649  * resembles the first-half of the igb_resume routine.
5650  */
5651 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5652 {
5653         struct net_device *netdev = pci_get_drvdata(pdev);
5654         struct igb_adapter *adapter = netdev_priv(netdev);
5655         struct e1000_hw *hw = &adapter->hw;
5656         pci_ers_result_t result;
5657         int err;
5658
5659         if (pci_enable_device_mem(pdev)) {
5660                 dev_err(&pdev->dev,
5661                         "Cannot re-enable PCI device after reset.\n");
5662                 result = PCI_ERS_RESULT_DISCONNECT;
5663         } else {
5664                 pci_set_master(pdev);
5665                 pci_restore_state(pdev);
5666
5667                 pci_enable_wake(pdev, PCI_D3hot, 0);
5668                 pci_enable_wake(pdev, PCI_D3cold, 0);
5669
5670                 igb_reset(adapter);
5671                 wr32(E1000_WUS, ~0);
5672                 result = PCI_ERS_RESULT_RECOVERED;
5673         }
5674
5675         err = pci_cleanup_aer_uncorrect_error_status(pdev);
5676         if (err) {
5677                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
5678                         "failed 0x%0x\n", err);
5679                 /* non-fatal, continue */
5680         }
5681
5682         return result;
5683 }
5684
5685 /**
5686  * igb_io_resume - called when traffic can start flowing again.
5687  * @pdev: Pointer to PCI device
5688  *
5689  * This callback is called when the error recovery driver tells us that
5690  * its OK to resume normal operation. Implementation resembles the
5691  * second-half of the igb_resume routine.
5692  */
5693 static void igb_io_resume(struct pci_dev *pdev)
5694 {
5695         struct net_device *netdev = pci_get_drvdata(pdev);
5696         struct igb_adapter *adapter = netdev_priv(netdev);
5697
5698         if (netif_running(netdev)) {
5699                 if (igb_up(adapter)) {
5700                         dev_err(&pdev->dev, "igb_up failed after reset\n");
5701                         return;
5702                 }
5703         }
5704
5705         netif_device_attach(netdev);
5706
5707         /* let the f/w know that the h/w is now under the control of the
5708          * driver. */
5709         igb_get_hw_control(adapter);
5710 }
5711
5712 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
5713                              u8 qsel)
5714 {
5715         u32 rar_low, rar_high;
5716         struct e1000_hw *hw = &adapter->hw;
5717
5718         /* HW expects these in little endian so we reverse the byte order
5719          * from network order (big endian) to little endian
5720          */
5721         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
5722                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
5723         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
5724
5725         /* Indicate to hardware the Address is Valid. */
5726         rar_high |= E1000_RAH_AV;
5727
5728         if (hw->mac.type == e1000_82575)
5729                 rar_high |= E1000_RAH_POOL_1 * qsel;
5730         else
5731                 rar_high |= E1000_RAH_POOL_1 << qsel;
5732
5733         wr32(E1000_RAL(index), rar_low);
5734         wrfl();
5735         wr32(E1000_RAH(index), rar_high);
5736         wrfl();
5737 }
5738
5739 static int igb_set_vf_mac(struct igb_adapter *adapter,
5740                           int vf, unsigned char *mac_addr)
5741 {
5742         struct e1000_hw *hw = &adapter->hw;
5743         /* VF MAC addresses start at end of receive addresses and moves
5744          * torwards the first, as a result a collision should not be possible */
5745         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5746
5747         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
5748
5749         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
5750
5751         return 0;
5752 }
5753
5754 static void igb_vmm_control(struct igb_adapter *adapter)
5755 {
5756         struct e1000_hw *hw = &adapter->hw;
5757         u32 reg;
5758
5759         /* replication is not supported for 82575 */
5760         if (hw->mac.type == e1000_82575)
5761                 return;
5762
5763         /* enable replication vlan tag stripping */
5764         reg = rd32(E1000_RPLOLR);
5765         reg |= E1000_RPLOLR_STRVLAN;
5766         wr32(E1000_RPLOLR, reg);
5767
5768         /* notify HW that the MAC is adding vlan tags */
5769         reg = rd32(E1000_DTXCTL);
5770         reg |= E1000_DTXCTL_VLAN_ADDED;
5771         wr32(E1000_DTXCTL, reg);
5772
5773         if (adapter->vfs_allocated_count) {
5774                 igb_vmdq_set_loopback_pf(hw, true);
5775                 igb_vmdq_set_replication_pf(hw, true);
5776         } else {
5777                 igb_vmdq_set_loopback_pf(hw, false);
5778                 igb_vmdq_set_replication_pf(hw, false);
5779         }
5780 }
5781
5782 /* igb_main.c */