igb: open up SCTP checksum offloads to all MACs 82576 and newer
[safe/jmp/linux-2.6] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "1.3.16-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56                                 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60         [board_82575] = &e1000_82575_info,
61 };
62
63 static struct pci_device_id igb_pci_tbl[] = {
64         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
74         /* required last entry */
75         {0, }
76 };
77
78 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
79
80 void igb_reset(struct igb_adapter *);
81 static int igb_setup_all_tx_resources(struct igb_adapter *);
82 static int igb_setup_all_rx_resources(struct igb_adapter *);
83 static void igb_free_all_tx_resources(struct igb_adapter *);
84 static void igb_free_all_rx_resources(struct igb_adapter *);
85 static void igb_setup_mrqc(struct igb_adapter *);
86 void igb_update_stats(struct igb_adapter *);
87 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
88 static void __devexit igb_remove(struct pci_dev *pdev);
89 static int igb_sw_init(struct igb_adapter *);
90 static int igb_open(struct net_device *);
91 static int igb_close(struct net_device *);
92 static void igb_configure_tx(struct igb_adapter *);
93 static void igb_configure_rx(struct igb_adapter *);
94 static void igb_clean_all_tx_rings(struct igb_adapter *);
95 static void igb_clean_all_rx_rings(struct igb_adapter *);
96 static void igb_clean_tx_ring(struct igb_ring *);
97 static void igb_clean_rx_ring(struct igb_ring *);
98 static void igb_set_rx_mode(struct net_device *);
99 static void igb_update_phy_info(unsigned long);
100 static void igb_watchdog(unsigned long);
101 static void igb_watchdog_task(struct work_struct *);
102 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
103 static struct net_device_stats *igb_get_stats(struct net_device *);
104 static int igb_change_mtu(struct net_device *, int);
105 static int igb_set_mac(struct net_device *, void *);
106 static void igb_set_uta(struct igb_adapter *adapter);
107 static irqreturn_t igb_intr(int irq, void *);
108 static irqreturn_t igb_intr_msi(int irq, void *);
109 static irqreturn_t igb_msix_other(int irq, void *);
110 static irqreturn_t igb_msix_ring(int irq, void *);
111 #ifdef CONFIG_IGB_DCA
112 static void igb_update_dca(struct igb_q_vector *);
113 static void igb_setup_dca(struct igb_adapter *);
114 #endif /* CONFIG_IGB_DCA */
115 static bool igb_clean_tx_irq(struct igb_q_vector *);
116 static int igb_poll(struct napi_struct *, int);
117 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
118 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
119 static void igb_tx_timeout(struct net_device *);
120 static void igb_reset_task(struct work_struct *);
121 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
122 static void igb_vlan_rx_add_vid(struct net_device *, u16);
123 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
124 static void igb_restore_vlan(struct igb_adapter *);
125 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
126 static void igb_ping_all_vfs(struct igb_adapter *);
127 static void igb_msg_task(struct igb_adapter *);
128 static void igb_vmm_control(struct igb_adapter *);
129 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
130 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
131
132 #ifdef CONFIG_PM
133 static int igb_suspend(struct pci_dev *, pm_message_t);
134 static int igb_resume(struct pci_dev *);
135 #endif
136 static void igb_shutdown(struct pci_dev *);
137 #ifdef CONFIG_IGB_DCA
138 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
139 static struct notifier_block dca_notifier = {
140         .notifier_call  = igb_notify_dca,
141         .next           = NULL,
142         .priority       = 0
143 };
144 #endif
145 #ifdef CONFIG_NET_POLL_CONTROLLER
146 /* for netdump / net console */
147 static void igb_netpoll(struct net_device *);
148 #endif
149 #ifdef CONFIG_PCI_IOV
150 static unsigned int max_vfs = 0;
151 module_param(max_vfs, uint, 0);
152 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
153                  "per physical function");
154 #endif /* CONFIG_PCI_IOV */
155
156 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
157                      pci_channel_state_t);
158 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
159 static void igb_io_resume(struct pci_dev *);
160
161 static struct pci_error_handlers igb_err_handler = {
162         .error_detected = igb_io_error_detected,
163         .slot_reset = igb_io_slot_reset,
164         .resume = igb_io_resume,
165 };
166
167
168 static struct pci_driver igb_driver = {
169         .name     = igb_driver_name,
170         .id_table = igb_pci_tbl,
171         .probe    = igb_probe,
172         .remove   = __devexit_p(igb_remove),
173 #ifdef CONFIG_PM
174         /* Power Managment Hooks */
175         .suspend  = igb_suspend,
176         .resume   = igb_resume,
177 #endif
178         .shutdown = igb_shutdown,
179         .err_handler = &igb_err_handler
180 };
181
182 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
183 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
184 MODULE_LICENSE("GPL");
185 MODULE_VERSION(DRV_VERSION);
186
187 /**
188  * igb_read_clock - read raw cycle counter (to be used by time counter)
189  */
190 static cycle_t igb_read_clock(const struct cyclecounter *tc)
191 {
192         struct igb_adapter *adapter =
193                 container_of(tc, struct igb_adapter, cycles);
194         struct e1000_hw *hw = &adapter->hw;
195         u64 stamp = 0;
196         int shift = 0;
197
198         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
199         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
200         return stamp;
201 }
202
203 #ifdef DEBUG
204 /**
205  * igb_get_hw_dev_name - return device name string
206  * used by hardware layer to print debugging information
207  **/
208 char *igb_get_hw_dev_name(struct e1000_hw *hw)
209 {
210         struct igb_adapter *adapter = hw->back;
211         return adapter->netdev->name;
212 }
213
214 /**
215  * igb_get_time_str - format current NIC and system time as string
216  */
217 static char *igb_get_time_str(struct igb_adapter *adapter,
218                               char buffer[160])
219 {
220         cycle_t hw = adapter->cycles.read(&adapter->cycles);
221         struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
222         struct timespec sys;
223         struct timespec delta;
224         getnstimeofday(&sys);
225
226         delta = timespec_sub(nic, sys);
227
228         sprintf(buffer,
229                 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
230                 hw,
231                 (long)nic.tv_sec, nic.tv_nsec,
232                 (long)sys.tv_sec, sys.tv_nsec,
233                 (long)delta.tv_sec, delta.tv_nsec);
234
235         return buffer;
236 }
237 #endif
238
239 /**
240  * igb_init_module - Driver Registration Routine
241  *
242  * igb_init_module is the first routine called when the driver is
243  * loaded. All it does is register with the PCI subsystem.
244  **/
245 static int __init igb_init_module(void)
246 {
247         int ret;
248         printk(KERN_INFO "%s - version %s\n",
249                igb_driver_string, igb_driver_version);
250
251         printk(KERN_INFO "%s\n", igb_copyright);
252
253 #ifdef CONFIG_IGB_DCA
254         dca_register_notify(&dca_notifier);
255 #endif
256         ret = pci_register_driver(&igb_driver);
257         return ret;
258 }
259
260 module_init(igb_init_module);
261
262 /**
263  * igb_exit_module - Driver Exit Cleanup Routine
264  *
265  * igb_exit_module is called just before the driver is removed
266  * from memory.
267  **/
268 static void __exit igb_exit_module(void)
269 {
270 #ifdef CONFIG_IGB_DCA
271         dca_unregister_notify(&dca_notifier);
272 #endif
273         pci_unregister_driver(&igb_driver);
274 }
275
276 module_exit(igb_exit_module);
277
278 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
279 /**
280  * igb_cache_ring_register - Descriptor ring to register mapping
281  * @adapter: board private structure to initialize
282  *
283  * Once we know the feature-set enabled for the device, we'll cache
284  * the register offset the descriptor ring is assigned to.
285  **/
286 static void igb_cache_ring_register(struct igb_adapter *adapter)
287 {
288         int i = 0, j = 0;
289         u32 rbase_offset = adapter->vfs_allocated_count;
290
291         switch (adapter->hw.mac.type) {
292         case e1000_82576:
293                 /* The queues are allocated for virtualization such that VF 0
294                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
295                  * In order to avoid collision we start at the first free queue
296                  * and continue consuming queues in the same sequence
297                  */
298                 if (adapter->vfs_allocated_count) {
299                         for (; i < adapter->num_rx_queues; i++)
300                                 adapter->rx_ring[i].reg_idx = rbase_offset +
301                                                               Q_IDX_82576(i);
302                         for (; j < adapter->num_tx_queues; j++)
303                                 adapter->tx_ring[j].reg_idx = rbase_offset +
304                                                               Q_IDX_82576(j);
305                 }
306         case e1000_82575:
307         default:
308                 for (; i < adapter->num_rx_queues; i++)
309                         adapter->rx_ring[i].reg_idx = rbase_offset + i;
310                 for (; j < adapter->num_tx_queues; j++)
311                         adapter->tx_ring[j].reg_idx = rbase_offset + j;
312                 break;
313         }
314 }
315
316 static void igb_free_queues(struct igb_adapter *adapter)
317 {
318         kfree(adapter->tx_ring);
319         kfree(adapter->rx_ring);
320
321         adapter->tx_ring = NULL;
322         adapter->rx_ring = NULL;
323
324         adapter->num_rx_queues = 0;
325         adapter->num_tx_queues = 0;
326 }
327
328 /**
329  * igb_alloc_queues - Allocate memory for all rings
330  * @adapter: board private structure to initialize
331  *
332  * We allocate one ring per queue at run-time since we don't know the
333  * number of queues at compile-time.
334  **/
335 static int igb_alloc_queues(struct igb_adapter *adapter)
336 {
337         int i;
338
339         adapter->tx_ring = kcalloc(adapter->num_tx_queues,
340                                    sizeof(struct igb_ring), GFP_KERNEL);
341         if (!adapter->tx_ring)
342                 goto err;
343
344         adapter->rx_ring = kcalloc(adapter->num_rx_queues,
345                                    sizeof(struct igb_ring), GFP_KERNEL);
346         if (!adapter->rx_ring)
347                 goto err;
348
349         for (i = 0; i < adapter->num_tx_queues; i++) {
350                 struct igb_ring *ring = &(adapter->tx_ring[i]);
351                 ring->count = adapter->tx_ring_count;
352                 ring->queue_index = i;
353                 ring->pdev = adapter->pdev;
354                 ring->netdev = adapter->netdev;
355                 /* For 82575, context index must be unique per ring. */
356                 if (adapter->hw.mac.type == e1000_82575)
357                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
358         }
359
360         for (i = 0; i < adapter->num_rx_queues; i++) {
361                 struct igb_ring *ring = &(adapter->rx_ring[i]);
362                 ring->count = adapter->rx_ring_count;
363                 ring->queue_index = i;
364                 ring->pdev = adapter->pdev;
365                 ring->netdev = adapter->netdev;
366                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
367                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
368                 /* set flag indicating ring supports SCTP checksum offload */
369                 if (adapter->hw.mac.type >= e1000_82576)
370                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
371         }
372
373         igb_cache_ring_register(adapter);
374
375         return 0;
376
377 err:
378         igb_free_queues(adapter);
379
380         return -ENOMEM;
381 }
382
383 #define IGB_N0_QUEUE -1
384 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
385 {
386         u32 msixbm = 0;
387         struct igb_adapter *adapter = q_vector->adapter;
388         struct e1000_hw *hw = &adapter->hw;
389         u32 ivar, index;
390         int rx_queue = IGB_N0_QUEUE;
391         int tx_queue = IGB_N0_QUEUE;
392
393         if (q_vector->rx_ring)
394                 rx_queue = q_vector->rx_ring->reg_idx;
395         if (q_vector->tx_ring)
396                 tx_queue = q_vector->tx_ring->reg_idx;
397
398         switch (hw->mac.type) {
399         case e1000_82575:
400                 /* The 82575 assigns vectors using a bitmask, which matches the
401                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
402                    or more queues to a vector, we write the appropriate bits
403                    into the MSIXBM register for that vector. */
404                 if (rx_queue > IGB_N0_QUEUE)
405                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
406                 if (tx_queue > IGB_N0_QUEUE)
407                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
408                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
409                 q_vector->eims_value = msixbm;
410                 break;
411         case e1000_82576:
412                 /* 82576 uses a table-based method for assigning vectors.
413                    Each queue has a single entry in the table to which we write
414                    a vector number along with a "valid" bit.  Sadly, the layout
415                    of the table is somewhat counterintuitive. */
416                 if (rx_queue > IGB_N0_QUEUE) {
417                         index = (rx_queue & 0x7);
418                         ivar = array_rd32(E1000_IVAR0, index);
419                         if (rx_queue < 8) {
420                                 /* vector goes into low byte of register */
421                                 ivar = ivar & 0xFFFFFF00;
422                                 ivar |= msix_vector | E1000_IVAR_VALID;
423                         } else {
424                                 /* vector goes into third byte of register */
425                                 ivar = ivar & 0xFF00FFFF;
426                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
427                         }
428                         array_wr32(E1000_IVAR0, index, ivar);
429                 }
430                 if (tx_queue > IGB_N0_QUEUE) {
431                         index = (tx_queue & 0x7);
432                         ivar = array_rd32(E1000_IVAR0, index);
433                         if (tx_queue < 8) {
434                                 /* vector goes into second byte of register */
435                                 ivar = ivar & 0xFFFF00FF;
436                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
437                         } else {
438                                 /* vector goes into high byte of register */
439                                 ivar = ivar & 0x00FFFFFF;
440                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
441                         }
442                         array_wr32(E1000_IVAR0, index, ivar);
443                 }
444                 q_vector->eims_value = 1 << msix_vector;
445                 break;
446         default:
447                 BUG();
448                 break;
449         }
450 }
451
452 /**
453  * igb_configure_msix - Configure MSI-X hardware
454  *
455  * igb_configure_msix sets up the hardware to properly
456  * generate MSI-X interrupts.
457  **/
458 static void igb_configure_msix(struct igb_adapter *adapter)
459 {
460         u32 tmp;
461         int i, vector = 0;
462         struct e1000_hw *hw = &adapter->hw;
463
464         adapter->eims_enable_mask = 0;
465
466         /* set vector for other causes, i.e. link changes */
467         switch (hw->mac.type) {
468         case e1000_82575:
469                 tmp = rd32(E1000_CTRL_EXT);
470                 /* enable MSI-X PBA support*/
471                 tmp |= E1000_CTRL_EXT_PBA_CLR;
472
473                 /* Auto-Mask interrupts upon ICR read. */
474                 tmp |= E1000_CTRL_EXT_EIAME;
475                 tmp |= E1000_CTRL_EXT_IRCA;
476
477                 wr32(E1000_CTRL_EXT, tmp);
478
479                 /* enable msix_other interrupt */
480                 array_wr32(E1000_MSIXBM(0), vector++,
481                                       E1000_EIMS_OTHER);
482                 adapter->eims_other = E1000_EIMS_OTHER;
483
484                 break;
485
486         case e1000_82576:
487                 /* Turn on MSI-X capability first, or our settings
488                  * won't stick.  And it will take days to debug. */
489                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
490                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
491                                 E1000_GPIE_NSICR);
492
493                 /* enable msix_other interrupt */
494                 adapter->eims_other = 1 << vector;
495                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
496
497                 wr32(E1000_IVAR_MISC, tmp);
498                 break;
499         default:
500                 /* do nothing, since nothing else supports MSI-X */
501                 break;
502         } /* switch (hw->mac.type) */
503
504         adapter->eims_enable_mask |= adapter->eims_other;
505
506         for (i = 0; i < adapter->num_q_vectors; i++) {
507                 struct igb_q_vector *q_vector = adapter->q_vector[i];
508                 igb_assign_vector(q_vector, vector++);
509                 adapter->eims_enable_mask |= q_vector->eims_value;
510         }
511
512         wrfl();
513 }
514
515 /**
516  * igb_request_msix - Initialize MSI-X interrupts
517  *
518  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
519  * kernel.
520  **/
521 static int igb_request_msix(struct igb_adapter *adapter)
522 {
523         struct net_device *netdev = adapter->netdev;
524         struct e1000_hw *hw = &adapter->hw;
525         int i, err = 0, vector = 0;
526
527         err = request_irq(adapter->msix_entries[vector].vector,
528                           &igb_msix_other, 0, netdev->name, adapter);
529         if (err)
530                 goto out;
531         vector++;
532
533         for (i = 0; i < adapter->num_q_vectors; i++) {
534                 struct igb_q_vector *q_vector = adapter->q_vector[i];
535
536                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
537
538                 if (q_vector->rx_ring && q_vector->tx_ring)
539                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
540                                 q_vector->rx_ring->queue_index);
541                 else if (q_vector->tx_ring)
542                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
543                                 q_vector->tx_ring->queue_index);
544                 else if (q_vector->rx_ring)
545                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
546                                 q_vector->rx_ring->queue_index);
547                 else
548                         sprintf(q_vector->name, "%s-unused", netdev->name);
549
550                 err = request_irq(adapter->msix_entries[vector].vector,
551                                   &igb_msix_ring, 0, q_vector->name,
552                                   q_vector);
553                 if (err)
554                         goto out;
555                 vector++;
556         }
557
558         igb_configure_msix(adapter);
559         return 0;
560 out:
561         return err;
562 }
563
564 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
565 {
566         if (adapter->msix_entries) {
567                 pci_disable_msix(adapter->pdev);
568                 kfree(adapter->msix_entries);
569                 adapter->msix_entries = NULL;
570         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
571                 pci_disable_msi(adapter->pdev);
572         }
573 }
574
575 /**
576  * igb_free_q_vectors - Free memory allocated for interrupt vectors
577  * @adapter: board private structure to initialize
578  *
579  * This function frees the memory allocated to the q_vectors.  In addition if
580  * NAPI is enabled it will delete any references to the NAPI struct prior
581  * to freeing the q_vector.
582  **/
583 static void igb_free_q_vectors(struct igb_adapter *adapter)
584 {
585         int v_idx;
586
587         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
588                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
589                 adapter->q_vector[v_idx] = NULL;
590                 netif_napi_del(&q_vector->napi);
591                 kfree(q_vector);
592         }
593         adapter->num_q_vectors = 0;
594 }
595
596 /**
597  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
598  *
599  * This function resets the device so that it has 0 rx queues, tx queues, and
600  * MSI-X interrupts allocated.
601  */
602 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
603 {
604         igb_free_queues(adapter);
605         igb_free_q_vectors(adapter);
606         igb_reset_interrupt_capability(adapter);
607 }
608
609 /**
610  * igb_set_interrupt_capability - set MSI or MSI-X if supported
611  *
612  * Attempt to configure interrupts using the best available
613  * capabilities of the hardware and kernel.
614  **/
615 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
616 {
617         int err;
618         int numvecs, i;
619
620         /* Number of supported queues. */
621         adapter->num_rx_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
622         adapter->num_tx_queues = min_t(u32, IGB_MAX_TX_QUEUES, num_online_cpus());
623
624         /* start with one vector for every rx queue */
625         numvecs = adapter->num_rx_queues;
626
627         /* if tx handler is seperate add 1 for every tx queue */
628         numvecs += adapter->num_tx_queues;
629
630         /* store the number of vectors reserved for queues */
631         adapter->num_q_vectors = numvecs;
632
633         /* add 1 vector for link status interrupts */
634         numvecs++;
635         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
636                                         GFP_KERNEL);
637         if (!adapter->msix_entries)
638                 goto msi_only;
639
640         for (i = 0; i < numvecs; i++)
641                 adapter->msix_entries[i].entry = i;
642
643         err = pci_enable_msix(adapter->pdev,
644                               adapter->msix_entries,
645                               numvecs);
646         if (err == 0)
647                 goto out;
648
649         igb_reset_interrupt_capability(adapter);
650
651         /* If we can't do MSI-X, try MSI */
652 msi_only:
653 #ifdef CONFIG_PCI_IOV
654         /* disable SR-IOV for non MSI-X configurations */
655         if (adapter->vf_data) {
656                 struct e1000_hw *hw = &adapter->hw;
657                 /* disable iov and allow time for transactions to clear */
658                 pci_disable_sriov(adapter->pdev);
659                 msleep(500);
660
661                 kfree(adapter->vf_data);
662                 adapter->vf_data = NULL;
663                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
664                 msleep(100);
665                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
666         }
667 #endif
668         adapter->vfs_allocated_count = 0;
669         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
670         adapter->num_rx_queues = 1;
671         adapter->num_tx_queues = 1;
672         adapter->num_q_vectors = 1;
673         if (!pci_enable_msi(adapter->pdev))
674                 adapter->flags |= IGB_FLAG_HAS_MSI;
675 out:
676         /* Notify the stack of the (possibly) reduced Tx Queue count. */
677         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
678         return;
679 }
680
681 /**
682  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
683  * @adapter: board private structure to initialize
684  *
685  * We allocate one q_vector per queue interrupt.  If allocation fails we
686  * return -ENOMEM.
687  **/
688 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
689 {
690         struct igb_q_vector *q_vector;
691         struct e1000_hw *hw = &adapter->hw;
692         int v_idx;
693
694         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
695                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
696                 if (!q_vector)
697                         goto err_out;
698                 q_vector->adapter = adapter;
699                 q_vector->itr_shift = (hw->mac.type == e1000_82575) ? 16 : 0;
700                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
701                 q_vector->itr_val = IGB_START_ITR;
702                 q_vector->set_itr = 1;
703                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
704                 adapter->q_vector[v_idx] = q_vector;
705         }
706         return 0;
707
708 err_out:
709         while (v_idx) {
710                 v_idx--;
711                 q_vector = adapter->q_vector[v_idx];
712                 netif_napi_del(&q_vector->napi);
713                 kfree(q_vector);
714                 adapter->q_vector[v_idx] = NULL;
715         }
716         return -ENOMEM;
717 }
718
719 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
720                                       int ring_idx, int v_idx)
721 {
722         struct igb_q_vector *q_vector;
723
724         q_vector = adapter->q_vector[v_idx];
725         q_vector->rx_ring = &adapter->rx_ring[ring_idx];
726         q_vector->rx_ring->q_vector = q_vector;
727         q_vector->itr_val = adapter->rx_itr_setting;
728         if (q_vector->itr_val && q_vector->itr_val <= 3)
729                 q_vector->itr_val = IGB_START_ITR;
730 }
731
732 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
733                                       int ring_idx, int v_idx)
734 {
735         struct igb_q_vector *q_vector;
736
737         q_vector = adapter->q_vector[v_idx];
738         q_vector->tx_ring = &adapter->tx_ring[ring_idx];
739         q_vector->tx_ring->q_vector = q_vector;
740         q_vector->itr_val = adapter->tx_itr_setting;
741         if (q_vector->itr_val && q_vector->itr_val <= 3)
742                 q_vector->itr_val = IGB_START_ITR;
743 }
744
745 /**
746  * igb_map_ring_to_vector - maps allocated queues to vectors
747  *
748  * This function maps the recently allocated queues to vectors.
749  **/
750 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
751 {
752         int i;
753         int v_idx = 0;
754
755         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
756             (adapter->num_q_vectors < adapter->num_tx_queues))
757                 return -ENOMEM;
758
759         if (adapter->num_q_vectors >=
760             (adapter->num_rx_queues + adapter->num_tx_queues)) {
761                 for (i = 0; i < adapter->num_rx_queues; i++)
762                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
763                 for (i = 0; i < adapter->num_tx_queues; i++)
764                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
765         } else {
766                 for (i = 0; i < adapter->num_rx_queues; i++) {
767                         if (i < adapter->num_tx_queues)
768                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
769                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
770                 }
771                 for (; i < adapter->num_tx_queues; i++)
772                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
773         }
774         return 0;
775 }
776
777 /**
778  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
779  *
780  * This function initializes the interrupts and allocates all of the queues.
781  **/
782 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
783 {
784         struct pci_dev *pdev = adapter->pdev;
785         int err;
786
787         igb_set_interrupt_capability(adapter);
788
789         err = igb_alloc_q_vectors(adapter);
790         if (err) {
791                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
792                 goto err_alloc_q_vectors;
793         }
794
795         err = igb_alloc_queues(adapter);
796         if (err) {
797                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
798                 goto err_alloc_queues;
799         }
800
801         err = igb_map_ring_to_vector(adapter);
802         if (err) {
803                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
804                 goto err_map_queues;
805         }
806
807
808         return 0;
809 err_map_queues:
810         igb_free_queues(adapter);
811 err_alloc_queues:
812         igb_free_q_vectors(adapter);
813 err_alloc_q_vectors:
814         igb_reset_interrupt_capability(adapter);
815         return err;
816 }
817
818 /**
819  * igb_request_irq - initialize interrupts
820  *
821  * Attempts to configure interrupts using the best available
822  * capabilities of the hardware and kernel.
823  **/
824 static int igb_request_irq(struct igb_adapter *adapter)
825 {
826         struct net_device *netdev = adapter->netdev;
827         struct pci_dev *pdev = adapter->pdev;
828         struct e1000_hw *hw = &adapter->hw;
829         int err = 0;
830
831         if (adapter->msix_entries) {
832                 err = igb_request_msix(adapter);
833                 if (!err)
834                         goto request_done;
835                 /* fall back to MSI */
836                 igb_clear_interrupt_scheme(adapter);
837                 if (!pci_enable_msi(adapter->pdev))
838                         adapter->flags |= IGB_FLAG_HAS_MSI;
839                 igb_free_all_tx_resources(adapter);
840                 igb_free_all_rx_resources(adapter);
841                 adapter->num_tx_queues = 1;
842                 adapter->num_rx_queues = 1;
843                 adapter->num_q_vectors = 1;
844                 err = igb_alloc_q_vectors(adapter);
845                 if (err) {
846                         dev_err(&pdev->dev,
847                                 "Unable to allocate memory for vectors\n");
848                         goto request_done;
849                 }
850                 err = igb_alloc_queues(adapter);
851                 if (err) {
852                         dev_err(&pdev->dev,
853                                 "Unable to allocate memory for queues\n");
854                         igb_free_q_vectors(adapter);
855                         goto request_done;
856                 }
857                 igb_setup_all_tx_resources(adapter);
858                 igb_setup_all_rx_resources(adapter);
859         } else {
860                 switch (hw->mac.type) {
861                 case e1000_82575:
862                         wr32(E1000_MSIXBM(0),
863                              (E1000_EICR_RX_QUEUE0 |
864                               E1000_EICR_TX_QUEUE0 |
865                               E1000_EIMS_OTHER));
866                         break;
867                 case e1000_82576:
868                         wr32(E1000_IVAR0, E1000_IVAR_VALID);
869                         break;
870                 default:
871                         break;
872                 }
873         }
874
875         if (adapter->flags & IGB_FLAG_HAS_MSI) {
876                 err = request_irq(adapter->pdev->irq, &igb_intr_msi, 0,
877                                   netdev->name, adapter);
878                 if (!err)
879                         goto request_done;
880
881                 /* fall back to legacy interrupts */
882                 igb_reset_interrupt_capability(adapter);
883                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
884         }
885
886         err = request_irq(adapter->pdev->irq, &igb_intr, IRQF_SHARED,
887                           netdev->name, adapter);
888
889         if (err)
890                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
891                         err);
892
893 request_done:
894         return err;
895 }
896
897 static void igb_free_irq(struct igb_adapter *adapter)
898 {
899         if (adapter->msix_entries) {
900                 int vector = 0, i;
901
902                 free_irq(adapter->msix_entries[vector++].vector, adapter);
903
904                 for (i = 0; i < adapter->num_q_vectors; i++) {
905                         struct igb_q_vector *q_vector = adapter->q_vector[i];
906                         free_irq(adapter->msix_entries[vector++].vector,
907                                  q_vector);
908                 }
909         } else {
910                 free_irq(adapter->pdev->irq, adapter);
911         }
912 }
913
914 /**
915  * igb_irq_disable - Mask off interrupt generation on the NIC
916  * @adapter: board private structure
917  **/
918 static void igb_irq_disable(struct igb_adapter *adapter)
919 {
920         struct e1000_hw *hw = &adapter->hw;
921
922         /*
923          * we need to be careful when disabling interrupts.  The VFs are also
924          * mapped into these registers and so clearing the bits can cause
925          * issues on the VF drivers so we only need to clear what we set
926          */
927         if (adapter->msix_entries) {
928                 u32 regval = rd32(E1000_EIAM);
929                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
930                 wr32(E1000_EIMC, adapter->eims_enable_mask);
931                 regval = rd32(E1000_EIAC);
932                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
933         }
934
935         wr32(E1000_IAM, 0);
936         wr32(E1000_IMC, ~0);
937         wrfl();
938         synchronize_irq(adapter->pdev->irq);
939 }
940
941 /**
942  * igb_irq_enable - Enable default interrupt generation settings
943  * @adapter: board private structure
944  **/
945 static void igb_irq_enable(struct igb_adapter *adapter)
946 {
947         struct e1000_hw *hw = &adapter->hw;
948
949         if (adapter->msix_entries) {
950                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
951                 u32 regval = rd32(E1000_EIAC);
952                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
953                 regval = rd32(E1000_EIAM);
954                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
955                 wr32(E1000_EIMS, adapter->eims_enable_mask);
956                 if (adapter->vfs_allocated_count) {
957                         wr32(E1000_MBVFIMR, 0xFF);
958                         ims |= E1000_IMS_VMMB;
959                 }
960                 wr32(E1000_IMS, ims);
961         } else {
962                 wr32(E1000_IMS, IMS_ENABLE_MASK);
963                 wr32(E1000_IAM, IMS_ENABLE_MASK);
964         }
965 }
966
967 static void igb_update_mng_vlan(struct igb_adapter *adapter)
968 {
969         struct e1000_hw *hw = &adapter->hw;
970         u16 vid = adapter->hw.mng_cookie.vlan_id;
971         u16 old_vid = adapter->mng_vlan_id;
972
973         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
974                 /* add VID to filter table */
975                 igb_vfta_set(hw, vid, true);
976                 adapter->mng_vlan_id = vid;
977         } else {
978                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
979         }
980
981         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
982             (vid != old_vid) &&
983             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
984                 /* remove VID from filter table */
985                 igb_vfta_set(hw, old_vid, false);
986         }
987 }
988
989 /**
990  * igb_release_hw_control - release control of the h/w to f/w
991  * @adapter: address of board private structure
992  *
993  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
994  * For ASF and Pass Through versions of f/w this means that the
995  * driver is no longer loaded.
996  *
997  **/
998 static void igb_release_hw_control(struct igb_adapter *adapter)
999 {
1000         struct e1000_hw *hw = &adapter->hw;
1001         u32 ctrl_ext;
1002
1003         /* Let firmware take over control of h/w */
1004         ctrl_ext = rd32(E1000_CTRL_EXT);
1005         wr32(E1000_CTRL_EXT,
1006                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1007 }
1008
1009
1010 /**
1011  * igb_get_hw_control - get control of the h/w from f/w
1012  * @adapter: address of board private structure
1013  *
1014  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1015  * For ASF and Pass Through versions of f/w this means that
1016  * the driver is loaded.
1017  *
1018  **/
1019 static void igb_get_hw_control(struct igb_adapter *adapter)
1020 {
1021         struct e1000_hw *hw = &adapter->hw;
1022         u32 ctrl_ext;
1023
1024         /* Let firmware know the driver has taken over */
1025         ctrl_ext = rd32(E1000_CTRL_EXT);
1026         wr32(E1000_CTRL_EXT,
1027                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1028 }
1029
1030 /**
1031  * igb_configure - configure the hardware for RX and TX
1032  * @adapter: private board structure
1033  **/
1034 static void igb_configure(struct igb_adapter *adapter)
1035 {
1036         struct net_device *netdev = adapter->netdev;
1037         int i;
1038
1039         igb_get_hw_control(adapter);
1040         igb_set_rx_mode(netdev);
1041
1042         igb_restore_vlan(adapter);
1043
1044         igb_setup_tctl(adapter);
1045         igb_setup_mrqc(adapter);
1046         igb_setup_rctl(adapter);
1047
1048         igb_configure_tx(adapter);
1049         igb_configure_rx(adapter);
1050
1051         igb_rx_fifo_flush_82575(&adapter->hw);
1052
1053         /* call igb_desc_unused which always leaves
1054          * at least 1 descriptor unused to make sure
1055          * next_to_use != next_to_clean */
1056         for (i = 0; i < adapter->num_rx_queues; i++) {
1057                 struct igb_ring *ring = &adapter->rx_ring[i];
1058                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1059         }
1060
1061
1062         adapter->tx_queue_len = netdev->tx_queue_len;
1063 }
1064
1065
1066 /**
1067  * igb_up - Open the interface and prepare it to handle traffic
1068  * @adapter: board private structure
1069  **/
1070
1071 int igb_up(struct igb_adapter *adapter)
1072 {
1073         struct e1000_hw *hw = &adapter->hw;
1074         int i;
1075
1076         /* hardware has been reset, we need to reload some things */
1077         igb_configure(adapter);
1078
1079         clear_bit(__IGB_DOWN, &adapter->state);
1080
1081         for (i = 0; i < adapter->num_q_vectors; i++) {
1082                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1083                 napi_enable(&q_vector->napi);
1084         }
1085         if (adapter->msix_entries)
1086                 igb_configure_msix(adapter);
1087
1088         /* Clear any pending interrupts. */
1089         rd32(E1000_ICR);
1090         igb_irq_enable(adapter);
1091
1092         /* notify VFs that reset has been completed */
1093         if (adapter->vfs_allocated_count) {
1094                 u32 reg_data = rd32(E1000_CTRL_EXT);
1095                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1096                 wr32(E1000_CTRL_EXT, reg_data);
1097         }
1098
1099         netif_tx_start_all_queues(adapter->netdev);
1100
1101         /* start the watchdog. */
1102         hw->mac.get_link_status = 1;
1103         schedule_work(&adapter->watchdog_task);
1104
1105         return 0;
1106 }
1107
1108 void igb_down(struct igb_adapter *adapter)
1109 {
1110         struct net_device *netdev = adapter->netdev;
1111         struct e1000_hw *hw = &adapter->hw;
1112         u32 tctl, rctl;
1113         int i;
1114
1115         /* signal that we're down so the interrupt handler does not
1116          * reschedule our watchdog timer */
1117         set_bit(__IGB_DOWN, &adapter->state);
1118
1119         /* disable receives in the hardware */
1120         rctl = rd32(E1000_RCTL);
1121         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1122         /* flush and sleep below */
1123
1124         netif_tx_stop_all_queues(netdev);
1125
1126         /* disable transmits in the hardware */
1127         tctl = rd32(E1000_TCTL);
1128         tctl &= ~E1000_TCTL_EN;
1129         wr32(E1000_TCTL, tctl);
1130         /* flush both disables and wait for them to finish */
1131         wrfl();
1132         msleep(10);
1133
1134         for (i = 0; i < adapter->num_q_vectors; i++) {
1135                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1136                 napi_disable(&q_vector->napi);
1137         }
1138
1139         igb_irq_disable(adapter);
1140
1141         del_timer_sync(&adapter->watchdog_timer);
1142         del_timer_sync(&adapter->phy_info_timer);
1143
1144         netdev->tx_queue_len = adapter->tx_queue_len;
1145         netif_carrier_off(netdev);
1146
1147         /* record the stats before reset*/
1148         igb_update_stats(adapter);
1149
1150         adapter->link_speed = 0;
1151         adapter->link_duplex = 0;
1152
1153         if (!pci_channel_offline(adapter->pdev))
1154                 igb_reset(adapter);
1155         igb_clean_all_tx_rings(adapter);
1156         igb_clean_all_rx_rings(adapter);
1157 #ifdef CONFIG_IGB_DCA
1158
1159         /* since we reset the hardware DCA settings were cleared */
1160         igb_setup_dca(adapter);
1161 #endif
1162 }
1163
1164 void igb_reinit_locked(struct igb_adapter *adapter)
1165 {
1166         WARN_ON(in_interrupt());
1167         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1168                 msleep(1);
1169         igb_down(adapter);
1170         igb_up(adapter);
1171         clear_bit(__IGB_RESETTING, &adapter->state);
1172 }
1173
1174 void igb_reset(struct igb_adapter *adapter)
1175 {
1176         struct pci_dev *pdev = adapter->pdev;
1177         struct e1000_hw *hw = &adapter->hw;
1178         struct e1000_mac_info *mac = &hw->mac;
1179         struct e1000_fc_info *fc = &hw->fc;
1180         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1181         u16 hwm;
1182
1183         /* Repartition Pba for greater than 9k mtu
1184          * To take effect CTRL.RST is required.
1185          */
1186         switch (mac->type) {
1187         case e1000_82576:
1188                 pba = rd32(E1000_RXPBS);
1189                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1190                 break;
1191         case e1000_82575:
1192         default:
1193                 pba = E1000_PBA_34K;
1194                 break;
1195         }
1196
1197         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1198             (mac->type < e1000_82576)) {
1199                 /* adjust PBA for jumbo frames */
1200                 wr32(E1000_PBA, pba);
1201
1202                 /* To maintain wire speed transmits, the Tx FIFO should be
1203                  * large enough to accommodate two full transmit packets,
1204                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1205                  * the Rx FIFO should be large enough to accommodate at least
1206                  * one full receive packet and is similarly rounded up and
1207                  * expressed in KB. */
1208                 pba = rd32(E1000_PBA);
1209                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1210                 tx_space = pba >> 16;
1211                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1212                 pba &= 0xffff;
1213                 /* the tx fifo also stores 16 bytes of information about the tx
1214                  * but don't include ethernet FCS because hardware appends it */
1215                 min_tx_space = (adapter->max_frame_size +
1216                                 sizeof(union e1000_adv_tx_desc) -
1217                                 ETH_FCS_LEN) * 2;
1218                 min_tx_space = ALIGN(min_tx_space, 1024);
1219                 min_tx_space >>= 10;
1220                 /* software strips receive CRC, so leave room for it */
1221                 min_rx_space = adapter->max_frame_size;
1222                 min_rx_space = ALIGN(min_rx_space, 1024);
1223                 min_rx_space >>= 10;
1224
1225                 /* If current Tx allocation is less than the min Tx FIFO size,
1226                  * and the min Tx FIFO size is less than the current Rx FIFO
1227                  * allocation, take space away from current Rx allocation */
1228                 if (tx_space < min_tx_space &&
1229                     ((min_tx_space - tx_space) < pba)) {
1230                         pba = pba - (min_tx_space - tx_space);
1231
1232                         /* if short on rx space, rx wins and must trump tx
1233                          * adjustment */
1234                         if (pba < min_rx_space)
1235                                 pba = min_rx_space;
1236                 }
1237                 wr32(E1000_PBA, pba);
1238         }
1239
1240         /* flow control settings */
1241         /* The high water mark must be low enough to fit one full frame
1242          * (or the size used for early receive) above it in the Rx FIFO.
1243          * Set it to the lower of:
1244          * - 90% of the Rx FIFO size, or
1245          * - the full Rx FIFO size minus one full frame */
1246         hwm = min(((pba << 10) * 9 / 10),
1247                         ((pba << 10) - 2 * adapter->max_frame_size));
1248
1249         if (mac->type < e1000_82576) {
1250                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
1251                 fc->low_water = fc->high_water - 8;
1252         } else {
1253                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1254                 fc->low_water = fc->high_water - 16;
1255         }
1256         fc->pause_time = 0xFFFF;
1257         fc->send_xon = 1;
1258         fc->current_mode = fc->requested_mode;
1259
1260         /* disable receive for all VFs and wait one second */
1261         if (adapter->vfs_allocated_count) {
1262                 int i;
1263                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1264                         adapter->vf_data[i].flags = 0;
1265
1266                 /* ping all the active vfs to let them know we are going down */
1267                 igb_ping_all_vfs(adapter);
1268
1269                 /* disable transmits and receives */
1270                 wr32(E1000_VFRE, 0);
1271                 wr32(E1000_VFTE, 0);
1272         }
1273
1274         /* Allow time for pending master requests to run */
1275         hw->mac.ops.reset_hw(hw);
1276         wr32(E1000_WUC, 0);
1277
1278         if (hw->mac.ops.init_hw(hw))
1279                 dev_err(&pdev->dev, "Hardware Error\n");
1280
1281         igb_update_mng_vlan(adapter);
1282
1283         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1284         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1285
1286         igb_reset_adaptive(hw);
1287         igb_get_phy_info(hw);
1288 }
1289
1290 static const struct net_device_ops igb_netdev_ops = {
1291         .ndo_open               = igb_open,
1292         .ndo_stop               = igb_close,
1293         .ndo_start_xmit         = igb_xmit_frame_adv,
1294         .ndo_get_stats          = igb_get_stats,
1295         .ndo_set_rx_mode        = igb_set_rx_mode,
1296         .ndo_set_multicast_list = igb_set_rx_mode,
1297         .ndo_set_mac_address    = igb_set_mac,
1298         .ndo_change_mtu         = igb_change_mtu,
1299         .ndo_do_ioctl           = igb_ioctl,
1300         .ndo_tx_timeout         = igb_tx_timeout,
1301         .ndo_validate_addr      = eth_validate_addr,
1302         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1303         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1304         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1305 #ifdef CONFIG_NET_POLL_CONTROLLER
1306         .ndo_poll_controller    = igb_netpoll,
1307 #endif
1308 };
1309
1310 /**
1311  * igb_probe - Device Initialization Routine
1312  * @pdev: PCI device information struct
1313  * @ent: entry in igb_pci_tbl
1314  *
1315  * Returns 0 on success, negative on failure
1316  *
1317  * igb_probe initializes an adapter identified by a pci_dev structure.
1318  * The OS initialization, configuring of the adapter private structure,
1319  * and a hardware reset occur.
1320  **/
1321 static int __devinit igb_probe(struct pci_dev *pdev,
1322                                const struct pci_device_id *ent)
1323 {
1324         struct net_device *netdev;
1325         struct igb_adapter *adapter;
1326         struct e1000_hw *hw;
1327         u16 eeprom_data = 0;
1328         static int global_quad_port_a; /* global quad port a indication */
1329         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1330         unsigned long mmio_start, mmio_len;
1331         int err, pci_using_dac;
1332         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1333         u32 part_num;
1334
1335         err = pci_enable_device_mem(pdev);
1336         if (err)
1337                 return err;
1338
1339         pci_using_dac = 0;
1340         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1341         if (!err) {
1342                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1343                 if (!err)
1344                         pci_using_dac = 1;
1345         } else {
1346                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1347                 if (err) {
1348                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1349                         if (err) {
1350                                 dev_err(&pdev->dev, "No usable DMA "
1351                                         "configuration, aborting\n");
1352                                 goto err_dma;
1353                         }
1354                 }
1355         }
1356
1357         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1358                                            IORESOURCE_MEM),
1359                                            igb_driver_name);
1360         if (err)
1361                 goto err_pci_reg;
1362
1363         pci_enable_pcie_error_reporting(pdev);
1364
1365         pci_set_master(pdev);
1366         pci_save_state(pdev);
1367
1368         err = -ENOMEM;
1369         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1370                                    IGB_ABS_MAX_TX_QUEUES);
1371         if (!netdev)
1372                 goto err_alloc_etherdev;
1373
1374         SET_NETDEV_DEV(netdev, &pdev->dev);
1375
1376         pci_set_drvdata(pdev, netdev);
1377         adapter = netdev_priv(netdev);
1378         adapter->netdev = netdev;
1379         adapter->pdev = pdev;
1380         hw = &adapter->hw;
1381         hw->back = adapter;
1382         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1383
1384         mmio_start = pci_resource_start(pdev, 0);
1385         mmio_len = pci_resource_len(pdev, 0);
1386
1387         err = -EIO;
1388         hw->hw_addr = ioremap(mmio_start, mmio_len);
1389         if (!hw->hw_addr)
1390                 goto err_ioremap;
1391
1392         netdev->netdev_ops = &igb_netdev_ops;
1393         igb_set_ethtool_ops(netdev);
1394         netdev->watchdog_timeo = 5 * HZ;
1395
1396         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1397
1398         netdev->mem_start = mmio_start;
1399         netdev->mem_end = mmio_start + mmio_len;
1400
1401         /* PCI config space info */
1402         hw->vendor_id = pdev->vendor;
1403         hw->device_id = pdev->device;
1404         hw->revision_id = pdev->revision;
1405         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1406         hw->subsystem_device_id = pdev->subsystem_device;
1407
1408         /* Copy the default MAC, PHY and NVM function pointers */
1409         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1410         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1411         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1412         /* Initialize skew-specific constants */
1413         err = ei->get_invariants(hw);
1414         if (err)
1415                 goto err_sw_init;
1416
1417         /* setup the private structure */
1418         err = igb_sw_init(adapter);
1419         if (err)
1420                 goto err_sw_init;
1421
1422         igb_get_bus_info_pcie(hw);
1423
1424         hw->phy.autoneg_wait_to_complete = false;
1425         hw->mac.adaptive_ifs = true;
1426
1427         /* Copper options */
1428         if (hw->phy.media_type == e1000_media_type_copper) {
1429                 hw->phy.mdix = AUTO_ALL_MODES;
1430                 hw->phy.disable_polarity_correction = false;
1431                 hw->phy.ms_type = e1000_ms_hw_default;
1432         }
1433
1434         if (igb_check_reset_block(hw))
1435                 dev_info(&pdev->dev,
1436                         "PHY reset is blocked due to SOL/IDER session.\n");
1437
1438         netdev->features = NETIF_F_SG |
1439                            NETIF_F_IP_CSUM |
1440                            NETIF_F_HW_VLAN_TX |
1441                            NETIF_F_HW_VLAN_RX |
1442                            NETIF_F_HW_VLAN_FILTER;
1443
1444         netdev->features |= NETIF_F_IPV6_CSUM;
1445         netdev->features |= NETIF_F_TSO;
1446         netdev->features |= NETIF_F_TSO6;
1447
1448         netdev->features |= NETIF_F_GRO;
1449
1450         netdev->vlan_features |= NETIF_F_TSO;
1451         netdev->vlan_features |= NETIF_F_TSO6;
1452         netdev->vlan_features |= NETIF_F_IP_CSUM;
1453         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1454         netdev->vlan_features |= NETIF_F_SG;
1455
1456         if (pci_using_dac)
1457                 netdev->features |= NETIF_F_HIGHDMA;
1458
1459         if (hw->mac.type >= e1000_82576)
1460                 netdev->features |= NETIF_F_SCTP_CSUM;
1461
1462         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1463
1464         /* before reading the NVM, reset the controller to put the device in a
1465          * known good starting state */
1466         hw->mac.ops.reset_hw(hw);
1467
1468         /* make sure the NVM is good */
1469         if (igb_validate_nvm_checksum(hw) < 0) {
1470                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1471                 err = -EIO;
1472                 goto err_eeprom;
1473         }
1474
1475         /* copy the MAC address out of the NVM */
1476         if (hw->mac.ops.read_mac_addr(hw))
1477                 dev_err(&pdev->dev, "NVM Read Error\n");
1478
1479         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1480         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1481
1482         if (!is_valid_ether_addr(netdev->perm_addr)) {
1483                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1484                 err = -EIO;
1485                 goto err_eeprom;
1486         }
1487
1488         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1489                     (unsigned long) adapter);
1490         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1491                     (unsigned long) adapter);
1492
1493         INIT_WORK(&adapter->reset_task, igb_reset_task);
1494         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1495
1496         /* Initialize link properties that are user-changeable */
1497         adapter->fc_autoneg = true;
1498         hw->mac.autoneg = true;
1499         hw->phy.autoneg_advertised = 0x2f;
1500
1501         hw->fc.requested_mode = e1000_fc_default;
1502         hw->fc.current_mode = e1000_fc_default;
1503
1504         igb_validate_mdi_setting(hw);
1505
1506         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1507          * enable the ACPI Magic Packet filter
1508          */
1509
1510         if (hw->bus.func == 0)
1511                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1512         else if (hw->bus.func == 1)
1513                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1514
1515         if (eeprom_data & eeprom_apme_mask)
1516                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1517
1518         /* now that we have the eeprom settings, apply the special cases where
1519          * the eeprom may be wrong or the board simply won't support wake on
1520          * lan on a particular port */
1521         switch (pdev->device) {
1522         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1523                 adapter->eeprom_wol = 0;
1524                 break;
1525         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1526         case E1000_DEV_ID_82576_FIBER:
1527         case E1000_DEV_ID_82576_SERDES:
1528                 /* Wake events only supported on port A for dual fiber
1529                  * regardless of eeprom setting */
1530                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1531                         adapter->eeprom_wol = 0;
1532                 break;
1533         case E1000_DEV_ID_82576_QUAD_COPPER:
1534                 /* if quad port adapter, disable WoL on all but port A */
1535                 if (global_quad_port_a != 0)
1536                         adapter->eeprom_wol = 0;
1537                 else
1538                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1539                 /* Reset for multiple quad port adapters */
1540                 if (++global_quad_port_a == 4)
1541                         global_quad_port_a = 0;
1542                 break;
1543         }
1544
1545         /* initialize the wol settings based on the eeprom settings */
1546         adapter->wol = adapter->eeprom_wol;
1547         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1548
1549         /* reset the hardware with the new settings */
1550         igb_reset(adapter);
1551
1552         /* let the f/w know that the h/w is now under the control of the
1553          * driver. */
1554         igb_get_hw_control(adapter);
1555
1556         strcpy(netdev->name, "eth%d");
1557         err = register_netdev(netdev);
1558         if (err)
1559                 goto err_register;
1560
1561         /* carrier off reporting is important to ethtool even BEFORE open */
1562         netif_carrier_off(netdev);
1563
1564 #ifdef CONFIG_IGB_DCA
1565         if (dca_add_requester(&pdev->dev) == 0) {
1566                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1567                 dev_info(&pdev->dev, "DCA enabled\n");
1568                 igb_setup_dca(adapter);
1569         }
1570
1571 #endif
1572
1573         switch (hw->mac.type) {
1574         case e1000_82576:
1575                 /*
1576                  * Initialize hardware timer: we keep it running just in case
1577                  * that some program needs it later on.
1578                  */
1579                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1580                 adapter->cycles.read = igb_read_clock;
1581                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1582                 adapter->cycles.mult = 1;
1583                 /**
1584                  * Scale the NIC clock cycle by a large factor so that
1585                  * relatively small clock corrections can be added or
1586                  * substracted at each clock tick. The drawbacks of a large
1587                  * factor are a) that the clock register overflows more quickly
1588                  * (not such a big deal) and b) that the increment per tick has
1589                  * to fit into 24 bits.  As a result we need to use a shift of
1590                  * 19 so we can fit a value of 16 into the TIMINCA register.
1591                  */
1592                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1593                 wr32(E1000_TIMINCA,
1594                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1595                                 (16 << IGB_82576_TSYNC_SHIFT));
1596
1597                 /* Set registers so that rollover occurs soon to test this. */
1598                 wr32(E1000_SYSTIML, 0x00000000);
1599                 wr32(E1000_SYSTIMH, 0xFF800000);
1600                 wrfl();
1601
1602                 timecounter_init(&adapter->clock,
1603                                  &adapter->cycles,
1604                                  ktime_to_ns(ktime_get_real()));
1605                 /*
1606                  * Synchronize our NIC clock against system wall clock. NIC
1607                  * time stamp reading requires ~3us per sample, each sample
1608                  * was pretty stable even under load => only require 10
1609                  * samples for each offset comparison.
1610                  */
1611                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1612                 adapter->compare.source = &adapter->clock;
1613                 adapter->compare.target = ktime_get_real;
1614                 adapter->compare.num_samples = 10;
1615                 timecompare_update(&adapter->compare, 0);
1616                 break;
1617         case e1000_82575:
1618                 /* 82575 does not support timesync */
1619         default:
1620                 break;
1621         }
1622
1623         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1624         /* print bus type/speed/width info */
1625         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1626                  netdev->name,
1627                  ((hw->bus.speed == e1000_bus_speed_2500)
1628                   ? "2.5Gb/s" : "unknown"),
1629                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1630                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1631                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1632                    "unknown"),
1633                  netdev->dev_addr);
1634
1635         igb_read_part_num(hw, &part_num);
1636         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1637                 (part_num >> 8), (part_num & 0xff));
1638
1639         dev_info(&pdev->dev,
1640                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1641                 adapter->msix_entries ? "MSI-X" :
1642                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1643                 adapter->num_rx_queues, adapter->num_tx_queues);
1644
1645         return 0;
1646
1647 err_register:
1648         igb_release_hw_control(adapter);
1649 err_eeprom:
1650         if (!igb_check_reset_block(hw))
1651                 igb_reset_phy(hw);
1652
1653         if (hw->flash_address)
1654                 iounmap(hw->flash_address);
1655 err_sw_init:
1656         igb_clear_interrupt_scheme(adapter);
1657         iounmap(hw->hw_addr);
1658 err_ioremap:
1659         free_netdev(netdev);
1660 err_alloc_etherdev:
1661         pci_release_selected_regions(pdev, pci_select_bars(pdev,
1662                                      IORESOURCE_MEM));
1663 err_pci_reg:
1664 err_dma:
1665         pci_disable_device(pdev);
1666         return err;
1667 }
1668
1669 /**
1670  * igb_remove - Device Removal Routine
1671  * @pdev: PCI device information struct
1672  *
1673  * igb_remove is called by the PCI subsystem to alert the driver
1674  * that it should release a PCI device.  The could be caused by a
1675  * Hot-Plug event, or because the driver is going to be removed from
1676  * memory.
1677  **/
1678 static void __devexit igb_remove(struct pci_dev *pdev)
1679 {
1680         struct net_device *netdev = pci_get_drvdata(pdev);
1681         struct igb_adapter *adapter = netdev_priv(netdev);
1682         struct e1000_hw *hw = &adapter->hw;
1683
1684         /* flush_scheduled work may reschedule our watchdog task, so
1685          * explicitly disable watchdog tasks from being rescheduled  */
1686         set_bit(__IGB_DOWN, &adapter->state);
1687         del_timer_sync(&adapter->watchdog_timer);
1688         del_timer_sync(&adapter->phy_info_timer);
1689
1690         flush_scheduled_work();
1691
1692 #ifdef CONFIG_IGB_DCA
1693         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1694                 dev_info(&pdev->dev, "DCA disabled\n");
1695                 dca_remove_requester(&pdev->dev);
1696                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1697                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1698         }
1699 #endif
1700
1701         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1702          * would have already happened in close and is redundant. */
1703         igb_release_hw_control(adapter);
1704
1705         unregister_netdev(netdev);
1706
1707         if (!igb_check_reset_block(hw))
1708                 igb_reset_phy(hw);
1709
1710         igb_clear_interrupt_scheme(adapter);
1711
1712 #ifdef CONFIG_PCI_IOV
1713         /* reclaim resources allocated to VFs */
1714         if (adapter->vf_data) {
1715                 /* disable iov and allow time for transactions to clear */
1716                 pci_disable_sriov(pdev);
1717                 msleep(500);
1718
1719                 kfree(adapter->vf_data);
1720                 adapter->vf_data = NULL;
1721                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1722                 msleep(100);
1723                 dev_info(&pdev->dev, "IOV Disabled\n");
1724         }
1725 #endif
1726         iounmap(hw->hw_addr);
1727         if (hw->flash_address)
1728                 iounmap(hw->flash_address);
1729         pci_release_selected_regions(pdev, pci_select_bars(pdev,
1730                                      IORESOURCE_MEM));
1731
1732         free_netdev(netdev);
1733
1734         pci_disable_pcie_error_reporting(pdev);
1735
1736         pci_disable_device(pdev);
1737 }
1738
1739 /**
1740  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1741  * @adapter: board private structure to initialize
1742  *
1743  * This function initializes the vf specific data storage and then attempts to
1744  * allocate the VFs.  The reason for ordering it this way is because it is much
1745  * mor expensive time wise to disable SR-IOV than it is to allocate and free
1746  * the memory for the VFs.
1747  **/
1748 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1749 {
1750 #ifdef CONFIG_PCI_IOV
1751         struct pci_dev *pdev = adapter->pdev;
1752
1753         if (adapter->vfs_allocated_count > 7)
1754                 adapter->vfs_allocated_count = 7;
1755
1756         if (adapter->vfs_allocated_count) {
1757                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1758                                            sizeof(struct vf_data_storage),
1759                                            GFP_KERNEL);
1760                 /* if allocation failed then we do not support SR-IOV */
1761                 if (!adapter->vf_data) {
1762                         adapter->vfs_allocated_count = 0;
1763                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
1764                                 "Data Storage\n");
1765                 }
1766         }
1767
1768         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1769                 kfree(adapter->vf_data);
1770                 adapter->vf_data = NULL;
1771 #endif /* CONFIG_PCI_IOV */
1772                 adapter->vfs_allocated_count = 0;
1773 #ifdef CONFIG_PCI_IOV
1774         } else {
1775                 unsigned char mac_addr[ETH_ALEN];
1776                 int i;
1777                 dev_info(&pdev->dev, "%d vfs allocated\n",
1778                          adapter->vfs_allocated_count);
1779                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1780                         random_ether_addr(mac_addr);
1781                         igb_set_vf_mac(adapter, i, mac_addr);
1782                 }
1783         }
1784 #endif /* CONFIG_PCI_IOV */
1785 }
1786
1787 /**
1788  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1789  * @adapter: board private structure to initialize
1790  *
1791  * igb_sw_init initializes the Adapter private data structure.
1792  * Fields are initialized based on PCI device information and
1793  * OS network device settings (MTU size).
1794  **/
1795 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1796 {
1797         struct e1000_hw *hw = &adapter->hw;
1798         struct net_device *netdev = adapter->netdev;
1799         struct pci_dev *pdev = adapter->pdev;
1800
1801         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1802
1803         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1804         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1805         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1806         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1807
1808         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1809         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1810
1811 #ifdef CONFIG_PCI_IOV
1812         if (hw->mac.type == e1000_82576)
1813                 adapter->vfs_allocated_count = max_vfs;
1814
1815 #endif /* CONFIG_PCI_IOV */
1816         /* This call may decrease the number of queues */
1817         if (igb_init_interrupt_scheme(adapter)) {
1818                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1819                 return -ENOMEM;
1820         }
1821
1822         igb_probe_vfs(adapter);
1823
1824         /* Explicitly disable IRQ since the NIC can be in any state. */
1825         igb_irq_disable(adapter);
1826
1827         set_bit(__IGB_DOWN, &adapter->state);
1828         return 0;
1829 }
1830
1831 /**
1832  * igb_open - Called when a network interface is made active
1833  * @netdev: network interface device structure
1834  *
1835  * Returns 0 on success, negative value on failure
1836  *
1837  * The open entry point is called when a network interface is made
1838  * active by the system (IFF_UP).  At this point all resources needed
1839  * for transmit and receive operations are allocated, the interrupt
1840  * handler is registered with the OS, the watchdog timer is started,
1841  * and the stack is notified that the interface is ready.
1842  **/
1843 static int igb_open(struct net_device *netdev)
1844 {
1845         struct igb_adapter *adapter = netdev_priv(netdev);
1846         struct e1000_hw *hw = &adapter->hw;
1847         int err;
1848         int i;
1849
1850         /* disallow open during test */
1851         if (test_bit(__IGB_TESTING, &adapter->state))
1852                 return -EBUSY;
1853
1854         netif_carrier_off(netdev);
1855
1856         /* allocate transmit descriptors */
1857         err = igb_setup_all_tx_resources(adapter);
1858         if (err)
1859                 goto err_setup_tx;
1860
1861         /* allocate receive descriptors */
1862         err = igb_setup_all_rx_resources(adapter);
1863         if (err)
1864                 goto err_setup_rx;
1865
1866         /* e1000_power_up_phy(adapter); */
1867
1868         /* before we allocate an interrupt, we must be ready to handle it.
1869          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1870          * as soon as we call pci_request_irq, so we have to setup our
1871          * clean_rx handler before we do so.  */
1872         igb_configure(adapter);
1873
1874         err = igb_request_irq(adapter);
1875         if (err)
1876                 goto err_req_irq;
1877
1878         /* From here on the code is the same as igb_up() */
1879         clear_bit(__IGB_DOWN, &adapter->state);
1880
1881         for (i = 0; i < adapter->num_q_vectors; i++) {
1882                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1883                 napi_enable(&q_vector->napi);
1884         }
1885
1886         /* Clear any pending interrupts. */
1887         rd32(E1000_ICR);
1888
1889         igb_irq_enable(adapter);
1890
1891         /* notify VFs that reset has been completed */
1892         if (adapter->vfs_allocated_count) {
1893                 u32 reg_data = rd32(E1000_CTRL_EXT);
1894                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1895                 wr32(E1000_CTRL_EXT, reg_data);
1896         }
1897
1898         netif_tx_start_all_queues(netdev);
1899
1900         /* start the watchdog. */
1901         hw->mac.get_link_status = 1;
1902         schedule_work(&adapter->watchdog_task);
1903
1904         return 0;
1905
1906 err_req_irq:
1907         igb_release_hw_control(adapter);
1908         /* e1000_power_down_phy(adapter); */
1909         igb_free_all_rx_resources(adapter);
1910 err_setup_rx:
1911         igb_free_all_tx_resources(adapter);
1912 err_setup_tx:
1913         igb_reset(adapter);
1914
1915         return err;
1916 }
1917
1918 /**
1919  * igb_close - Disables a network interface
1920  * @netdev: network interface device structure
1921  *
1922  * Returns 0, this is not allowed to fail
1923  *
1924  * The close entry point is called when an interface is de-activated
1925  * by the OS.  The hardware is still under the driver's control, but
1926  * needs to be disabled.  A global MAC reset is issued to stop the
1927  * hardware, and all transmit and receive resources are freed.
1928  **/
1929 static int igb_close(struct net_device *netdev)
1930 {
1931         struct igb_adapter *adapter = netdev_priv(netdev);
1932
1933         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
1934         igb_down(adapter);
1935
1936         igb_free_irq(adapter);
1937
1938         igb_free_all_tx_resources(adapter);
1939         igb_free_all_rx_resources(adapter);
1940
1941         return 0;
1942 }
1943
1944 /**
1945  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
1946  * @tx_ring: tx descriptor ring (for a specific queue) to setup
1947  *
1948  * Return 0 on success, negative on failure
1949  **/
1950 int igb_setup_tx_resources(struct igb_ring *tx_ring)
1951 {
1952         struct pci_dev *pdev = tx_ring->pdev;
1953         int size;
1954
1955         size = sizeof(struct igb_buffer) * tx_ring->count;
1956         tx_ring->buffer_info = vmalloc(size);
1957         if (!tx_ring->buffer_info)
1958                 goto err;
1959         memset(tx_ring->buffer_info, 0, size);
1960
1961         /* round up to nearest 4K */
1962         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
1963         tx_ring->size = ALIGN(tx_ring->size, 4096);
1964
1965         tx_ring->desc = pci_alloc_consistent(pdev,
1966                                              tx_ring->size,
1967                                              &tx_ring->dma);
1968
1969         if (!tx_ring->desc)
1970                 goto err;
1971
1972         tx_ring->next_to_use = 0;
1973         tx_ring->next_to_clean = 0;
1974         return 0;
1975
1976 err:
1977         vfree(tx_ring->buffer_info);
1978         dev_err(&pdev->dev,
1979                 "Unable to allocate memory for the transmit descriptor ring\n");
1980         return -ENOMEM;
1981 }
1982
1983 /**
1984  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
1985  *                                (Descriptors) for all queues
1986  * @adapter: board private structure
1987  *
1988  * Return 0 on success, negative on failure
1989  **/
1990 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
1991 {
1992         struct pci_dev *pdev = adapter->pdev;
1993         int i, err = 0;
1994
1995         for (i = 0; i < adapter->num_tx_queues; i++) {
1996                 err = igb_setup_tx_resources(&adapter->tx_ring[i]);
1997                 if (err) {
1998                         dev_err(&pdev->dev,
1999                                 "Allocation for Tx Queue %u failed\n", i);
2000                         for (i--; i >= 0; i--)
2001                                 igb_free_tx_resources(&adapter->tx_ring[i]);
2002                         break;
2003                 }
2004         }
2005
2006         for (i = 0; i < IGB_MAX_TX_QUEUES; i++) {
2007                 int r_idx = i % adapter->num_tx_queues;
2008                 adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
2009         }
2010         return err;
2011 }
2012
2013 /**
2014  * igb_setup_tctl - configure the transmit control registers
2015  * @adapter: Board private structure
2016  **/
2017 void igb_setup_tctl(struct igb_adapter *adapter)
2018 {
2019         struct e1000_hw *hw = &adapter->hw;
2020         u32 tctl;
2021
2022         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2023         wr32(E1000_TXDCTL(0), 0);
2024
2025         /* Program the Transmit Control Register */
2026         tctl = rd32(E1000_TCTL);
2027         tctl &= ~E1000_TCTL_CT;
2028         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2029                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2030
2031         igb_config_collision_dist(hw);
2032
2033         /* Enable transmits */
2034         tctl |= E1000_TCTL_EN;
2035
2036         wr32(E1000_TCTL, tctl);
2037 }
2038
2039 /**
2040  * igb_configure_tx_ring - Configure transmit ring after Reset
2041  * @adapter: board private structure
2042  * @ring: tx ring to configure
2043  *
2044  * Configure a transmit ring after a reset.
2045  **/
2046 void igb_configure_tx_ring(struct igb_adapter *adapter,
2047                            struct igb_ring *ring)
2048 {
2049         struct e1000_hw *hw = &adapter->hw;
2050         u32 txdctl;
2051         u64 tdba = ring->dma;
2052         int reg_idx = ring->reg_idx;
2053
2054         /* disable the queue */
2055         txdctl = rd32(E1000_TXDCTL(reg_idx));
2056         wr32(E1000_TXDCTL(reg_idx),
2057                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2058         wrfl();
2059         mdelay(10);
2060
2061         wr32(E1000_TDLEN(reg_idx),
2062                         ring->count * sizeof(union e1000_adv_tx_desc));
2063         wr32(E1000_TDBAL(reg_idx),
2064                         tdba & 0x00000000ffffffffULL);
2065         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2066
2067         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2068         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2069         writel(0, ring->head);
2070         writel(0, ring->tail);
2071
2072         txdctl |= IGB_TX_PTHRESH;
2073         txdctl |= IGB_TX_HTHRESH << 8;
2074         txdctl |= IGB_TX_WTHRESH << 16;
2075
2076         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2077         wr32(E1000_TXDCTL(reg_idx), txdctl);
2078 }
2079
2080 /**
2081  * igb_configure_tx - Configure transmit Unit after Reset
2082  * @adapter: board private structure
2083  *
2084  * Configure the Tx unit of the MAC after a reset.
2085  **/
2086 static void igb_configure_tx(struct igb_adapter *adapter)
2087 {
2088         int i;
2089
2090         for (i = 0; i < adapter->num_tx_queues; i++)
2091                 igb_configure_tx_ring(adapter, &adapter->tx_ring[i]);
2092 }
2093
2094 /**
2095  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2096  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2097  *
2098  * Returns 0 on success, negative on failure
2099  **/
2100 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2101 {
2102         struct pci_dev *pdev = rx_ring->pdev;
2103         int size, desc_len;
2104
2105         size = sizeof(struct igb_buffer) * rx_ring->count;
2106         rx_ring->buffer_info = vmalloc(size);
2107         if (!rx_ring->buffer_info)
2108                 goto err;
2109         memset(rx_ring->buffer_info, 0, size);
2110
2111         desc_len = sizeof(union e1000_adv_rx_desc);
2112
2113         /* Round up to nearest 4K */
2114         rx_ring->size = rx_ring->count * desc_len;
2115         rx_ring->size = ALIGN(rx_ring->size, 4096);
2116
2117         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2118                                              &rx_ring->dma);
2119
2120         if (!rx_ring->desc)
2121                 goto err;
2122
2123         rx_ring->next_to_clean = 0;
2124         rx_ring->next_to_use = 0;
2125
2126         return 0;
2127
2128 err:
2129         vfree(rx_ring->buffer_info);
2130         rx_ring->buffer_info = NULL;
2131         dev_err(&pdev->dev, "Unable to allocate memory for "
2132                 "the receive descriptor ring\n");
2133         return -ENOMEM;
2134 }
2135
2136 /**
2137  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2138  *                                (Descriptors) for all queues
2139  * @adapter: board private structure
2140  *
2141  * Return 0 on success, negative on failure
2142  **/
2143 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2144 {
2145         struct pci_dev *pdev = adapter->pdev;
2146         int i, err = 0;
2147
2148         for (i = 0; i < adapter->num_rx_queues; i++) {
2149                 err = igb_setup_rx_resources(&adapter->rx_ring[i]);
2150                 if (err) {
2151                         dev_err(&pdev->dev,
2152                                 "Allocation for Rx Queue %u failed\n", i);
2153                         for (i--; i >= 0; i--)
2154                                 igb_free_rx_resources(&adapter->rx_ring[i]);
2155                         break;
2156                 }
2157         }
2158
2159         return err;
2160 }
2161
2162 /**
2163  * igb_setup_mrqc - configure the multiple receive queue control registers
2164  * @adapter: Board private structure
2165  **/
2166 static void igb_setup_mrqc(struct igb_adapter *adapter)
2167 {
2168         struct e1000_hw *hw = &adapter->hw;
2169         u32 mrqc, rxcsum;
2170         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2171         union e1000_reta {
2172                 u32 dword;
2173                 u8  bytes[4];
2174         } reta;
2175         static const u8 rsshash[40] = {
2176                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2177                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2178                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2179                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2180
2181         /* Fill out hash function seeds */
2182         for (j = 0; j < 10; j++) {
2183                 u32 rsskey = rsshash[(j * 4)];
2184                 rsskey |= rsshash[(j * 4) + 1] << 8;
2185                 rsskey |= rsshash[(j * 4) + 2] << 16;
2186                 rsskey |= rsshash[(j * 4) + 3] << 24;
2187                 array_wr32(E1000_RSSRK(0), j, rsskey);
2188         }
2189
2190         num_rx_queues = adapter->num_rx_queues;
2191
2192         if (adapter->vfs_allocated_count) {
2193                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2194                 switch (hw->mac.type) {
2195                 case e1000_82576:
2196                         shift = 3;
2197                         num_rx_queues = 2;
2198                         break;
2199                 case e1000_82575:
2200                         shift = 2;
2201                         shift2 = 6;
2202                 default:
2203                         break;
2204                 }
2205         } else {
2206                 if (hw->mac.type == e1000_82575)
2207                         shift = 6;
2208         }
2209
2210         for (j = 0; j < (32 * 4); j++) {
2211                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2212                 if (shift2)
2213                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2214                 if ((j & 3) == 3)
2215                         wr32(E1000_RETA(j >> 2), reta.dword);
2216         }
2217
2218         /*
2219          * Disable raw packet checksumming so that RSS hash is placed in
2220          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2221          * offloads as they are enabled by default
2222          */
2223         rxcsum = rd32(E1000_RXCSUM);
2224         rxcsum |= E1000_RXCSUM_PCSD;
2225
2226         if (adapter->hw.mac.type >= e1000_82576)
2227                 /* Enable Receive Checksum Offload for SCTP */
2228                 rxcsum |= E1000_RXCSUM_CRCOFL;
2229
2230         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2231         wr32(E1000_RXCSUM, rxcsum);
2232
2233         /* If VMDq is enabled then we set the appropriate mode for that, else
2234          * we default to RSS so that an RSS hash is calculated per packet even
2235          * if we are only using one queue */
2236         if (adapter->vfs_allocated_count) {
2237                 if (hw->mac.type > e1000_82575) {
2238                         /* Set the default pool for the PF's first queue */
2239                         u32 vtctl = rd32(E1000_VT_CTL);
2240                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2241                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2242                         vtctl |= adapter->vfs_allocated_count <<
2243                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2244                         wr32(E1000_VT_CTL, vtctl);
2245                 }
2246                 if (adapter->num_rx_queues > 1)
2247                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2248                 else
2249                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2250         } else {
2251                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2252         }
2253         igb_vmm_control(adapter);
2254
2255         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2256                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2257         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2258                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2259         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2260                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2261         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2262                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2263
2264         wr32(E1000_MRQC, mrqc);
2265 }
2266
2267 /**
2268  * igb_setup_rctl - configure the receive control registers
2269  * @adapter: Board private structure
2270  **/
2271 void igb_setup_rctl(struct igb_adapter *adapter)
2272 {
2273         struct e1000_hw *hw = &adapter->hw;
2274         u32 rctl;
2275
2276         rctl = rd32(E1000_RCTL);
2277
2278         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2279         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2280
2281         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2282                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2283
2284         /*
2285          * enable stripping of CRC. It's unlikely this will break BMC
2286          * redirection as it did with e1000. Newer features require
2287          * that the HW strips the CRC.
2288          */
2289         rctl |= E1000_RCTL_SECRC;
2290
2291         /*
2292          * disable store bad packets and clear size bits.
2293          */
2294         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2295
2296         /* enable LPE to prevent packets larger than max_frame_size */
2297         rctl |= E1000_RCTL_LPE;
2298
2299         /* disable queue 0 to prevent tail write w/o re-config */
2300         wr32(E1000_RXDCTL(0), 0);
2301
2302         /* Attention!!!  For SR-IOV PF driver operations you must enable
2303          * queue drop for all VF and PF queues to prevent head of line blocking
2304          * if an un-trusted VF does not provide descriptors to hardware.
2305          */
2306         if (adapter->vfs_allocated_count) {
2307                 /* set all queue drop enable bits */
2308                 wr32(E1000_QDE, ALL_QUEUES);
2309         }
2310
2311         wr32(E1000_RCTL, rctl);
2312 }
2313
2314 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2315                                    int vfn)
2316 {
2317         struct e1000_hw *hw = &adapter->hw;
2318         u32 vmolr;
2319
2320         /* if it isn't the PF check to see if VFs are enabled and
2321          * increase the size to support vlan tags */
2322         if (vfn < adapter->vfs_allocated_count &&
2323             adapter->vf_data[vfn].vlans_enabled)
2324                 size += VLAN_TAG_SIZE;
2325
2326         vmolr = rd32(E1000_VMOLR(vfn));
2327         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2328         vmolr |= size | E1000_VMOLR_LPE;
2329         wr32(E1000_VMOLR(vfn), vmolr);
2330
2331         return 0;
2332 }
2333
2334 /**
2335  * igb_rlpml_set - set maximum receive packet size
2336  * @adapter: board private structure
2337  *
2338  * Configure maximum receivable packet size.
2339  **/
2340 static void igb_rlpml_set(struct igb_adapter *adapter)
2341 {
2342         u32 max_frame_size = adapter->max_frame_size;
2343         struct e1000_hw *hw = &adapter->hw;
2344         u16 pf_id = adapter->vfs_allocated_count;
2345
2346         if (adapter->vlgrp)
2347                 max_frame_size += VLAN_TAG_SIZE;
2348
2349         /* if vfs are enabled we set RLPML to the largest possible request
2350          * size and set the VMOLR RLPML to the size we need */
2351         if (pf_id) {
2352                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2353                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2354         }
2355
2356         wr32(E1000_RLPML, max_frame_size);
2357 }
2358
2359 static inline void igb_set_vmolr(struct igb_adapter *adapter, int vfn)
2360 {
2361         struct e1000_hw *hw = &adapter->hw;
2362         u32 vmolr;
2363
2364         /*
2365          * This register exists only on 82576 and newer so if we are older then
2366          * we should exit and do nothing
2367          */
2368         if (hw->mac.type < e1000_82576)
2369                 return;
2370
2371         vmolr = rd32(E1000_VMOLR(vfn));
2372         vmolr |= E1000_VMOLR_AUPE |        /* Accept untagged packets */
2373                  E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2374
2375         /* clear all bits that might not be set */
2376         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2377
2378         if (adapter->num_rx_queues > 1 && vfn == adapter->vfs_allocated_count)
2379                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2380         /*
2381          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2382          * multicast packets
2383          */
2384         if (vfn <= adapter->vfs_allocated_count)
2385                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2386
2387         wr32(E1000_VMOLR(vfn), vmolr);
2388 }
2389
2390 /**
2391  * igb_configure_rx_ring - Configure a receive ring after Reset
2392  * @adapter: board private structure
2393  * @ring: receive ring to be configured
2394  *
2395  * Configure the Rx unit of the MAC after a reset.
2396  **/
2397 void igb_configure_rx_ring(struct igb_adapter *adapter,
2398                            struct igb_ring *ring)
2399 {
2400         struct e1000_hw *hw = &adapter->hw;
2401         u64 rdba = ring->dma;
2402         int reg_idx = ring->reg_idx;
2403         u32 srrctl, rxdctl;
2404
2405         /* disable the queue */
2406         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2407         wr32(E1000_RXDCTL(reg_idx),
2408                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2409
2410         /* Set DMA base address registers */
2411         wr32(E1000_RDBAL(reg_idx),
2412              rdba & 0x00000000ffffffffULL);
2413         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2414         wr32(E1000_RDLEN(reg_idx),
2415                        ring->count * sizeof(union e1000_adv_rx_desc));
2416
2417         /* initialize head and tail */
2418         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2419         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2420         writel(0, ring->head);
2421         writel(0, ring->tail);
2422
2423         /* set descriptor configuration */
2424         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2425                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2426                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2427 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2428                 srrctl |= IGB_RXBUFFER_16384 >>
2429                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2430 #else
2431                 srrctl |= (PAGE_SIZE / 2) >>
2432                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2433 #endif
2434                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2435         } else {
2436                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2437                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2438                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2439         }
2440
2441         wr32(E1000_SRRCTL(reg_idx), srrctl);
2442
2443         /* set filtering for VMDQ pools */
2444         igb_set_vmolr(adapter, reg_idx & 0x7);
2445
2446         /* enable receive descriptor fetching */
2447         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2448         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2449         rxdctl &= 0xFFF00000;
2450         rxdctl |= IGB_RX_PTHRESH;
2451         rxdctl |= IGB_RX_HTHRESH << 8;
2452         rxdctl |= IGB_RX_WTHRESH << 16;
2453         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2454 }
2455
2456 /**
2457  * igb_configure_rx - Configure receive Unit after Reset
2458  * @adapter: board private structure
2459  *
2460  * Configure the Rx unit of the MAC after a reset.
2461  **/
2462 static void igb_configure_rx(struct igb_adapter *adapter)
2463 {
2464         int i;
2465
2466         /* set UTA to appropriate mode */
2467         igb_set_uta(adapter);
2468
2469         /* set the correct pool for the PF default MAC address in entry 0 */
2470         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2471                          adapter->vfs_allocated_count);
2472
2473         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2474          * the Base and Length of the Rx Descriptor Ring */
2475         for (i = 0; i < adapter->num_rx_queues; i++)
2476                 igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
2477 }
2478
2479 /**
2480  * igb_free_tx_resources - Free Tx Resources per Queue
2481  * @tx_ring: Tx descriptor ring for a specific queue
2482  *
2483  * Free all transmit software resources
2484  **/
2485 void igb_free_tx_resources(struct igb_ring *tx_ring)
2486 {
2487         igb_clean_tx_ring(tx_ring);
2488
2489         vfree(tx_ring->buffer_info);
2490         tx_ring->buffer_info = NULL;
2491
2492         /* if not set, then don't free */
2493         if (!tx_ring->desc)
2494                 return;
2495
2496         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2497                             tx_ring->desc, tx_ring->dma);
2498
2499         tx_ring->desc = NULL;
2500 }
2501
2502 /**
2503  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2504  * @adapter: board private structure
2505  *
2506  * Free all transmit software resources
2507  **/
2508 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2509 {
2510         int i;
2511
2512         for (i = 0; i < adapter->num_tx_queues; i++)
2513                 igb_free_tx_resources(&adapter->tx_ring[i]);
2514 }
2515
2516 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2517                                     struct igb_buffer *buffer_info)
2518 {
2519         buffer_info->dma = 0;
2520         if (buffer_info->skb) {
2521                 skb_dma_unmap(&tx_ring->pdev->dev,
2522                               buffer_info->skb,
2523                               DMA_TO_DEVICE);
2524                 dev_kfree_skb_any(buffer_info->skb);
2525                 buffer_info->skb = NULL;
2526         }
2527         buffer_info->time_stamp = 0;
2528         /* buffer_info must be completely set up in the transmit path */
2529 }
2530
2531 /**
2532  * igb_clean_tx_ring - Free Tx Buffers
2533  * @tx_ring: ring to be cleaned
2534  **/
2535 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2536 {
2537         struct igb_buffer *buffer_info;
2538         unsigned long size;
2539         unsigned int i;
2540
2541         if (!tx_ring->buffer_info)
2542                 return;
2543         /* Free all the Tx ring sk_buffs */
2544
2545         for (i = 0; i < tx_ring->count; i++) {
2546                 buffer_info = &tx_ring->buffer_info[i];
2547                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2548         }
2549
2550         size = sizeof(struct igb_buffer) * tx_ring->count;
2551         memset(tx_ring->buffer_info, 0, size);
2552
2553         /* Zero out the descriptor ring */
2554         memset(tx_ring->desc, 0, tx_ring->size);
2555
2556         tx_ring->next_to_use = 0;
2557         tx_ring->next_to_clean = 0;
2558 }
2559
2560 /**
2561  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2562  * @adapter: board private structure
2563  **/
2564 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2565 {
2566         int i;
2567
2568         for (i = 0; i < adapter->num_tx_queues; i++)
2569                 igb_clean_tx_ring(&adapter->tx_ring[i]);
2570 }
2571
2572 /**
2573  * igb_free_rx_resources - Free Rx Resources
2574  * @rx_ring: ring to clean the resources from
2575  *
2576  * Free all receive software resources
2577  **/
2578 void igb_free_rx_resources(struct igb_ring *rx_ring)
2579 {
2580         igb_clean_rx_ring(rx_ring);
2581
2582         vfree(rx_ring->buffer_info);
2583         rx_ring->buffer_info = NULL;
2584
2585         /* if not set, then don't free */
2586         if (!rx_ring->desc)
2587                 return;
2588
2589         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2590                             rx_ring->desc, rx_ring->dma);
2591
2592         rx_ring->desc = NULL;
2593 }
2594
2595 /**
2596  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2597  * @adapter: board private structure
2598  *
2599  * Free all receive software resources
2600  **/
2601 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2602 {
2603         int i;
2604
2605         for (i = 0; i < adapter->num_rx_queues; i++)
2606                 igb_free_rx_resources(&adapter->rx_ring[i]);
2607 }
2608
2609 /**
2610  * igb_clean_rx_ring - Free Rx Buffers per Queue
2611  * @rx_ring: ring to free buffers from
2612  **/
2613 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2614 {
2615         struct igb_buffer *buffer_info;
2616         unsigned long size;
2617         unsigned int i;
2618
2619         if (!rx_ring->buffer_info)
2620                 return;
2621
2622         /* Free all the Rx ring sk_buffs */
2623         for (i = 0; i < rx_ring->count; i++) {
2624                 buffer_info = &rx_ring->buffer_info[i];
2625                 if (buffer_info->dma) {
2626                         pci_unmap_single(rx_ring->pdev,
2627                                          buffer_info->dma,
2628                                          rx_ring->rx_buffer_len,
2629                                          PCI_DMA_FROMDEVICE);
2630                         buffer_info->dma = 0;
2631                 }
2632
2633                 if (buffer_info->skb) {
2634                         dev_kfree_skb(buffer_info->skb);
2635                         buffer_info->skb = NULL;
2636                 }
2637                 if (buffer_info->page_dma) {
2638                         pci_unmap_page(rx_ring->pdev,
2639                                        buffer_info->page_dma,
2640                                        PAGE_SIZE / 2,
2641                                        PCI_DMA_FROMDEVICE);
2642                         buffer_info->page_dma = 0;
2643                 }
2644                 if (buffer_info->page) {
2645                         put_page(buffer_info->page);
2646                         buffer_info->page = NULL;
2647                         buffer_info->page_offset = 0;
2648                 }
2649         }
2650
2651         size = sizeof(struct igb_buffer) * rx_ring->count;
2652         memset(rx_ring->buffer_info, 0, size);
2653
2654         /* Zero out the descriptor ring */
2655         memset(rx_ring->desc, 0, rx_ring->size);
2656
2657         rx_ring->next_to_clean = 0;
2658         rx_ring->next_to_use = 0;
2659 }
2660
2661 /**
2662  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2663  * @adapter: board private structure
2664  **/
2665 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2666 {
2667         int i;
2668
2669         for (i = 0; i < adapter->num_rx_queues; i++)
2670                 igb_clean_rx_ring(&adapter->rx_ring[i]);
2671 }
2672
2673 /**
2674  * igb_set_mac - Change the Ethernet Address of the NIC
2675  * @netdev: network interface device structure
2676  * @p: pointer to an address structure
2677  *
2678  * Returns 0 on success, negative on failure
2679  **/
2680 static int igb_set_mac(struct net_device *netdev, void *p)
2681 {
2682         struct igb_adapter *adapter = netdev_priv(netdev);
2683         struct e1000_hw *hw = &adapter->hw;
2684         struct sockaddr *addr = p;
2685
2686         if (!is_valid_ether_addr(addr->sa_data))
2687                 return -EADDRNOTAVAIL;
2688
2689         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2690         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2691
2692         /* set the correct pool for the new PF MAC address in entry 0 */
2693         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2694                          adapter->vfs_allocated_count);
2695
2696         return 0;
2697 }
2698
2699 /**
2700  * igb_write_mc_addr_list - write multicast addresses to MTA
2701  * @netdev: network interface device structure
2702  *
2703  * Writes multicast address list to the MTA hash table.
2704  * Returns: -ENOMEM on failure
2705  *                0 on no addresses written
2706  *                X on writing X addresses to MTA
2707  **/
2708 static int igb_write_mc_addr_list(struct net_device *netdev)
2709 {
2710         struct igb_adapter *adapter = netdev_priv(netdev);
2711         struct e1000_hw *hw = &adapter->hw;
2712         struct dev_mc_list *mc_ptr = netdev->mc_list;
2713         u8  *mta_list;
2714         u32 vmolr = 0;
2715         int i;
2716
2717         if (!netdev->mc_count) {
2718                 /* nothing to program, so clear mc list */
2719                 igb_update_mc_addr_list(hw, NULL, 0);
2720                 igb_restore_vf_multicasts(adapter);
2721                 return 0;
2722         }
2723
2724         mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
2725         if (!mta_list)
2726                 return -ENOMEM;
2727
2728         /* set vmolr receive overflow multicast bit */
2729         vmolr |= E1000_VMOLR_ROMPE;
2730
2731         /* The shared function expects a packed array of only addresses. */
2732         mc_ptr = netdev->mc_list;
2733
2734         for (i = 0; i < netdev->mc_count; i++) {
2735                 if (!mc_ptr)
2736                         break;
2737                 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2738                 mc_ptr = mc_ptr->next;
2739         }
2740         igb_update_mc_addr_list(hw, mta_list, i);
2741         kfree(mta_list);
2742
2743         return netdev->mc_count;
2744 }
2745
2746 /**
2747  * igb_write_uc_addr_list - write unicast addresses to RAR table
2748  * @netdev: network interface device structure
2749  *
2750  * Writes unicast address list to the RAR table.
2751  * Returns: -ENOMEM on failure/insufficient address space
2752  *                0 on no addresses written
2753  *                X on writing X addresses to the RAR table
2754  **/
2755 static int igb_write_uc_addr_list(struct net_device *netdev)
2756 {
2757         struct igb_adapter *adapter = netdev_priv(netdev);
2758         struct e1000_hw *hw = &adapter->hw;
2759         unsigned int vfn = adapter->vfs_allocated_count;
2760         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2761         int count = 0;
2762
2763         /* return ENOMEM indicating insufficient memory for addresses */
2764         if (netdev->uc.count > rar_entries)
2765                 return -ENOMEM;
2766
2767         if (netdev->uc.count && rar_entries) {
2768                 struct netdev_hw_addr *ha;
2769                 list_for_each_entry(ha, &netdev->uc.list, list) {
2770                         if (!rar_entries)
2771                                 break;
2772                         igb_rar_set_qsel(adapter, ha->addr,
2773                                          rar_entries--,
2774                                          vfn);
2775                         count++;
2776                 }
2777         }
2778         /* write the addresses in reverse order to avoid write combining */
2779         for (; rar_entries > 0 ; rar_entries--) {
2780                 wr32(E1000_RAH(rar_entries), 0);
2781                 wr32(E1000_RAL(rar_entries), 0);
2782         }
2783         wrfl();
2784
2785         return count;
2786 }
2787
2788 /**
2789  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2790  * @netdev: network interface device structure
2791  *
2792  * The set_rx_mode entry point is called whenever the unicast or multicast
2793  * address lists or the network interface flags are updated.  This routine is
2794  * responsible for configuring the hardware for proper unicast, multicast,
2795  * promiscuous mode, and all-multi behavior.
2796  **/
2797 static void igb_set_rx_mode(struct net_device *netdev)
2798 {
2799         struct igb_adapter *adapter = netdev_priv(netdev);
2800         struct e1000_hw *hw = &adapter->hw;
2801         unsigned int vfn = adapter->vfs_allocated_count;
2802         u32 rctl, vmolr = 0;
2803         int count;
2804
2805         /* Check for Promiscuous and All Multicast modes */
2806         rctl = rd32(E1000_RCTL);
2807
2808         /* clear the effected bits */
2809         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2810
2811         if (netdev->flags & IFF_PROMISC) {
2812                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2813                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2814         } else {
2815                 if (netdev->flags & IFF_ALLMULTI) {
2816                         rctl |= E1000_RCTL_MPE;
2817                         vmolr |= E1000_VMOLR_MPME;
2818                 } else {
2819                         /*
2820                          * Write addresses to the MTA, if the attempt fails
2821                          * then we should just turn on promiscous mode so
2822                          * that we can at least receive multicast traffic
2823                          */
2824                         count = igb_write_mc_addr_list(netdev);
2825                         if (count < 0) {
2826                                 rctl |= E1000_RCTL_MPE;
2827                                 vmolr |= E1000_VMOLR_MPME;
2828                         } else if (count) {
2829                                 vmolr |= E1000_VMOLR_ROMPE;
2830                         }
2831                 }
2832                 /*
2833                  * Write addresses to available RAR registers, if there is not
2834                  * sufficient space to store all the addresses then enable
2835                  * unicast promiscous mode
2836                  */
2837                 count = igb_write_uc_addr_list(netdev);
2838                 if (count < 0) {
2839                         rctl |= E1000_RCTL_UPE;
2840                         vmolr |= E1000_VMOLR_ROPE;
2841                 }
2842                 rctl |= E1000_RCTL_VFE;
2843         }
2844         wr32(E1000_RCTL, rctl);
2845
2846         /*
2847          * In order to support SR-IOV and eventually VMDq it is necessary to set
2848          * the VMOLR to enable the appropriate modes.  Without this workaround
2849          * we will have issues with VLAN tag stripping not being done for frames
2850          * that are only arriving because we are the default pool
2851          */
2852         if (hw->mac.type < e1000_82576)
2853                 return;
2854
2855         vmolr |= rd32(E1000_VMOLR(vfn)) &
2856                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
2857         wr32(E1000_VMOLR(vfn), vmolr);
2858         igb_restore_vf_multicasts(adapter);
2859 }
2860
2861 /* Need to wait a few seconds after link up to get diagnostic information from
2862  * the phy */
2863 static void igb_update_phy_info(unsigned long data)
2864 {
2865         struct igb_adapter *adapter = (struct igb_adapter *) data;
2866         igb_get_phy_info(&adapter->hw);
2867 }
2868
2869 /**
2870  * igb_has_link - check shared code for link and determine up/down
2871  * @adapter: pointer to driver private info
2872  **/
2873 static bool igb_has_link(struct igb_adapter *adapter)
2874 {
2875         struct e1000_hw *hw = &adapter->hw;
2876         bool link_active = false;
2877         s32 ret_val = 0;
2878
2879         /* get_link_status is set on LSC (link status) interrupt or
2880          * rx sequence error interrupt.  get_link_status will stay
2881          * false until the e1000_check_for_link establishes link
2882          * for copper adapters ONLY
2883          */
2884         switch (hw->phy.media_type) {
2885         case e1000_media_type_copper:
2886                 if (hw->mac.get_link_status) {
2887                         ret_val = hw->mac.ops.check_for_link(hw);
2888                         link_active = !hw->mac.get_link_status;
2889                 } else {
2890                         link_active = true;
2891                 }
2892                 break;
2893         case e1000_media_type_internal_serdes:
2894                 ret_val = hw->mac.ops.check_for_link(hw);
2895                 link_active = hw->mac.serdes_has_link;
2896                 break;
2897         default:
2898         case e1000_media_type_unknown:
2899                 break;
2900         }
2901
2902         return link_active;
2903 }
2904
2905 /**
2906  * igb_watchdog - Timer Call-back
2907  * @data: pointer to adapter cast into an unsigned long
2908  **/
2909 static void igb_watchdog(unsigned long data)
2910 {
2911         struct igb_adapter *adapter = (struct igb_adapter *)data;
2912         /* Do the rest outside of interrupt context */
2913         schedule_work(&adapter->watchdog_task);
2914 }
2915
2916 static void igb_watchdog_task(struct work_struct *work)
2917 {
2918         struct igb_adapter *adapter = container_of(work,
2919                                         struct igb_adapter, watchdog_task);
2920         struct e1000_hw *hw = &adapter->hw;
2921         struct net_device *netdev = adapter->netdev;
2922         struct igb_ring *tx_ring = adapter->tx_ring;
2923         u32 link;
2924         int i;
2925
2926         link = igb_has_link(adapter);
2927         if (link) {
2928                 if (!netif_carrier_ok(netdev)) {
2929                         u32 ctrl;
2930                         hw->mac.ops.get_speed_and_duplex(hw,
2931                                                          &adapter->link_speed,
2932                                                          &adapter->link_duplex);
2933
2934                         ctrl = rd32(E1000_CTRL);
2935                         /* Links status message must follow this format */
2936                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
2937                                  "Flow Control: %s\n",
2938                                  netdev->name,
2939                                  adapter->link_speed,
2940                                  adapter->link_duplex == FULL_DUPLEX ?
2941                                  "Full Duplex" : "Half Duplex",
2942                                  ((ctrl & E1000_CTRL_TFCE) && (ctrl &
2943                                  E1000_CTRL_RFCE)) ? "RX/TX" : ((ctrl &
2944                                  E1000_CTRL_RFCE) ? "RX" : ((ctrl &
2945                                  E1000_CTRL_TFCE) ? "TX" : "None")));
2946
2947                         /* tweak tx_queue_len according to speed/duplex and
2948                          * adjust the timeout factor */
2949                         netdev->tx_queue_len = adapter->tx_queue_len;
2950                         adapter->tx_timeout_factor = 1;
2951                         switch (adapter->link_speed) {
2952                         case SPEED_10:
2953                                 netdev->tx_queue_len = 10;
2954                                 adapter->tx_timeout_factor = 14;
2955                                 break;
2956                         case SPEED_100:
2957                                 netdev->tx_queue_len = 100;
2958                                 /* maybe add some timeout factor ? */
2959                                 break;
2960                         }
2961
2962                         netif_carrier_on(netdev);
2963
2964                         igb_ping_all_vfs(adapter);
2965
2966                         /* link state has changed, schedule phy info update */
2967                         if (!test_bit(__IGB_DOWN, &adapter->state))
2968                                 mod_timer(&adapter->phy_info_timer,
2969                                           round_jiffies(jiffies + 2 * HZ));
2970                 }
2971         } else {
2972                 if (netif_carrier_ok(netdev)) {
2973                         adapter->link_speed = 0;
2974                         adapter->link_duplex = 0;
2975                         /* Links status message must follow this format */
2976                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
2977                                netdev->name);
2978                         netif_carrier_off(netdev);
2979
2980                         igb_ping_all_vfs(adapter);
2981
2982                         /* link state has changed, schedule phy info update */
2983                         if (!test_bit(__IGB_DOWN, &adapter->state))
2984                                 mod_timer(&adapter->phy_info_timer,
2985                                           round_jiffies(jiffies + 2 * HZ));
2986                 }
2987         }
2988
2989         igb_update_stats(adapter);
2990         igb_update_adaptive(hw);
2991
2992         if (!netif_carrier_ok(netdev)) {
2993                 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
2994                         /* We've lost link, so the controller stops DMA,
2995                          * but we've got queued Tx work that's never going
2996                          * to get done, so reset controller to flush Tx.
2997                          * (Do the reset outside of interrupt context). */
2998                         adapter->tx_timeout_count++;
2999                         schedule_work(&adapter->reset_task);
3000                         /* return immediately since reset is imminent */
3001                         return;
3002                 }
3003         }
3004
3005         /* Force detection of hung controller every watchdog period */
3006         for (i = 0; i < adapter->num_tx_queues; i++)
3007                 adapter->tx_ring[i].detect_tx_hung = true;
3008
3009         /* Cause software interrupt to ensure rx ring is cleaned */
3010         if (adapter->msix_entries) {
3011                 u32 eics = 0;
3012                 for (i = 0; i < adapter->num_q_vectors; i++) {
3013                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3014                         eics |= q_vector->eims_value;
3015                 }
3016                 wr32(E1000_EICS, eics);
3017         } else {
3018                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3019         }
3020
3021         /* Reset the timer */
3022         if (!test_bit(__IGB_DOWN, &adapter->state))
3023                 mod_timer(&adapter->watchdog_timer,
3024                           round_jiffies(jiffies + 2 * HZ));
3025 }
3026
3027 enum latency_range {
3028         lowest_latency = 0,
3029         low_latency = 1,
3030         bulk_latency = 2,
3031         latency_invalid = 255
3032 };
3033
3034 /**
3035  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3036  *
3037  *      Stores a new ITR value based on strictly on packet size.  This
3038  *      algorithm is less sophisticated than that used in igb_update_itr,
3039  *      due to the difficulty of synchronizing statistics across multiple
3040  *      receive rings.  The divisors and thresholds used by this fuction
3041  *      were determined based on theoretical maximum wire speed and testing
3042  *      data, in order to minimize response time while increasing bulk
3043  *      throughput.
3044  *      This functionality is controlled by the InterruptThrottleRate module
3045  *      parameter (see igb_param.c)
3046  *      NOTE:  This function is called only when operating in a multiqueue
3047  *             receive environment.
3048  * @q_vector: pointer to q_vector
3049  **/
3050 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3051 {
3052         int new_val = q_vector->itr_val;
3053         int avg_wire_size = 0;
3054         struct igb_adapter *adapter = q_vector->adapter;
3055
3056         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3057          * ints/sec - ITR timer value of 120 ticks.
3058          */
3059         if (adapter->link_speed != SPEED_1000) {
3060                 new_val = 976;
3061                 goto set_itr_val;
3062         }
3063
3064         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3065                 struct igb_ring *ring = q_vector->rx_ring;
3066                 avg_wire_size = ring->total_bytes / ring->total_packets;
3067         }
3068
3069         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3070                 struct igb_ring *ring = q_vector->tx_ring;
3071                 avg_wire_size = max_t(u32, avg_wire_size,
3072                                       (ring->total_bytes /
3073                                        ring->total_packets));
3074         }
3075
3076         /* if avg_wire_size isn't set no work was done */
3077         if (!avg_wire_size)
3078                 goto clear_counts;
3079
3080         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3081         avg_wire_size += 24;
3082
3083         /* Don't starve jumbo frames */
3084         avg_wire_size = min(avg_wire_size, 3000);
3085
3086         /* Give a little boost to mid-size frames */
3087         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3088                 new_val = avg_wire_size / 3;
3089         else
3090                 new_val = avg_wire_size / 2;
3091
3092 set_itr_val:
3093         if (new_val != q_vector->itr_val) {
3094                 q_vector->itr_val = new_val;
3095                 q_vector->set_itr = 1;
3096         }
3097 clear_counts:
3098         if (q_vector->rx_ring) {
3099                 q_vector->rx_ring->total_bytes = 0;
3100                 q_vector->rx_ring->total_packets = 0;
3101         }
3102         if (q_vector->tx_ring) {
3103                 q_vector->tx_ring->total_bytes = 0;
3104                 q_vector->tx_ring->total_packets = 0;
3105         }
3106 }
3107
3108 /**
3109  * igb_update_itr - update the dynamic ITR value based on statistics
3110  *      Stores a new ITR value based on packets and byte
3111  *      counts during the last interrupt.  The advantage of per interrupt
3112  *      computation is faster updates and more accurate ITR for the current
3113  *      traffic pattern.  Constants in this function were computed
3114  *      based on theoretical maximum wire speed and thresholds were set based
3115  *      on testing data as well as attempting to minimize response time
3116  *      while increasing bulk throughput.
3117  *      this functionality is controlled by the InterruptThrottleRate module
3118  *      parameter (see igb_param.c)
3119  *      NOTE:  These calculations are only valid when operating in a single-
3120  *             queue environment.
3121  * @adapter: pointer to adapter
3122  * @itr_setting: current q_vector->itr_val
3123  * @packets: the number of packets during this measurement interval
3124  * @bytes: the number of bytes during this measurement interval
3125  **/
3126 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3127                                    int packets, int bytes)
3128 {
3129         unsigned int retval = itr_setting;
3130
3131         if (packets == 0)
3132                 goto update_itr_done;
3133
3134         switch (itr_setting) {
3135         case lowest_latency:
3136                 /* handle TSO and jumbo frames */
3137                 if (bytes/packets > 8000)
3138                         retval = bulk_latency;
3139                 else if ((packets < 5) && (bytes > 512))
3140                         retval = low_latency;
3141                 break;
3142         case low_latency:  /* 50 usec aka 20000 ints/s */
3143                 if (bytes > 10000) {
3144                         /* this if handles the TSO accounting */
3145                         if (bytes/packets > 8000) {
3146                                 retval = bulk_latency;
3147                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3148                                 retval = bulk_latency;
3149                         } else if ((packets > 35)) {
3150                                 retval = lowest_latency;
3151                         }
3152                 } else if (bytes/packets > 2000) {
3153                         retval = bulk_latency;
3154                 } else if (packets <= 2 && bytes < 512) {
3155                         retval = lowest_latency;
3156                 }
3157                 break;
3158         case bulk_latency: /* 250 usec aka 4000 ints/s */
3159                 if (bytes > 25000) {
3160                         if (packets > 35)
3161                                 retval = low_latency;
3162                 } else if (bytes < 1500) {
3163                         retval = low_latency;
3164                 }
3165                 break;
3166         }
3167
3168 update_itr_done:
3169         return retval;
3170 }
3171
3172 static void igb_set_itr(struct igb_adapter *adapter)
3173 {
3174         struct igb_q_vector *q_vector = adapter->q_vector[0];
3175         u16 current_itr;
3176         u32 new_itr = q_vector->itr_val;
3177
3178         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3179         if (adapter->link_speed != SPEED_1000) {
3180                 current_itr = 0;
3181                 new_itr = 4000;
3182                 goto set_itr_now;
3183         }
3184
3185         adapter->rx_itr = igb_update_itr(adapter,
3186                                     adapter->rx_itr,
3187                                     adapter->rx_ring->total_packets,
3188                                     adapter->rx_ring->total_bytes);
3189
3190         adapter->tx_itr = igb_update_itr(adapter,
3191                                     adapter->tx_itr,
3192                                     adapter->tx_ring->total_packets,
3193                                     adapter->tx_ring->total_bytes);
3194         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3195
3196         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3197         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3198                 current_itr = low_latency;
3199
3200         switch (current_itr) {
3201         /* counts and packets in update_itr are dependent on these numbers */
3202         case lowest_latency:
3203                 new_itr = 56;  /* aka 70,000 ints/sec */
3204                 break;
3205         case low_latency:
3206                 new_itr = 196; /* aka 20,000 ints/sec */
3207                 break;
3208         case bulk_latency:
3209                 new_itr = 980; /* aka 4,000 ints/sec */
3210                 break;
3211         default:
3212                 break;
3213         }
3214
3215 set_itr_now:
3216         adapter->rx_ring->total_bytes = 0;
3217         adapter->rx_ring->total_packets = 0;
3218         adapter->tx_ring->total_bytes = 0;
3219         adapter->tx_ring->total_packets = 0;
3220
3221         if (new_itr != q_vector->itr_val) {
3222                 /* this attempts to bias the interrupt rate towards Bulk
3223                  * by adding intermediate steps when interrupt rate is
3224                  * increasing */
3225                 new_itr = new_itr > q_vector->itr_val ?
3226                              max((new_itr * q_vector->itr_val) /
3227                                  (new_itr + (q_vector->itr_val >> 2)),
3228                                  new_itr) :
3229                              new_itr;
3230                 /* Don't write the value here; it resets the adapter's
3231                  * internal timer, and causes us to delay far longer than
3232                  * we should between interrupts.  Instead, we write the ITR
3233                  * value at the beginning of the next interrupt so the timing
3234                  * ends up being correct.
3235                  */
3236                 q_vector->itr_val = new_itr;
3237                 q_vector->set_itr = 1;
3238         }
3239
3240         return;
3241 }
3242
3243 #define IGB_TX_FLAGS_CSUM               0x00000001
3244 #define IGB_TX_FLAGS_VLAN               0x00000002
3245 #define IGB_TX_FLAGS_TSO                0x00000004
3246 #define IGB_TX_FLAGS_IPV4               0x00000008
3247 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3248 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3249 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3250
3251 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3252                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3253 {
3254         struct e1000_adv_tx_context_desc *context_desc;
3255         unsigned int i;
3256         int err;
3257         struct igb_buffer *buffer_info;
3258         u32 info = 0, tu_cmd = 0;
3259         u32 mss_l4len_idx, l4len;
3260         *hdr_len = 0;
3261
3262         if (skb_header_cloned(skb)) {
3263                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3264                 if (err)
3265                         return err;
3266         }
3267
3268         l4len = tcp_hdrlen(skb);
3269         *hdr_len += l4len;
3270
3271         if (skb->protocol == htons(ETH_P_IP)) {
3272                 struct iphdr *iph = ip_hdr(skb);
3273                 iph->tot_len = 0;
3274                 iph->check = 0;
3275                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3276                                                          iph->daddr, 0,
3277                                                          IPPROTO_TCP,
3278                                                          0);
3279         } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
3280                 ipv6_hdr(skb)->payload_len = 0;
3281                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3282                                                        &ipv6_hdr(skb)->daddr,
3283                                                        0, IPPROTO_TCP, 0);
3284         }
3285
3286         i = tx_ring->next_to_use;
3287
3288         buffer_info = &tx_ring->buffer_info[i];
3289         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3290         /* VLAN MACLEN IPLEN */
3291         if (tx_flags & IGB_TX_FLAGS_VLAN)
3292                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3293         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3294         *hdr_len += skb_network_offset(skb);
3295         info |= skb_network_header_len(skb);
3296         *hdr_len += skb_network_header_len(skb);
3297         context_desc->vlan_macip_lens = cpu_to_le32(info);
3298
3299         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3300         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3301
3302         if (skb->protocol == htons(ETH_P_IP))
3303                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3304         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3305
3306         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3307
3308         /* MSS L4LEN IDX */
3309         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3310         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3311
3312         /* For 82575, context index must be unique per ring. */
3313         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3314                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3315
3316         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3317         context_desc->seqnum_seed = 0;
3318
3319         buffer_info->time_stamp = jiffies;
3320         buffer_info->next_to_watch = i;
3321         buffer_info->dma = 0;
3322         i++;
3323         if (i == tx_ring->count)
3324                 i = 0;
3325
3326         tx_ring->next_to_use = i;
3327
3328         return true;
3329 }
3330
3331 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3332                                    struct sk_buff *skb, u32 tx_flags)
3333 {
3334         struct e1000_adv_tx_context_desc *context_desc;
3335         struct pci_dev *pdev = tx_ring->pdev;
3336         struct igb_buffer *buffer_info;
3337         u32 info = 0, tu_cmd = 0;
3338         unsigned int i;
3339
3340         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3341             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3342                 i = tx_ring->next_to_use;
3343                 buffer_info = &tx_ring->buffer_info[i];
3344                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3345
3346                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3347                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3348
3349                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3350                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3351                         info |= skb_network_header_len(skb);
3352
3353                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3354
3355                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3356
3357                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3358                         __be16 protocol;
3359
3360                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3361                                 const struct vlan_ethhdr *vhdr =
3362                                           (const struct vlan_ethhdr*)skb->data;
3363
3364                                 protocol = vhdr->h_vlan_encapsulated_proto;
3365                         } else {
3366                                 protocol = skb->protocol;
3367                         }
3368
3369                         switch (protocol) {
3370                         case cpu_to_be16(ETH_P_IP):
3371                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3372                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3373                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3374                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3375                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3376                                 break;
3377                         case cpu_to_be16(ETH_P_IPV6):
3378                                 /* XXX what about other V6 headers?? */
3379                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3380                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3381                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3382                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3383                                 break;
3384                         default:
3385                                 if (unlikely(net_ratelimit()))
3386                                         dev_warn(&pdev->dev,
3387                                             "partial checksum but proto=%x!\n",
3388                                             skb->protocol);
3389                                 break;
3390                         }
3391                 }
3392
3393                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3394                 context_desc->seqnum_seed = 0;
3395                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3396                         context_desc->mss_l4len_idx =
3397                                 cpu_to_le32(tx_ring->reg_idx << 4);
3398
3399                 buffer_info->time_stamp = jiffies;
3400                 buffer_info->next_to_watch = i;
3401                 buffer_info->dma = 0;
3402
3403                 i++;
3404                 if (i == tx_ring->count)
3405                         i = 0;
3406                 tx_ring->next_to_use = i;
3407
3408                 return true;
3409         }
3410         return false;
3411 }
3412
3413 #define IGB_MAX_TXD_PWR 16
3414 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3415
3416 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3417                                  unsigned int first)
3418 {
3419         struct igb_buffer *buffer_info;
3420         struct pci_dev *pdev = tx_ring->pdev;
3421         unsigned int len = skb_headlen(skb);
3422         unsigned int count = 0, i;
3423         unsigned int f;
3424         dma_addr_t *map;
3425
3426         i = tx_ring->next_to_use;
3427
3428         if (skb_dma_map(&pdev->dev, skb, DMA_TO_DEVICE)) {
3429                 dev_err(&pdev->dev, "TX DMA map failed\n");
3430                 return 0;
3431         }
3432
3433         map = skb_shinfo(skb)->dma_maps;
3434
3435         buffer_info = &tx_ring->buffer_info[i];
3436         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3437         buffer_info->length = len;
3438         /* set time_stamp *before* dma to help avoid a possible race */
3439         buffer_info->time_stamp = jiffies;
3440         buffer_info->next_to_watch = i;
3441         buffer_info->dma = skb_shinfo(skb)->dma_head;
3442
3443         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3444                 struct skb_frag_struct *frag;
3445
3446                 i++;
3447                 if (i == tx_ring->count)
3448                         i = 0;
3449
3450                 frag = &skb_shinfo(skb)->frags[f];
3451                 len = frag->size;
3452
3453                 buffer_info = &tx_ring->buffer_info[i];
3454                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3455                 buffer_info->length = len;
3456                 buffer_info->time_stamp = jiffies;
3457                 buffer_info->next_to_watch = i;
3458                 buffer_info->dma = map[count];
3459                 count++;
3460         }
3461
3462         tx_ring->buffer_info[i].skb = skb;
3463         tx_ring->buffer_info[first].next_to_watch = i;
3464
3465         return ++count;
3466 }
3467
3468 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3469                                     int tx_flags, int count, u32 paylen,
3470                                     u8 hdr_len)
3471 {
3472         union e1000_adv_tx_desc *tx_desc;
3473         struct igb_buffer *buffer_info;
3474         u32 olinfo_status = 0, cmd_type_len;
3475         unsigned int i = tx_ring->next_to_use;
3476
3477         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3478                         E1000_ADVTXD_DCMD_DEXT);
3479
3480         if (tx_flags & IGB_TX_FLAGS_VLAN)
3481                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3482
3483         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3484                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3485
3486         if (tx_flags & IGB_TX_FLAGS_TSO) {
3487                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3488
3489                 /* insert tcp checksum */
3490                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3491
3492                 /* insert ip checksum */
3493                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3494                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3495
3496         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3497                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3498         }
3499
3500         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3501             (tx_flags & (IGB_TX_FLAGS_CSUM |
3502                          IGB_TX_FLAGS_TSO |
3503                          IGB_TX_FLAGS_VLAN)))
3504                 olinfo_status |= tx_ring->reg_idx << 4;
3505
3506         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3507
3508         do {
3509                 buffer_info = &tx_ring->buffer_info[i];
3510                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3511                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3512                 tx_desc->read.cmd_type_len =
3513                         cpu_to_le32(cmd_type_len | buffer_info->length);
3514                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3515                 count--;
3516                 i++;
3517                 if (i == tx_ring->count)
3518                         i = 0;
3519         } while (count > 0);
3520
3521         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3522         /* Force memory writes to complete before letting h/w
3523          * know there are new descriptors to fetch.  (Only
3524          * applicable for weak-ordered memory model archs,
3525          * such as IA-64). */
3526         wmb();
3527
3528         tx_ring->next_to_use = i;
3529         writel(i, tx_ring->tail);
3530         /* we need this if more than one processor can write to our tail
3531          * at a time, it syncronizes IO on IA64/Altix systems */
3532         mmiowb();
3533 }
3534
3535 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3536 {
3537         struct net_device *netdev = tx_ring->netdev;
3538
3539         netif_stop_subqueue(netdev, tx_ring->queue_index);
3540
3541         /* Herbert's original patch had:
3542          *  smp_mb__after_netif_stop_queue();
3543          * but since that doesn't exist yet, just open code it. */
3544         smp_mb();
3545
3546         /* We need to check again in a case another CPU has just
3547          * made room available. */
3548         if (igb_desc_unused(tx_ring) < size)
3549                 return -EBUSY;
3550
3551         /* A reprieve! */
3552         netif_wake_subqueue(netdev, tx_ring->queue_index);
3553         tx_ring->tx_stats.restart_queue++;
3554         return 0;
3555 }
3556
3557 static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3558 {
3559         if (igb_desc_unused(tx_ring) >= size)
3560                 return 0;
3561         return __igb_maybe_stop_tx(tx_ring, size);
3562 }
3563
3564 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3565                                     struct igb_ring *tx_ring)
3566 {
3567         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3568         unsigned int first;
3569         unsigned int tx_flags = 0;
3570         u8 hdr_len = 0;
3571         int tso = 0, count;
3572         union skb_shared_tx *shtx = skb_tx(skb);
3573
3574         /* need: 1 descriptor per page,
3575          *       + 2 desc gap to keep tail from touching head,
3576          *       + 1 desc for skb->data,
3577          *       + 1 desc for context descriptor,
3578          * otherwise try next time */
3579         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3580                 /* this is a hard error */
3581                 return NETDEV_TX_BUSY;
3582         }
3583
3584         if (unlikely(shtx->hardware)) {
3585                 shtx->in_progress = 1;
3586                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3587         }
3588
3589         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3590                 tx_flags |= IGB_TX_FLAGS_VLAN;
3591                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3592         }
3593
3594         if (skb->protocol == htons(ETH_P_IP))
3595                 tx_flags |= IGB_TX_FLAGS_IPV4;
3596
3597         first = tx_ring->next_to_use;
3598         if (skb_is_gso(skb)) {
3599                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3600
3601                 if (tso < 0) {
3602                         dev_kfree_skb_any(skb);
3603                         return NETDEV_TX_OK;
3604                 }
3605         }
3606
3607         if (tso)
3608                 tx_flags |= IGB_TX_FLAGS_TSO;
3609         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3610                  (skb->ip_summed == CHECKSUM_PARTIAL))
3611                 tx_flags |= IGB_TX_FLAGS_CSUM;
3612
3613         /*
3614          * count reflects descriptors mapped, if 0 or less then mapping error
3615          * has occured and we need to rewind the descriptor queue
3616          */
3617         count = igb_tx_map_adv(tx_ring, skb, first);
3618         if (count <= 0) {
3619                 dev_kfree_skb_any(skb);
3620                 tx_ring->buffer_info[first].time_stamp = 0;
3621                 tx_ring->next_to_use = first;
3622                 return NETDEV_TX_OK;
3623         }
3624
3625         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3626
3627         /* Make sure there is space in the ring for the next send. */
3628         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3629
3630         return NETDEV_TX_OK;
3631 }
3632
3633 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3634                                       struct net_device *netdev)
3635 {
3636         struct igb_adapter *adapter = netdev_priv(netdev);
3637         struct igb_ring *tx_ring;
3638         int r_idx = 0;
3639
3640         if (test_bit(__IGB_DOWN, &adapter->state)) {
3641                 dev_kfree_skb_any(skb);
3642                 return NETDEV_TX_OK;
3643         }
3644
3645         if (skb->len <= 0) {
3646                 dev_kfree_skb_any(skb);
3647                 return NETDEV_TX_OK;
3648         }
3649
3650         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3651         tx_ring = adapter->multi_tx_table[r_idx];
3652
3653         /* This goes back to the question of how to logically map a tx queue
3654          * to a flow.  Right now, performance is impacted slightly negatively
3655          * if using multiple tx queues.  If the stack breaks away from a
3656          * single qdisc implementation, we can look at this again. */
3657         return igb_xmit_frame_ring_adv(skb, tx_ring);
3658 }
3659
3660 /**
3661  * igb_tx_timeout - Respond to a Tx Hang
3662  * @netdev: network interface device structure
3663  **/
3664 static void igb_tx_timeout(struct net_device *netdev)
3665 {
3666         struct igb_adapter *adapter = netdev_priv(netdev);
3667         struct e1000_hw *hw = &adapter->hw;
3668
3669         /* Do the reset outside of interrupt context */
3670         adapter->tx_timeout_count++;
3671
3672         schedule_work(&adapter->reset_task);
3673         wr32(E1000_EICS,
3674              (adapter->eims_enable_mask & ~adapter->eims_other));
3675 }
3676
3677 static void igb_reset_task(struct work_struct *work)
3678 {
3679         struct igb_adapter *adapter;
3680         adapter = container_of(work, struct igb_adapter, reset_task);
3681
3682         igb_reinit_locked(adapter);
3683 }
3684
3685 /**
3686  * igb_get_stats - Get System Network Statistics
3687  * @netdev: network interface device structure
3688  *
3689  * Returns the address of the device statistics structure.
3690  * The statistics are actually updated from the timer callback.
3691  **/
3692 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3693 {
3694         /* only return the current stats */
3695         return &netdev->stats;
3696 }
3697
3698 /**
3699  * igb_change_mtu - Change the Maximum Transfer Unit
3700  * @netdev: network interface device structure
3701  * @new_mtu: new value for maximum frame size
3702  *
3703  * Returns 0 on success, negative on failure
3704  **/
3705 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3706 {
3707         struct igb_adapter *adapter = netdev_priv(netdev);
3708         struct pci_dev *pdev = adapter->pdev;
3709         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3710         u32 rx_buffer_len, i;
3711
3712         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3713                 dev_err(&pdev->dev, "Invalid MTU setting\n");
3714                 return -EINVAL;
3715         }
3716
3717         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3718                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3719                 return -EINVAL;
3720         }
3721
3722         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3723                 msleep(1);
3724
3725         /* igb_down has a dependency on max_frame_size */
3726         adapter->max_frame_size = max_frame;
3727         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3728          * means we reserve 2 more, this pushes us to allocate from the next
3729          * larger slab size.
3730          * i.e. RXBUFFER_2048 --> size-4096 slab
3731          */
3732
3733         if (max_frame <= IGB_RXBUFFER_1024)
3734                 rx_buffer_len = IGB_RXBUFFER_1024;
3735         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3736                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3737         else
3738                 rx_buffer_len = IGB_RXBUFFER_128;
3739
3740         if (netif_running(netdev))
3741                 igb_down(adapter);
3742
3743         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3744                  netdev->mtu, new_mtu);
3745         netdev->mtu = new_mtu;
3746
3747         for (i = 0; i < adapter->num_rx_queues; i++)
3748                 adapter->rx_ring[i].rx_buffer_len = rx_buffer_len;
3749
3750         if (netif_running(netdev))
3751                 igb_up(adapter);
3752         else
3753                 igb_reset(adapter);
3754
3755         clear_bit(__IGB_RESETTING, &adapter->state);
3756
3757         return 0;
3758 }
3759
3760 /**
3761  * igb_update_stats - Update the board statistics counters
3762  * @adapter: board private structure
3763  **/
3764
3765 void igb_update_stats(struct igb_adapter *adapter)
3766 {
3767         struct net_device *netdev = adapter->netdev;
3768         struct e1000_hw *hw = &adapter->hw;
3769         struct pci_dev *pdev = adapter->pdev;
3770         u32 rnbc;
3771         u16 phy_tmp;
3772         int i;
3773         u64 bytes, packets;
3774
3775 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3776
3777         /*
3778          * Prevent stats update while adapter is being reset, or if the pci
3779          * connection is down.
3780          */
3781         if (adapter->link_speed == 0)
3782                 return;
3783         if (pci_channel_offline(pdev))
3784                 return;
3785
3786         bytes = 0;
3787         packets = 0;
3788         for (i = 0; i < adapter->num_rx_queues; i++) {
3789                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3790                 adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
3791                 netdev->stats.rx_fifo_errors += rqdpc_tmp;
3792                 bytes += adapter->rx_ring[i].rx_stats.bytes;
3793                 packets += adapter->rx_ring[i].rx_stats.packets;
3794         }
3795
3796         netdev->stats.rx_bytes = bytes;
3797         netdev->stats.rx_packets = packets;
3798
3799         bytes = 0;
3800         packets = 0;
3801         for (i = 0; i < adapter->num_tx_queues; i++) {
3802                 bytes += adapter->tx_ring[i].tx_stats.bytes;
3803                 packets += adapter->tx_ring[i].tx_stats.packets;
3804         }
3805         netdev->stats.tx_bytes = bytes;
3806         netdev->stats.tx_packets = packets;
3807
3808         /* read stats registers */
3809         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
3810         adapter->stats.gprc += rd32(E1000_GPRC);
3811         adapter->stats.gorc += rd32(E1000_GORCL);
3812         rd32(E1000_GORCH); /* clear GORCL */
3813         adapter->stats.bprc += rd32(E1000_BPRC);
3814         adapter->stats.mprc += rd32(E1000_MPRC);
3815         adapter->stats.roc += rd32(E1000_ROC);
3816
3817         adapter->stats.prc64 += rd32(E1000_PRC64);
3818         adapter->stats.prc127 += rd32(E1000_PRC127);
3819         adapter->stats.prc255 += rd32(E1000_PRC255);
3820         adapter->stats.prc511 += rd32(E1000_PRC511);
3821         adapter->stats.prc1023 += rd32(E1000_PRC1023);
3822         adapter->stats.prc1522 += rd32(E1000_PRC1522);
3823         adapter->stats.symerrs += rd32(E1000_SYMERRS);
3824         adapter->stats.sec += rd32(E1000_SEC);
3825
3826         adapter->stats.mpc += rd32(E1000_MPC);
3827         adapter->stats.scc += rd32(E1000_SCC);
3828         adapter->stats.ecol += rd32(E1000_ECOL);
3829         adapter->stats.mcc += rd32(E1000_MCC);
3830         adapter->stats.latecol += rd32(E1000_LATECOL);
3831         adapter->stats.dc += rd32(E1000_DC);
3832         adapter->stats.rlec += rd32(E1000_RLEC);
3833         adapter->stats.xonrxc += rd32(E1000_XONRXC);
3834         adapter->stats.xontxc += rd32(E1000_XONTXC);
3835         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
3836         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
3837         adapter->stats.fcruc += rd32(E1000_FCRUC);
3838         adapter->stats.gptc += rd32(E1000_GPTC);
3839         adapter->stats.gotc += rd32(E1000_GOTCL);
3840         rd32(E1000_GOTCH); /* clear GOTCL */
3841         rnbc = rd32(E1000_RNBC);
3842         adapter->stats.rnbc += rnbc;
3843         netdev->stats.rx_fifo_errors += rnbc;
3844         adapter->stats.ruc += rd32(E1000_RUC);
3845         adapter->stats.rfc += rd32(E1000_RFC);
3846         adapter->stats.rjc += rd32(E1000_RJC);
3847         adapter->stats.tor += rd32(E1000_TORH);
3848         adapter->stats.tot += rd32(E1000_TOTH);
3849         adapter->stats.tpr += rd32(E1000_TPR);
3850
3851         adapter->stats.ptc64 += rd32(E1000_PTC64);
3852         adapter->stats.ptc127 += rd32(E1000_PTC127);
3853         adapter->stats.ptc255 += rd32(E1000_PTC255);
3854         adapter->stats.ptc511 += rd32(E1000_PTC511);
3855         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
3856         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
3857
3858         adapter->stats.mptc += rd32(E1000_MPTC);
3859         adapter->stats.bptc += rd32(E1000_BPTC);
3860
3861         /* used for adaptive IFS */
3862         hw->mac.tx_packet_delta = rd32(E1000_TPT);
3863         adapter->stats.tpt += hw->mac.tx_packet_delta;
3864         hw->mac.collision_delta = rd32(E1000_COLC);
3865         adapter->stats.colc += hw->mac.collision_delta;
3866
3867         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
3868         adapter->stats.rxerrc += rd32(E1000_RXERRC);
3869         adapter->stats.tncrs += rd32(E1000_TNCRS);
3870         adapter->stats.tsctc += rd32(E1000_TSCTC);
3871         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
3872
3873         adapter->stats.iac += rd32(E1000_IAC);
3874         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
3875         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
3876         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
3877         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
3878         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
3879         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
3880         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
3881         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
3882
3883         /* Fill out the OS statistics structure */
3884         netdev->stats.multicast = adapter->stats.mprc;
3885         netdev->stats.collisions = adapter->stats.colc;
3886
3887         /* Rx Errors */
3888
3889         /* RLEC on some newer hardware can be incorrect so build
3890          * our own version based on RUC and ROC */
3891         netdev->stats.rx_errors = adapter->stats.rxerrc +
3892                 adapter->stats.crcerrs + adapter->stats.algnerrc +
3893                 adapter->stats.ruc + adapter->stats.roc +
3894                 adapter->stats.cexterr;
3895         netdev->stats.rx_length_errors = adapter->stats.ruc +
3896                                               adapter->stats.roc;
3897         netdev->stats.rx_crc_errors = adapter->stats.crcerrs;
3898         netdev->stats.rx_frame_errors = adapter->stats.algnerrc;
3899         netdev->stats.rx_missed_errors = adapter->stats.mpc;
3900
3901         /* Tx Errors */
3902         netdev->stats.tx_errors = adapter->stats.ecol +
3903                                        adapter->stats.latecol;
3904         netdev->stats.tx_aborted_errors = adapter->stats.ecol;
3905         netdev->stats.tx_window_errors = adapter->stats.latecol;
3906         netdev->stats.tx_carrier_errors = adapter->stats.tncrs;
3907
3908         /* Tx Dropped needs to be maintained elsewhere */
3909
3910         /* Phy Stats */
3911         if (hw->phy.media_type == e1000_media_type_copper) {
3912                 if ((adapter->link_speed == SPEED_1000) &&
3913                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
3914                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
3915                         adapter->phy_stats.idle_errors += phy_tmp;
3916                 }
3917         }
3918
3919         /* Management Stats */
3920         adapter->stats.mgptc += rd32(E1000_MGTPTC);
3921         adapter->stats.mgprc += rd32(E1000_MGTPRC);
3922         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
3923 }
3924
3925 static irqreturn_t igb_msix_other(int irq, void *data)
3926 {
3927         struct igb_adapter *adapter = data;
3928         struct e1000_hw *hw = &adapter->hw;
3929         u32 icr = rd32(E1000_ICR);
3930         /* reading ICR causes bit 31 of EICR to be cleared */
3931
3932         if (icr & E1000_ICR_DOUTSYNC) {
3933                 /* HW is reporting DMA is out of sync */
3934                 adapter->stats.doosync++;
3935         }
3936
3937         /* Check for a mailbox event */
3938         if (icr & E1000_ICR_VMMB)
3939                 igb_msg_task(adapter);
3940
3941         if (icr & E1000_ICR_LSC) {
3942                 hw->mac.get_link_status = 1;
3943                 /* guard against interrupt when we're going down */
3944                 if (!test_bit(__IGB_DOWN, &adapter->state))
3945                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
3946         }
3947
3948         if (adapter->vfs_allocated_count)
3949                 wr32(E1000_IMS, E1000_IMS_LSC |
3950                                 E1000_IMS_VMMB |
3951                                 E1000_IMS_DOUTSYNC);
3952         else
3953                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
3954         wr32(E1000_EIMS, adapter->eims_other);
3955
3956         return IRQ_HANDLED;
3957 }
3958
3959 static void igb_write_itr(struct igb_q_vector *q_vector)
3960 {
3961         u32 itr_val = q_vector->itr_val & 0x7FFC;
3962
3963         if (!q_vector->set_itr)
3964                 return;
3965
3966         if (!itr_val)
3967                 itr_val = 0x4;
3968
3969         if (q_vector->itr_shift)
3970                 itr_val |= itr_val << q_vector->itr_shift;
3971         else
3972                 itr_val |= 0x8000000;
3973
3974         writel(itr_val, q_vector->itr_register);
3975         q_vector->set_itr = 0;
3976 }
3977
3978 static irqreturn_t igb_msix_ring(int irq, void *data)
3979 {
3980         struct igb_q_vector *q_vector = data;
3981
3982         /* Write the ITR value calculated from the previous interrupt. */
3983         igb_write_itr(q_vector);
3984
3985         napi_schedule(&q_vector->napi);
3986
3987         return IRQ_HANDLED;
3988 }
3989
3990 #ifdef CONFIG_IGB_DCA
3991 static void igb_update_dca(struct igb_q_vector *q_vector)
3992 {
3993         struct igb_adapter *adapter = q_vector->adapter;
3994         struct e1000_hw *hw = &adapter->hw;
3995         int cpu = get_cpu();
3996
3997         if (q_vector->cpu == cpu)
3998                 goto out_no_update;
3999
4000         if (q_vector->tx_ring) {
4001                 int q = q_vector->tx_ring->reg_idx;
4002                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4003                 if (hw->mac.type == e1000_82575) {
4004                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4005                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4006                 } else {
4007                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4008                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4009                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4010                 }
4011                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4012                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4013         }
4014         if (q_vector->rx_ring) {
4015                 int q = q_vector->rx_ring->reg_idx;
4016                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4017                 if (hw->mac.type == e1000_82575) {
4018                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4019                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4020                 } else {
4021                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4022                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4023                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4024                 }
4025                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4026                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4027                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4028                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4029         }
4030         q_vector->cpu = cpu;
4031 out_no_update:
4032         put_cpu();
4033 }
4034
4035 static void igb_setup_dca(struct igb_adapter *adapter)
4036 {
4037         struct e1000_hw *hw = &adapter->hw;
4038         int i;
4039
4040         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4041                 return;
4042
4043         /* Always use CB2 mode, difference is masked in the CB driver. */
4044         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4045
4046         for (i = 0; i < adapter->num_q_vectors; i++) {
4047                 struct igb_q_vector *q_vector = adapter->q_vector[i];
4048                 q_vector->cpu = -1;
4049                 igb_update_dca(q_vector);
4050         }
4051 }
4052
4053 static int __igb_notify_dca(struct device *dev, void *data)
4054 {
4055         struct net_device *netdev = dev_get_drvdata(dev);
4056         struct igb_adapter *adapter = netdev_priv(netdev);
4057         struct pci_dev *pdev = adapter->pdev;
4058         struct e1000_hw *hw = &adapter->hw;
4059         unsigned long event = *(unsigned long *)data;
4060
4061         switch (event) {
4062         case DCA_PROVIDER_ADD:
4063                 /* if already enabled, don't do it again */
4064                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4065                         break;
4066                 if (dca_add_requester(dev) == 0) {
4067                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4068                         dev_info(&pdev->dev, "DCA enabled\n");
4069                         igb_setup_dca(adapter);
4070                         break;
4071                 }
4072                 /* Fall Through since DCA is disabled. */
4073         case DCA_PROVIDER_REMOVE:
4074                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4075                         /* without this a class_device is left
4076                          * hanging around in the sysfs model */
4077                         dca_remove_requester(dev);
4078                         dev_info(&pdev->dev, "DCA disabled\n");
4079                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4080                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4081                 }
4082                 break;
4083         }
4084
4085         return 0;
4086 }
4087
4088 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4089                           void *p)
4090 {
4091         int ret_val;
4092
4093         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4094                                          __igb_notify_dca);
4095
4096         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4097 }
4098 #endif /* CONFIG_IGB_DCA */
4099
4100 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4101 {
4102         struct e1000_hw *hw = &adapter->hw;
4103         u32 ping;
4104         int i;
4105
4106         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4107                 ping = E1000_PF_CONTROL_MSG;
4108                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4109                         ping |= E1000_VT_MSGTYPE_CTS;
4110                 igb_write_mbx(hw, &ping, 1, i);
4111         }
4112 }
4113
4114 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4115 {
4116         struct e1000_hw *hw = &adapter->hw;
4117         u32 vmolr = rd32(E1000_VMOLR(vf));
4118         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4119
4120         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4121                             IGB_VF_FLAG_MULTI_PROMISC);
4122         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4123
4124         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4125                 vmolr |= E1000_VMOLR_MPME;
4126                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4127         } else {
4128                 /*
4129                  * if we have hashes and we are clearing a multicast promisc
4130                  * flag we need to write the hashes to the MTA as this step
4131                  * was previously skipped
4132                  */
4133                 if (vf_data->num_vf_mc_hashes > 30) {
4134                         vmolr |= E1000_VMOLR_MPME;
4135                 } else if (vf_data->num_vf_mc_hashes) {
4136                         int j;
4137                         vmolr |= E1000_VMOLR_ROMPE;
4138                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4139                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4140                 }
4141         }
4142
4143         wr32(E1000_VMOLR(vf), vmolr);
4144
4145         /* there are flags left unprocessed, likely not supported */
4146         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4147                 return -EINVAL;
4148
4149         return 0;
4150
4151 }
4152
4153 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4154                                   u32 *msgbuf, u32 vf)
4155 {
4156         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4157         u16 *hash_list = (u16 *)&msgbuf[1];
4158         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4159         int i;
4160
4161         /* salt away the number of multicast addresses assigned
4162          * to this VF for later use to restore when the PF multi cast
4163          * list changes
4164          */
4165         vf_data->num_vf_mc_hashes = n;
4166
4167         /* only up to 30 hash values supported */
4168         if (n > 30)
4169                 n = 30;
4170
4171         /* store the hashes for later use */
4172         for (i = 0; i < n; i++)
4173                 vf_data->vf_mc_hashes[i] = hash_list[i];
4174
4175         /* Flush and reset the mta with the new values */
4176         igb_set_rx_mode(adapter->netdev);
4177
4178         return 0;
4179 }
4180
4181 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4182 {
4183         struct e1000_hw *hw = &adapter->hw;
4184         struct vf_data_storage *vf_data;
4185         int i, j;
4186
4187         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4188                 u32 vmolr = rd32(E1000_VMOLR(i));
4189                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4190
4191                 vf_data = &adapter->vf_data[i];
4192
4193                 if ((vf_data->num_vf_mc_hashes > 30) ||
4194                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4195                         vmolr |= E1000_VMOLR_MPME;
4196                 } else if (vf_data->num_vf_mc_hashes) {
4197                         vmolr |= E1000_VMOLR_ROMPE;
4198                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4199                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4200                 }
4201                 wr32(E1000_VMOLR(i), vmolr);
4202         }
4203 }
4204
4205 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4206 {
4207         struct e1000_hw *hw = &adapter->hw;
4208         u32 pool_mask, reg, vid;
4209         int i;
4210
4211         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4212
4213         /* Find the vlan filter for this id */
4214         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4215                 reg = rd32(E1000_VLVF(i));
4216
4217                 /* remove the vf from the pool */
4218                 reg &= ~pool_mask;
4219
4220                 /* if pool is empty then remove entry from vfta */
4221                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4222                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4223                         reg = 0;
4224                         vid = reg & E1000_VLVF_VLANID_MASK;
4225                         igb_vfta_set(hw, vid, false);
4226                 }
4227
4228                 wr32(E1000_VLVF(i), reg);
4229         }
4230
4231         adapter->vf_data[vf].vlans_enabled = 0;
4232 }
4233
4234 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4235 {
4236         struct e1000_hw *hw = &adapter->hw;
4237         u32 reg, i;
4238
4239         /* The vlvf table only exists on 82576 hardware and newer */
4240         if (hw->mac.type < e1000_82576)
4241                 return -1;
4242
4243         /* we only need to do this if VMDq is enabled */
4244         if (!adapter->vfs_allocated_count)
4245                 return -1;
4246
4247         /* Find the vlan filter for this id */
4248         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4249                 reg = rd32(E1000_VLVF(i));
4250                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4251                     vid == (reg & E1000_VLVF_VLANID_MASK))
4252                         break;
4253         }
4254
4255         if (add) {
4256                 if (i == E1000_VLVF_ARRAY_SIZE) {
4257                         /* Did not find a matching VLAN ID entry that was
4258                          * enabled.  Search for a free filter entry, i.e.
4259                          * one without the enable bit set
4260                          */
4261                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4262                                 reg = rd32(E1000_VLVF(i));
4263                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4264                                         break;
4265                         }
4266                 }
4267                 if (i < E1000_VLVF_ARRAY_SIZE) {
4268                         /* Found an enabled/available entry */
4269                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4270
4271                         /* if !enabled we need to set this up in vfta */
4272                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4273                                 /* add VID to filter table */
4274                                 igb_vfta_set(hw, vid, true);
4275                                 reg |= E1000_VLVF_VLANID_ENABLE;
4276                         }
4277                         reg &= ~E1000_VLVF_VLANID_MASK;
4278                         reg |= vid;
4279                         wr32(E1000_VLVF(i), reg);
4280
4281                         /* do not modify RLPML for PF devices */
4282                         if (vf >= adapter->vfs_allocated_count)
4283                                 return 0;
4284
4285                         if (!adapter->vf_data[vf].vlans_enabled) {
4286                                 u32 size;
4287                                 reg = rd32(E1000_VMOLR(vf));
4288                                 size = reg & E1000_VMOLR_RLPML_MASK;
4289                                 size += 4;
4290                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4291                                 reg |= size;
4292                                 wr32(E1000_VMOLR(vf), reg);
4293                         }
4294
4295                         adapter->vf_data[vf].vlans_enabled++;
4296                         return 0;
4297                 }
4298         } else {
4299                 if (i < E1000_VLVF_ARRAY_SIZE) {
4300                         /* remove vf from the pool */
4301                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4302                         /* if pool is empty then remove entry from vfta */
4303                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4304                                 reg = 0;
4305                                 igb_vfta_set(hw, vid, false);
4306                         }
4307                         wr32(E1000_VLVF(i), reg);
4308
4309                         /* do not modify RLPML for PF devices */
4310                         if (vf >= adapter->vfs_allocated_count)
4311                                 return 0;
4312
4313                         adapter->vf_data[vf].vlans_enabled--;
4314                         if (!adapter->vf_data[vf].vlans_enabled) {
4315                                 u32 size;
4316                                 reg = rd32(E1000_VMOLR(vf));
4317                                 size = reg & E1000_VMOLR_RLPML_MASK;
4318                                 size -= 4;
4319                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4320                                 reg |= size;
4321                                 wr32(E1000_VMOLR(vf), reg);
4322                         }
4323                         return 0;
4324                 }
4325         }
4326         return -1;
4327 }
4328
4329 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4330 {
4331         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4332         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4333
4334         return igb_vlvf_set(adapter, vid, add, vf);
4335 }
4336
4337 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4338 {
4339         /* clear all flags */
4340         adapter->vf_data[vf].flags = 0;
4341         adapter->vf_data[vf].last_nack = jiffies;
4342
4343         /* reset offloads to defaults */
4344         igb_set_vmolr(adapter, vf);
4345
4346         /* reset vlans for device */
4347         igb_clear_vf_vfta(adapter, vf);
4348
4349         /* reset multicast table array for vf */
4350         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4351
4352         /* Flush and reset the mta with the new values */
4353         igb_set_rx_mode(adapter->netdev);
4354 }
4355
4356 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4357 {
4358         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4359
4360         /* generate a new mac address as we were hotplug removed/added */
4361         random_ether_addr(vf_mac);
4362
4363         /* process remaining reset events */
4364         igb_vf_reset(adapter, vf);
4365 }
4366
4367 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4368 {
4369         struct e1000_hw *hw = &adapter->hw;
4370         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4371         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4372         u32 reg, msgbuf[3];
4373         u8 *addr = (u8 *)(&msgbuf[1]);
4374
4375         /* process all the same items cleared in a function level reset */
4376         igb_vf_reset(adapter, vf);
4377
4378         /* set vf mac address */
4379         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4380
4381         /* enable transmit and receive for vf */
4382         reg = rd32(E1000_VFTE);
4383         wr32(E1000_VFTE, reg | (1 << vf));
4384         reg = rd32(E1000_VFRE);
4385         wr32(E1000_VFRE, reg | (1 << vf));
4386
4387         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4388
4389         /* reply to reset with ack and vf mac address */
4390         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4391         memcpy(addr, vf_mac, 6);
4392         igb_write_mbx(hw, msgbuf, 3, vf);
4393 }
4394
4395 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4396 {
4397         unsigned char *addr = (char *)&msg[1];
4398         int err = -1;
4399
4400         if (is_valid_ether_addr(addr))
4401                 err = igb_set_vf_mac(adapter, vf, addr);
4402
4403         return err;
4404 }
4405
4406 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4407 {
4408         struct e1000_hw *hw = &adapter->hw;
4409         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4410         u32 msg = E1000_VT_MSGTYPE_NACK;
4411
4412         /* if device isn't clear to send it shouldn't be reading either */
4413         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4414             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4415                 igb_write_mbx(hw, &msg, 1, vf);
4416                 vf_data->last_nack = jiffies;
4417         }
4418 }
4419
4420 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4421 {
4422         struct pci_dev *pdev = adapter->pdev;
4423         u32 msgbuf[E1000_VFMAILBOX_SIZE];
4424         struct e1000_hw *hw = &adapter->hw;
4425         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4426         s32 retval;
4427
4428         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4429
4430         if (retval)
4431                 dev_err(&pdev->dev, "Error receiving message from VF\n");
4432
4433         /* this is a message we already processed, do nothing */
4434         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4435                 return;
4436
4437         /*
4438          * until the vf completes a reset it should not be
4439          * allowed to start any configuration.
4440          */
4441
4442         if (msgbuf[0] == E1000_VF_RESET) {
4443                 igb_vf_reset_msg(adapter, vf);
4444                 return;
4445         }
4446
4447         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4448                 msgbuf[0] = E1000_VT_MSGTYPE_NACK;
4449                 if (time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4450                         igb_write_mbx(hw, msgbuf, 1, vf);
4451                         vf_data->last_nack = jiffies;
4452                 }
4453                 return;
4454         }
4455
4456         switch ((msgbuf[0] & 0xFFFF)) {
4457         case E1000_VF_SET_MAC_ADDR:
4458                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4459                 break;
4460         case E1000_VF_SET_PROMISC:
4461                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4462                 break;
4463         case E1000_VF_SET_MULTICAST:
4464                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4465                 break;
4466         case E1000_VF_SET_LPE:
4467                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4468                 break;
4469         case E1000_VF_SET_VLAN:
4470                 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4471                 break;
4472         default:
4473                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4474                 retval = -1;
4475                 break;
4476         }
4477
4478         /* notify the VF of the results of what it sent us */
4479         if (retval)
4480                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4481         else
4482                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4483
4484         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4485
4486         igb_write_mbx(hw, msgbuf, 1, vf);
4487 }
4488
4489 static void igb_msg_task(struct igb_adapter *adapter)
4490 {
4491         struct e1000_hw *hw = &adapter->hw;
4492         u32 vf;
4493
4494         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4495                 /* process any reset requests */
4496                 if (!igb_check_for_rst(hw, vf))
4497                         igb_vf_reset_event(adapter, vf);
4498
4499                 /* process any messages pending */
4500                 if (!igb_check_for_msg(hw, vf))
4501                         igb_rcv_msg_from_vf(adapter, vf);
4502
4503                 /* process any acks */
4504                 if (!igb_check_for_ack(hw, vf))
4505                         igb_rcv_ack_from_vf(adapter, vf);
4506         }
4507 }
4508
4509 /**
4510  *  igb_set_uta - Set unicast filter table address
4511  *  @adapter: board private structure
4512  *
4513  *  The unicast table address is a register array of 32-bit registers.
4514  *  The table is meant to be used in a way similar to how the MTA is used
4515  *  however due to certain limitations in the hardware it is necessary to
4516  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4517  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4518  **/
4519 static void igb_set_uta(struct igb_adapter *adapter)
4520 {
4521         struct e1000_hw *hw = &adapter->hw;
4522         int i;
4523
4524         /* The UTA table only exists on 82576 hardware and newer */
4525         if (hw->mac.type < e1000_82576)
4526                 return;
4527
4528         /* we only need to do this if VMDq is enabled */
4529         if (!adapter->vfs_allocated_count)
4530                 return;
4531
4532         for (i = 0; i < hw->mac.uta_reg_count; i++)
4533                 array_wr32(E1000_UTA, i, ~0);
4534 }
4535
4536 /**
4537  * igb_intr_msi - Interrupt Handler
4538  * @irq: interrupt number
4539  * @data: pointer to a network interface device structure
4540  **/
4541 static irqreturn_t igb_intr_msi(int irq, void *data)
4542 {
4543         struct igb_adapter *adapter = data;
4544         struct igb_q_vector *q_vector = adapter->q_vector[0];
4545         struct e1000_hw *hw = &adapter->hw;
4546         /* read ICR disables interrupts using IAM */
4547         u32 icr = rd32(E1000_ICR);
4548
4549         igb_write_itr(q_vector);
4550
4551         if (icr & E1000_ICR_DOUTSYNC) {
4552                 /* HW is reporting DMA is out of sync */
4553                 adapter->stats.doosync++;
4554         }
4555
4556         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4557                 hw->mac.get_link_status = 1;
4558                 if (!test_bit(__IGB_DOWN, &adapter->state))
4559                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4560         }
4561
4562         napi_schedule(&q_vector->napi);
4563
4564         return IRQ_HANDLED;
4565 }
4566
4567 /**
4568  * igb_intr - Legacy Interrupt Handler
4569  * @irq: interrupt number
4570  * @data: pointer to a network interface device structure
4571  **/
4572 static irqreturn_t igb_intr(int irq, void *data)
4573 {
4574         struct igb_adapter *adapter = data;
4575         struct igb_q_vector *q_vector = adapter->q_vector[0];
4576         struct e1000_hw *hw = &adapter->hw;
4577         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4578          * need for the IMC write */
4579         u32 icr = rd32(E1000_ICR);
4580         if (!icr)
4581                 return IRQ_NONE;  /* Not our interrupt */
4582
4583         igb_write_itr(q_vector);
4584
4585         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4586          * not set, then the adapter didn't send an interrupt */
4587         if (!(icr & E1000_ICR_INT_ASSERTED))
4588                 return IRQ_NONE;
4589
4590         if (icr & E1000_ICR_DOUTSYNC) {
4591                 /* HW is reporting DMA is out of sync */
4592                 adapter->stats.doosync++;
4593         }
4594
4595         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4596                 hw->mac.get_link_status = 1;
4597                 /* guard against interrupt when we're going down */
4598                 if (!test_bit(__IGB_DOWN, &adapter->state))
4599                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4600         }
4601
4602         napi_schedule(&q_vector->napi);
4603
4604         return IRQ_HANDLED;
4605 }
4606
4607 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4608 {
4609         struct igb_adapter *adapter = q_vector->adapter;
4610         struct e1000_hw *hw = &adapter->hw;
4611
4612         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4613             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4614                 if (!adapter->msix_entries)
4615                         igb_set_itr(adapter);
4616                 else
4617                         igb_update_ring_itr(q_vector);
4618         }
4619
4620         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4621                 if (adapter->msix_entries)
4622                         wr32(E1000_EIMS, q_vector->eims_value);
4623                 else
4624                         igb_irq_enable(adapter);
4625         }
4626 }
4627
4628 /**
4629  * igb_poll - NAPI Rx polling callback
4630  * @napi: napi polling structure
4631  * @budget: count of how many packets we should handle
4632  **/
4633 static int igb_poll(struct napi_struct *napi, int budget)
4634 {
4635         struct igb_q_vector *q_vector = container_of(napi,
4636                                                      struct igb_q_vector,
4637                                                      napi);
4638         int tx_clean_complete = 1, work_done = 0;
4639
4640 #ifdef CONFIG_IGB_DCA
4641         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4642                 igb_update_dca(q_vector);
4643 #endif
4644         if (q_vector->tx_ring)
4645                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4646
4647         if (q_vector->rx_ring)
4648                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4649
4650         if (!tx_clean_complete)
4651                 work_done = budget;
4652
4653         /* If not enough Rx work done, exit the polling mode */
4654         if (work_done < budget) {
4655                 napi_complete(napi);
4656                 igb_ring_irq_enable(q_vector);
4657         }
4658
4659         return work_done;
4660 }
4661
4662 /**
4663  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4664  * @adapter: board private structure
4665  * @shhwtstamps: timestamp structure to update
4666  * @regval: unsigned 64bit system time value.
4667  *
4668  * We need to convert the system time value stored in the RX/TXSTMP registers
4669  * into a hwtstamp which can be used by the upper level timestamping functions
4670  */
4671 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4672                                    struct skb_shared_hwtstamps *shhwtstamps,
4673                                    u64 regval)
4674 {
4675         u64 ns;
4676
4677         ns = timecounter_cyc2time(&adapter->clock, regval);
4678         timecompare_update(&adapter->compare, ns);
4679         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4680         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4681         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4682 }
4683
4684 /**
4685  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4686  * @q_vector: pointer to q_vector containing needed info
4687  * @skb: packet that was just sent
4688  *
4689  * If we were asked to do hardware stamping and such a time stamp is
4690  * available, then it must have been for this skb here because we only
4691  * allow only one such packet into the queue.
4692  */
4693 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4694 {
4695         struct igb_adapter *adapter = q_vector->adapter;
4696         union skb_shared_tx *shtx = skb_tx(skb);
4697         struct e1000_hw *hw = &adapter->hw;
4698         struct skb_shared_hwtstamps shhwtstamps;
4699         u64 regval;
4700
4701         /* if skb does not support hw timestamp or TX stamp not valid exit */
4702         if (likely(!shtx->hardware) ||
4703             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4704                 return;
4705
4706         regval = rd32(E1000_TXSTMPL);
4707         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4708
4709         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4710         skb_tstamp_tx(skb, &shhwtstamps);
4711 }
4712
4713 /**
4714  * igb_clean_tx_irq - Reclaim resources after transmit completes
4715  * @q_vector: pointer to q_vector containing needed info
4716  * returns true if ring is completely cleaned
4717  **/
4718 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4719 {
4720         struct igb_adapter *adapter = q_vector->adapter;
4721         struct igb_ring *tx_ring = q_vector->tx_ring;
4722         struct net_device *netdev = tx_ring->netdev;
4723         struct e1000_hw *hw = &adapter->hw;
4724         struct igb_buffer *buffer_info;
4725         struct sk_buff *skb;
4726         union e1000_adv_tx_desc *tx_desc, *eop_desc;
4727         unsigned int total_bytes = 0, total_packets = 0;
4728         unsigned int i, eop, count = 0;
4729         bool cleaned = false;
4730
4731         i = tx_ring->next_to_clean;
4732         eop = tx_ring->buffer_info[i].next_to_watch;
4733         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4734
4735         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
4736                (count < tx_ring->count)) {
4737                 for (cleaned = false; !cleaned; count++) {
4738                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4739                         buffer_info = &tx_ring->buffer_info[i];
4740                         cleaned = (i == eop);
4741                         skb = buffer_info->skb;
4742
4743                         if (skb) {
4744                                 unsigned int segs, bytecount;
4745                                 /* gso_segs is currently only valid for tcp */
4746                                 segs = skb_shinfo(skb)->gso_segs ?: 1;
4747                                 /* multiply data chunks by size of headers */
4748                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
4749                                             skb->len;
4750                                 total_packets += segs;
4751                                 total_bytes += bytecount;
4752
4753                                 igb_tx_hwtstamp(q_vector, skb);
4754                         }
4755
4756                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4757                         tx_desc->wb.status = 0;
4758
4759                         i++;
4760                         if (i == tx_ring->count)
4761                                 i = 0;
4762                 }
4763                 eop = tx_ring->buffer_info[i].next_to_watch;
4764                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4765         }
4766
4767         tx_ring->next_to_clean = i;
4768
4769         if (unlikely(count &&
4770                      netif_carrier_ok(netdev) &&
4771                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
4772                 /* Make sure that anybody stopping the queue after this
4773                  * sees the new next_to_clean.
4774                  */
4775                 smp_mb();
4776                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
4777                     !(test_bit(__IGB_DOWN, &adapter->state))) {
4778                         netif_wake_subqueue(netdev, tx_ring->queue_index);
4779                         tx_ring->tx_stats.restart_queue++;
4780                 }
4781         }
4782
4783         if (tx_ring->detect_tx_hung) {
4784                 /* Detect a transmit hang in hardware, this serializes the
4785                  * check with the clearing of time_stamp and movement of i */
4786                 tx_ring->detect_tx_hung = false;
4787                 if (tx_ring->buffer_info[i].time_stamp &&
4788                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
4789                                (adapter->tx_timeout_factor * HZ))
4790                     && !(rd32(E1000_STATUS) &
4791                          E1000_STATUS_TXOFF)) {
4792
4793                         /* detected Tx unit hang */
4794                         dev_err(&tx_ring->pdev->dev,
4795                                 "Detected Tx Unit Hang\n"
4796                                 "  Tx Queue             <%d>\n"
4797                                 "  TDH                  <%x>\n"
4798                                 "  TDT                  <%x>\n"
4799                                 "  next_to_use          <%x>\n"
4800                                 "  next_to_clean        <%x>\n"
4801                                 "buffer_info[next_to_clean]\n"
4802                                 "  time_stamp           <%lx>\n"
4803                                 "  next_to_watch        <%x>\n"
4804                                 "  jiffies              <%lx>\n"
4805                                 "  desc.status          <%x>\n",
4806                                 tx_ring->queue_index,
4807                                 readl(tx_ring->head),
4808                                 readl(tx_ring->tail),
4809                                 tx_ring->next_to_use,
4810                                 tx_ring->next_to_clean,
4811                                 tx_ring->buffer_info[eop].time_stamp,
4812                                 eop,
4813                                 jiffies,
4814                                 eop_desc->wb.status);
4815                         netif_stop_subqueue(netdev, tx_ring->queue_index);
4816                 }
4817         }
4818         tx_ring->total_bytes += total_bytes;
4819         tx_ring->total_packets += total_packets;
4820         tx_ring->tx_stats.bytes += total_bytes;
4821         tx_ring->tx_stats.packets += total_packets;
4822         return (count < tx_ring->count);
4823 }
4824
4825 /**
4826  * igb_receive_skb - helper function to handle rx indications
4827  * @q_vector: structure containing interrupt and ring information
4828  * @skb: packet to send up
4829  * @vlan_tag: vlan tag for packet
4830  **/
4831 static void igb_receive_skb(struct igb_q_vector *q_vector,
4832                             struct sk_buff *skb,
4833                             u16 vlan_tag)
4834 {
4835         struct igb_adapter *adapter = q_vector->adapter;
4836
4837         if (vlan_tag)
4838                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
4839                                  vlan_tag, skb);
4840         else
4841                 napi_gro_receive(&q_vector->napi, skb);
4842 }
4843
4844 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
4845                                        u32 status_err, struct sk_buff *skb)
4846 {
4847         skb->ip_summed = CHECKSUM_NONE;
4848
4849         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
4850         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
4851              (status_err & E1000_RXD_STAT_IXSM))
4852                 return;
4853
4854         /* TCP/UDP checksum error bit is set */
4855         if (status_err &
4856             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
4857                 /*
4858                  * work around errata with sctp packets where the TCPE aka
4859                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
4860                  * packets, (aka let the stack check the crc32c)
4861                  */
4862                 if ((skb->len == 60) &&
4863                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
4864                         ring->rx_stats.csum_err++;
4865
4866                 /* let the stack verify checksum errors */
4867                 return;
4868         }
4869         /* It must be a TCP or UDP packet with a valid checksum */
4870         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
4871                 skb->ip_summed = CHECKSUM_UNNECESSARY;
4872
4873         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
4874 }
4875
4876 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
4877                                    struct sk_buff *skb)
4878 {
4879         struct igb_adapter *adapter = q_vector->adapter;
4880         struct e1000_hw *hw = &adapter->hw;
4881         u64 regval;
4882
4883         /*
4884          * If this bit is set, then the RX registers contain the time stamp. No
4885          * other packet will be time stamped until we read these registers, so
4886          * read the registers to make them available again. Because only one
4887          * packet can be time stamped at a time, we know that the register
4888          * values must belong to this one here and therefore we don't need to
4889          * compare any of the additional attributes stored for it.
4890          *
4891          * If nothing went wrong, then it should have a skb_shared_tx that we
4892          * can turn into a skb_shared_hwtstamps.
4893          */
4894         if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
4895                 return;
4896         if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
4897                 return;
4898
4899         regval = rd32(E1000_RXSTMPL);
4900         regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4901
4902         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
4903 }
4904 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
4905                                union e1000_adv_rx_desc *rx_desc)
4906 {
4907         /* HW will not DMA in data larger than the given buffer, even if it
4908          * parses the (NFS, of course) header to be larger.  In that case, it
4909          * fills the header buffer and spills the rest into the page.
4910          */
4911         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
4912                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
4913         if (hlen > rx_ring->rx_buffer_len)
4914                 hlen = rx_ring->rx_buffer_len;
4915         return hlen;
4916 }
4917
4918 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
4919                                  int *work_done, int budget)
4920 {
4921         struct igb_ring *rx_ring = q_vector->rx_ring;
4922         struct net_device *netdev = rx_ring->netdev;
4923         struct pci_dev *pdev = rx_ring->pdev;
4924         union e1000_adv_rx_desc *rx_desc , *next_rxd;
4925         struct igb_buffer *buffer_info , *next_buffer;
4926         struct sk_buff *skb;
4927         bool cleaned = false;
4928         int cleaned_count = 0;
4929         unsigned int total_bytes = 0, total_packets = 0;
4930         unsigned int i;
4931         u32 staterr;
4932         u16 length;
4933         u16 vlan_tag;
4934
4935         i = rx_ring->next_to_clean;
4936         buffer_info = &rx_ring->buffer_info[i];
4937         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
4938         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4939
4940         while (staterr & E1000_RXD_STAT_DD) {
4941                 if (*work_done >= budget)
4942                         break;
4943                 (*work_done)++;
4944
4945                 skb = buffer_info->skb;
4946                 prefetch(skb->data - NET_IP_ALIGN);
4947                 buffer_info->skb = NULL;
4948
4949                 i++;
4950                 if (i == rx_ring->count)
4951                         i = 0;
4952
4953                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
4954                 prefetch(next_rxd);
4955                 next_buffer = &rx_ring->buffer_info[i];
4956
4957                 length = le16_to_cpu(rx_desc->wb.upper.length);
4958                 cleaned = true;
4959                 cleaned_count++;
4960
4961                 if (buffer_info->dma) {
4962                         pci_unmap_single(pdev, buffer_info->dma,
4963                                          rx_ring->rx_buffer_len,
4964                                          PCI_DMA_FROMDEVICE);
4965                         buffer_info->dma = 0;
4966                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
4967                                 skb_put(skb, length);
4968                                 goto send_up;
4969                         }
4970                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
4971                 }
4972
4973                 if (length) {
4974                         pci_unmap_page(pdev, buffer_info->page_dma,
4975                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
4976                         buffer_info->page_dma = 0;
4977
4978                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
4979                                                 buffer_info->page,
4980                                                 buffer_info->page_offset,
4981                                                 length);
4982
4983                         if (page_count(buffer_info->page) != 1)
4984                                 buffer_info->page = NULL;
4985                         else
4986                                 get_page(buffer_info->page);
4987
4988                         skb->len += length;
4989                         skb->data_len += length;
4990                         skb->truesize += length;
4991                 }
4992
4993                 if (!(staterr & E1000_RXD_STAT_EOP)) {
4994                         buffer_info->skb = next_buffer->skb;
4995                         buffer_info->dma = next_buffer->dma;
4996                         next_buffer->skb = skb;
4997                         next_buffer->dma = 0;
4998                         goto next_desc;
4999                 }
5000 send_up:
5001                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5002                         dev_kfree_skb_irq(skb);
5003                         goto next_desc;
5004                 }
5005
5006                 igb_rx_hwtstamp(q_vector, staterr, skb);
5007                 total_bytes += skb->len;
5008                 total_packets++;
5009
5010                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5011
5012                 skb->protocol = eth_type_trans(skb, netdev);
5013                 skb_record_rx_queue(skb, rx_ring->queue_index);
5014
5015                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5016                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5017
5018                 igb_receive_skb(q_vector, skb, vlan_tag);
5019
5020 next_desc:
5021                 rx_desc->wb.upper.status_error = 0;
5022
5023                 /* return some buffers to hardware, one at a time is too slow */
5024                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5025                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5026                         cleaned_count = 0;
5027                 }
5028
5029                 /* use prefetched values */
5030                 rx_desc = next_rxd;
5031                 buffer_info = next_buffer;
5032                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5033         }
5034
5035         rx_ring->next_to_clean = i;
5036         cleaned_count = igb_desc_unused(rx_ring);
5037
5038         if (cleaned_count)
5039                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5040
5041         rx_ring->total_packets += total_packets;
5042         rx_ring->total_bytes += total_bytes;
5043         rx_ring->rx_stats.packets += total_packets;
5044         rx_ring->rx_stats.bytes += total_bytes;
5045         return cleaned;
5046 }
5047
5048 /**
5049  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5050  * @adapter: address of board private structure
5051  **/
5052 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5053 {
5054         struct net_device *netdev = rx_ring->netdev;
5055         union e1000_adv_rx_desc *rx_desc;
5056         struct igb_buffer *buffer_info;
5057         struct sk_buff *skb;
5058         unsigned int i;
5059         int bufsz;
5060
5061         i = rx_ring->next_to_use;
5062         buffer_info = &rx_ring->buffer_info[i];
5063
5064         bufsz = rx_ring->rx_buffer_len;
5065
5066         while (cleaned_count--) {
5067                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5068
5069                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5070                         if (!buffer_info->page) {
5071                                 buffer_info->page = netdev_alloc_page(netdev);
5072                                 if (!buffer_info->page) {
5073                                         rx_ring->rx_stats.alloc_failed++;
5074                                         goto no_buffers;
5075                                 }
5076                                 buffer_info->page_offset = 0;
5077                         } else {
5078                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5079                         }
5080                         buffer_info->page_dma =
5081                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5082                                              buffer_info->page_offset,
5083                                              PAGE_SIZE / 2,
5084                                              PCI_DMA_FROMDEVICE);
5085                         if (pci_dma_mapping_error(rx_ring->pdev,
5086                                                   buffer_info->page_dma)) {
5087                                 buffer_info->page_dma = 0;
5088                                 rx_ring->rx_stats.alloc_failed++;
5089                                 goto no_buffers;
5090                         }
5091                 }
5092
5093                 skb = buffer_info->skb;
5094                 if (!skb) {
5095                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5096                         if (!skb) {
5097                                 rx_ring->rx_stats.alloc_failed++;
5098                                 goto no_buffers;
5099                         }
5100
5101                         buffer_info->skb = skb;
5102                 }
5103                 if (!buffer_info->dma) {
5104                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5105                                                           skb->data,
5106                                                           bufsz,
5107                                                           PCI_DMA_FROMDEVICE);
5108                         if (pci_dma_mapping_error(rx_ring->pdev,
5109                                                   buffer_info->dma)) {
5110                                 buffer_info->dma = 0;
5111                                 rx_ring->rx_stats.alloc_failed++;
5112                                 goto no_buffers;
5113                         }
5114                 }
5115                 /* Refresh the desc even if buffer_addrs didn't change because
5116                  * each write-back erases this info. */
5117                 if (bufsz < IGB_RXBUFFER_1024) {
5118                         rx_desc->read.pkt_addr =
5119                              cpu_to_le64(buffer_info->page_dma);
5120                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5121                 } else {
5122                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5123                         rx_desc->read.hdr_addr = 0;
5124                 }
5125
5126                 i++;
5127                 if (i == rx_ring->count)
5128                         i = 0;
5129                 buffer_info = &rx_ring->buffer_info[i];
5130         }
5131
5132 no_buffers:
5133         if (rx_ring->next_to_use != i) {
5134                 rx_ring->next_to_use = i;
5135                 if (i == 0)
5136                         i = (rx_ring->count - 1);
5137                 else
5138                         i--;
5139
5140                 /* Force memory writes to complete before letting h/w
5141                  * know there are new descriptors to fetch.  (Only
5142                  * applicable for weak-ordered memory model archs,
5143                  * such as IA-64). */
5144                 wmb();
5145                 writel(i, rx_ring->tail);
5146         }
5147 }
5148
5149 /**
5150  * igb_mii_ioctl -
5151  * @netdev:
5152  * @ifreq:
5153  * @cmd:
5154  **/
5155 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5156 {
5157         struct igb_adapter *adapter = netdev_priv(netdev);
5158         struct mii_ioctl_data *data = if_mii(ifr);
5159
5160         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5161                 return -EOPNOTSUPP;
5162
5163         switch (cmd) {
5164         case SIOCGMIIPHY:
5165                 data->phy_id = adapter->hw.phy.addr;
5166                 break;
5167         case SIOCGMIIREG:
5168                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5169                                      &data->val_out))
5170                         return -EIO;
5171                 break;
5172         case SIOCSMIIREG:
5173         default:
5174                 return -EOPNOTSUPP;
5175         }
5176         return 0;
5177 }
5178
5179 /**
5180  * igb_hwtstamp_ioctl - control hardware time stamping
5181  * @netdev:
5182  * @ifreq:
5183  * @cmd:
5184  *
5185  * Outgoing time stamping can be enabled and disabled. Play nice and
5186  * disable it when requested, although it shouldn't case any overhead
5187  * when no packet needs it. At most one packet in the queue may be
5188  * marked for time stamping, otherwise it would be impossible to tell
5189  * for sure to which packet the hardware time stamp belongs.
5190  *
5191  * Incoming time stamping has to be configured via the hardware
5192  * filters. Not all combinations are supported, in particular event
5193  * type has to be specified. Matching the kind of event packet is
5194  * not supported, with the exception of "all V2 events regardless of
5195  * level 2 or 4".
5196  *
5197  **/
5198 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5199                               struct ifreq *ifr, int cmd)
5200 {
5201         struct igb_adapter *adapter = netdev_priv(netdev);
5202         struct e1000_hw *hw = &adapter->hw;
5203         struct hwtstamp_config config;
5204         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5205         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5206         u32 tsync_rx_cfg = 0;
5207         bool is_l4 = false;
5208         bool is_l2 = false;
5209         u32 regval;
5210
5211         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5212                 return -EFAULT;
5213
5214         /* reserved for future extensions */
5215         if (config.flags)
5216                 return -EINVAL;
5217
5218         switch (config.tx_type) {
5219         case HWTSTAMP_TX_OFF:
5220                 tsync_tx_ctl = 0;
5221         case HWTSTAMP_TX_ON:
5222                 break;
5223         default:
5224                 return -ERANGE;
5225         }
5226
5227         switch (config.rx_filter) {
5228         case HWTSTAMP_FILTER_NONE:
5229                 tsync_rx_ctl = 0;
5230                 break;
5231         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5232         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5233         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5234         case HWTSTAMP_FILTER_ALL:
5235                 /*
5236                  * register TSYNCRXCFG must be set, therefore it is not
5237                  * possible to time stamp both Sync and Delay_Req messages
5238                  * => fall back to time stamping all packets
5239                  */
5240                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5241                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5242                 break;
5243         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5244                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5245                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5246                 is_l4 = true;
5247                 break;
5248         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5249                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5250                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5251                 is_l4 = true;
5252                 break;
5253         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5254         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5255                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5256                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5257                 is_l2 = true;
5258                 is_l4 = true;
5259                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5260                 break;
5261         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5262         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5263                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5264                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5265                 is_l2 = true;
5266                 is_l4 = true;
5267                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5268                 break;
5269         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5270         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5271         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5272                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5273                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5274                 is_l2 = true;
5275                 break;
5276         default:
5277                 return -ERANGE;
5278         }
5279
5280         if (hw->mac.type == e1000_82575) {
5281                 if (tsync_rx_ctl | tsync_tx_ctl)
5282                         return -EINVAL;
5283                 return 0;
5284         }
5285
5286         /* enable/disable TX */
5287         regval = rd32(E1000_TSYNCTXCTL);
5288         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5289         regval |= tsync_tx_ctl;
5290         wr32(E1000_TSYNCTXCTL, regval);
5291
5292         /* enable/disable RX */
5293         regval = rd32(E1000_TSYNCRXCTL);
5294         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5295         regval |= tsync_rx_ctl;
5296         wr32(E1000_TSYNCRXCTL, regval);
5297
5298         /* define which PTP packets are time stamped */
5299         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5300
5301         /* define ethertype filter for timestamped packets */
5302         if (is_l2)
5303                 wr32(E1000_ETQF(3),
5304                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5305                                  E1000_ETQF_1588 | /* enable timestamping */
5306                                  ETH_P_1588));     /* 1588 eth protocol type */
5307         else
5308                 wr32(E1000_ETQF(3), 0);
5309
5310 #define PTP_PORT 319
5311         /* L4 Queue Filter[3]: filter by destination port and protocol */
5312         if (is_l4) {
5313                 u32 ftqf = (IPPROTO_UDP /* UDP */
5314                         | E1000_FTQF_VF_BP /* VF not compared */
5315                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5316                         | E1000_FTQF_MASK); /* mask all inputs */
5317                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5318
5319                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5320                 wr32(E1000_IMIREXT(3),
5321                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5322                 if (hw->mac.type == e1000_82576) {
5323                         /* enable source port check */
5324                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5325                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5326                 }
5327                 wr32(E1000_FTQF(3), ftqf);
5328         } else {
5329                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5330         }
5331         wrfl();
5332
5333         adapter->hwtstamp_config = config;
5334
5335         /* clear TX/RX time stamp registers, just to be sure */
5336         regval = rd32(E1000_TXSTMPH);
5337         regval = rd32(E1000_RXSTMPH);
5338
5339         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5340                 -EFAULT : 0;
5341 }
5342
5343 /**
5344  * igb_ioctl -
5345  * @netdev:
5346  * @ifreq:
5347  * @cmd:
5348  **/
5349 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5350 {
5351         switch (cmd) {
5352         case SIOCGMIIPHY:
5353         case SIOCGMIIREG:
5354         case SIOCSMIIREG:
5355                 return igb_mii_ioctl(netdev, ifr, cmd);
5356         case SIOCSHWTSTAMP:
5357                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5358         default:
5359                 return -EOPNOTSUPP;
5360         }
5361 }
5362
5363 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5364 {
5365         struct igb_adapter *adapter = hw->back;
5366         u16 cap_offset;
5367
5368         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5369         if (!cap_offset)
5370                 return -E1000_ERR_CONFIG;
5371
5372         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5373
5374         return 0;
5375 }
5376
5377 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5378 {
5379         struct igb_adapter *adapter = hw->back;
5380         u16 cap_offset;
5381
5382         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5383         if (!cap_offset)
5384                 return -E1000_ERR_CONFIG;
5385
5386         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5387
5388         return 0;
5389 }
5390
5391 static void igb_vlan_rx_register(struct net_device *netdev,
5392                                  struct vlan_group *grp)
5393 {
5394         struct igb_adapter *adapter = netdev_priv(netdev);
5395         struct e1000_hw *hw = &adapter->hw;
5396         u32 ctrl, rctl;
5397
5398         igb_irq_disable(adapter);
5399         adapter->vlgrp = grp;
5400
5401         if (grp) {
5402                 /* enable VLAN tag insert/strip */
5403                 ctrl = rd32(E1000_CTRL);
5404                 ctrl |= E1000_CTRL_VME;
5405                 wr32(E1000_CTRL, ctrl);
5406
5407                 /* Disable CFI check */
5408                 rctl = rd32(E1000_RCTL);
5409                 rctl &= ~E1000_RCTL_CFIEN;
5410                 wr32(E1000_RCTL, rctl);
5411         } else {
5412                 /* disable VLAN tag insert/strip */
5413                 ctrl = rd32(E1000_CTRL);
5414                 ctrl &= ~E1000_CTRL_VME;
5415                 wr32(E1000_CTRL, ctrl);
5416         }
5417
5418         igb_rlpml_set(adapter);
5419
5420         if (!test_bit(__IGB_DOWN, &adapter->state))
5421                 igb_irq_enable(adapter);
5422 }
5423
5424 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5425 {
5426         struct igb_adapter *adapter = netdev_priv(netdev);
5427         struct e1000_hw *hw = &adapter->hw;
5428         int pf_id = adapter->vfs_allocated_count;
5429
5430         /* attempt to add filter to vlvf array */
5431         igb_vlvf_set(adapter, vid, true, pf_id);
5432
5433         /* add the filter since PF can receive vlans w/o entry in vlvf */
5434         igb_vfta_set(hw, vid, true);
5435 }
5436
5437 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5438 {
5439         struct igb_adapter *adapter = netdev_priv(netdev);
5440         struct e1000_hw *hw = &adapter->hw;
5441         int pf_id = adapter->vfs_allocated_count;
5442         s32 err;
5443
5444         igb_irq_disable(adapter);
5445         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5446
5447         if (!test_bit(__IGB_DOWN, &adapter->state))
5448                 igb_irq_enable(adapter);
5449
5450         /* remove vlan from VLVF table array */
5451         err = igb_vlvf_set(adapter, vid, false, pf_id);
5452
5453         /* if vid was not present in VLVF just remove it from table */
5454         if (err)
5455                 igb_vfta_set(hw, vid, false);
5456 }
5457
5458 static void igb_restore_vlan(struct igb_adapter *adapter)
5459 {
5460         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5461
5462         if (adapter->vlgrp) {
5463                 u16 vid;
5464                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5465                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5466                                 continue;
5467                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5468                 }
5469         }
5470 }
5471
5472 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5473 {
5474         struct pci_dev *pdev = adapter->pdev;
5475         struct e1000_mac_info *mac = &adapter->hw.mac;
5476
5477         mac->autoneg = 0;
5478
5479         switch (spddplx) {
5480         case SPEED_10 + DUPLEX_HALF:
5481                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5482                 break;
5483         case SPEED_10 + DUPLEX_FULL:
5484                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5485                 break;
5486         case SPEED_100 + DUPLEX_HALF:
5487                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5488                 break;
5489         case SPEED_100 + DUPLEX_FULL:
5490                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5491                 break;
5492         case SPEED_1000 + DUPLEX_FULL:
5493                 mac->autoneg = 1;
5494                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5495                 break;
5496         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5497         default:
5498                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5499                 return -EINVAL;
5500         }
5501         return 0;
5502 }
5503
5504 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5505 {
5506         struct net_device *netdev = pci_get_drvdata(pdev);
5507         struct igb_adapter *adapter = netdev_priv(netdev);
5508         struct e1000_hw *hw = &adapter->hw;
5509         u32 ctrl, rctl, status;
5510         u32 wufc = adapter->wol;
5511 #ifdef CONFIG_PM
5512         int retval = 0;
5513 #endif
5514
5515         netif_device_detach(netdev);
5516
5517         if (netif_running(netdev))
5518                 igb_close(netdev);
5519
5520         igb_clear_interrupt_scheme(adapter);
5521
5522 #ifdef CONFIG_PM
5523         retval = pci_save_state(pdev);
5524         if (retval)
5525                 return retval;
5526 #endif
5527
5528         status = rd32(E1000_STATUS);
5529         if (status & E1000_STATUS_LU)
5530                 wufc &= ~E1000_WUFC_LNKC;
5531
5532         if (wufc) {
5533                 igb_setup_rctl(adapter);
5534                 igb_set_rx_mode(netdev);
5535
5536                 /* turn on all-multi mode if wake on multicast is enabled */
5537                 if (wufc & E1000_WUFC_MC) {
5538                         rctl = rd32(E1000_RCTL);
5539                         rctl |= E1000_RCTL_MPE;
5540                         wr32(E1000_RCTL, rctl);
5541                 }
5542
5543                 ctrl = rd32(E1000_CTRL);
5544                 /* advertise wake from D3Cold */
5545                 #define E1000_CTRL_ADVD3WUC 0x00100000
5546                 /* phy power management enable */
5547                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5548                 ctrl |= E1000_CTRL_ADVD3WUC;
5549                 wr32(E1000_CTRL, ctrl);
5550
5551                 /* Allow time for pending master requests to run */
5552                 igb_disable_pcie_master(hw);
5553
5554                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5555                 wr32(E1000_WUFC, wufc);
5556         } else {
5557                 wr32(E1000_WUC, 0);
5558                 wr32(E1000_WUFC, 0);
5559         }
5560
5561         *enable_wake = wufc || adapter->en_mng_pt;
5562         if (!*enable_wake)
5563                 igb_shutdown_serdes_link_82575(hw);
5564
5565         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5566          * would have already happened in close and is redundant. */
5567         igb_release_hw_control(adapter);
5568
5569         pci_disable_device(pdev);
5570
5571         return 0;
5572 }
5573
5574 #ifdef CONFIG_PM
5575 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5576 {
5577         int retval;
5578         bool wake;
5579
5580         retval = __igb_shutdown(pdev, &wake);
5581         if (retval)
5582                 return retval;
5583
5584         if (wake) {
5585                 pci_prepare_to_sleep(pdev);
5586         } else {
5587                 pci_wake_from_d3(pdev, false);
5588                 pci_set_power_state(pdev, PCI_D3hot);
5589         }
5590
5591         return 0;
5592 }
5593
5594 static int igb_resume(struct pci_dev *pdev)
5595 {
5596         struct net_device *netdev = pci_get_drvdata(pdev);
5597         struct igb_adapter *adapter = netdev_priv(netdev);
5598         struct e1000_hw *hw = &adapter->hw;
5599         u32 err;
5600
5601         pci_set_power_state(pdev, PCI_D0);
5602         pci_restore_state(pdev);
5603
5604         err = pci_enable_device_mem(pdev);
5605         if (err) {
5606                 dev_err(&pdev->dev,
5607                         "igb: Cannot enable PCI device from suspend\n");
5608                 return err;
5609         }
5610         pci_set_master(pdev);
5611
5612         pci_enable_wake(pdev, PCI_D3hot, 0);
5613         pci_enable_wake(pdev, PCI_D3cold, 0);
5614
5615         if (igb_init_interrupt_scheme(adapter)) {
5616                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5617                 return -ENOMEM;
5618         }
5619
5620         /* e1000_power_up_phy(adapter); */
5621
5622         igb_reset(adapter);
5623
5624         /* let the f/w know that the h/w is now under the control of the
5625          * driver. */
5626         igb_get_hw_control(adapter);
5627
5628         wr32(E1000_WUS, ~0);
5629
5630         if (netif_running(netdev)) {
5631                 err = igb_open(netdev);
5632                 if (err)
5633                         return err;
5634         }
5635
5636         netif_device_attach(netdev);
5637
5638         return 0;
5639 }
5640 #endif
5641
5642 static void igb_shutdown(struct pci_dev *pdev)
5643 {
5644         bool wake;
5645
5646         __igb_shutdown(pdev, &wake);
5647
5648         if (system_state == SYSTEM_POWER_OFF) {
5649                 pci_wake_from_d3(pdev, wake);
5650                 pci_set_power_state(pdev, PCI_D3hot);
5651         }
5652 }
5653
5654 #ifdef CONFIG_NET_POLL_CONTROLLER
5655 /*
5656  * Polling 'interrupt' - used by things like netconsole to send skbs
5657  * without having to re-enable interrupts. It's not called while
5658  * the interrupt routine is executing.
5659  */
5660 static void igb_netpoll(struct net_device *netdev)
5661 {
5662         struct igb_adapter *adapter = netdev_priv(netdev);
5663         struct e1000_hw *hw = &adapter->hw;
5664         int i;
5665
5666         if (!adapter->msix_entries) {
5667                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5668                 igb_irq_disable(adapter);
5669                 napi_schedule(&q_vector->napi);
5670                 return;
5671         }
5672
5673         for (i = 0; i < adapter->num_q_vectors; i++) {
5674                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5675                 wr32(E1000_EIMC, q_vector->eims_value);
5676                 napi_schedule(&q_vector->napi);
5677         }
5678 }
5679 #endif /* CONFIG_NET_POLL_CONTROLLER */
5680
5681 /**
5682  * igb_io_error_detected - called when PCI error is detected
5683  * @pdev: Pointer to PCI device
5684  * @state: The current pci connection state
5685  *
5686  * This function is called after a PCI bus error affecting
5687  * this device has been detected.
5688  */
5689 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5690                                               pci_channel_state_t state)
5691 {
5692         struct net_device *netdev = pci_get_drvdata(pdev);
5693         struct igb_adapter *adapter = netdev_priv(netdev);
5694
5695         netif_device_detach(netdev);
5696
5697         if (state == pci_channel_io_perm_failure)
5698                 return PCI_ERS_RESULT_DISCONNECT;
5699
5700         if (netif_running(netdev))
5701                 igb_down(adapter);
5702         pci_disable_device(pdev);
5703
5704         /* Request a slot slot reset. */
5705         return PCI_ERS_RESULT_NEED_RESET;
5706 }
5707
5708 /**
5709  * igb_io_slot_reset - called after the pci bus has been reset.
5710  * @pdev: Pointer to PCI device
5711  *
5712  * Restart the card from scratch, as if from a cold-boot. Implementation
5713  * resembles the first-half of the igb_resume routine.
5714  */
5715 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5716 {
5717         struct net_device *netdev = pci_get_drvdata(pdev);
5718         struct igb_adapter *adapter = netdev_priv(netdev);
5719         struct e1000_hw *hw = &adapter->hw;
5720         pci_ers_result_t result;
5721         int err;
5722
5723         if (pci_enable_device_mem(pdev)) {
5724                 dev_err(&pdev->dev,
5725                         "Cannot re-enable PCI device after reset.\n");
5726                 result = PCI_ERS_RESULT_DISCONNECT;
5727         } else {
5728                 pci_set_master(pdev);
5729                 pci_restore_state(pdev);
5730
5731                 pci_enable_wake(pdev, PCI_D3hot, 0);
5732                 pci_enable_wake(pdev, PCI_D3cold, 0);
5733
5734                 igb_reset(adapter);
5735                 wr32(E1000_WUS, ~0);
5736                 result = PCI_ERS_RESULT_RECOVERED;
5737         }
5738
5739         err = pci_cleanup_aer_uncorrect_error_status(pdev);
5740         if (err) {
5741                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
5742                         "failed 0x%0x\n", err);
5743                 /* non-fatal, continue */
5744         }
5745
5746         return result;
5747 }
5748
5749 /**
5750  * igb_io_resume - called when traffic can start flowing again.
5751  * @pdev: Pointer to PCI device
5752  *
5753  * This callback is called when the error recovery driver tells us that
5754  * its OK to resume normal operation. Implementation resembles the
5755  * second-half of the igb_resume routine.
5756  */
5757 static void igb_io_resume(struct pci_dev *pdev)
5758 {
5759         struct net_device *netdev = pci_get_drvdata(pdev);
5760         struct igb_adapter *adapter = netdev_priv(netdev);
5761
5762         if (netif_running(netdev)) {
5763                 if (igb_up(adapter)) {
5764                         dev_err(&pdev->dev, "igb_up failed after reset\n");
5765                         return;
5766                 }
5767         }
5768
5769         netif_device_attach(netdev);
5770
5771         /* let the f/w know that the h/w is now under the control of the
5772          * driver. */
5773         igb_get_hw_control(adapter);
5774 }
5775
5776 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
5777                              u8 qsel)
5778 {
5779         u32 rar_low, rar_high;
5780         struct e1000_hw *hw = &adapter->hw;
5781
5782         /* HW expects these in little endian so we reverse the byte order
5783          * from network order (big endian) to little endian
5784          */
5785         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
5786                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
5787         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
5788
5789         /* Indicate to hardware the Address is Valid. */
5790         rar_high |= E1000_RAH_AV;
5791
5792         if (hw->mac.type == e1000_82575)
5793                 rar_high |= E1000_RAH_POOL_1 * qsel;
5794         else
5795                 rar_high |= E1000_RAH_POOL_1 << qsel;
5796
5797         wr32(E1000_RAL(index), rar_low);
5798         wrfl();
5799         wr32(E1000_RAH(index), rar_high);
5800         wrfl();
5801 }
5802
5803 static int igb_set_vf_mac(struct igb_adapter *adapter,
5804                           int vf, unsigned char *mac_addr)
5805 {
5806         struct e1000_hw *hw = &adapter->hw;
5807         /* VF MAC addresses start at end of receive addresses and moves
5808          * torwards the first, as a result a collision should not be possible */
5809         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5810
5811         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
5812
5813         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
5814
5815         return 0;
5816 }
5817
5818 static void igb_vmm_control(struct igb_adapter *adapter)
5819 {
5820         struct e1000_hw *hw = &adapter->hw;
5821         u32 reg;
5822
5823         /* replication is not supported for 82575 */
5824         if (hw->mac.type == e1000_82575)
5825                 return;
5826
5827         /* enable replication vlan tag stripping */
5828         reg = rd32(E1000_RPLOLR);
5829         reg |= E1000_RPLOLR_STRVLAN;
5830         wr32(E1000_RPLOLR, reg);
5831
5832         /* notify HW that the MAC is adding vlan tags */
5833         reg = rd32(E1000_DTXCTL);
5834         reg |= E1000_DTXCTL_VLAN_ADDED;
5835         wr32(E1000_DTXCTL, reg);
5836
5837         if (adapter->vfs_allocated_count) {
5838                 igb_vmdq_set_loopback_pf(hw, true);
5839                 igb_vmdq_set_replication_pf(hw, true);
5840         } else {
5841                 igb_vmdq_set_loopback_pf(hw, false);
5842                 igb_vmdq_set_replication_pf(hw, false);
5843         }
5844 }
5845
5846 /* igb_main.c */