igb: Rework how netdev->stats is handled
[safe/jmp/linux-2.6] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "1.3.16-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56                                 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60         [board_82575] = &e1000_82575_info,
61 };
62
63 static struct pci_device_id igb_pci_tbl[] = {
64         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
74         /* required last entry */
75         {0, }
76 };
77
78 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
79
80 void igb_reset(struct igb_adapter *);
81 static int igb_setup_all_tx_resources(struct igb_adapter *);
82 static int igb_setup_all_rx_resources(struct igb_adapter *);
83 static void igb_free_all_tx_resources(struct igb_adapter *);
84 static void igb_free_all_rx_resources(struct igb_adapter *);
85 static void igb_setup_mrqc(struct igb_adapter *);
86 void igb_update_stats(struct igb_adapter *);
87 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
88 static void __devexit igb_remove(struct pci_dev *pdev);
89 static int igb_sw_init(struct igb_adapter *);
90 static int igb_open(struct net_device *);
91 static int igb_close(struct net_device *);
92 static void igb_configure_tx(struct igb_adapter *);
93 static void igb_configure_rx(struct igb_adapter *);
94 static void igb_clean_all_tx_rings(struct igb_adapter *);
95 static void igb_clean_all_rx_rings(struct igb_adapter *);
96 static void igb_clean_tx_ring(struct igb_ring *);
97 static void igb_clean_rx_ring(struct igb_ring *);
98 static void igb_set_rx_mode(struct net_device *);
99 static void igb_update_phy_info(unsigned long);
100 static void igb_watchdog(unsigned long);
101 static void igb_watchdog_task(struct work_struct *);
102 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
103 static struct net_device_stats *igb_get_stats(struct net_device *);
104 static int igb_change_mtu(struct net_device *, int);
105 static int igb_set_mac(struct net_device *, void *);
106 static void igb_set_uta(struct igb_adapter *adapter);
107 static irqreturn_t igb_intr(int irq, void *);
108 static irqreturn_t igb_intr_msi(int irq, void *);
109 static irqreturn_t igb_msix_other(int irq, void *);
110 static irqreturn_t igb_msix_ring(int irq, void *);
111 #ifdef CONFIG_IGB_DCA
112 static void igb_update_dca(struct igb_q_vector *);
113 static void igb_setup_dca(struct igb_adapter *);
114 #endif /* CONFIG_IGB_DCA */
115 static bool igb_clean_tx_irq(struct igb_q_vector *);
116 static int igb_poll(struct napi_struct *, int);
117 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
118 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
119 static void igb_tx_timeout(struct net_device *);
120 static void igb_reset_task(struct work_struct *);
121 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
122 static void igb_vlan_rx_add_vid(struct net_device *, u16);
123 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
124 static void igb_restore_vlan(struct igb_adapter *);
125 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
126 static void igb_ping_all_vfs(struct igb_adapter *);
127 static void igb_msg_task(struct igb_adapter *);
128 static void igb_vmm_control(struct igb_adapter *);
129 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
130 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
131
132 #ifdef CONFIG_PM
133 static int igb_suspend(struct pci_dev *, pm_message_t);
134 static int igb_resume(struct pci_dev *);
135 #endif
136 static void igb_shutdown(struct pci_dev *);
137 #ifdef CONFIG_IGB_DCA
138 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
139 static struct notifier_block dca_notifier = {
140         .notifier_call  = igb_notify_dca,
141         .next           = NULL,
142         .priority       = 0
143 };
144 #endif
145 #ifdef CONFIG_NET_POLL_CONTROLLER
146 /* for netdump / net console */
147 static void igb_netpoll(struct net_device *);
148 #endif
149 #ifdef CONFIG_PCI_IOV
150 static unsigned int max_vfs = 0;
151 module_param(max_vfs, uint, 0);
152 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
153                  "per physical function");
154 #endif /* CONFIG_PCI_IOV */
155
156 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
157                      pci_channel_state_t);
158 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
159 static void igb_io_resume(struct pci_dev *);
160
161 static struct pci_error_handlers igb_err_handler = {
162         .error_detected = igb_io_error_detected,
163         .slot_reset = igb_io_slot_reset,
164         .resume = igb_io_resume,
165 };
166
167
168 static struct pci_driver igb_driver = {
169         .name     = igb_driver_name,
170         .id_table = igb_pci_tbl,
171         .probe    = igb_probe,
172         .remove   = __devexit_p(igb_remove),
173 #ifdef CONFIG_PM
174         /* Power Managment Hooks */
175         .suspend  = igb_suspend,
176         .resume   = igb_resume,
177 #endif
178         .shutdown = igb_shutdown,
179         .err_handler = &igb_err_handler
180 };
181
182 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
183 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
184 MODULE_LICENSE("GPL");
185 MODULE_VERSION(DRV_VERSION);
186
187 /**
188  * igb_read_clock - read raw cycle counter (to be used by time counter)
189  */
190 static cycle_t igb_read_clock(const struct cyclecounter *tc)
191 {
192         struct igb_adapter *adapter =
193                 container_of(tc, struct igb_adapter, cycles);
194         struct e1000_hw *hw = &adapter->hw;
195         u64 stamp = 0;
196         int shift = 0;
197
198         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
199         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
200         return stamp;
201 }
202
203 #ifdef DEBUG
204 /**
205  * igb_get_hw_dev_name - return device name string
206  * used by hardware layer to print debugging information
207  **/
208 char *igb_get_hw_dev_name(struct e1000_hw *hw)
209 {
210         struct igb_adapter *adapter = hw->back;
211         return adapter->netdev->name;
212 }
213
214 /**
215  * igb_get_time_str - format current NIC and system time as string
216  */
217 static char *igb_get_time_str(struct igb_adapter *adapter,
218                               char buffer[160])
219 {
220         cycle_t hw = adapter->cycles.read(&adapter->cycles);
221         struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
222         struct timespec sys;
223         struct timespec delta;
224         getnstimeofday(&sys);
225
226         delta = timespec_sub(nic, sys);
227
228         sprintf(buffer,
229                 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
230                 hw,
231                 (long)nic.tv_sec, nic.tv_nsec,
232                 (long)sys.tv_sec, sys.tv_nsec,
233                 (long)delta.tv_sec, delta.tv_nsec);
234
235         return buffer;
236 }
237 #endif
238
239 /**
240  * igb_init_module - Driver Registration Routine
241  *
242  * igb_init_module is the first routine called when the driver is
243  * loaded. All it does is register with the PCI subsystem.
244  **/
245 static int __init igb_init_module(void)
246 {
247         int ret;
248         printk(KERN_INFO "%s - version %s\n",
249                igb_driver_string, igb_driver_version);
250
251         printk(KERN_INFO "%s\n", igb_copyright);
252
253 #ifdef CONFIG_IGB_DCA
254         dca_register_notify(&dca_notifier);
255 #endif
256         ret = pci_register_driver(&igb_driver);
257         return ret;
258 }
259
260 module_init(igb_init_module);
261
262 /**
263  * igb_exit_module - Driver Exit Cleanup Routine
264  *
265  * igb_exit_module is called just before the driver is removed
266  * from memory.
267  **/
268 static void __exit igb_exit_module(void)
269 {
270 #ifdef CONFIG_IGB_DCA
271         dca_unregister_notify(&dca_notifier);
272 #endif
273         pci_unregister_driver(&igb_driver);
274 }
275
276 module_exit(igb_exit_module);
277
278 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
279 /**
280  * igb_cache_ring_register - Descriptor ring to register mapping
281  * @adapter: board private structure to initialize
282  *
283  * Once we know the feature-set enabled for the device, we'll cache
284  * the register offset the descriptor ring is assigned to.
285  **/
286 static void igb_cache_ring_register(struct igb_adapter *adapter)
287 {
288         int i = 0, j = 0;
289         u32 rbase_offset = adapter->vfs_allocated_count;
290
291         switch (adapter->hw.mac.type) {
292         case e1000_82576:
293                 /* The queues are allocated for virtualization such that VF 0
294                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
295                  * In order to avoid collision we start at the first free queue
296                  * and continue consuming queues in the same sequence
297                  */
298                 if (adapter->vfs_allocated_count) {
299                         for (; i < adapter->rss_queues; i++)
300                                 adapter->rx_ring[i].reg_idx = rbase_offset +
301                                                               Q_IDX_82576(i);
302                         for (; j < adapter->rss_queues; j++)
303                                 adapter->tx_ring[j].reg_idx = rbase_offset +
304                                                               Q_IDX_82576(j);
305                 }
306         case e1000_82575:
307         default:
308                 for (; i < adapter->num_rx_queues; i++)
309                         adapter->rx_ring[i].reg_idx = rbase_offset + i;
310                 for (; j < adapter->num_tx_queues; j++)
311                         adapter->tx_ring[j].reg_idx = rbase_offset + j;
312                 break;
313         }
314 }
315
316 static void igb_free_queues(struct igb_adapter *adapter)
317 {
318         kfree(adapter->tx_ring);
319         kfree(adapter->rx_ring);
320
321         adapter->tx_ring = NULL;
322         adapter->rx_ring = NULL;
323
324         adapter->num_rx_queues = 0;
325         adapter->num_tx_queues = 0;
326 }
327
328 /**
329  * igb_alloc_queues - Allocate memory for all rings
330  * @adapter: board private structure to initialize
331  *
332  * We allocate one ring per queue at run-time since we don't know the
333  * number of queues at compile-time.
334  **/
335 static int igb_alloc_queues(struct igb_adapter *adapter)
336 {
337         int i;
338
339         adapter->tx_ring = kcalloc(adapter->num_tx_queues,
340                                    sizeof(struct igb_ring), GFP_KERNEL);
341         if (!adapter->tx_ring)
342                 goto err;
343
344         adapter->rx_ring = kcalloc(adapter->num_rx_queues,
345                                    sizeof(struct igb_ring), GFP_KERNEL);
346         if (!adapter->rx_ring)
347                 goto err;
348
349         for (i = 0; i < adapter->num_tx_queues; i++) {
350                 struct igb_ring *ring = &(adapter->tx_ring[i]);
351                 ring->count = adapter->tx_ring_count;
352                 ring->queue_index = i;
353                 ring->pdev = adapter->pdev;
354                 ring->netdev = adapter->netdev;
355                 /* For 82575, context index must be unique per ring. */
356                 if (adapter->hw.mac.type == e1000_82575)
357                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
358         }
359
360         for (i = 0; i < adapter->num_rx_queues; i++) {
361                 struct igb_ring *ring = &(adapter->rx_ring[i]);
362                 ring->count = adapter->rx_ring_count;
363                 ring->queue_index = i;
364                 ring->pdev = adapter->pdev;
365                 ring->netdev = adapter->netdev;
366                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
367                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
368                 /* set flag indicating ring supports SCTP checksum offload */
369                 if (adapter->hw.mac.type >= e1000_82576)
370                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
371         }
372
373         igb_cache_ring_register(adapter);
374
375         return 0;
376
377 err:
378         igb_free_queues(adapter);
379
380         return -ENOMEM;
381 }
382
383 #define IGB_N0_QUEUE -1
384 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
385 {
386         u32 msixbm = 0;
387         struct igb_adapter *adapter = q_vector->adapter;
388         struct e1000_hw *hw = &adapter->hw;
389         u32 ivar, index;
390         int rx_queue = IGB_N0_QUEUE;
391         int tx_queue = IGB_N0_QUEUE;
392
393         if (q_vector->rx_ring)
394                 rx_queue = q_vector->rx_ring->reg_idx;
395         if (q_vector->tx_ring)
396                 tx_queue = q_vector->tx_ring->reg_idx;
397
398         switch (hw->mac.type) {
399         case e1000_82575:
400                 /* The 82575 assigns vectors using a bitmask, which matches the
401                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
402                    or more queues to a vector, we write the appropriate bits
403                    into the MSIXBM register for that vector. */
404                 if (rx_queue > IGB_N0_QUEUE)
405                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
406                 if (tx_queue > IGB_N0_QUEUE)
407                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
408                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
409                 q_vector->eims_value = msixbm;
410                 break;
411         case e1000_82576:
412                 /* 82576 uses a table-based method for assigning vectors.
413                    Each queue has a single entry in the table to which we write
414                    a vector number along with a "valid" bit.  Sadly, the layout
415                    of the table is somewhat counterintuitive. */
416                 if (rx_queue > IGB_N0_QUEUE) {
417                         index = (rx_queue & 0x7);
418                         ivar = array_rd32(E1000_IVAR0, index);
419                         if (rx_queue < 8) {
420                                 /* vector goes into low byte of register */
421                                 ivar = ivar & 0xFFFFFF00;
422                                 ivar |= msix_vector | E1000_IVAR_VALID;
423                         } else {
424                                 /* vector goes into third byte of register */
425                                 ivar = ivar & 0xFF00FFFF;
426                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
427                         }
428                         array_wr32(E1000_IVAR0, index, ivar);
429                 }
430                 if (tx_queue > IGB_N0_QUEUE) {
431                         index = (tx_queue & 0x7);
432                         ivar = array_rd32(E1000_IVAR0, index);
433                         if (tx_queue < 8) {
434                                 /* vector goes into second byte of register */
435                                 ivar = ivar & 0xFFFF00FF;
436                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
437                         } else {
438                                 /* vector goes into high byte of register */
439                                 ivar = ivar & 0x00FFFFFF;
440                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
441                         }
442                         array_wr32(E1000_IVAR0, index, ivar);
443                 }
444                 q_vector->eims_value = 1 << msix_vector;
445                 break;
446         default:
447                 BUG();
448                 break;
449         }
450 }
451
452 /**
453  * igb_configure_msix - Configure MSI-X hardware
454  *
455  * igb_configure_msix sets up the hardware to properly
456  * generate MSI-X interrupts.
457  **/
458 static void igb_configure_msix(struct igb_adapter *adapter)
459 {
460         u32 tmp;
461         int i, vector = 0;
462         struct e1000_hw *hw = &adapter->hw;
463
464         adapter->eims_enable_mask = 0;
465
466         /* set vector for other causes, i.e. link changes */
467         switch (hw->mac.type) {
468         case e1000_82575:
469                 tmp = rd32(E1000_CTRL_EXT);
470                 /* enable MSI-X PBA support*/
471                 tmp |= E1000_CTRL_EXT_PBA_CLR;
472
473                 /* Auto-Mask interrupts upon ICR read. */
474                 tmp |= E1000_CTRL_EXT_EIAME;
475                 tmp |= E1000_CTRL_EXT_IRCA;
476
477                 wr32(E1000_CTRL_EXT, tmp);
478
479                 /* enable msix_other interrupt */
480                 array_wr32(E1000_MSIXBM(0), vector++,
481                                       E1000_EIMS_OTHER);
482                 adapter->eims_other = E1000_EIMS_OTHER;
483
484                 break;
485
486         case e1000_82576:
487                 /* Turn on MSI-X capability first, or our settings
488                  * won't stick.  And it will take days to debug. */
489                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
490                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
491                                 E1000_GPIE_NSICR);
492
493                 /* enable msix_other interrupt */
494                 adapter->eims_other = 1 << vector;
495                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
496
497                 wr32(E1000_IVAR_MISC, tmp);
498                 break;
499         default:
500                 /* do nothing, since nothing else supports MSI-X */
501                 break;
502         } /* switch (hw->mac.type) */
503
504         adapter->eims_enable_mask |= adapter->eims_other;
505
506         for (i = 0; i < adapter->num_q_vectors; i++) {
507                 struct igb_q_vector *q_vector = adapter->q_vector[i];
508                 igb_assign_vector(q_vector, vector++);
509                 adapter->eims_enable_mask |= q_vector->eims_value;
510         }
511
512         wrfl();
513 }
514
515 /**
516  * igb_request_msix - Initialize MSI-X interrupts
517  *
518  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
519  * kernel.
520  **/
521 static int igb_request_msix(struct igb_adapter *adapter)
522 {
523         struct net_device *netdev = adapter->netdev;
524         struct e1000_hw *hw = &adapter->hw;
525         int i, err = 0, vector = 0;
526
527         err = request_irq(adapter->msix_entries[vector].vector,
528                           &igb_msix_other, 0, netdev->name, adapter);
529         if (err)
530                 goto out;
531         vector++;
532
533         for (i = 0; i < adapter->num_q_vectors; i++) {
534                 struct igb_q_vector *q_vector = adapter->q_vector[i];
535
536                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
537
538                 if (q_vector->rx_ring && q_vector->tx_ring)
539                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
540                                 q_vector->rx_ring->queue_index);
541                 else if (q_vector->tx_ring)
542                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
543                                 q_vector->tx_ring->queue_index);
544                 else if (q_vector->rx_ring)
545                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
546                                 q_vector->rx_ring->queue_index);
547                 else
548                         sprintf(q_vector->name, "%s-unused", netdev->name);
549
550                 err = request_irq(adapter->msix_entries[vector].vector,
551                                   &igb_msix_ring, 0, q_vector->name,
552                                   q_vector);
553                 if (err)
554                         goto out;
555                 vector++;
556         }
557
558         igb_configure_msix(adapter);
559         return 0;
560 out:
561         return err;
562 }
563
564 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
565 {
566         if (adapter->msix_entries) {
567                 pci_disable_msix(adapter->pdev);
568                 kfree(adapter->msix_entries);
569                 adapter->msix_entries = NULL;
570         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
571                 pci_disable_msi(adapter->pdev);
572         }
573 }
574
575 /**
576  * igb_free_q_vectors - Free memory allocated for interrupt vectors
577  * @adapter: board private structure to initialize
578  *
579  * This function frees the memory allocated to the q_vectors.  In addition if
580  * NAPI is enabled it will delete any references to the NAPI struct prior
581  * to freeing the q_vector.
582  **/
583 static void igb_free_q_vectors(struct igb_adapter *adapter)
584 {
585         int v_idx;
586
587         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
588                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
589                 adapter->q_vector[v_idx] = NULL;
590                 netif_napi_del(&q_vector->napi);
591                 kfree(q_vector);
592         }
593         adapter->num_q_vectors = 0;
594 }
595
596 /**
597  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
598  *
599  * This function resets the device so that it has 0 rx queues, tx queues, and
600  * MSI-X interrupts allocated.
601  */
602 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
603 {
604         igb_free_queues(adapter);
605         igb_free_q_vectors(adapter);
606         igb_reset_interrupt_capability(adapter);
607 }
608
609 /**
610  * igb_set_interrupt_capability - set MSI or MSI-X if supported
611  *
612  * Attempt to configure interrupts using the best available
613  * capabilities of the hardware and kernel.
614  **/
615 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
616 {
617         int err;
618         int numvecs, i;
619
620         /* Number of supported queues. */
621         adapter->num_rx_queues = adapter->rss_queues;
622         adapter->num_tx_queues = adapter->rss_queues;
623
624         /* start with one vector for every rx queue */
625         numvecs = adapter->num_rx_queues;
626
627         /* if tx handler is seperate add 1 for every tx queue */
628         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
629                 numvecs += adapter->num_tx_queues;
630
631         /* store the number of vectors reserved for queues */
632         adapter->num_q_vectors = numvecs;
633
634         /* add 1 vector for link status interrupts */
635         numvecs++;
636         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
637                                         GFP_KERNEL);
638         if (!adapter->msix_entries)
639                 goto msi_only;
640
641         for (i = 0; i < numvecs; i++)
642                 adapter->msix_entries[i].entry = i;
643
644         err = pci_enable_msix(adapter->pdev,
645                               adapter->msix_entries,
646                               numvecs);
647         if (err == 0)
648                 goto out;
649
650         igb_reset_interrupt_capability(adapter);
651
652         /* If we can't do MSI-X, try MSI */
653 msi_only:
654 #ifdef CONFIG_PCI_IOV
655         /* disable SR-IOV for non MSI-X configurations */
656         if (adapter->vf_data) {
657                 struct e1000_hw *hw = &adapter->hw;
658                 /* disable iov and allow time for transactions to clear */
659                 pci_disable_sriov(adapter->pdev);
660                 msleep(500);
661
662                 kfree(adapter->vf_data);
663                 adapter->vf_data = NULL;
664                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
665                 msleep(100);
666                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
667         }
668 #endif
669         adapter->vfs_allocated_count = 0;
670         adapter->rss_queues = 1;
671         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
672         adapter->num_rx_queues = 1;
673         adapter->num_tx_queues = 1;
674         adapter->num_q_vectors = 1;
675         if (!pci_enable_msi(adapter->pdev))
676                 adapter->flags |= IGB_FLAG_HAS_MSI;
677 out:
678         /* Notify the stack of the (possibly) reduced Tx Queue count. */
679         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
680         return;
681 }
682
683 /**
684  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
685  * @adapter: board private structure to initialize
686  *
687  * We allocate one q_vector per queue interrupt.  If allocation fails we
688  * return -ENOMEM.
689  **/
690 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
691 {
692         struct igb_q_vector *q_vector;
693         struct e1000_hw *hw = &adapter->hw;
694         int v_idx;
695
696         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
697                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
698                 if (!q_vector)
699                         goto err_out;
700                 q_vector->adapter = adapter;
701                 q_vector->itr_shift = (hw->mac.type == e1000_82575) ? 16 : 0;
702                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
703                 q_vector->itr_val = IGB_START_ITR;
704                 q_vector->set_itr = 1;
705                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
706                 adapter->q_vector[v_idx] = q_vector;
707         }
708         return 0;
709
710 err_out:
711         while (v_idx) {
712                 v_idx--;
713                 q_vector = adapter->q_vector[v_idx];
714                 netif_napi_del(&q_vector->napi);
715                 kfree(q_vector);
716                 adapter->q_vector[v_idx] = NULL;
717         }
718         return -ENOMEM;
719 }
720
721 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
722                                       int ring_idx, int v_idx)
723 {
724         struct igb_q_vector *q_vector;
725
726         q_vector = adapter->q_vector[v_idx];
727         q_vector->rx_ring = &adapter->rx_ring[ring_idx];
728         q_vector->rx_ring->q_vector = q_vector;
729         q_vector->itr_val = adapter->rx_itr_setting;
730         if (q_vector->itr_val && q_vector->itr_val <= 3)
731                 q_vector->itr_val = IGB_START_ITR;
732 }
733
734 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
735                                       int ring_idx, int v_idx)
736 {
737         struct igb_q_vector *q_vector;
738
739         q_vector = adapter->q_vector[v_idx];
740         q_vector->tx_ring = &adapter->tx_ring[ring_idx];
741         q_vector->tx_ring->q_vector = q_vector;
742         q_vector->itr_val = adapter->tx_itr_setting;
743         if (q_vector->itr_val && q_vector->itr_val <= 3)
744                 q_vector->itr_val = IGB_START_ITR;
745 }
746
747 /**
748  * igb_map_ring_to_vector - maps allocated queues to vectors
749  *
750  * This function maps the recently allocated queues to vectors.
751  **/
752 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
753 {
754         int i;
755         int v_idx = 0;
756
757         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
758             (adapter->num_q_vectors < adapter->num_tx_queues))
759                 return -ENOMEM;
760
761         if (adapter->num_q_vectors >=
762             (adapter->num_rx_queues + adapter->num_tx_queues)) {
763                 for (i = 0; i < adapter->num_rx_queues; i++)
764                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
765                 for (i = 0; i < adapter->num_tx_queues; i++)
766                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
767         } else {
768                 for (i = 0; i < adapter->num_rx_queues; i++) {
769                         if (i < adapter->num_tx_queues)
770                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
771                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
772                 }
773                 for (; i < adapter->num_tx_queues; i++)
774                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
775         }
776         return 0;
777 }
778
779 /**
780  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
781  *
782  * This function initializes the interrupts and allocates all of the queues.
783  **/
784 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
785 {
786         struct pci_dev *pdev = adapter->pdev;
787         int err;
788
789         igb_set_interrupt_capability(adapter);
790
791         err = igb_alloc_q_vectors(adapter);
792         if (err) {
793                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
794                 goto err_alloc_q_vectors;
795         }
796
797         err = igb_alloc_queues(adapter);
798         if (err) {
799                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
800                 goto err_alloc_queues;
801         }
802
803         err = igb_map_ring_to_vector(adapter);
804         if (err) {
805                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
806                 goto err_map_queues;
807         }
808
809
810         return 0;
811 err_map_queues:
812         igb_free_queues(adapter);
813 err_alloc_queues:
814         igb_free_q_vectors(adapter);
815 err_alloc_q_vectors:
816         igb_reset_interrupt_capability(adapter);
817         return err;
818 }
819
820 /**
821  * igb_request_irq - initialize interrupts
822  *
823  * Attempts to configure interrupts using the best available
824  * capabilities of the hardware and kernel.
825  **/
826 static int igb_request_irq(struct igb_adapter *adapter)
827 {
828         struct net_device *netdev = adapter->netdev;
829         struct pci_dev *pdev = adapter->pdev;
830         struct e1000_hw *hw = &adapter->hw;
831         int err = 0;
832
833         if (adapter->msix_entries) {
834                 err = igb_request_msix(adapter);
835                 if (!err)
836                         goto request_done;
837                 /* fall back to MSI */
838                 igb_clear_interrupt_scheme(adapter);
839                 if (!pci_enable_msi(adapter->pdev))
840                         adapter->flags |= IGB_FLAG_HAS_MSI;
841                 igb_free_all_tx_resources(adapter);
842                 igb_free_all_rx_resources(adapter);
843                 adapter->num_tx_queues = 1;
844                 adapter->num_rx_queues = 1;
845                 adapter->num_q_vectors = 1;
846                 err = igb_alloc_q_vectors(adapter);
847                 if (err) {
848                         dev_err(&pdev->dev,
849                                 "Unable to allocate memory for vectors\n");
850                         goto request_done;
851                 }
852                 err = igb_alloc_queues(adapter);
853                 if (err) {
854                         dev_err(&pdev->dev,
855                                 "Unable to allocate memory for queues\n");
856                         igb_free_q_vectors(adapter);
857                         goto request_done;
858                 }
859                 igb_setup_all_tx_resources(adapter);
860                 igb_setup_all_rx_resources(adapter);
861         } else {
862                 switch (hw->mac.type) {
863                 case e1000_82575:
864                         wr32(E1000_MSIXBM(0),
865                              (E1000_EICR_RX_QUEUE0 |
866                               E1000_EICR_TX_QUEUE0 |
867                               E1000_EIMS_OTHER));
868                         break;
869                 case e1000_82576:
870                         wr32(E1000_IVAR0, E1000_IVAR_VALID);
871                         break;
872                 default:
873                         break;
874                 }
875         }
876
877         if (adapter->flags & IGB_FLAG_HAS_MSI) {
878                 err = request_irq(adapter->pdev->irq, &igb_intr_msi, 0,
879                                   netdev->name, adapter);
880                 if (!err)
881                         goto request_done;
882
883                 /* fall back to legacy interrupts */
884                 igb_reset_interrupt_capability(adapter);
885                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
886         }
887
888         err = request_irq(adapter->pdev->irq, &igb_intr, IRQF_SHARED,
889                           netdev->name, adapter);
890
891         if (err)
892                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
893                         err);
894
895 request_done:
896         return err;
897 }
898
899 static void igb_free_irq(struct igb_adapter *adapter)
900 {
901         if (adapter->msix_entries) {
902                 int vector = 0, i;
903
904                 free_irq(adapter->msix_entries[vector++].vector, adapter);
905
906                 for (i = 0; i < adapter->num_q_vectors; i++) {
907                         struct igb_q_vector *q_vector = adapter->q_vector[i];
908                         free_irq(adapter->msix_entries[vector++].vector,
909                                  q_vector);
910                 }
911         } else {
912                 free_irq(adapter->pdev->irq, adapter);
913         }
914 }
915
916 /**
917  * igb_irq_disable - Mask off interrupt generation on the NIC
918  * @adapter: board private structure
919  **/
920 static void igb_irq_disable(struct igb_adapter *adapter)
921 {
922         struct e1000_hw *hw = &adapter->hw;
923
924         /*
925          * we need to be careful when disabling interrupts.  The VFs are also
926          * mapped into these registers and so clearing the bits can cause
927          * issues on the VF drivers so we only need to clear what we set
928          */
929         if (adapter->msix_entries) {
930                 u32 regval = rd32(E1000_EIAM);
931                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
932                 wr32(E1000_EIMC, adapter->eims_enable_mask);
933                 regval = rd32(E1000_EIAC);
934                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
935         }
936
937         wr32(E1000_IAM, 0);
938         wr32(E1000_IMC, ~0);
939         wrfl();
940         synchronize_irq(adapter->pdev->irq);
941 }
942
943 /**
944  * igb_irq_enable - Enable default interrupt generation settings
945  * @adapter: board private structure
946  **/
947 static void igb_irq_enable(struct igb_adapter *adapter)
948 {
949         struct e1000_hw *hw = &adapter->hw;
950
951         if (adapter->msix_entries) {
952                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
953                 u32 regval = rd32(E1000_EIAC);
954                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
955                 regval = rd32(E1000_EIAM);
956                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
957                 wr32(E1000_EIMS, adapter->eims_enable_mask);
958                 if (adapter->vfs_allocated_count) {
959                         wr32(E1000_MBVFIMR, 0xFF);
960                         ims |= E1000_IMS_VMMB;
961                 }
962                 wr32(E1000_IMS, ims);
963         } else {
964                 wr32(E1000_IMS, IMS_ENABLE_MASK);
965                 wr32(E1000_IAM, IMS_ENABLE_MASK);
966         }
967 }
968
969 static void igb_update_mng_vlan(struct igb_adapter *adapter)
970 {
971         struct e1000_hw *hw = &adapter->hw;
972         u16 vid = adapter->hw.mng_cookie.vlan_id;
973         u16 old_vid = adapter->mng_vlan_id;
974
975         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
976                 /* add VID to filter table */
977                 igb_vfta_set(hw, vid, true);
978                 adapter->mng_vlan_id = vid;
979         } else {
980                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
981         }
982
983         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
984             (vid != old_vid) &&
985             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
986                 /* remove VID from filter table */
987                 igb_vfta_set(hw, old_vid, false);
988         }
989 }
990
991 /**
992  * igb_release_hw_control - release control of the h/w to f/w
993  * @adapter: address of board private structure
994  *
995  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
996  * For ASF and Pass Through versions of f/w this means that the
997  * driver is no longer loaded.
998  *
999  **/
1000 static void igb_release_hw_control(struct igb_adapter *adapter)
1001 {
1002         struct e1000_hw *hw = &adapter->hw;
1003         u32 ctrl_ext;
1004
1005         /* Let firmware take over control of h/w */
1006         ctrl_ext = rd32(E1000_CTRL_EXT);
1007         wr32(E1000_CTRL_EXT,
1008                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1009 }
1010
1011 /**
1012  * igb_get_hw_control - get control of the h/w from f/w
1013  * @adapter: address of board private structure
1014  *
1015  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1016  * For ASF and Pass Through versions of f/w this means that
1017  * the driver is loaded.
1018  *
1019  **/
1020 static void igb_get_hw_control(struct igb_adapter *adapter)
1021 {
1022         struct e1000_hw *hw = &adapter->hw;
1023         u32 ctrl_ext;
1024
1025         /* Let firmware know the driver has taken over */
1026         ctrl_ext = rd32(E1000_CTRL_EXT);
1027         wr32(E1000_CTRL_EXT,
1028                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1029 }
1030
1031 /**
1032  * igb_configure - configure the hardware for RX and TX
1033  * @adapter: private board structure
1034  **/
1035 static void igb_configure(struct igb_adapter *adapter)
1036 {
1037         struct net_device *netdev = adapter->netdev;
1038         int i;
1039
1040         igb_get_hw_control(adapter);
1041         igb_set_rx_mode(netdev);
1042
1043         igb_restore_vlan(adapter);
1044
1045         igb_setup_tctl(adapter);
1046         igb_setup_mrqc(adapter);
1047         igb_setup_rctl(adapter);
1048
1049         igb_configure_tx(adapter);
1050         igb_configure_rx(adapter);
1051
1052         igb_rx_fifo_flush_82575(&adapter->hw);
1053
1054         /* call igb_desc_unused which always leaves
1055          * at least 1 descriptor unused to make sure
1056          * next_to_use != next_to_clean */
1057         for (i = 0; i < adapter->num_rx_queues; i++) {
1058                 struct igb_ring *ring = &adapter->rx_ring[i];
1059                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1060         }
1061
1062
1063         adapter->tx_queue_len = netdev->tx_queue_len;
1064 }
1065
1066
1067 /**
1068  * igb_up - Open the interface and prepare it to handle traffic
1069  * @adapter: board private structure
1070  **/
1071 int igb_up(struct igb_adapter *adapter)
1072 {
1073         struct e1000_hw *hw = &adapter->hw;
1074         int i;
1075
1076         /* hardware has been reset, we need to reload some things */
1077         igb_configure(adapter);
1078
1079         clear_bit(__IGB_DOWN, &adapter->state);
1080
1081         for (i = 0; i < adapter->num_q_vectors; i++) {
1082                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1083                 napi_enable(&q_vector->napi);
1084         }
1085         if (adapter->msix_entries)
1086                 igb_configure_msix(adapter);
1087
1088         /* Clear any pending interrupts. */
1089         rd32(E1000_ICR);
1090         igb_irq_enable(adapter);
1091
1092         /* notify VFs that reset has been completed */
1093         if (adapter->vfs_allocated_count) {
1094                 u32 reg_data = rd32(E1000_CTRL_EXT);
1095                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1096                 wr32(E1000_CTRL_EXT, reg_data);
1097         }
1098
1099         netif_tx_start_all_queues(adapter->netdev);
1100
1101         /* start the watchdog. */
1102         hw->mac.get_link_status = 1;
1103         schedule_work(&adapter->watchdog_task);
1104
1105         return 0;
1106 }
1107
1108 void igb_down(struct igb_adapter *adapter)
1109 {
1110         struct net_device *netdev = adapter->netdev;
1111         struct e1000_hw *hw = &adapter->hw;
1112         u32 tctl, rctl;
1113         int i;
1114
1115         /* signal that we're down so the interrupt handler does not
1116          * reschedule our watchdog timer */
1117         set_bit(__IGB_DOWN, &adapter->state);
1118
1119         /* disable receives in the hardware */
1120         rctl = rd32(E1000_RCTL);
1121         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1122         /* flush and sleep below */
1123
1124         netif_tx_stop_all_queues(netdev);
1125
1126         /* disable transmits in the hardware */
1127         tctl = rd32(E1000_TCTL);
1128         tctl &= ~E1000_TCTL_EN;
1129         wr32(E1000_TCTL, tctl);
1130         /* flush both disables and wait for them to finish */
1131         wrfl();
1132         msleep(10);
1133
1134         for (i = 0; i < adapter->num_q_vectors; i++) {
1135                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1136                 napi_disable(&q_vector->napi);
1137         }
1138
1139         igb_irq_disable(adapter);
1140
1141         del_timer_sync(&adapter->watchdog_timer);
1142         del_timer_sync(&adapter->phy_info_timer);
1143
1144         netdev->tx_queue_len = adapter->tx_queue_len;
1145         netif_carrier_off(netdev);
1146
1147         /* record the stats before reset*/
1148         igb_update_stats(adapter);
1149
1150         adapter->link_speed = 0;
1151         adapter->link_duplex = 0;
1152
1153         if (!pci_channel_offline(adapter->pdev))
1154                 igb_reset(adapter);
1155         igb_clean_all_tx_rings(adapter);
1156         igb_clean_all_rx_rings(adapter);
1157 #ifdef CONFIG_IGB_DCA
1158
1159         /* since we reset the hardware DCA settings were cleared */
1160         igb_setup_dca(adapter);
1161 #endif
1162 }
1163
1164 void igb_reinit_locked(struct igb_adapter *adapter)
1165 {
1166         WARN_ON(in_interrupt());
1167         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1168                 msleep(1);
1169         igb_down(adapter);
1170         igb_up(adapter);
1171         clear_bit(__IGB_RESETTING, &adapter->state);
1172 }
1173
1174 void igb_reset(struct igb_adapter *adapter)
1175 {
1176         struct pci_dev *pdev = adapter->pdev;
1177         struct e1000_hw *hw = &adapter->hw;
1178         struct e1000_mac_info *mac = &hw->mac;
1179         struct e1000_fc_info *fc = &hw->fc;
1180         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1181         u16 hwm;
1182
1183         /* Repartition Pba for greater than 9k mtu
1184          * To take effect CTRL.RST is required.
1185          */
1186         switch (mac->type) {
1187         case e1000_82576:
1188                 pba = rd32(E1000_RXPBS);
1189                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1190                 break;
1191         case e1000_82575:
1192         default:
1193                 pba = E1000_PBA_34K;
1194                 break;
1195         }
1196
1197         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1198             (mac->type < e1000_82576)) {
1199                 /* adjust PBA for jumbo frames */
1200                 wr32(E1000_PBA, pba);
1201
1202                 /* To maintain wire speed transmits, the Tx FIFO should be
1203                  * large enough to accommodate two full transmit packets,
1204                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1205                  * the Rx FIFO should be large enough to accommodate at least
1206                  * one full receive packet and is similarly rounded up and
1207                  * expressed in KB. */
1208                 pba = rd32(E1000_PBA);
1209                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1210                 tx_space = pba >> 16;
1211                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1212                 pba &= 0xffff;
1213                 /* the tx fifo also stores 16 bytes of information about the tx
1214                  * but don't include ethernet FCS because hardware appends it */
1215                 min_tx_space = (adapter->max_frame_size +
1216                                 sizeof(union e1000_adv_tx_desc) -
1217                                 ETH_FCS_LEN) * 2;
1218                 min_tx_space = ALIGN(min_tx_space, 1024);
1219                 min_tx_space >>= 10;
1220                 /* software strips receive CRC, so leave room for it */
1221                 min_rx_space = adapter->max_frame_size;
1222                 min_rx_space = ALIGN(min_rx_space, 1024);
1223                 min_rx_space >>= 10;
1224
1225                 /* If current Tx allocation is less than the min Tx FIFO size,
1226                  * and the min Tx FIFO size is less than the current Rx FIFO
1227                  * allocation, take space away from current Rx allocation */
1228                 if (tx_space < min_tx_space &&
1229                     ((min_tx_space - tx_space) < pba)) {
1230                         pba = pba - (min_tx_space - tx_space);
1231
1232                         /* if short on rx space, rx wins and must trump tx
1233                          * adjustment */
1234                         if (pba < min_rx_space)
1235                                 pba = min_rx_space;
1236                 }
1237                 wr32(E1000_PBA, pba);
1238         }
1239
1240         /* flow control settings */
1241         /* The high water mark must be low enough to fit one full frame
1242          * (or the size used for early receive) above it in the Rx FIFO.
1243          * Set it to the lower of:
1244          * - 90% of the Rx FIFO size, or
1245          * - the full Rx FIFO size minus one full frame */
1246         hwm = min(((pba << 10) * 9 / 10),
1247                         ((pba << 10) - 2 * adapter->max_frame_size));
1248
1249         if (mac->type < e1000_82576) {
1250                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
1251                 fc->low_water = fc->high_water - 8;
1252         } else {
1253                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1254                 fc->low_water = fc->high_water - 16;
1255         }
1256         fc->pause_time = 0xFFFF;
1257         fc->send_xon = 1;
1258         fc->current_mode = fc->requested_mode;
1259
1260         /* disable receive for all VFs and wait one second */
1261         if (adapter->vfs_allocated_count) {
1262                 int i;
1263                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1264                         adapter->vf_data[i].flags = 0;
1265
1266                 /* ping all the active vfs to let them know we are going down */
1267                 igb_ping_all_vfs(adapter);
1268
1269                 /* disable transmits and receives */
1270                 wr32(E1000_VFRE, 0);
1271                 wr32(E1000_VFTE, 0);
1272         }
1273
1274         /* Allow time for pending master requests to run */
1275         hw->mac.ops.reset_hw(hw);
1276         wr32(E1000_WUC, 0);
1277
1278         if (hw->mac.ops.init_hw(hw))
1279                 dev_err(&pdev->dev, "Hardware Error\n");
1280
1281         igb_update_mng_vlan(adapter);
1282
1283         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1284         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1285
1286         igb_reset_adaptive(hw);
1287         igb_get_phy_info(hw);
1288 }
1289
1290 static const struct net_device_ops igb_netdev_ops = {
1291         .ndo_open               = igb_open,
1292         .ndo_stop               = igb_close,
1293         .ndo_start_xmit         = igb_xmit_frame_adv,
1294         .ndo_get_stats          = igb_get_stats,
1295         .ndo_set_rx_mode        = igb_set_rx_mode,
1296         .ndo_set_multicast_list = igb_set_rx_mode,
1297         .ndo_set_mac_address    = igb_set_mac,
1298         .ndo_change_mtu         = igb_change_mtu,
1299         .ndo_do_ioctl           = igb_ioctl,
1300         .ndo_tx_timeout         = igb_tx_timeout,
1301         .ndo_validate_addr      = eth_validate_addr,
1302         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1303         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1304         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1305 #ifdef CONFIG_NET_POLL_CONTROLLER
1306         .ndo_poll_controller    = igb_netpoll,
1307 #endif
1308 };
1309
1310 /**
1311  * igb_probe - Device Initialization Routine
1312  * @pdev: PCI device information struct
1313  * @ent: entry in igb_pci_tbl
1314  *
1315  * Returns 0 on success, negative on failure
1316  *
1317  * igb_probe initializes an adapter identified by a pci_dev structure.
1318  * The OS initialization, configuring of the adapter private structure,
1319  * and a hardware reset occur.
1320  **/
1321 static int __devinit igb_probe(struct pci_dev *pdev,
1322                                const struct pci_device_id *ent)
1323 {
1324         struct net_device *netdev;
1325         struct igb_adapter *adapter;
1326         struct e1000_hw *hw;
1327         u16 eeprom_data = 0;
1328         static int global_quad_port_a; /* global quad port a indication */
1329         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1330         unsigned long mmio_start, mmio_len;
1331         int err, pci_using_dac;
1332         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1333         u32 part_num;
1334
1335         err = pci_enable_device_mem(pdev);
1336         if (err)
1337                 return err;
1338
1339         pci_using_dac = 0;
1340         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1341         if (!err) {
1342                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1343                 if (!err)
1344                         pci_using_dac = 1;
1345         } else {
1346                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1347                 if (err) {
1348                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1349                         if (err) {
1350                                 dev_err(&pdev->dev, "No usable DMA "
1351                                         "configuration, aborting\n");
1352                                 goto err_dma;
1353                         }
1354                 }
1355         }
1356
1357         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1358                                            IORESOURCE_MEM),
1359                                            igb_driver_name);
1360         if (err)
1361                 goto err_pci_reg;
1362
1363         pci_enable_pcie_error_reporting(pdev);
1364
1365         pci_set_master(pdev);
1366         pci_save_state(pdev);
1367
1368         err = -ENOMEM;
1369         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1370                                    IGB_ABS_MAX_TX_QUEUES);
1371         if (!netdev)
1372                 goto err_alloc_etherdev;
1373
1374         SET_NETDEV_DEV(netdev, &pdev->dev);
1375
1376         pci_set_drvdata(pdev, netdev);
1377         adapter = netdev_priv(netdev);
1378         adapter->netdev = netdev;
1379         adapter->pdev = pdev;
1380         hw = &adapter->hw;
1381         hw->back = adapter;
1382         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1383
1384         mmio_start = pci_resource_start(pdev, 0);
1385         mmio_len = pci_resource_len(pdev, 0);
1386
1387         err = -EIO;
1388         hw->hw_addr = ioremap(mmio_start, mmio_len);
1389         if (!hw->hw_addr)
1390                 goto err_ioremap;
1391
1392         netdev->netdev_ops = &igb_netdev_ops;
1393         igb_set_ethtool_ops(netdev);
1394         netdev->watchdog_timeo = 5 * HZ;
1395
1396         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1397
1398         netdev->mem_start = mmio_start;
1399         netdev->mem_end = mmio_start + mmio_len;
1400
1401         /* PCI config space info */
1402         hw->vendor_id = pdev->vendor;
1403         hw->device_id = pdev->device;
1404         hw->revision_id = pdev->revision;
1405         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1406         hw->subsystem_device_id = pdev->subsystem_device;
1407
1408         /* Copy the default MAC, PHY and NVM function pointers */
1409         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1410         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1411         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1412         /* Initialize skew-specific constants */
1413         err = ei->get_invariants(hw);
1414         if (err)
1415                 goto err_sw_init;
1416
1417         /* setup the private structure */
1418         err = igb_sw_init(adapter);
1419         if (err)
1420                 goto err_sw_init;
1421
1422         igb_get_bus_info_pcie(hw);
1423
1424         hw->phy.autoneg_wait_to_complete = false;
1425         hw->mac.adaptive_ifs = true;
1426
1427         /* Copper options */
1428         if (hw->phy.media_type == e1000_media_type_copper) {
1429                 hw->phy.mdix = AUTO_ALL_MODES;
1430                 hw->phy.disable_polarity_correction = false;
1431                 hw->phy.ms_type = e1000_ms_hw_default;
1432         }
1433
1434         if (igb_check_reset_block(hw))
1435                 dev_info(&pdev->dev,
1436                         "PHY reset is blocked due to SOL/IDER session.\n");
1437
1438         netdev->features = NETIF_F_SG |
1439                            NETIF_F_IP_CSUM |
1440                            NETIF_F_HW_VLAN_TX |
1441                            NETIF_F_HW_VLAN_RX |
1442                            NETIF_F_HW_VLAN_FILTER;
1443
1444         netdev->features |= NETIF_F_IPV6_CSUM;
1445         netdev->features |= NETIF_F_TSO;
1446         netdev->features |= NETIF_F_TSO6;
1447         netdev->features |= NETIF_F_GRO;
1448
1449         netdev->vlan_features |= NETIF_F_TSO;
1450         netdev->vlan_features |= NETIF_F_TSO6;
1451         netdev->vlan_features |= NETIF_F_IP_CSUM;
1452         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1453         netdev->vlan_features |= NETIF_F_SG;
1454
1455         if (pci_using_dac)
1456                 netdev->features |= NETIF_F_HIGHDMA;
1457
1458         if (hw->mac.type >= e1000_82576)
1459                 netdev->features |= NETIF_F_SCTP_CSUM;
1460
1461         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1462
1463         /* before reading the NVM, reset the controller to put the device in a
1464          * known good starting state */
1465         hw->mac.ops.reset_hw(hw);
1466
1467         /* make sure the NVM is good */
1468         if (igb_validate_nvm_checksum(hw) < 0) {
1469                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1470                 err = -EIO;
1471                 goto err_eeprom;
1472         }
1473
1474         /* copy the MAC address out of the NVM */
1475         if (hw->mac.ops.read_mac_addr(hw))
1476                 dev_err(&pdev->dev, "NVM Read Error\n");
1477
1478         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1479         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1480
1481         if (!is_valid_ether_addr(netdev->perm_addr)) {
1482                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1483                 err = -EIO;
1484                 goto err_eeprom;
1485         }
1486
1487         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1488                     (unsigned long) adapter);
1489         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1490                     (unsigned long) adapter);
1491
1492         INIT_WORK(&adapter->reset_task, igb_reset_task);
1493         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1494
1495         /* Initialize link properties that are user-changeable */
1496         adapter->fc_autoneg = true;
1497         hw->mac.autoneg = true;
1498         hw->phy.autoneg_advertised = 0x2f;
1499
1500         hw->fc.requested_mode = e1000_fc_default;
1501         hw->fc.current_mode = e1000_fc_default;
1502
1503         igb_validate_mdi_setting(hw);
1504
1505         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1506          * enable the ACPI Magic Packet filter
1507          */
1508
1509         if (hw->bus.func == 0)
1510                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1511         else if (hw->bus.func == 1)
1512                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1513
1514         if (eeprom_data & eeprom_apme_mask)
1515                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1516
1517         /* now that we have the eeprom settings, apply the special cases where
1518          * the eeprom may be wrong or the board simply won't support wake on
1519          * lan on a particular port */
1520         switch (pdev->device) {
1521         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1522                 adapter->eeprom_wol = 0;
1523                 break;
1524         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1525         case E1000_DEV_ID_82576_FIBER:
1526         case E1000_DEV_ID_82576_SERDES:
1527                 /* Wake events only supported on port A for dual fiber
1528                  * regardless of eeprom setting */
1529                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1530                         adapter->eeprom_wol = 0;
1531                 break;
1532         case E1000_DEV_ID_82576_QUAD_COPPER:
1533                 /* if quad port adapter, disable WoL on all but port A */
1534                 if (global_quad_port_a != 0)
1535                         adapter->eeprom_wol = 0;
1536                 else
1537                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1538                 /* Reset for multiple quad port adapters */
1539                 if (++global_quad_port_a == 4)
1540                         global_quad_port_a = 0;
1541                 break;
1542         }
1543
1544         /* initialize the wol settings based on the eeprom settings */
1545         adapter->wol = adapter->eeprom_wol;
1546         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1547
1548         /* reset the hardware with the new settings */
1549         igb_reset(adapter);
1550
1551         /* let the f/w know that the h/w is now under the control of the
1552          * driver. */
1553         igb_get_hw_control(adapter);
1554
1555         strcpy(netdev->name, "eth%d");
1556         err = register_netdev(netdev);
1557         if (err)
1558                 goto err_register;
1559
1560         /* carrier off reporting is important to ethtool even BEFORE open */
1561         netif_carrier_off(netdev);
1562
1563 #ifdef CONFIG_IGB_DCA
1564         if (dca_add_requester(&pdev->dev) == 0) {
1565                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1566                 dev_info(&pdev->dev, "DCA enabled\n");
1567                 igb_setup_dca(adapter);
1568         }
1569
1570 #endif
1571         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1572         /* print bus type/speed/width info */
1573         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1574                  netdev->name,
1575                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1576                                                             "unknown"),
1577                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1578                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1579                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1580                    "unknown"),
1581                  netdev->dev_addr);
1582
1583         igb_read_part_num(hw, &part_num);
1584         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1585                 (part_num >> 8), (part_num & 0xff));
1586
1587         dev_info(&pdev->dev,
1588                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1589                 adapter->msix_entries ? "MSI-X" :
1590                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1591                 adapter->num_rx_queues, adapter->num_tx_queues);
1592
1593         return 0;
1594
1595 err_register:
1596         igb_release_hw_control(adapter);
1597 err_eeprom:
1598         if (!igb_check_reset_block(hw))
1599                 igb_reset_phy(hw);
1600
1601         if (hw->flash_address)
1602                 iounmap(hw->flash_address);
1603 err_sw_init:
1604         igb_clear_interrupt_scheme(adapter);
1605         iounmap(hw->hw_addr);
1606 err_ioremap:
1607         free_netdev(netdev);
1608 err_alloc_etherdev:
1609         pci_release_selected_regions(pdev,
1610                                      pci_select_bars(pdev, IORESOURCE_MEM));
1611 err_pci_reg:
1612 err_dma:
1613         pci_disable_device(pdev);
1614         return err;
1615 }
1616
1617 /**
1618  * igb_remove - Device Removal Routine
1619  * @pdev: PCI device information struct
1620  *
1621  * igb_remove is called by the PCI subsystem to alert the driver
1622  * that it should release a PCI device.  The could be caused by a
1623  * Hot-Plug event, or because the driver is going to be removed from
1624  * memory.
1625  **/
1626 static void __devexit igb_remove(struct pci_dev *pdev)
1627 {
1628         struct net_device *netdev = pci_get_drvdata(pdev);
1629         struct igb_adapter *adapter = netdev_priv(netdev);
1630         struct e1000_hw *hw = &adapter->hw;
1631
1632         /* flush_scheduled work may reschedule our watchdog task, so
1633          * explicitly disable watchdog tasks from being rescheduled  */
1634         set_bit(__IGB_DOWN, &adapter->state);
1635         del_timer_sync(&adapter->watchdog_timer);
1636         del_timer_sync(&adapter->phy_info_timer);
1637
1638         flush_scheduled_work();
1639
1640 #ifdef CONFIG_IGB_DCA
1641         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1642                 dev_info(&pdev->dev, "DCA disabled\n");
1643                 dca_remove_requester(&pdev->dev);
1644                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1645                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1646         }
1647 #endif
1648
1649         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1650          * would have already happened in close and is redundant. */
1651         igb_release_hw_control(adapter);
1652
1653         unregister_netdev(netdev);
1654
1655         if (!igb_check_reset_block(hw))
1656                 igb_reset_phy(hw);
1657
1658         igb_clear_interrupt_scheme(adapter);
1659
1660 #ifdef CONFIG_PCI_IOV
1661         /* reclaim resources allocated to VFs */
1662         if (adapter->vf_data) {
1663                 /* disable iov and allow time for transactions to clear */
1664                 pci_disable_sriov(pdev);
1665                 msleep(500);
1666
1667                 kfree(adapter->vf_data);
1668                 adapter->vf_data = NULL;
1669                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1670                 msleep(100);
1671                 dev_info(&pdev->dev, "IOV Disabled\n");
1672         }
1673 #endif
1674
1675         iounmap(hw->hw_addr);
1676         if (hw->flash_address)
1677                 iounmap(hw->flash_address);
1678         pci_release_selected_regions(pdev,
1679                                      pci_select_bars(pdev, IORESOURCE_MEM));
1680
1681         free_netdev(netdev);
1682
1683         pci_disable_pcie_error_reporting(pdev);
1684
1685         pci_disable_device(pdev);
1686 }
1687
1688 /**
1689  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1690  * @adapter: board private structure to initialize
1691  *
1692  * This function initializes the vf specific data storage and then attempts to
1693  * allocate the VFs.  The reason for ordering it this way is because it is much
1694  * mor expensive time wise to disable SR-IOV than it is to allocate and free
1695  * the memory for the VFs.
1696  **/
1697 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1698 {
1699 #ifdef CONFIG_PCI_IOV
1700         struct pci_dev *pdev = adapter->pdev;
1701
1702         if (adapter->vfs_allocated_count > 7)
1703                 adapter->vfs_allocated_count = 7;
1704
1705         if (adapter->vfs_allocated_count) {
1706                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1707                                            sizeof(struct vf_data_storage),
1708                                            GFP_KERNEL);
1709                 /* if allocation failed then we do not support SR-IOV */
1710                 if (!adapter->vf_data) {
1711                         adapter->vfs_allocated_count = 0;
1712                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
1713                                 "Data Storage\n");
1714                 }
1715         }
1716
1717         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1718                 kfree(adapter->vf_data);
1719                 adapter->vf_data = NULL;
1720 #endif /* CONFIG_PCI_IOV */
1721                 adapter->vfs_allocated_count = 0;
1722 #ifdef CONFIG_PCI_IOV
1723         } else {
1724                 unsigned char mac_addr[ETH_ALEN];
1725                 int i;
1726                 dev_info(&pdev->dev, "%d vfs allocated\n",
1727                          adapter->vfs_allocated_count);
1728                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1729                         random_ether_addr(mac_addr);
1730                         igb_set_vf_mac(adapter, i, mac_addr);
1731                 }
1732         }
1733 #endif /* CONFIG_PCI_IOV */
1734 }
1735
1736
1737 /**
1738  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1739  * @adapter: board private structure to initialize
1740  *
1741  * igb_init_hw_timer initializes the function pointer and values for the hw
1742  * timer found in hardware.
1743  **/
1744 static void igb_init_hw_timer(struct igb_adapter *adapter)
1745 {
1746         struct e1000_hw *hw = &adapter->hw;
1747
1748         switch (hw->mac.type) {
1749         case e1000_82576:
1750                 /*
1751                  * Initialize hardware timer: we keep it running just in case
1752                  * that some program needs it later on.
1753                  */
1754                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1755                 adapter->cycles.read = igb_read_clock;
1756                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1757                 adapter->cycles.mult = 1;
1758                 /**
1759                  * Scale the NIC clock cycle by a large factor so that
1760                  * relatively small clock corrections can be added or
1761                  * substracted at each clock tick. The drawbacks of a large
1762                  * factor are a) that the clock register overflows more quickly
1763                  * (not such a big deal) and b) that the increment per tick has
1764                  * to fit into 24 bits.  As a result we need to use a shift of
1765                  * 19 so we can fit a value of 16 into the TIMINCA register.
1766                  */
1767                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1768                 wr32(E1000_TIMINCA,
1769                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1770                                 (16 << IGB_82576_TSYNC_SHIFT));
1771
1772                 /* Set registers so that rollover occurs soon to test this. */
1773                 wr32(E1000_SYSTIML, 0x00000000);
1774                 wr32(E1000_SYSTIMH, 0xFF800000);
1775                 wrfl();
1776
1777                 timecounter_init(&adapter->clock,
1778                                  &adapter->cycles,
1779                                  ktime_to_ns(ktime_get_real()));
1780                 /*
1781                  * Synchronize our NIC clock against system wall clock. NIC
1782                  * time stamp reading requires ~3us per sample, each sample
1783                  * was pretty stable even under load => only require 10
1784                  * samples for each offset comparison.
1785                  */
1786                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1787                 adapter->compare.source = &adapter->clock;
1788                 adapter->compare.target = ktime_get_real;
1789                 adapter->compare.num_samples = 10;
1790                 timecompare_update(&adapter->compare, 0);
1791                 break;
1792         case e1000_82575:
1793                 /* 82575 does not support timesync */
1794         default:
1795                 break;
1796         }
1797
1798 }
1799
1800 /**
1801  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1802  * @adapter: board private structure to initialize
1803  *
1804  * igb_sw_init initializes the Adapter private data structure.
1805  * Fields are initialized based on PCI device information and
1806  * OS network device settings (MTU size).
1807  **/
1808 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1809 {
1810         struct e1000_hw *hw = &adapter->hw;
1811         struct net_device *netdev = adapter->netdev;
1812         struct pci_dev *pdev = adapter->pdev;
1813
1814         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1815
1816         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1817         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1818         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1819         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1820
1821         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1822         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1823
1824 #ifdef CONFIG_PCI_IOV
1825         if (hw->mac.type == e1000_82576)
1826                 adapter->vfs_allocated_count = max_vfs;
1827
1828 #endif /* CONFIG_PCI_IOV */
1829         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1830
1831         /*
1832          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1833          * then we should combine the queues into a queue pair in order to
1834          * conserve interrupts due to limited supply
1835          */
1836         if ((adapter->rss_queues > 4) ||
1837             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1838                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1839
1840         /* This call may decrease the number of queues */
1841         if (igb_init_interrupt_scheme(adapter)) {
1842                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1843                 return -ENOMEM;
1844         }
1845
1846         igb_init_hw_timer(adapter);
1847         igb_probe_vfs(adapter);
1848
1849         /* Explicitly disable IRQ since the NIC can be in any state. */
1850         igb_irq_disable(adapter);
1851
1852         set_bit(__IGB_DOWN, &adapter->state);
1853         return 0;
1854 }
1855
1856 /**
1857  * igb_open - Called when a network interface is made active
1858  * @netdev: network interface device structure
1859  *
1860  * Returns 0 on success, negative value on failure
1861  *
1862  * The open entry point is called when a network interface is made
1863  * active by the system (IFF_UP).  At this point all resources needed
1864  * for transmit and receive operations are allocated, the interrupt
1865  * handler is registered with the OS, the watchdog timer is started,
1866  * and the stack is notified that the interface is ready.
1867  **/
1868 static int igb_open(struct net_device *netdev)
1869 {
1870         struct igb_adapter *adapter = netdev_priv(netdev);
1871         struct e1000_hw *hw = &adapter->hw;
1872         int err;
1873         int i;
1874
1875         /* disallow open during test */
1876         if (test_bit(__IGB_TESTING, &adapter->state))
1877                 return -EBUSY;
1878
1879         netif_carrier_off(netdev);
1880
1881         /* allocate transmit descriptors */
1882         err = igb_setup_all_tx_resources(adapter);
1883         if (err)
1884                 goto err_setup_tx;
1885
1886         /* allocate receive descriptors */
1887         err = igb_setup_all_rx_resources(adapter);
1888         if (err)
1889                 goto err_setup_rx;
1890
1891         /* e1000_power_up_phy(adapter); */
1892
1893         /* before we allocate an interrupt, we must be ready to handle it.
1894          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1895          * as soon as we call pci_request_irq, so we have to setup our
1896          * clean_rx handler before we do so.  */
1897         igb_configure(adapter);
1898
1899         err = igb_request_irq(adapter);
1900         if (err)
1901                 goto err_req_irq;
1902
1903         /* From here on the code is the same as igb_up() */
1904         clear_bit(__IGB_DOWN, &adapter->state);
1905
1906         for (i = 0; i < adapter->num_q_vectors; i++) {
1907                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1908                 napi_enable(&q_vector->napi);
1909         }
1910
1911         /* Clear any pending interrupts. */
1912         rd32(E1000_ICR);
1913
1914         igb_irq_enable(adapter);
1915
1916         /* notify VFs that reset has been completed */
1917         if (adapter->vfs_allocated_count) {
1918                 u32 reg_data = rd32(E1000_CTRL_EXT);
1919                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1920                 wr32(E1000_CTRL_EXT, reg_data);
1921         }
1922
1923         netif_tx_start_all_queues(netdev);
1924
1925         /* start the watchdog. */
1926         hw->mac.get_link_status = 1;
1927         schedule_work(&adapter->watchdog_task);
1928
1929         return 0;
1930
1931 err_req_irq:
1932         igb_release_hw_control(adapter);
1933         /* e1000_power_down_phy(adapter); */
1934         igb_free_all_rx_resources(adapter);
1935 err_setup_rx:
1936         igb_free_all_tx_resources(adapter);
1937 err_setup_tx:
1938         igb_reset(adapter);
1939
1940         return err;
1941 }
1942
1943 /**
1944  * igb_close - Disables a network interface
1945  * @netdev: network interface device structure
1946  *
1947  * Returns 0, this is not allowed to fail
1948  *
1949  * The close entry point is called when an interface is de-activated
1950  * by the OS.  The hardware is still under the driver's control, but
1951  * needs to be disabled.  A global MAC reset is issued to stop the
1952  * hardware, and all transmit and receive resources are freed.
1953  **/
1954 static int igb_close(struct net_device *netdev)
1955 {
1956         struct igb_adapter *adapter = netdev_priv(netdev);
1957
1958         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
1959         igb_down(adapter);
1960
1961         igb_free_irq(adapter);
1962
1963         igb_free_all_tx_resources(adapter);
1964         igb_free_all_rx_resources(adapter);
1965
1966         return 0;
1967 }
1968
1969 /**
1970  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
1971  * @tx_ring: tx descriptor ring (for a specific queue) to setup
1972  *
1973  * Return 0 on success, negative on failure
1974  **/
1975 int igb_setup_tx_resources(struct igb_ring *tx_ring)
1976 {
1977         struct pci_dev *pdev = tx_ring->pdev;
1978         int size;
1979
1980         size = sizeof(struct igb_buffer) * tx_ring->count;
1981         tx_ring->buffer_info = vmalloc(size);
1982         if (!tx_ring->buffer_info)
1983                 goto err;
1984         memset(tx_ring->buffer_info, 0, size);
1985
1986         /* round up to nearest 4K */
1987         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
1988         tx_ring->size = ALIGN(tx_ring->size, 4096);
1989
1990         tx_ring->desc = pci_alloc_consistent(pdev,
1991                                              tx_ring->size,
1992                                              &tx_ring->dma);
1993
1994         if (!tx_ring->desc)
1995                 goto err;
1996
1997         tx_ring->next_to_use = 0;
1998         tx_ring->next_to_clean = 0;
1999         return 0;
2000
2001 err:
2002         vfree(tx_ring->buffer_info);
2003         dev_err(&pdev->dev,
2004                 "Unable to allocate memory for the transmit descriptor ring\n");
2005         return -ENOMEM;
2006 }
2007
2008 /**
2009  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2010  *                                (Descriptors) for all queues
2011  * @adapter: board private structure
2012  *
2013  * Return 0 on success, negative on failure
2014  **/
2015 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2016 {
2017         struct pci_dev *pdev = adapter->pdev;
2018         int i, err = 0;
2019
2020         for (i = 0; i < adapter->num_tx_queues; i++) {
2021                 err = igb_setup_tx_resources(&adapter->tx_ring[i]);
2022                 if (err) {
2023                         dev_err(&pdev->dev,
2024                                 "Allocation for Tx Queue %u failed\n", i);
2025                         for (i--; i >= 0; i--)
2026                                 igb_free_tx_resources(&adapter->tx_ring[i]);
2027                         break;
2028                 }
2029         }
2030
2031         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2032                 int r_idx = i % adapter->num_tx_queues;
2033                 adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
2034         }
2035         return err;
2036 }
2037
2038 /**
2039  * igb_setup_tctl - configure the transmit control registers
2040  * @adapter: Board private structure
2041  **/
2042 void igb_setup_tctl(struct igb_adapter *adapter)
2043 {
2044         struct e1000_hw *hw = &adapter->hw;
2045         u32 tctl;
2046
2047         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2048         wr32(E1000_TXDCTL(0), 0);
2049
2050         /* Program the Transmit Control Register */
2051         tctl = rd32(E1000_TCTL);
2052         tctl &= ~E1000_TCTL_CT;
2053         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2054                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2055
2056         igb_config_collision_dist(hw);
2057
2058         /* Enable transmits */
2059         tctl |= E1000_TCTL_EN;
2060
2061         wr32(E1000_TCTL, tctl);
2062 }
2063
2064 /**
2065  * igb_configure_tx_ring - Configure transmit ring after Reset
2066  * @adapter: board private structure
2067  * @ring: tx ring to configure
2068  *
2069  * Configure a transmit ring after a reset.
2070  **/
2071 void igb_configure_tx_ring(struct igb_adapter *adapter,
2072                            struct igb_ring *ring)
2073 {
2074         struct e1000_hw *hw = &adapter->hw;
2075         u32 txdctl;
2076         u64 tdba = ring->dma;
2077         int reg_idx = ring->reg_idx;
2078
2079         /* disable the queue */
2080         txdctl = rd32(E1000_TXDCTL(reg_idx));
2081         wr32(E1000_TXDCTL(reg_idx),
2082                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2083         wrfl();
2084         mdelay(10);
2085
2086         wr32(E1000_TDLEN(reg_idx),
2087                         ring->count * sizeof(union e1000_adv_tx_desc));
2088         wr32(E1000_TDBAL(reg_idx),
2089                         tdba & 0x00000000ffffffffULL);
2090         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2091
2092         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2093         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2094         writel(0, ring->head);
2095         writel(0, ring->tail);
2096
2097         txdctl |= IGB_TX_PTHRESH;
2098         txdctl |= IGB_TX_HTHRESH << 8;
2099         txdctl |= IGB_TX_WTHRESH << 16;
2100
2101         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2102         wr32(E1000_TXDCTL(reg_idx), txdctl);
2103 }
2104
2105 /**
2106  * igb_configure_tx - Configure transmit Unit after Reset
2107  * @adapter: board private structure
2108  *
2109  * Configure the Tx unit of the MAC after a reset.
2110  **/
2111 static void igb_configure_tx(struct igb_adapter *adapter)
2112 {
2113         int i;
2114
2115         for (i = 0; i < adapter->num_tx_queues; i++)
2116                 igb_configure_tx_ring(adapter, &adapter->tx_ring[i]);
2117 }
2118
2119 /**
2120  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2121  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2122  *
2123  * Returns 0 on success, negative on failure
2124  **/
2125 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2126 {
2127         struct pci_dev *pdev = rx_ring->pdev;
2128         int size, desc_len;
2129
2130         size = sizeof(struct igb_buffer) * rx_ring->count;
2131         rx_ring->buffer_info = vmalloc(size);
2132         if (!rx_ring->buffer_info)
2133                 goto err;
2134         memset(rx_ring->buffer_info, 0, size);
2135
2136         desc_len = sizeof(union e1000_adv_rx_desc);
2137
2138         /* Round up to nearest 4K */
2139         rx_ring->size = rx_ring->count * desc_len;
2140         rx_ring->size = ALIGN(rx_ring->size, 4096);
2141
2142         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2143                                              &rx_ring->dma);
2144
2145         if (!rx_ring->desc)
2146                 goto err;
2147
2148         rx_ring->next_to_clean = 0;
2149         rx_ring->next_to_use = 0;
2150
2151         return 0;
2152
2153 err:
2154         vfree(rx_ring->buffer_info);
2155         rx_ring->buffer_info = NULL;
2156         dev_err(&pdev->dev, "Unable to allocate memory for "
2157                 "the receive descriptor ring\n");
2158         return -ENOMEM;
2159 }
2160
2161 /**
2162  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2163  *                                (Descriptors) for all queues
2164  * @adapter: board private structure
2165  *
2166  * Return 0 on success, negative on failure
2167  **/
2168 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2169 {
2170         struct pci_dev *pdev = adapter->pdev;
2171         int i, err = 0;
2172
2173         for (i = 0; i < adapter->num_rx_queues; i++) {
2174                 err = igb_setup_rx_resources(&adapter->rx_ring[i]);
2175                 if (err) {
2176                         dev_err(&pdev->dev,
2177                                 "Allocation for Rx Queue %u failed\n", i);
2178                         for (i--; i >= 0; i--)
2179                                 igb_free_rx_resources(&adapter->rx_ring[i]);
2180                         break;
2181                 }
2182         }
2183
2184         return err;
2185 }
2186
2187 /**
2188  * igb_setup_mrqc - configure the multiple receive queue control registers
2189  * @adapter: Board private structure
2190  **/
2191 static void igb_setup_mrqc(struct igb_adapter *adapter)
2192 {
2193         struct e1000_hw *hw = &adapter->hw;
2194         u32 mrqc, rxcsum;
2195         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2196         union e1000_reta {
2197                 u32 dword;
2198                 u8  bytes[4];
2199         } reta;
2200         static const u8 rsshash[40] = {
2201                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2202                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2203                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2204                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2205
2206         /* Fill out hash function seeds */
2207         for (j = 0; j < 10; j++) {
2208                 u32 rsskey = rsshash[(j * 4)];
2209                 rsskey |= rsshash[(j * 4) + 1] << 8;
2210                 rsskey |= rsshash[(j * 4) + 2] << 16;
2211                 rsskey |= rsshash[(j * 4) + 3] << 24;
2212                 array_wr32(E1000_RSSRK(0), j, rsskey);
2213         }
2214
2215         num_rx_queues = adapter->rss_queues;
2216
2217         if (adapter->vfs_allocated_count) {
2218                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2219                 switch (hw->mac.type) {
2220                 case e1000_82576:
2221                         shift = 3;
2222                         num_rx_queues = 2;
2223                         break;
2224                 case e1000_82575:
2225                         shift = 2;
2226                         shift2 = 6;
2227                 default:
2228                         break;
2229                 }
2230         } else {
2231                 if (hw->mac.type == e1000_82575)
2232                         shift = 6;
2233         }
2234
2235         for (j = 0; j < (32 * 4); j++) {
2236                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2237                 if (shift2)
2238                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2239                 if ((j & 3) == 3)
2240                         wr32(E1000_RETA(j >> 2), reta.dword);
2241         }
2242
2243         /*
2244          * Disable raw packet checksumming so that RSS hash is placed in
2245          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2246          * offloads as they are enabled by default
2247          */
2248         rxcsum = rd32(E1000_RXCSUM);
2249         rxcsum |= E1000_RXCSUM_PCSD;
2250
2251         if (adapter->hw.mac.type >= e1000_82576)
2252                 /* Enable Receive Checksum Offload for SCTP */
2253                 rxcsum |= E1000_RXCSUM_CRCOFL;
2254
2255         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2256         wr32(E1000_RXCSUM, rxcsum);
2257
2258         /* If VMDq is enabled then we set the appropriate mode for that, else
2259          * we default to RSS so that an RSS hash is calculated per packet even
2260          * if we are only using one queue */
2261         if (adapter->vfs_allocated_count) {
2262                 if (hw->mac.type > e1000_82575) {
2263                         /* Set the default pool for the PF's first queue */
2264                         u32 vtctl = rd32(E1000_VT_CTL);
2265                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2266                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2267                         vtctl |= adapter->vfs_allocated_count <<
2268                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2269                         wr32(E1000_VT_CTL, vtctl);
2270                 }
2271                 if (adapter->rss_queues > 1)
2272                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2273                 else
2274                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2275         } else {
2276                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2277         }
2278         igb_vmm_control(adapter);
2279
2280         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2281                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2282         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2283                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2284         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2285                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2286         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2287                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2288
2289         wr32(E1000_MRQC, mrqc);
2290 }
2291
2292 /**
2293  * igb_setup_rctl - configure the receive control registers
2294  * @adapter: Board private structure
2295  **/
2296 void igb_setup_rctl(struct igb_adapter *adapter)
2297 {
2298         struct e1000_hw *hw = &adapter->hw;
2299         u32 rctl;
2300
2301         rctl = rd32(E1000_RCTL);
2302
2303         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2304         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2305
2306         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2307                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2308
2309         /*
2310          * enable stripping of CRC. It's unlikely this will break BMC
2311          * redirection as it did with e1000. Newer features require
2312          * that the HW strips the CRC.
2313          */
2314         rctl |= E1000_RCTL_SECRC;
2315
2316         /* disable store bad packets and clear size bits. */
2317         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2318
2319         /* enable LPE to prevent packets larger than max_frame_size */
2320         rctl |= E1000_RCTL_LPE;
2321
2322         /* disable queue 0 to prevent tail write w/o re-config */
2323         wr32(E1000_RXDCTL(0), 0);
2324
2325         /* Attention!!!  For SR-IOV PF driver operations you must enable
2326          * queue drop for all VF and PF queues to prevent head of line blocking
2327          * if an un-trusted VF does not provide descriptors to hardware.
2328          */
2329         if (adapter->vfs_allocated_count) {
2330                 /* set all queue drop enable bits */
2331                 wr32(E1000_QDE, ALL_QUEUES);
2332         }
2333
2334         wr32(E1000_RCTL, rctl);
2335 }
2336
2337 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2338                                    int vfn)
2339 {
2340         struct e1000_hw *hw = &adapter->hw;
2341         u32 vmolr;
2342
2343         /* if it isn't the PF check to see if VFs are enabled and
2344          * increase the size to support vlan tags */
2345         if (vfn < adapter->vfs_allocated_count &&
2346             adapter->vf_data[vfn].vlans_enabled)
2347                 size += VLAN_TAG_SIZE;
2348
2349         vmolr = rd32(E1000_VMOLR(vfn));
2350         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2351         vmolr |= size | E1000_VMOLR_LPE;
2352         wr32(E1000_VMOLR(vfn), vmolr);
2353
2354         return 0;
2355 }
2356
2357 /**
2358  * igb_rlpml_set - set maximum receive packet size
2359  * @adapter: board private structure
2360  *
2361  * Configure maximum receivable packet size.
2362  **/
2363 static void igb_rlpml_set(struct igb_adapter *adapter)
2364 {
2365         u32 max_frame_size = adapter->max_frame_size;
2366         struct e1000_hw *hw = &adapter->hw;
2367         u16 pf_id = adapter->vfs_allocated_count;
2368
2369         if (adapter->vlgrp)
2370                 max_frame_size += VLAN_TAG_SIZE;
2371
2372         /* if vfs are enabled we set RLPML to the largest possible request
2373          * size and set the VMOLR RLPML to the size we need */
2374         if (pf_id) {
2375                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2376                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2377         }
2378
2379         wr32(E1000_RLPML, max_frame_size);
2380 }
2381
2382 static inline void igb_set_vmolr(struct igb_adapter *adapter, int vfn)
2383 {
2384         struct e1000_hw *hw = &adapter->hw;
2385         u32 vmolr;
2386
2387         /*
2388          * This register exists only on 82576 and newer so if we are older then
2389          * we should exit and do nothing
2390          */
2391         if (hw->mac.type < e1000_82576)
2392                 return;
2393
2394         vmolr = rd32(E1000_VMOLR(vfn));
2395         vmolr |= E1000_VMOLR_AUPE |        /* Accept untagged packets */
2396                  E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2397
2398         /* clear all bits that might not be set */
2399         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2400
2401         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2402                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2403         /*
2404          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2405          * multicast packets
2406          */
2407         if (vfn <= adapter->vfs_allocated_count)
2408                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2409
2410         wr32(E1000_VMOLR(vfn), vmolr);
2411 }
2412
2413 /**
2414  * igb_configure_rx_ring - Configure a receive ring after Reset
2415  * @adapter: board private structure
2416  * @ring: receive ring to be configured
2417  *
2418  * Configure the Rx unit of the MAC after a reset.
2419  **/
2420 void igb_configure_rx_ring(struct igb_adapter *adapter,
2421                            struct igb_ring *ring)
2422 {
2423         struct e1000_hw *hw = &adapter->hw;
2424         u64 rdba = ring->dma;
2425         int reg_idx = ring->reg_idx;
2426         u32 srrctl, rxdctl;
2427
2428         /* disable the queue */
2429         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2430         wr32(E1000_RXDCTL(reg_idx),
2431                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2432
2433         /* Set DMA base address registers */
2434         wr32(E1000_RDBAL(reg_idx),
2435              rdba & 0x00000000ffffffffULL);
2436         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2437         wr32(E1000_RDLEN(reg_idx),
2438                        ring->count * sizeof(union e1000_adv_rx_desc));
2439
2440         /* initialize head and tail */
2441         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2442         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2443         writel(0, ring->head);
2444         writel(0, ring->tail);
2445
2446         /* set descriptor configuration */
2447         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2448                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2449                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2450 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2451                 srrctl |= IGB_RXBUFFER_16384 >>
2452                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2453 #else
2454                 srrctl |= (PAGE_SIZE / 2) >>
2455                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2456 #endif
2457                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2458         } else {
2459                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2460                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2461                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2462         }
2463
2464         wr32(E1000_SRRCTL(reg_idx), srrctl);
2465
2466         /* set filtering for VMDQ pools */
2467         igb_set_vmolr(adapter, reg_idx & 0x7);
2468
2469         /* enable receive descriptor fetching */
2470         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2471         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2472         rxdctl &= 0xFFF00000;
2473         rxdctl |= IGB_RX_PTHRESH;
2474         rxdctl |= IGB_RX_HTHRESH << 8;
2475         rxdctl |= IGB_RX_WTHRESH << 16;
2476         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2477 }
2478
2479 /**
2480  * igb_configure_rx - Configure receive Unit after Reset
2481  * @adapter: board private structure
2482  *
2483  * Configure the Rx unit of the MAC after a reset.
2484  **/
2485 static void igb_configure_rx(struct igb_adapter *adapter)
2486 {
2487         int i;
2488
2489         /* set UTA to appropriate mode */
2490         igb_set_uta(adapter);
2491
2492         /* set the correct pool for the PF default MAC address in entry 0 */
2493         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2494                          adapter->vfs_allocated_count);
2495
2496         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2497          * the Base and Length of the Rx Descriptor Ring */
2498         for (i = 0; i < adapter->num_rx_queues; i++)
2499                 igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
2500 }
2501
2502 /**
2503  * igb_free_tx_resources - Free Tx Resources per Queue
2504  * @tx_ring: Tx descriptor ring for a specific queue
2505  *
2506  * Free all transmit software resources
2507  **/
2508 void igb_free_tx_resources(struct igb_ring *tx_ring)
2509 {
2510         igb_clean_tx_ring(tx_ring);
2511
2512         vfree(tx_ring->buffer_info);
2513         tx_ring->buffer_info = NULL;
2514
2515         /* if not set, then don't free */
2516         if (!tx_ring->desc)
2517                 return;
2518
2519         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2520                             tx_ring->desc, tx_ring->dma);
2521
2522         tx_ring->desc = NULL;
2523 }
2524
2525 /**
2526  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2527  * @adapter: board private structure
2528  *
2529  * Free all transmit software resources
2530  **/
2531 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2532 {
2533         int i;
2534
2535         for (i = 0; i < adapter->num_tx_queues; i++)
2536                 igb_free_tx_resources(&adapter->tx_ring[i]);
2537 }
2538
2539 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2540                                     struct igb_buffer *buffer_info)
2541 {
2542         buffer_info->dma = 0;
2543         if (buffer_info->skb) {
2544                 skb_dma_unmap(&tx_ring->pdev->dev,
2545                               buffer_info->skb,
2546                               DMA_TO_DEVICE);
2547                 dev_kfree_skb_any(buffer_info->skb);
2548                 buffer_info->skb = NULL;
2549         }
2550         buffer_info->time_stamp = 0;
2551         /* buffer_info must be completely set up in the transmit path */
2552 }
2553
2554 /**
2555  * igb_clean_tx_ring - Free Tx Buffers
2556  * @tx_ring: ring to be cleaned
2557  **/
2558 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2559 {
2560         struct igb_buffer *buffer_info;
2561         unsigned long size;
2562         unsigned int i;
2563
2564         if (!tx_ring->buffer_info)
2565                 return;
2566         /* Free all the Tx ring sk_buffs */
2567
2568         for (i = 0; i < tx_ring->count; i++) {
2569                 buffer_info = &tx_ring->buffer_info[i];
2570                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2571         }
2572
2573         size = sizeof(struct igb_buffer) * tx_ring->count;
2574         memset(tx_ring->buffer_info, 0, size);
2575
2576         /* Zero out the descriptor ring */
2577         memset(tx_ring->desc, 0, tx_ring->size);
2578
2579         tx_ring->next_to_use = 0;
2580         tx_ring->next_to_clean = 0;
2581 }
2582
2583 /**
2584  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2585  * @adapter: board private structure
2586  **/
2587 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2588 {
2589         int i;
2590
2591         for (i = 0; i < adapter->num_tx_queues; i++)
2592                 igb_clean_tx_ring(&adapter->tx_ring[i]);
2593 }
2594
2595 /**
2596  * igb_free_rx_resources - Free Rx Resources
2597  * @rx_ring: ring to clean the resources from
2598  *
2599  * Free all receive software resources
2600  **/
2601 void igb_free_rx_resources(struct igb_ring *rx_ring)
2602 {
2603         igb_clean_rx_ring(rx_ring);
2604
2605         vfree(rx_ring->buffer_info);
2606         rx_ring->buffer_info = NULL;
2607
2608         /* if not set, then don't free */
2609         if (!rx_ring->desc)
2610                 return;
2611
2612         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2613                             rx_ring->desc, rx_ring->dma);
2614
2615         rx_ring->desc = NULL;
2616 }
2617
2618 /**
2619  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2620  * @adapter: board private structure
2621  *
2622  * Free all receive software resources
2623  **/
2624 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2625 {
2626         int i;
2627
2628         for (i = 0; i < adapter->num_rx_queues; i++)
2629                 igb_free_rx_resources(&adapter->rx_ring[i]);
2630 }
2631
2632 /**
2633  * igb_clean_rx_ring - Free Rx Buffers per Queue
2634  * @rx_ring: ring to free buffers from
2635  **/
2636 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2637 {
2638         struct igb_buffer *buffer_info;
2639         unsigned long size;
2640         unsigned int i;
2641
2642         if (!rx_ring->buffer_info)
2643                 return;
2644
2645         /* Free all the Rx ring sk_buffs */
2646         for (i = 0; i < rx_ring->count; i++) {
2647                 buffer_info = &rx_ring->buffer_info[i];
2648                 if (buffer_info->dma) {
2649                         pci_unmap_single(rx_ring->pdev,
2650                                          buffer_info->dma,
2651                                          rx_ring->rx_buffer_len,
2652                                          PCI_DMA_FROMDEVICE);
2653                         buffer_info->dma = 0;
2654                 }
2655
2656                 if (buffer_info->skb) {
2657                         dev_kfree_skb(buffer_info->skb);
2658                         buffer_info->skb = NULL;
2659                 }
2660                 if (buffer_info->page_dma) {
2661                         pci_unmap_page(rx_ring->pdev,
2662                                        buffer_info->page_dma,
2663                                        PAGE_SIZE / 2,
2664                                        PCI_DMA_FROMDEVICE);
2665                         buffer_info->page_dma = 0;
2666                 }
2667                 if (buffer_info->page) {
2668                         put_page(buffer_info->page);
2669                         buffer_info->page = NULL;
2670                         buffer_info->page_offset = 0;
2671                 }
2672         }
2673
2674         size = sizeof(struct igb_buffer) * rx_ring->count;
2675         memset(rx_ring->buffer_info, 0, size);
2676
2677         /* Zero out the descriptor ring */
2678         memset(rx_ring->desc, 0, rx_ring->size);
2679
2680         rx_ring->next_to_clean = 0;
2681         rx_ring->next_to_use = 0;
2682 }
2683
2684 /**
2685  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2686  * @adapter: board private structure
2687  **/
2688 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2689 {
2690         int i;
2691
2692         for (i = 0; i < adapter->num_rx_queues; i++)
2693                 igb_clean_rx_ring(&adapter->rx_ring[i]);
2694 }
2695
2696 /**
2697  * igb_set_mac - Change the Ethernet Address of the NIC
2698  * @netdev: network interface device structure
2699  * @p: pointer to an address structure
2700  *
2701  * Returns 0 on success, negative on failure
2702  **/
2703 static int igb_set_mac(struct net_device *netdev, void *p)
2704 {
2705         struct igb_adapter *adapter = netdev_priv(netdev);
2706         struct e1000_hw *hw = &adapter->hw;
2707         struct sockaddr *addr = p;
2708
2709         if (!is_valid_ether_addr(addr->sa_data))
2710                 return -EADDRNOTAVAIL;
2711
2712         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2713         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2714
2715         /* set the correct pool for the new PF MAC address in entry 0 */
2716         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2717                          adapter->vfs_allocated_count);
2718
2719         return 0;
2720 }
2721
2722 /**
2723  * igb_write_mc_addr_list - write multicast addresses to MTA
2724  * @netdev: network interface device structure
2725  *
2726  * Writes multicast address list to the MTA hash table.
2727  * Returns: -ENOMEM on failure
2728  *                0 on no addresses written
2729  *                X on writing X addresses to MTA
2730  **/
2731 static int igb_write_mc_addr_list(struct net_device *netdev)
2732 {
2733         struct igb_adapter *adapter = netdev_priv(netdev);
2734         struct e1000_hw *hw = &adapter->hw;
2735         struct dev_mc_list *mc_ptr = netdev->mc_list;
2736         u8  *mta_list;
2737         u32 vmolr = 0;
2738         int i;
2739
2740         if (!netdev->mc_count) {
2741                 /* nothing to program, so clear mc list */
2742                 igb_update_mc_addr_list(hw, NULL, 0);
2743                 igb_restore_vf_multicasts(adapter);
2744                 return 0;
2745         }
2746
2747         mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
2748         if (!mta_list)
2749                 return -ENOMEM;
2750
2751         /* set vmolr receive overflow multicast bit */
2752         vmolr |= E1000_VMOLR_ROMPE;
2753
2754         /* The shared function expects a packed array of only addresses. */
2755         mc_ptr = netdev->mc_list;
2756
2757         for (i = 0; i < netdev->mc_count; i++) {
2758                 if (!mc_ptr)
2759                         break;
2760                 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2761                 mc_ptr = mc_ptr->next;
2762         }
2763         igb_update_mc_addr_list(hw, mta_list, i);
2764         kfree(mta_list);
2765
2766         return netdev->mc_count;
2767 }
2768
2769 /**
2770  * igb_write_uc_addr_list - write unicast addresses to RAR table
2771  * @netdev: network interface device structure
2772  *
2773  * Writes unicast address list to the RAR table.
2774  * Returns: -ENOMEM on failure/insufficient address space
2775  *                0 on no addresses written
2776  *                X on writing X addresses to the RAR table
2777  **/
2778 static int igb_write_uc_addr_list(struct net_device *netdev)
2779 {
2780         struct igb_adapter *adapter = netdev_priv(netdev);
2781         struct e1000_hw *hw = &adapter->hw;
2782         unsigned int vfn = adapter->vfs_allocated_count;
2783         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2784         int count = 0;
2785
2786         /* return ENOMEM indicating insufficient memory for addresses */
2787         if (netdev->uc.count > rar_entries)
2788                 return -ENOMEM;
2789
2790         if (netdev->uc.count && rar_entries) {
2791                 struct netdev_hw_addr *ha;
2792                 list_for_each_entry(ha, &netdev->uc.list, list) {
2793                         if (!rar_entries)
2794                                 break;
2795                         igb_rar_set_qsel(adapter, ha->addr,
2796                                          rar_entries--,
2797                                          vfn);
2798                         count++;
2799                 }
2800         }
2801         /* write the addresses in reverse order to avoid write combining */
2802         for (; rar_entries > 0 ; rar_entries--) {
2803                 wr32(E1000_RAH(rar_entries), 0);
2804                 wr32(E1000_RAL(rar_entries), 0);
2805         }
2806         wrfl();
2807
2808         return count;
2809 }
2810
2811 /**
2812  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2813  * @netdev: network interface device structure
2814  *
2815  * The set_rx_mode entry point is called whenever the unicast or multicast
2816  * address lists or the network interface flags are updated.  This routine is
2817  * responsible for configuring the hardware for proper unicast, multicast,
2818  * promiscuous mode, and all-multi behavior.
2819  **/
2820 static void igb_set_rx_mode(struct net_device *netdev)
2821 {
2822         struct igb_adapter *adapter = netdev_priv(netdev);
2823         struct e1000_hw *hw = &adapter->hw;
2824         unsigned int vfn = adapter->vfs_allocated_count;
2825         u32 rctl, vmolr = 0;
2826         int count;
2827
2828         /* Check for Promiscuous and All Multicast modes */
2829         rctl = rd32(E1000_RCTL);
2830
2831         /* clear the effected bits */
2832         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2833
2834         if (netdev->flags & IFF_PROMISC) {
2835                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2836                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2837         } else {
2838                 if (netdev->flags & IFF_ALLMULTI) {
2839                         rctl |= E1000_RCTL_MPE;
2840                         vmolr |= E1000_VMOLR_MPME;
2841                 } else {
2842                         /*
2843                          * Write addresses to the MTA, if the attempt fails
2844                          * then we should just turn on promiscous mode so
2845                          * that we can at least receive multicast traffic
2846                          */
2847                         count = igb_write_mc_addr_list(netdev);
2848                         if (count < 0) {
2849                                 rctl |= E1000_RCTL_MPE;
2850                                 vmolr |= E1000_VMOLR_MPME;
2851                         } else if (count) {
2852                                 vmolr |= E1000_VMOLR_ROMPE;
2853                         }
2854                 }
2855                 /*
2856                  * Write addresses to available RAR registers, if there is not
2857                  * sufficient space to store all the addresses then enable
2858                  * unicast promiscous mode
2859                  */
2860                 count = igb_write_uc_addr_list(netdev);
2861                 if (count < 0) {
2862                         rctl |= E1000_RCTL_UPE;
2863                         vmolr |= E1000_VMOLR_ROPE;
2864                 }
2865                 rctl |= E1000_RCTL_VFE;
2866         }
2867         wr32(E1000_RCTL, rctl);
2868
2869         /*
2870          * In order to support SR-IOV and eventually VMDq it is necessary to set
2871          * the VMOLR to enable the appropriate modes.  Without this workaround
2872          * we will have issues with VLAN tag stripping not being done for frames
2873          * that are only arriving because we are the default pool
2874          */
2875         if (hw->mac.type < e1000_82576)
2876                 return;
2877
2878         vmolr |= rd32(E1000_VMOLR(vfn)) &
2879                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
2880         wr32(E1000_VMOLR(vfn), vmolr);
2881         igb_restore_vf_multicasts(adapter);
2882 }
2883
2884 /* Need to wait a few seconds after link up to get diagnostic information from
2885  * the phy */
2886 static void igb_update_phy_info(unsigned long data)
2887 {
2888         struct igb_adapter *adapter = (struct igb_adapter *) data;
2889         igb_get_phy_info(&adapter->hw);
2890 }
2891
2892 /**
2893  * igb_has_link - check shared code for link and determine up/down
2894  * @adapter: pointer to driver private info
2895  **/
2896 static bool igb_has_link(struct igb_adapter *adapter)
2897 {
2898         struct e1000_hw *hw = &adapter->hw;
2899         bool link_active = false;
2900         s32 ret_val = 0;
2901
2902         /* get_link_status is set on LSC (link status) interrupt or
2903          * rx sequence error interrupt.  get_link_status will stay
2904          * false until the e1000_check_for_link establishes link
2905          * for copper adapters ONLY
2906          */
2907         switch (hw->phy.media_type) {
2908         case e1000_media_type_copper:
2909                 if (hw->mac.get_link_status) {
2910                         ret_val = hw->mac.ops.check_for_link(hw);
2911                         link_active = !hw->mac.get_link_status;
2912                 } else {
2913                         link_active = true;
2914                 }
2915                 break;
2916         case e1000_media_type_internal_serdes:
2917                 ret_val = hw->mac.ops.check_for_link(hw);
2918                 link_active = hw->mac.serdes_has_link;
2919                 break;
2920         default:
2921         case e1000_media_type_unknown:
2922                 break;
2923         }
2924
2925         return link_active;
2926 }
2927
2928 /**
2929  * igb_watchdog - Timer Call-back
2930  * @data: pointer to adapter cast into an unsigned long
2931  **/
2932 static void igb_watchdog(unsigned long data)
2933 {
2934         struct igb_adapter *adapter = (struct igb_adapter *)data;
2935         /* Do the rest outside of interrupt context */
2936         schedule_work(&adapter->watchdog_task);
2937 }
2938
2939 static void igb_watchdog_task(struct work_struct *work)
2940 {
2941         struct igb_adapter *adapter = container_of(work,
2942                                                    struct igb_adapter,
2943                                                    watchdog_task);
2944         struct e1000_hw *hw = &adapter->hw;
2945         struct net_device *netdev = adapter->netdev;
2946         struct igb_ring *tx_ring = adapter->tx_ring;
2947         u32 link;
2948         int i;
2949
2950         link = igb_has_link(adapter);
2951         if (link) {
2952                 if (!netif_carrier_ok(netdev)) {
2953                         u32 ctrl;
2954                         hw->mac.ops.get_speed_and_duplex(hw,
2955                                                          &adapter->link_speed,
2956                                                          &adapter->link_duplex);
2957
2958                         ctrl = rd32(E1000_CTRL);
2959                         /* Links status message must follow this format */
2960                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
2961                                  "Flow Control: %s\n",
2962                                netdev->name,
2963                                adapter->link_speed,
2964                                adapter->link_duplex == FULL_DUPLEX ?
2965                                  "Full Duplex" : "Half Duplex",
2966                                ((ctrl & E1000_CTRL_TFCE) &&
2967                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
2968                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
2969                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
2970
2971                         /* tweak tx_queue_len according to speed/duplex and
2972                          * adjust the timeout factor */
2973                         netdev->tx_queue_len = adapter->tx_queue_len;
2974                         adapter->tx_timeout_factor = 1;
2975                         switch (adapter->link_speed) {
2976                         case SPEED_10:
2977                                 netdev->tx_queue_len = 10;
2978                                 adapter->tx_timeout_factor = 14;
2979                                 break;
2980                         case SPEED_100:
2981                                 netdev->tx_queue_len = 100;
2982                                 /* maybe add some timeout factor ? */
2983                                 break;
2984                         }
2985
2986                         netif_carrier_on(netdev);
2987
2988                         igb_ping_all_vfs(adapter);
2989
2990                         /* link state has changed, schedule phy info update */
2991                         if (!test_bit(__IGB_DOWN, &adapter->state))
2992                                 mod_timer(&adapter->phy_info_timer,
2993                                           round_jiffies(jiffies + 2 * HZ));
2994                 }
2995         } else {
2996                 if (netif_carrier_ok(netdev)) {
2997                         adapter->link_speed = 0;
2998                         adapter->link_duplex = 0;
2999                         /* Links status message must follow this format */
3000                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3001                                netdev->name);
3002                         netif_carrier_off(netdev);
3003
3004                         igb_ping_all_vfs(adapter);
3005
3006                         /* link state has changed, schedule phy info update */
3007                         if (!test_bit(__IGB_DOWN, &adapter->state))
3008                                 mod_timer(&adapter->phy_info_timer,
3009                                           round_jiffies(jiffies + 2 * HZ));
3010                 }
3011         }
3012
3013         igb_update_stats(adapter);
3014         igb_update_adaptive(hw);
3015
3016         if (!netif_carrier_ok(netdev)) {
3017                 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3018                         /* We've lost link, so the controller stops DMA,
3019                          * but we've got queued Tx work that's never going
3020                          * to get done, so reset controller to flush Tx.
3021                          * (Do the reset outside of interrupt context). */
3022                         adapter->tx_timeout_count++;
3023                         schedule_work(&adapter->reset_task);
3024                         /* return immediately since reset is imminent */
3025                         return;
3026                 }
3027         }
3028
3029         /* Force detection of hung controller every watchdog period */
3030         for (i = 0; i < adapter->num_tx_queues; i++)
3031                 adapter->tx_ring[i].detect_tx_hung = true;
3032
3033         /* Cause software interrupt to ensure rx ring is cleaned */
3034         if (adapter->msix_entries) {
3035                 u32 eics = 0;
3036                 for (i = 0; i < adapter->num_q_vectors; i++) {
3037                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3038                         eics |= q_vector->eims_value;
3039                 }
3040                 wr32(E1000_EICS, eics);
3041         } else {
3042                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3043         }
3044
3045         /* Reset the timer */
3046         if (!test_bit(__IGB_DOWN, &adapter->state))
3047                 mod_timer(&adapter->watchdog_timer,
3048                           round_jiffies(jiffies + 2 * HZ));
3049 }
3050
3051 enum latency_range {
3052         lowest_latency = 0,
3053         low_latency = 1,
3054         bulk_latency = 2,
3055         latency_invalid = 255
3056 };
3057
3058 /**
3059  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3060  *
3061  *      Stores a new ITR value based on strictly on packet size.  This
3062  *      algorithm is less sophisticated than that used in igb_update_itr,
3063  *      due to the difficulty of synchronizing statistics across multiple
3064  *      receive rings.  The divisors and thresholds used by this fuction
3065  *      were determined based on theoretical maximum wire speed and testing
3066  *      data, in order to minimize response time while increasing bulk
3067  *      throughput.
3068  *      This functionality is controlled by the InterruptThrottleRate module
3069  *      parameter (see igb_param.c)
3070  *      NOTE:  This function is called only when operating in a multiqueue
3071  *             receive environment.
3072  * @q_vector: pointer to q_vector
3073  **/
3074 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3075 {
3076         int new_val = q_vector->itr_val;
3077         int avg_wire_size = 0;
3078         struct igb_adapter *adapter = q_vector->adapter;
3079
3080         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3081          * ints/sec - ITR timer value of 120 ticks.
3082          */
3083         if (adapter->link_speed != SPEED_1000) {
3084                 new_val = 976;
3085                 goto set_itr_val;
3086         }
3087
3088         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3089                 struct igb_ring *ring = q_vector->rx_ring;
3090                 avg_wire_size = ring->total_bytes / ring->total_packets;
3091         }
3092
3093         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3094                 struct igb_ring *ring = q_vector->tx_ring;
3095                 avg_wire_size = max_t(u32, avg_wire_size,
3096                                       (ring->total_bytes /
3097                                        ring->total_packets));
3098         }
3099
3100         /* if avg_wire_size isn't set no work was done */
3101         if (!avg_wire_size)
3102                 goto clear_counts;
3103
3104         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3105         avg_wire_size += 24;
3106
3107         /* Don't starve jumbo frames */
3108         avg_wire_size = min(avg_wire_size, 3000);
3109
3110         /* Give a little boost to mid-size frames */
3111         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3112                 new_val = avg_wire_size / 3;
3113         else
3114                 new_val = avg_wire_size / 2;
3115
3116 set_itr_val:
3117         if (new_val != q_vector->itr_val) {
3118                 q_vector->itr_val = new_val;
3119                 q_vector->set_itr = 1;
3120         }
3121 clear_counts:
3122         if (q_vector->rx_ring) {
3123                 q_vector->rx_ring->total_bytes = 0;
3124                 q_vector->rx_ring->total_packets = 0;
3125         }
3126         if (q_vector->tx_ring) {
3127                 q_vector->tx_ring->total_bytes = 0;
3128                 q_vector->tx_ring->total_packets = 0;
3129         }
3130 }
3131
3132 /**
3133  * igb_update_itr - update the dynamic ITR value based on statistics
3134  *      Stores a new ITR value based on packets and byte
3135  *      counts during the last interrupt.  The advantage of per interrupt
3136  *      computation is faster updates and more accurate ITR for the current
3137  *      traffic pattern.  Constants in this function were computed
3138  *      based on theoretical maximum wire speed and thresholds were set based
3139  *      on testing data as well as attempting to minimize response time
3140  *      while increasing bulk throughput.
3141  *      this functionality is controlled by the InterruptThrottleRate module
3142  *      parameter (see igb_param.c)
3143  *      NOTE:  These calculations are only valid when operating in a single-
3144  *             queue environment.
3145  * @adapter: pointer to adapter
3146  * @itr_setting: current q_vector->itr_val
3147  * @packets: the number of packets during this measurement interval
3148  * @bytes: the number of bytes during this measurement interval
3149  **/
3150 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3151                                    int packets, int bytes)
3152 {
3153         unsigned int retval = itr_setting;
3154
3155         if (packets == 0)
3156                 goto update_itr_done;
3157
3158         switch (itr_setting) {
3159         case lowest_latency:
3160                 /* handle TSO and jumbo frames */
3161                 if (bytes/packets > 8000)
3162                         retval = bulk_latency;
3163                 else if ((packets < 5) && (bytes > 512))
3164                         retval = low_latency;
3165                 break;
3166         case low_latency:  /* 50 usec aka 20000 ints/s */
3167                 if (bytes > 10000) {
3168                         /* this if handles the TSO accounting */
3169                         if (bytes/packets > 8000) {
3170                                 retval = bulk_latency;
3171                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3172                                 retval = bulk_latency;
3173                         } else if ((packets > 35)) {
3174                                 retval = lowest_latency;
3175                         }
3176                 } else if (bytes/packets > 2000) {
3177                         retval = bulk_latency;
3178                 } else if (packets <= 2 && bytes < 512) {
3179                         retval = lowest_latency;
3180                 }
3181                 break;
3182         case bulk_latency: /* 250 usec aka 4000 ints/s */
3183                 if (bytes > 25000) {
3184                         if (packets > 35)
3185                                 retval = low_latency;
3186                 } else if (bytes < 1500) {
3187                         retval = low_latency;
3188                 }
3189                 break;
3190         }
3191
3192 update_itr_done:
3193         return retval;
3194 }
3195
3196 static void igb_set_itr(struct igb_adapter *adapter)
3197 {
3198         struct igb_q_vector *q_vector = adapter->q_vector[0];
3199         u16 current_itr;
3200         u32 new_itr = q_vector->itr_val;
3201
3202         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3203         if (adapter->link_speed != SPEED_1000) {
3204                 current_itr = 0;
3205                 new_itr = 4000;
3206                 goto set_itr_now;
3207         }
3208
3209         adapter->rx_itr = igb_update_itr(adapter,
3210                                     adapter->rx_itr,
3211                                     adapter->rx_ring->total_packets,
3212                                     adapter->rx_ring->total_bytes);
3213
3214         adapter->tx_itr = igb_update_itr(adapter,
3215                                     adapter->tx_itr,
3216                                     adapter->tx_ring->total_packets,
3217                                     adapter->tx_ring->total_bytes);
3218         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3219
3220         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3221         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3222                 current_itr = low_latency;
3223
3224         switch (current_itr) {
3225         /* counts and packets in update_itr are dependent on these numbers */
3226         case lowest_latency:
3227                 new_itr = 56;  /* aka 70,000 ints/sec */
3228                 break;
3229         case low_latency:
3230                 new_itr = 196; /* aka 20,000 ints/sec */
3231                 break;
3232         case bulk_latency:
3233                 new_itr = 980; /* aka 4,000 ints/sec */
3234                 break;
3235         default:
3236                 break;
3237         }
3238
3239 set_itr_now:
3240         adapter->rx_ring->total_bytes = 0;
3241         adapter->rx_ring->total_packets = 0;
3242         adapter->tx_ring->total_bytes = 0;
3243         adapter->tx_ring->total_packets = 0;
3244
3245         if (new_itr != q_vector->itr_val) {
3246                 /* this attempts to bias the interrupt rate towards Bulk
3247                  * by adding intermediate steps when interrupt rate is
3248                  * increasing */
3249                 new_itr = new_itr > q_vector->itr_val ?
3250                              max((new_itr * q_vector->itr_val) /
3251                                  (new_itr + (q_vector->itr_val >> 2)),
3252                                  new_itr) :
3253                              new_itr;
3254                 /* Don't write the value here; it resets the adapter's
3255                  * internal timer, and causes us to delay far longer than
3256                  * we should between interrupts.  Instead, we write the ITR
3257                  * value at the beginning of the next interrupt so the timing
3258                  * ends up being correct.
3259                  */
3260                 q_vector->itr_val = new_itr;
3261                 q_vector->set_itr = 1;
3262         }
3263
3264         return;
3265 }
3266
3267 #define IGB_TX_FLAGS_CSUM               0x00000001
3268 #define IGB_TX_FLAGS_VLAN               0x00000002
3269 #define IGB_TX_FLAGS_TSO                0x00000004
3270 #define IGB_TX_FLAGS_IPV4               0x00000008
3271 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3272 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3273 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3274
3275 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3276                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3277 {
3278         struct e1000_adv_tx_context_desc *context_desc;
3279         unsigned int i;
3280         int err;
3281         struct igb_buffer *buffer_info;
3282         u32 info = 0, tu_cmd = 0;
3283         u32 mss_l4len_idx, l4len;
3284         *hdr_len = 0;
3285
3286         if (skb_header_cloned(skb)) {
3287                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3288                 if (err)
3289                         return err;
3290         }
3291
3292         l4len = tcp_hdrlen(skb);
3293         *hdr_len += l4len;
3294
3295         if (skb->protocol == htons(ETH_P_IP)) {
3296                 struct iphdr *iph = ip_hdr(skb);
3297                 iph->tot_len = 0;
3298                 iph->check = 0;
3299                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3300                                                          iph->daddr, 0,
3301                                                          IPPROTO_TCP,
3302                                                          0);
3303         } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
3304                 ipv6_hdr(skb)->payload_len = 0;
3305                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3306                                                        &ipv6_hdr(skb)->daddr,
3307                                                        0, IPPROTO_TCP, 0);
3308         }
3309
3310         i = tx_ring->next_to_use;
3311
3312         buffer_info = &tx_ring->buffer_info[i];
3313         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3314         /* VLAN MACLEN IPLEN */
3315         if (tx_flags & IGB_TX_FLAGS_VLAN)
3316                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3317         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3318         *hdr_len += skb_network_offset(skb);
3319         info |= skb_network_header_len(skb);
3320         *hdr_len += skb_network_header_len(skb);
3321         context_desc->vlan_macip_lens = cpu_to_le32(info);
3322
3323         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3324         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3325
3326         if (skb->protocol == htons(ETH_P_IP))
3327                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3328         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3329
3330         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3331
3332         /* MSS L4LEN IDX */
3333         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3334         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3335
3336         /* For 82575, context index must be unique per ring. */
3337         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3338                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3339
3340         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3341         context_desc->seqnum_seed = 0;
3342
3343         buffer_info->time_stamp = jiffies;
3344         buffer_info->next_to_watch = i;
3345         buffer_info->dma = 0;
3346         i++;
3347         if (i == tx_ring->count)
3348                 i = 0;
3349
3350         tx_ring->next_to_use = i;
3351
3352         return true;
3353 }
3354
3355 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3356                                    struct sk_buff *skb, u32 tx_flags)
3357 {
3358         struct e1000_adv_tx_context_desc *context_desc;
3359         struct pci_dev *pdev = tx_ring->pdev;
3360         struct igb_buffer *buffer_info;
3361         u32 info = 0, tu_cmd = 0;
3362         unsigned int i;
3363
3364         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3365             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3366                 i = tx_ring->next_to_use;
3367                 buffer_info = &tx_ring->buffer_info[i];
3368                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3369
3370                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3371                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3372
3373                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3374                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3375                         info |= skb_network_header_len(skb);
3376
3377                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3378
3379                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3380
3381                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3382                         __be16 protocol;
3383
3384                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3385                                 const struct vlan_ethhdr *vhdr =
3386                                           (const struct vlan_ethhdr*)skb->data;
3387
3388                                 protocol = vhdr->h_vlan_encapsulated_proto;
3389                         } else {
3390                                 protocol = skb->protocol;
3391                         }
3392
3393                         switch (protocol) {
3394                         case cpu_to_be16(ETH_P_IP):
3395                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3396                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3397                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3398                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3399                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3400                                 break;
3401                         case cpu_to_be16(ETH_P_IPV6):
3402                                 /* XXX what about other V6 headers?? */
3403                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3404                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3405                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3406                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3407                                 break;
3408                         default:
3409                                 if (unlikely(net_ratelimit()))
3410                                         dev_warn(&pdev->dev,
3411                                             "partial checksum but proto=%x!\n",
3412                                             skb->protocol);
3413                                 break;
3414                         }
3415                 }
3416
3417                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3418                 context_desc->seqnum_seed = 0;
3419                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3420                         context_desc->mss_l4len_idx =
3421                                 cpu_to_le32(tx_ring->reg_idx << 4);
3422
3423                 buffer_info->time_stamp = jiffies;
3424                 buffer_info->next_to_watch = i;
3425                 buffer_info->dma = 0;
3426
3427                 i++;
3428                 if (i == tx_ring->count)
3429                         i = 0;
3430                 tx_ring->next_to_use = i;
3431
3432                 return true;
3433         }
3434         return false;
3435 }
3436
3437 #define IGB_MAX_TXD_PWR 16
3438 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3439
3440 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3441                                  unsigned int first)
3442 {
3443         struct igb_buffer *buffer_info;
3444         struct pci_dev *pdev = tx_ring->pdev;
3445         unsigned int len = skb_headlen(skb);
3446         unsigned int count = 0, i;
3447         unsigned int f;
3448         dma_addr_t *map;
3449
3450         i = tx_ring->next_to_use;
3451
3452         if (skb_dma_map(&pdev->dev, skb, DMA_TO_DEVICE)) {
3453                 dev_err(&pdev->dev, "TX DMA map failed\n");
3454                 return 0;
3455         }
3456
3457         map = skb_shinfo(skb)->dma_maps;
3458
3459         buffer_info = &tx_ring->buffer_info[i];
3460         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3461         buffer_info->length = len;
3462         /* set time_stamp *before* dma to help avoid a possible race */
3463         buffer_info->time_stamp = jiffies;
3464         buffer_info->next_to_watch = i;
3465         buffer_info->dma = skb_shinfo(skb)->dma_head;
3466
3467         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3468                 struct skb_frag_struct *frag;
3469
3470                 i++;
3471                 if (i == tx_ring->count)
3472                         i = 0;
3473
3474                 frag = &skb_shinfo(skb)->frags[f];
3475                 len = frag->size;
3476
3477                 buffer_info = &tx_ring->buffer_info[i];
3478                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3479                 buffer_info->length = len;
3480                 buffer_info->time_stamp = jiffies;
3481                 buffer_info->next_to_watch = i;
3482                 buffer_info->dma = map[count];
3483                 count++;
3484         }
3485
3486         tx_ring->buffer_info[i].skb = skb;
3487         tx_ring->buffer_info[first].next_to_watch = i;
3488
3489         return ++count;
3490 }
3491
3492 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3493                                     int tx_flags, int count, u32 paylen,
3494                                     u8 hdr_len)
3495 {
3496         union e1000_adv_tx_desc *tx_desc;
3497         struct igb_buffer *buffer_info;
3498         u32 olinfo_status = 0, cmd_type_len;
3499         unsigned int i = tx_ring->next_to_use;
3500
3501         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3502                         E1000_ADVTXD_DCMD_DEXT);
3503
3504         if (tx_flags & IGB_TX_FLAGS_VLAN)
3505                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3506
3507         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3508                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3509
3510         if (tx_flags & IGB_TX_FLAGS_TSO) {
3511                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3512
3513                 /* insert tcp checksum */
3514                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3515
3516                 /* insert ip checksum */
3517                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3518                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3519
3520         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3521                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3522         }
3523
3524         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3525             (tx_flags & (IGB_TX_FLAGS_CSUM |
3526                          IGB_TX_FLAGS_TSO |
3527                          IGB_TX_FLAGS_VLAN)))
3528                 olinfo_status |= tx_ring->reg_idx << 4;
3529
3530         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3531
3532         do {
3533                 buffer_info = &tx_ring->buffer_info[i];
3534                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3535                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3536                 tx_desc->read.cmd_type_len =
3537                         cpu_to_le32(cmd_type_len | buffer_info->length);
3538                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3539                 count--;
3540                 i++;
3541                 if (i == tx_ring->count)
3542                         i = 0;
3543         } while (count > 0);
3544
3545         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3546         /* Force memory writes to complete before letting h/w
3547          * know there are new descriptors to fetch.  (Only
3548          * applicable for weak-ordered memory model archs,
3549          * such as IA-64). */
3550         wmb();
3551
3552         tx_ring->next_to_use = i;
3553         writel(i, tx_ring->tail);
3554         /* we need this if more than one processor can write to our tail
3555          * at a time, it syncronizes IO on IA64/Altix systems */
3556         mmiowb();
3557 }
3558
3559 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3560 {
3561         struct net_device *netdev = tx_ring->netdev;
3562
3563         netif_stop_subqueue(netdev, tx_ring->queue_index);
3564
3565         /* Herbert's original patch had:
3566          *  smp_mb__after_netif_stop_queue();
3567          * but since that doesn't exist yet, just open code it. */
3568         smp_mb();
3569
3570         /* We need to check again in a case another CPU has just
3571          * made room available. */
3572         if (igb_desc_unused(tx_ring) < size)
3573                 return -EBUSY;
3574
3575         /* A reprieve! */
3576         netif_wake_subqueue(netdev, tx_ring->queue_index);
3577         tx_ring->tx_stats.restart_queue++;
3578         return 0;
3579 }
3580
3581 static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3582 {
3583         if (igb_desc_unused(tx_ring) >= size)
3584                 return 0;
3585         return __igb_maybe_stop_tx(tx_ring, size);
3586 }
3587
3588 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3589                                     struct igb_ring *tx_ring)
3590 {
3591         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3592         unsigned int first;
3593         unsigned int tx_flags = 0;
3594         u8 hdr_len = 0;
3595         int tso = 0, count;
3596         union skb_shared_tx *shtx = skb_tx(skb);
3597
3598         /* need: 1 descriptor per page,
3599          *       + 2 desc gap to keep tail from touching head,
3600          *       + 1 desc for skb->data,
3601          *       + 1 desc for context descriptor,
3602          * otherwise try next time */
3603         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3604                 /* this is a hard error */
3605                 return NETDEV_TX_BUSY;
3606         }
3607
3608         if (unlikely(shtx->hardware)) {
3609                 shtx->in_progress = 1;
3610                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3611         }
3612
3613         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3614                 tx_flags |= IGB_TX_FLAGS_VLAN;
3615                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3616         }
3617
3618         if (skb->protocol == htons(ETH_P_IP))
3619                 tx_flags |= IGB_TX_FLAGS_IPV4;
3620
3621         first = tx_ring->next_to_use;
3622         if (skb_is_gso(skb)) {
3623                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3624
3625                 if (tso < 0) {
3626                         dev_kfree_skb_any(skb);
3627                         return NETDEV_TX_OK;
3628                 }
3629         }
3630
3631         if (tso)
3632                 tx_flags |= IGB_TX_FLAGS_TSO;
3633         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3634                  (skb->ip_summed == CHECKSUM_PARTIAL))
3635                 tx_flags |= IGB_TX_FLAGS_CSUM;
3636
3637         /*
3638          * count reflects descriptors mapped, if 0 or less then mapping error
3639          * has occured and we need to rewind the descriptor queue
3640          */
3641         count = igb_tx_map_adv(tx_ring, skb, first);
3642         if (count <= 0) {
3643                 dev_kfree_skb_any(skb);
3644                 tx_ring->buffer_info[first].time_stamp = 0;
3645                 tx_ring->next_to_use = first;
3646                 return NETDEV_TX_OK;
3647         }
3648
3649         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3650
3651         /* Make sure there is space in the ring for the next send. */
3652         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3653
3654         return NETDEV_TX_OK;
3655 }
3656
3657 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3658                                       struct net_device *netdev)
3659 {
3660         struct igb_adapter *adapter = netdev_priv(netdev);
3661         struct igb_ring *tx_ring;
3662         int r_idx = 0;
3663
3664         if (test_bit(__IGB_DOWN, &adapter->state)) {
3665                 dev_kfree_skb_any(skb);
3666                 return NETDEV_TX_OK;
3667         }
3668
3669         if (skb->len <= 0) {
3670                 dev_kfree_skb_any(skb);
3671                 return NETDEV_TX_OK;
3672         }
3673
3674         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3675         tx_ring = adapter->multi_tx_table[r_idx];
3676
3677         /* This goes back to the question of how to logically map a tx queue
3678          * to a flow.  Right now, performance is impacted slightly negatively
3679          * if using multiple tx queues.  If the stack breaks away from a
3680          * single qdisc implementation, we can look at this again. */
3681         return igb_xmit_frame_ring_adv(skb, tx_ring);
3682 }
3683
3684 /**
3685  * igb_tx_timeout - Respond to a Tx Hang
3686  * @netdev: network interface device structure
3687  **/
3688 static void igb_tx_timeout(struct net_device *netdev)
3689 {
3690         struct igb_adapter *adapter = netdev_priv(netdev);
3691         struct e1000_hw *hw = &adapter->hw;
3692
3693         /* Do the reset outside of interrupt context */
3694         adapter->tx_timeout_count++;
3695
3696         schedule_work(&adapter->reset_task);
3697         wr32(E1000_EICS,
3698              (adapter->eims_enable_mask & ~adapter->eims_other));
3699 }
3700
3701 static void igb_reset_task(struct work_struct *work)
3702 {
3703         struct igb_adapter *adapter;
3704         adapter = container_of(work, struct igb_adapter, reset_task);
3705
3706         igb_reinit_locked(adapter);
3707 }
3708
3709 /**
3710  * igb_get_stats - Get System Network Statistics
3711  * @netdev: network interface device structure
3712  *
3713  * Returns the address of the device statistics structure.
3714  * The statistics are actually updated from the timer callback.
3715  **/
3716 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3717 {
3718         /* only return the current stats */
3719         return &netdev->stats;
3720 }
3721
3722 /**
3723  * igb_change_mtu - Change the Maximum Transfer Unit
3724  * @netdev: network interface device structure
3725  * @new_mtu: new value for maximum frame size
3726  *
3727  * Returns 0 on success, negative on failure
3728  **/
3729 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3730 {
3731         struct igb_adapter *adapter = netdev_priv(netdev);
3732         struct pci_dev *pdev = adapter->pdev;
3733         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3734         u32 rx_buffer_len, i;
3735
3736         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3737                 dev_err(&pdev->dev, "Invalid MTU setting\n");
3738                 return -EINVAL;
3739         }
3740
3741         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3742                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3743                 return -EINVAL;
3744         }
3745
3746         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3747                 msleep(1);
3748
3749         /* igb_down has a dependency on max_frame_size */
3750         adapter->max_frame_size = max_frame;
3751
3752         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3753          * means we reserve 2 more, this pushes us to allocate from the next
3754          * larger slab size.
3755          * i.e. RXBUFFER_2048 --> size-4096 slab
3756          */
3757
3758         if (max_frame <= IGB_RXBUFFER_1024)
3759                 rx_buffer_len = IGB_RXBUFFER_1024;
3760         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3761                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3762         else
3763                 rx_buffer_len = IGB_RXBUFFER_128;
3764
3765         if (netif_running(netdev))
3766                 igb_down(adapter);
3767
3768         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3769                  netdev->mtu, new_mtu);
3770         netdev->mtu = new_mtu;
3771
3772         for (i = 0; i < adapter->num_rx_queues; i++)
3773                 adapter->rx_ring[i].rx_buffer_len = rx_buffer_len;
3774
3775         if (netif_running(netdev))
3776                 igb_up(adapter);
3777         else
3778                 igb_reset(adapter);
3779
3780         clear_bit(__IGB_RESETTING, &adapter->state);
3781
3782         return 0;
3783 }
3784
3785 /**
3786  * igb_update_stats - Update the board statistics counters
3787  * @adapter: board private structure
3788  **/
3789
3790 void igb_update_stats(struct igb_adapter *adapter)
3791 {
3792         struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3793         struct e1000_hw *hw = &adapter->hw;
3794         struct pci_dev *pdev = adapter->pdev;
3795         u32 rnbc;
3796         u16 phy_tmp;
3797         int i;
3798         u64 bytes, packets;
3799
3800 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3801
3802         /*
3803          * Prevent stats update while adapter is being reset, or if the pci
3804          * connection is down.
3805          */
3806         if (adapter->link_speed == 0)
3807                 return;
3808         if (pci_channel_offline(pdev))
3809                 return;
3810
3811         bytes = 0;
3812         packets = 0;
3813         for (i = 0; i < adapter->num_rx_queues; i++) {
3814                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3815                 adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
3816                 net_stats->rx_fifo_errors += rqdpc_tmp;
3817                 bytes += adapter->rx_ring[i].rx_stats.bytes;
3818                 packets += adapter->rx_ring[i].rx_stats.packets;
3819         }
3820
3821         net_stats->rx_bytes = bytes;
3822         net_stats->rx_packets = packets;
3823
3824         bytes = 0;
3825         packets = 0;
3826         for (i = 0; i < adapter->num_tx_queues; i++) {
3827                 bytes += adapter->tx_ring[i].tx_stats.bytes;
3828                 packets += adapter->tx_ring[i].tx_stats.packets;
3829         }
3830         net_stats->tx_bytes = bytes;
3831         net_stats->tx_packets = packets;
3832
3833         /* read stats registers */
3834         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
3835         adapter->stats.gprc += rd32(E1000_GPRC);
3836         adapter->stats.gorc += rd32(E1000_GORCL);
3837         rd32(E1000_GORCH); /* clear GORCL */
3838         adapter->stats.bprc += rd32(E1000_BPRC);
3839         adapter->stats.mprc += rd32(E1000_MPRC);
3840         adapter->stats.roc += rd32(E1000_ROC);
3841
3842         adapter->stats.prc64 += rd32(E1000_PRC64);
3843         adapter->stats.prc127 += rd32(E1000_PRC127);
3844         adapter->stats.prc255 += rd32(E1000_PRC255);
3845         adapter->stats.prc511 += rd32(E1000_PRC511);
3846         adapter->stats.prc1023 += rd32(E1000_PRC1023);
3847         adapter->stats.prc1522 += rd32(E1000_PRC1522);
3848         adapter->stats.symerrs += rd32(E1000_SYMERRS);
3849         adapter->stats.sec += rd32(E1000_SEC);
3850
3851         adapter->stats.mpc += rd32(E1000_MPC);
3852         adapter->stats.scc += rd32(E1000_SCC);
3853         adapter->stats.ecol += rd32(E1000_ECOL);
3854         adapter->stats.mcc += rd32(E1000_MCC);
3855         adapter->stats.latecol += rd32(E1000_LATECOL);
3856         adapter->stats.dc += rd32(E1000_DC);
3857         adapter->stats.rlec += rd32(E1000_RLEC);
3858         adapter->stats.xonrxc += rd32(E1000_XONRXC);
3859         adapter->stats.xontxc += rd32(E1000_XONTXC);
3860         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
3861         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
3862         adapter->stats.fcruc += rd32(E1000_FCRUC);
3863         adapter->stats.gptc += rd32(E1000_GPTC);
3864         adapter->stats.gotc += rd32(E1000_GOTCL);
3865         rd32(E1000_GOTCH); /* clear GOTCL */
3866         rnbc = rd32(E1000_RNBC);
3867         adapter->stats.rnbc += rnbc;
3868         net_stats->rx_fifo_errors += rnbc;
3869         adapter->stats.ruc += rd32(E1000_RUC);
3870         adapter->stats.rfc += rd32(E1000_RFC);
3871         adapter->stats.rjc += rd32(E1000_RJC);
3872         adapter->stats.tor += rd32(E1000_TORH);
3873         adapter->stats.tot += rd32(E1000_TOTH);
3874         adapter->stats.tpr += rd32(E1000_TPR);
3875
3876         adapter->stats.ptc64 += rd32(E1000_PTC64);
3877         adapter->stats.ptc127 += rd32(E1000_PTC127);
3878         adapter->stats.ptc255 += rd32(E1000_PTC255);
3879         adapter->stats.ptc511 += rd32(E1000_PTC511);
3880         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
3881         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
3882
3883         adapter->stats.mptc += rd32(E1000_MPTC);
3884         adapter->stats.bptc += rd32(E1000_BPTC);
3885
3886         /* used for adaptive IFS */
3887         hw->mac.tx_packet_delta = rd32(E1000_TPT);
3888         adapter->stats.tpt += hw->mac.tx_packet_delta;
3889         hw->mac.collision_delta = rd32(E1000_COLC);
3890         adapter->stats.colc += hw->mac.collision_delta;
3891
3892         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
3893         adapter->stats.rxerrc += rd32(E1000_RXERRC);
3894         adapter->stats.tncrs += rd32(E1000_TNCRS);
3895         adapter->stats.tsctc += rd32(E1000_TSCTC);
3896         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
3897
3898         adapter->stats.iac += rd32(E1000_IAC);
3899         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
3900         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
3901         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
3902         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
3903         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
3904         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
3905         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
3906         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
3907
3908         /* Fill out the OS statistics structure */
3909         net_stats->multicast = adapter->stats.mprc;
3910         net_stats->collisions = adapter->stats.colc;
3911
3912         /* Rx Errors */
3913
3914         /* RLEC on some newer hardware can be incorrect so build
3915          * our own version based on RUC and ROC */
3916         net_stats->rx_errors = adapter->stats.rxerrc +
3917                 adapter->stats.crcerrs + adapter->stats.algnerrc +
3918                 adapter->stats.ruc + adapter->stats.roc +
3919                 adapter->stats.cexterr;
3920         net_stats->rx_length_errors = adapter->stats.ruc +
3921                                       adapter->stats.roc;
3922         net_stats->rx_crc_errors = adapter->stats.crcerrs;
3923         net_stats->rx_frame_errors = adapter->stats.algnerrc;
3924         net_stats->rx_missed_errors = adapter->stats.mpc;
3925
3926         /* Tx Errors */
3927         net_stats->tx_errors = adapter->stats.ecol +
3928                                adapter->stats.latecol;
3929         net_stats->tx_aborted_errors = adapter->stats.ecol;
3930         net_stats->tx_window_errors = adapter->stats.latecol;
3931         net_stats->tx_carrier_errors = adapter->stats.tncrs;
3932
3933         /* Tx Dropped needs to be maintained elsewhere */
3934
3935         /* Phy Stats */
3936         if (hw->phy.media_type == e1000_media_type_copper) {
3937                 if ((adapter->link_speed == SPEED_1000) &&
3938                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
3939                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
3940                         adapter->phy_stats.idle_errors += phy_tmp;
3941                 }
3942         }
3943
3944         /* Management Stats */
3945         adapter->stats.mgptc += rd32(E1000_MGTPTC);
3946         adapter->stats.mgprc += rd32(E1000_MGTPRC);
3947         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
3948 }
3949
3950 static irqreturn_t igb_msix_other(int irq, void *data)
3951 {
3952         struct igb_adapter *adapter = data;
3953         struct e1000_hw *hw = &adapter->hw;
3954         u32 icr = rd32(E1000_ICR);
3955         /* reading ICR causes bit 31 of EICR to be cleared */
3956
3957         if (icr & E1000_ICR_DOUTSYNC) {
3958                 /* HW is reporting DMA is out of sync */
3959                 adapter->stats.doosync++;
3960         }
3961
3962         /* Check for a mailbox event */
3963         if (icr & E1000_ICR_VMMB)
3964                 igb_msg_task(adapter);
3965
3966         if (icr & E1000_ICR_LSC) {
3967                 hw->mac.get_link_status = 1;
3968                 /* guard against interrupt when we're going down */
3969                 if (!test_bit(__IGB_DOWN, &adapter->state))
3970                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
3971         }
3972
3973         if (adapter->vfs_allocated_count)
3974                 wr32(E1000_IMS, E1000_IMS_LSC |
3975                                 E1000_IMS_VMMB |
3976                                 E1000_IMS_DOUTSYNC);
3977         else
3978                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
3979         wr32(E1000_EIMS, adapter->eims_other);
3980
3981         return IRQ_HANDLED;
3982 }
3983
3984 static void igb_write_itr(struct igb_q_vector *q_vector)
3985 {
3986         u32 itr_val = q_vector->itr_val & 0x7FFC;
3987
3988         if (!q_vector->set_itr)
3989                 return;
3990
3991         if (!itr_val)
3992                 itr_val = 0x4;
3993
3994         if (q_vector->itr_shift)
3995                 itr_val |= itr_val << q_vector->itr_shift;
3996         else
3997                 itr_val |= 0x8000000;
3998
3999         writel(itr_val, q_vector->itr_register);
4000         q_vector->set_itr = 0;
4001 }
4002
4003 static irqreturn_t igb_msix_ring(int irq, void *data)
4004 {
4005         struct igb_q_vector *q_vector = data;
4006
4007         /* Write the ITR value calculated from the previous interrupt. */
4008         igb_write_itr(q_vector);
4009
4010         napi_schedule(&q_vector->napi);
4011
4012         return IRQ_HANDLED;
4013 }
4014
4015 #ifdef CONFIG_IGB_DCA
4016 static void igb_update_dca(struct igb_q_vector *q_vector)
4017 {
4018         struct igb_adapter *adapter = q_vector->adapter;
4019         struct e1000_hw *hw = &adapter->hw;
4020         int cpu = get_cpu();
4021
4022         if (q_vector->cpu == cpu)
4023                 goto out_no_update;
4024
4025         if (q_vector->tx_ring) {
4026                 int q = q_vector->tx_ring->reg_idx;
4027                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4028                 if (hw->mac.type == e1000_82575) {
4029                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4030                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4031                 } else {
4032                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4033                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4034                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4035                 }
4036                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4037                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4038         }
4039         if (q_vector->rx_ring) {
4040                 int q = q_vector->rx_ring->reg_idx;
4041                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4042                 if (hw->mac.type == e1000_82575) {
4043                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4044                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4045                 } else {
4046                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4047                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4048                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4049                 }
4050                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4051                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4052                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4053                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4054         }
4055         q_vector->cpu = cpu;
4056 out_no_update:
4057         put_cpu();
4058 }
4059
4060 static void igb_setup_dca(struct igb_adapter *adapter)
4061 {
4062         struct e1000_hw *hw = &adapter->hw;
4063         int i;
4064
4065         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4066                 return;
4067
4068         /* Always use CB2 mode, difference is masked in the CB driver. */
4069         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4070
4071         for (i = 0; i < adapter->num_q_vectors; i++) {
4072                 struct igb_q_vector *q_vector = adapter->q_vector[i];
4073                 q_vector->cpu = -1;
4074                 igb_update_dca(q_vector);
4075         }
4076 }
4077
4078 static int __igb_notify_dca(struct device *dev, void *data)
4079 {
4080         struct net_device *netdev = dev_get_drvdata(dev);
4081         struct igb_adapter *adapter = netdev_priv(netdev);
4082         struct pci_dev *pdev = adapter->pdev;
4083         struct e1000_hw *hw = &adapter->hw;
4084         unsigned long event = *(unsigned long *)data;
4085
4086         switch (event) {
4087         case DCA_PROVIDER_ADD:
4088                 /* if already enabled, don't do it again */
4089                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4090                         break;
4091                 if (dca_add_requester(dev) == 0) {
4092                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4093                         dev_info(&pdev->dev, "DCA enabled\n");
4094                         igb_setup_dca(adapter);
4095                         break;
4096                 }
4097                 /* Fall Through since DCA is disabled. */
4098         case DCA_PROVIDER_REMOVE:
4099                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4100                         /* without this a class_device is left
4101                          * hanging around in the sysfs model */
4102                         dca_remove_requester(dev);
4103                         dev_info(&pdev->dev, "DCA disabled\n");
4104                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4105                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4106                 }
4107                 break;
4108         }
4109
4110         return 0;
4111 }
4112
4113 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4114                           void *p)
4115 {
4116         int ret_val;
4117
4118         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4119                                          __igb_notify_dca);
4120
4121         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4122 }
4123 #endif /* CONFIG_IGB_DCA */
4124
4125 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4126 {
4127         struct e1000_hw *hw = &adapter->hw;
4128         u32 ping;
4129         int i;
4130
4131         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4132                 ping = E1000_PF_CONTROL_MSG;
4133                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4134                         ping |= E1000_VT_MSGTYPE_CTS;
4135                 igb_write_mbx(hw, &ping, 1, i);
4136         }
4137 }
4138
4139 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4140 {
4141         struct e1000_hw *hw = &adapter->hw;
4142         u32 vmolr = rd32(E1000_VMOLR(vf));
4143         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4144
4145         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4146                             IGB_VF_FLAG_MULTI_PROMISC);
4147         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4148
4149         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4150                 vmolr |= E1000_VMOLR_MPME;
4151                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4152         } else {
4153                 /*
4154                  * if we have hashes and we are clearing a multicast promisc
4155                  * flag we need to write the hashes to the MTA as this step
4156                  * was previously skipped
4157                  */
4158                 if (vf_data->num_vf_mc_hashes > 30) {
4159                         vmolr |= E1000_VMOLR_MPME;
4160                 } else if (vf_data->num_vf_mc_hashes) {
4161                         int j;
4162                         vmolr |= E1000_VMOLR_ROMPE;
4163                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4164                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4165                 }
4166         }
4167
4168         wr32(E1000_VMOLR(vf), vmolr);
4169
4170         /* there are flags left unprocessed, likely not supported */
4171         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4172                 return -EINVAL;
4173
4174         return 0;
4175
4176 }
4177
4178 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4179                                   u32 *msgbuf, u32 vf)
4180 {
4181         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4182         u16 *hash_list = (u16 *)&msgbuf[1];
4183         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4184         int i;
4185
4186         /* salt away the number of multicast addresses assigned
4187          * to this VF for later use to restore when the PF multi cast
4188          * list changes
4189          */
4190         vf_data->num_vf_mc_hashes = n;
4191
4192         /* only up to 30 hash values supported */
4193         if (n > 30)
4194                 n = 30;
4195
4196         /* store the hashes for later use */
4197         for (i = 0; i < n; i++)
4198                 vf_data->vf_mc_hashes[i] = hash_list[i];
4199
4200         /* Flush and reset the mta with the new values */
4201         igb_set_rx_mode(adapter->netdev);
4202
4203         return 0;
4204 }
4205
4206 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4207 {
4208         struct e1000_hw *hw = &adapter->hw;
4209         struct vf_data_storage *vf_data;
4210         int i, j;
4211
4212         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4213                 u32 vmolr = rd32(E1000_VMOLR(i));
4214                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4215
4216                 vf_data = &adapter->vf_data[i];
4217
4218                 if ((vf_data->num_vf_mc_hashes > 30) ||
4219                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4220                         vmolr |= E1000_VMOLR_MPME;
4221                 } else if (vf_data->num_vf_mc_hashes) {
4222                         vmolr |= E1000_VMOLR_ROMPE;
4223                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4224                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4225                 }
4226                 wr32(E1000_VMOLR(i), vmolr);
4227         }
4228 }
4229
4230 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4231 {
4232         struct e1000_hw *hw = &adapter->hw;
4233         u32 pool_mask, reg, vid;
4234         int i;
4235
4236         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4237
4238         /* Find the vlan filter for this id */
4239         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4240                 reg = rd32(E1000_VLVF(i));
4241
4242                 /* remove the vf from the pool */
4243                 reg &= ~pool_mask;
4244
4245                 /* if pool is empty then remove entry from vfta */
4246                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4247                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4248                         reg = 0;
4249                         vid = reg & E1000_VLVF_VLANID_MASK;
4250                         igb_vfta_set(hw, vid, false);
4251                 }
4252
4253                 wr32(E1000_VLVF(i), reg);
4254         }
4255
4256         adapter->vf_data[vf].vlans_enabled = 0;
4257 }
4258
4259 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4260 {
4261         struct e1000_hw *hw = &adapter->hw;
4262         u32 reg, i;
4263
4264         /* The vlvf table only exists on 82576 hardware and newer */
4265         if (hw->mac.type < e1000_82576)
4266                 return -1;
4267
4268         /* we only need to do this if VMDq is enabled */
4269         if (!adapter->vfs_allocated_count)
4270                 return -1;
4271
4272         /* Find the vlan filter for this id */
4273         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4274                 reg = rd32(E1000_VLVF(i));
4275                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4276                     vid == (reg & E1000_VLVF_VLANID_MASK))
4277                         break;
4278         }
4279
4280         if (add) {
4281                 if (i == E1000_VLVF_ARRAY_SIZE) {
4282                         /* Did not find a matching VLAN ID entry that was
4283                          * enabled.  Search for a free filter entry, i.e.
4284                          * one without the enable bit set
4285                          */
4286                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4287                                 reg = rd32(E1000_VLVF(i));
4288                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4289                                         break;
4290                         }
4291                 }
4292                 if (i < E1000_VLVF_ARRAY_SIZE) {
4293                         /* Found an enabled/available entry */
4294                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4295
4296                         /* if !enabled we need to set this up in vfta */
4297                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4298                                 /* add VID to filter table */
4299                                 igb_vfta_set(hw, vid, true);
4300                                 reg |= E1000_VLVF_VLANID_ENABLE;
4301                         }
4302                         reg &= ~E1000_VLVF_VLANID_MASK;
4303                         reg |= vid;
4304                         wr32(E1000_VLVF(i), reg);
4305
4306                         /* do not modify RLPML for PF devices */
4307                         if (vf >= adapter->vfs_allocated_count)
4308                                 return 0;
4309
4310                         if (!adapter->vf_data[vf].vlans_enabled) {
4311                                 u32 size;
4312                                 reg = rd32(E1000_VMOLR(vf));
4313                                 size = reg & E1000_VMOLR_RLPML_MASK;
4314                                 size += 4;
4315                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4316                                 reg |= size;
4317                                 wr32(E1000_VMOLR(vf), reg);
4318                         }
4319
4320                         adapter->vf_data[vf].vlans_enabled++;
4321                         return 0;
4322                 }
4323         } else {
4324                 if (i < E1000_VLVF_ARRAY_SIZE) {
4325                         /* remove vf from the pool */
4326                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4327                         /* if pool is empty then remove entry from vfta */
4328                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4329                                 reg = 0;
4330                                 igb_vfta_set(hw, vid, false);
4331                         }
4332                         wr32(E1000_VLVF(i), reg);
4333
4334                         /* do not modify RLPML for PF devices */
4335                         if (vf >= adapter->vfs_allocated_count)
4336                                 return 0;
4337
4338                         adapter->vf_data[vf].vlans_enabled--;
4339                         if (!adapter->vf_data[vf].vlans_enabled) {
4340                                 u32 size;
4341                                 reg = rd32(E1000_VMOLR(vf));
4342                                 size = reg & E1000_VMOLR_RLPML_MASK;
4343                                 size -= 4;
4344                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4345                                 reg |= size;
4346                                 wr32(E1000_VMOLR(vf), reg);
4347                         }
4348                         return 0;
4349                 }
4350         }
4351         return -1;
4352 }
4353
4354 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4355 {
4356         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4357         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4358
4359         return igb_vlvf_set(adapter, vid, add, vf);
4360 }
4361
4362 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4363 {
4364         /* clear all flags */
4365         adapter->vf_data[vf].flags = 0;
4366         adapter->vf_data[vf].last_nack = jiffies;
4367
4368         /* reset offloads to defaults */
4369         igb_set_vmolr(adapter, vf);
4370
4371         /* reset vlans for device */
4372         igb_clear_vf_vfta(adapter, vf);
4373
4374         /* reset multicast table array for vf */
4375         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4376
4377         /* Flush and reset the mta with the new values */
4378         igb_set_rx_mode(adapter->netdev);
4379 }
4380
4381 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4382 {
4383         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4384
4385         /* generate a new mac address as we were hotplug removed/added */
4386         random_ether_addr(vf_mac);
4387
4388         /* process remaining reset events */
4389         igb_vf_reset(adapter, vf);
4390 }
4391
4392 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4393 {
4394         struct e1000_hw *hw = &adapter->hw;
4395         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4396         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4397         u32 reg, msgbuf[3];
4398         u8 *addr = (u8 *)(&msgbuf[1]);
4399
4400         /* process all the same items cleared in a function level reset */
4401         igb_vf_reset(adapter, vf);
4402
4403         /* set vf mac address */
4404         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4405
4406         /* enable transmit and receive for vf */
4407         reg = rd32(E1000_VFTE);
4408         wr32(E1000_VFTE, reg | (1 << vf));
4409         reg = rd32(E1000_VFRE);
4410         wr32(E1000_VFRE, reg | (1 << vf));
4411
4412         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4413
4414         /* reply to reset with ack and vf mac address */
4415         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4416         memcpy(addr, vf_mac, 6);
4417         igb_write_mbx(hw, msgbuf, 3, vf);
4418 }
4419
4420 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4421 {
4422         unsigned char *addr = (char *)&msg[1];
4423         int err = -1;
4424
4425         if (is_valid_ether_addr(addr))
4426                 err = igb_set_vf_mac(adapter, vf, addr);
4427
4428         return err;
4429 }
4430
4431 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4432 {
4433         struct e1000_hw *hw = &adapter->hw;
4434         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4435         u32 msg = E1000_VT_MSGTYPE_NACK;
4436
4437         /* if device isn't clear to send it shouldn't be reading either */
4438         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4439             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4440                 igb_write_mbx(hw, &msg, 1, vf);
4441                 vf_data->last_nack = jiffies;
4442         }
4443 }
4444
4445 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4446 {
4447         struct pci_dev *pdev = adapter->pdev;
4448         u32 msgbuf[E1000_VFMAILBOX_SIZE];
4449         struct e1000_hw *hw = &adapter->hw;
4450         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4451         s32 retval;
4452
4453         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4454
4455         if (retval)
4456                 dev_err(&pdev->dev, "Error receiving message from VF\n");
4457
4458         /* this is a message we already processed, do nothing */
4459         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4460                 return;
4461
4462         /*
4463          * until the vf completes a reset it should not be
4464          * allowed to start any configuration.
4465          */
4466
4467         if (msgbuf[0] == E1000_VF_RESET) {
4468                 igb_vf_reset_msg(adapter, vf);
4469                 return;
4470         }
4471
4472         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4473                 msgbuf[0] = E1000_VT_MSGTYPE_NACK;
4474                 if (time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4475                         igb_write_mbx(hw, msgbuf, 1, vf);
4476                         vf_data->last_nack = jiffies;
4477                 }
4478                 return;
4479         }
4480
4481         switch ((msgbuf[0] & 0xFFFF)) {
4482         case E1000_VF_SET_MAC_ADDR:
4483                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4484                 break;
4485         case E1000_VF_SET_PROMISC:
4486                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4487                 break;
4488         case E1000_VF_SET_MULTICAST:
4489                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4490                 break;
4491         case E1000_VF_SET_LPE:
4492                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4493                 break;
4494         case E1000_VF_SET_VLAN:
4495                 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4496                 break;
4497         default:
4498                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4499                 retval = -1;
4500                 break;
4501         }
4502
4503         /* notify the VF of the results of what it sent us */
4504         if (retval)
4505                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4506         else
4507                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4508
4509         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4510
4511         igb_write_mbx(hw, msgbuf, 1, vf);
4512 }
4513
4514 static void igb_msg_task(struct igb_adapter *adapter)
4515 {
4516         struct e1000_hw *hw = &adapter->hw;
4517         u32 vf;
4518
4519         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4520                 /* process any reset requests */
4521                 if (!igb_check_for_rst(hw, vf))
4522                         igb_vf_reset_event(adapter, vf);
4523
4524                 /* process any messages pending */
4525                 if (!igb_check_for_msg(hw, vf))
4526                         igb_rcv_msg_from_vf(adapter, vf);
4527
4528                 /* process any acks */
4529                 if (!igb_check_for_ack(hw, vf))
4530                         igb_rcv_ack_from_vf(adapter, vf);
4531         }
4532 }
4533
4534 /**
4535  *  igb_set_uta - Set unicast filter table address
4536  *  @adapter: board private structure
4537  *
4538  *  The unicast table address is a register array of 32-bit registers.
4539  *  The table is meant to be used in a way similar to how the MTA is used
4540  *  however due to certain limitations in the hardware it is necessary to
4541  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4542  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4543  **/
4544 static void igb_set_uta(struct igb_adapter *adapter)
4545 {
4546         struct e1000_hw *hw = &adapter->hw;
4547         int i;
4548
4549         /* The UTA table only exists on 82576 hardware and newer */
4550         if (hw->mac.type < e1000_82576)
4551                 return;
4552
4553         /* we only need to do this if VMDq is enabled */
4554         if (!adapter->vfs_allocated_count)
4555                 return;
4556
4557         for (i = 0; i < hw->mac.uta_reg_count; i++)
4558                 array_wr32(E1000_UTA, i, ~0);
4559 }
4560
4561 /**
4562  * igb_intr_msi - Interrupt Handler
4563  * @irq: interrupt number
4564  * @data: pointer to a network interface device structure
4565  **/
4566 static irqreturn_t igb_intr_msi(int irq, void *data)
4567 {
4568         struct igb_adapter *adapter = data;
4569         struct igb_q_vector *q_vector = adapter->q_vector[0];
4570         struct e1000_hw *hw = &adapter->hw;
4571         /* read ICR disables interrupts using IAM */
4572         u32 icr = rd32(E1000_ICR);
4573
4574         igb_write_itr(q_vector);
4575
4576         if (icr & E1000_ICR_DOUTSYNC) {
4577                 /* HW is reporting DMA is out of sync */
4578                 adapter->stats.doosync++;
4579         }
4580
4581         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4582                 hw->mac.get_link_status = 1;
4583                 if (!test_bit(__IGB_DOWN, &adapter->state))
4584                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4585         }
4586
4587         napi_schedule(&q_vector->napi);
4588
4589         return IRQ_HANDLED;
4590 }
4591
4592 /**
4593  * igb_intr - Legacy Interrupt Handler
4594  * @irq: interrupt number
4595  * @data: pointer to a network interface device structure
4596  **/
4597 static irqreturn_t igb_intr(int irq, void *data)
4598 {
4599         struct igb_adapter *adapter = data;
4600         struct igb_q_vector *q_vector = adapter->q_vector[0];
4601         struct e1000_hw *hw = &adapter->hw;
4602         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4603          * need for the IMC write */
4604         u32 icr = rd32(E1000_ICR);
4605         if (!icr)
4606                 return IRQ_NONE;  /* Not our interrupt */
4607
4608         igb_write_itr(q_vector);
4609
4610         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4611          * not set, then the adapter didn't send an interrupt */
4612         if (!(icr & E1000_ICR_INT_ASSERTED))
4613                 return IRQ_NONE;
4614
4615         if (icr & E1000_ICR_DOUTSYNC) {
4616                 /* HW is reporting DMA is out of sync */
4617                 adapter->stats.doosync++;
4618         }
4619
4620         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4621                 hw->mac.get_link_status = 1;
4622                 /* guard against interrupt when we're going down */
4623                 if (!test_bit(__IGB_DOWN, &adapter->state))
4624                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4625         }
4626
4627         napi_schedule(&q_vector->napi);
4628
4629         return IRQ_HANDLED;
4630 }
4631
4632 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4633 {
4634         struct igb_adapter *adapter = q_vector->adapter;
4635         struct e1000_hw *hw = &adapter->hw;
4636
4637         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4638             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4639                 if (!adapter->msix_entries)
4640                         igb_set_itr(adapter);
4641                 else
4642                         igb_update_ring_itr(q_vector);
4643         }
4644
4645         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4646                 if (adapter->msix_entries)
4647                         wr32(E1000_EIMS, q_vector->eims_value);
4648                 else
4649                         igb_irq_enable(adapter);
4650         }
4651 }
4652
4653 /**
4654  * igb_poll - NAPI Rx polling callback
4655  * @napi: napi polling structure
4656  * @budget: count of how many packets we should handle
4657  **/
4658 static int igb_poll(struct napi_struct *napi, int budget)
4659 {
4660         struct igb_q_vector *q_vector = container_of(napi,
4661                                                      struct igb_q_vector,
4662                                                      napi);
4663         int tx_clean_complete = 1, work_done = 0;
4664
4665 #ifdef CONFIG_IGB_DCA
4666         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4667                 igb_update_dca(q_vector);
4668 #endif
4669         if (q_vector->tx_ring)
4670                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4671
4672         if (q_vector->rx_ring)
4673                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4674
4675         if (!tx_clean_complete)
4676                 work_done = budget;
4677
4678         /* If not enough Rx work done, exit the polling mode */
4679         if (work_done < budget) {
4680                 napi_complete(napi);
4681                 igb_ring_irq_enable(q_vector);
4682         }
4683
4684         return work_done;
4685 }
4686
4687 /**
4688  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4689  * @adapter: board private structure
4690  * @shhwtstamps: timestamp structure to update
4691  * @regval: unsigned 64bit system time value.
4692  *
4693  * We need to convert the system time value stored in the RX/TXSTMP registers
4694  * into a hwtstamp which can be used by the upper level timestamping functions
4695  */
4696 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4697                                    struct skb_shared_hwtstamps *shhwtstamps,
4698                                    u64 regval)
4699 {
4700         u64 ns;
4701
4702         ns = timecounter_cyc2time(&adapter->clock, regval);
4703         timecompare_update(&adapter->compare, ns);
4704         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4705         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4706         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4707 }
4708
4709 /**
4710  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4711  * @q_vector: pointer to q_vector containing needed info
4712  * @skb: packet that was just sent
4713  *
4714  * If we were asked to do hardware stamping and such a time stamp is
4715  * available, then it must have been for this skb here because we only
4716  * allow only one such packet into the queue.
4717  */
4718 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4719 {
4720         struct igb_adapter *adapter = q_vector->adapter;
4721         union skb_shared_tx *shtx = skb_tx(skb);
4722         struct e1000_hw *hw = &adapter->hw;
4723         struct skb_shared_hwtstamps shhwtstamps;
4724         u64 regval;
4725
4726         /* if skb does not support hw timestamp or TX stamp not valid exit */
4727         if (likely(!shtx->hardware) ||
4728             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4729                 return;
4730
4731         regval = rd32(E1000_TXSTMPL);
4732         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4733
4734         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4735         skb_tstamp_tx(skb, &shhwtstamps);
4736 }
4737
4738 /**
4739  * igb_clean_tx_irq - Reclaim resources after transmit completes
4740  * @q_vector: pointer to q_vector containing needed info
4741  * returns true if ring is completely cleaned
4742  **/
4743 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4744 {
4745         struct igb_adapter *adapter = q_vector->adapter;
4746         struct igb_ring *tx_ring = q_vector->tx_ring;
4747         struct net_device *netdev = tx_ring->netdev;
4748         struct e1000_hw *hw = &adapter->hw;
4749         struct igb_buffer *buffer_info;
4750         struct sk_buff *skb;
4751         union e1000_adv_tx_desc *tx_desc, *eop_desc;
4752         unsigned int total_bytes = 0, total_packets = 0;
4753         unsigned int i, eop, count = 0;
4754         bool cleaned = false;
4755
4756         i = tx_ring->next_to_clean;
4757         eop = tx_ring->buffer_info[i].next_to_watch;
4758         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4759
4760         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
4761                (count < tx_ring->count)) {
4762                 for (cleaned = false; !cleaned; count++) {
4763                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4764                         buffer_info = &tx_ring->buffer_info[i];
4765                         cleaned = (i == eop);
4766                         skb = buffer_info->skb;
4767
4768                         if (skb) {
4769                                 unsigned int segs, bytecount;
4770                                 /* gso_segs is currently only valid for tcp */
4771                                 segs = skb_shinfo(skb)->gso_segs ?: 1;
4772                                 /* multiply data chunks by size of headers */
4773                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
4774                                             skb->len;
4775                                 total_packets += segs;
4776                                 total_bytes += bytecount;
4777
4778                                 igb_tx_hwtstamp(q_vector, skb);
4779                         }
4780
4781                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4782                         tx_desc->wb.status = 0;
4783
4784                         i++;
4785                         if (i == tx_ring->count)
4786                                 i = 0;
4787                 }
4788                 eop = tx_ring->buffer_info[i].next_to_watch;
4789                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4790         }
4791
4792         tx_ring->next_to_clean = i;
4793
4794         if (unlikely(count &&
4795                      netif_carrier_ok(netdev) &&
4796                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
4797                 /* Make sure that anybody stopping the queue after this
4798                  * sees the new next_to_clean.
4799                  */
4800                 smp_mb();
4801                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
4802                     !(test_bit(__IGB_DOWN, &adapter->state))) {
4803                         netif_wake_subqueue(netdev, tx_ring->queue_index);
4804                         tx_ring->tx_stats.restart_queue++;
4805                 }
4806         }
4807
4808         if (tx_ring->detect_tx_hung) {
4809                 /* Detect a transmit hang in hardware, this serializes the
4810                  * check with the clearing of time_stamp and movement of i */
4811                 tx_ring->detect_tx_hung = false;
4812                 if (tx_ring->buffer_info[i].time_stamp &&
4813                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
4814                                (adapter->tx_timeout_factor * HZ))
4815                     && !(rd32(E1000_STATUS) &
4816                          E1000_STATUS_TXOFF)) {
4817
4818                         /* detected Tx unit hang */
4819                         dev_err(&tx_ring->pdev->dev,
4820                                 "Detected Tx Unit Hang\n"
4821                                 "  Tx Queue             <%d>\n"
4822                                 "  TDH                  <%x>\n"
4823                                 "  TDT                  <%x>\n"
4824                                 "  next_to_use          <%x>\n"
4825                                 "  next_to_clean        <%x>\n"
4826                                 "buffer_info[next_to_clean]\n"
4827                                 "  time_stamp           <%lx>\n"
4828                                 "  next_to_watch        <%x>\n"
4829                                 "  jiffies              <%lx>\n"
4830                                 "  desc.status          <%x>\n",
4831                                 tx_ring->queue_index,
4832                                 readl(tx_ring->head),
4833                                 readl(tx_ring->tail),
4834                                 tx_ring->next_to_use,
4835                                 tx_ring->next_to_clean,
4836                                 tx_ring->buffer_info[eop].time_stamp,
4837                                 eop,
4838                                 jiffies,
4839                                 eop_desc->wb.status);
4840                         netif_stop_subqueue(netdev, tx_ring->queue_index);
4841                 }
4842         }
4843         tx_ring->total_bytes += total_bytes;
4844         tx_ring->total_packets += total_packets;
4845         tx_ring->tx_stats.bytes += total_bytes;
4846         tx_ring->tx_stats.packets += total_packets;
4847         return (count < tx_ring->count);
4848 }
4849
4850 /**
4851  * igb_receive_skb - helper function to handle rx indications
4852  * @q_vector: structure containing interrupt and ring information
4853  * @skb: packet to send up
4854  * @vlan_tag: vlan tag for packet
4855  **/
4856 static void igb_receive_skb(struct igb_q_vector *q_vector,
4857                             struct sk_buff *skb,
4858                             u16 vlan_tag)
4859 {
4860         struct igb_adapter *adapter = q_vector->adapter;
4861
4862         if (vlan_tag)
4863                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
4864                                  vlan_tag, skb);
4865         else
4866                 napi_gro_receive(&q_vector->napi, skb);
4867 }
4868
4869 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
4870                                        u32 status_err, struct sk_buff *skb)
4871 {
4872         skb->ip_summed = CHECKSUM_NONE;
4873
4874         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
4875         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
4876              (status_err & E1000_RXD_STAT_IXSM))
4877                 return;
4878
4879         /* TCP/UDP checksum error bit is set */
4880         if (status_err &
4881             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
4882                 /*
4883                  * work around errata with sctp packets where the TCPE aka
4884                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
4885                  * packets, (aka let the stack check the crc32c)
4886                  */
4887                 if ((skb->len == 60) &&
4888                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
4889                         ring->rx_stats.csum_err++;
4890
4891                 /* let the stack verify checksum errors */
4892                 return;
4893         }
4894         /* It must be a TCP or UDP packet with a valid checksum */
4895         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
4896                 skb->ip_summed = CHECKSUM_UNNECESSARY;
4897
4898         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
4899 }
4900
4901 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
4902                                    struct sk_buff *skb)
4903 {
4904         struct igb_adapter *adapter = q_vector->adapter;
4905         struct e1000_hw *hw = &adapter->hw;
4906         u64 regval;
4907
4908         /*
4909          * If this bit is set, then the RX registers contain the time stamp. No
4910          * other packet will be time stamped until we read these registers, so
4911          * read the registers to make them available again. Because only one
4912          * packet can be time stamped at a time, we know that the register
4913          * values must belong to this one here and therefore we don't need to
4914          * compare any of the additional attributes stored for it.
4915          *
4916          * If nothing went wrong, then it should have a skb_shared_tx that we
4917          * can turn into a skb_shared_hwtstamps.
4918          */
4919         if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
4920                 return;
4921         if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
4922                 return;
4923
4924         regval = rd32(E1000_RXSTMPL);
4925         regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4926
4927         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
4928 }
4929 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
4930                                union e1000_adv_rx_desc *rx_desc)
4931 {
4932         /* HW will not DMA in data larger than the given buffer, even if it
4933          * parses the (NFS, of course) header to be larger.  In that case, it
4934          * fills the header buffer and spills the rest into the page.
4935          */
4936         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
4937                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
4938         if (hlen > rx_ring->rx_buffer_len)
4939                 hlen = rx_ring->rx_buffer_len;
4940         return hlen;
4941 }
4942
4943 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
4944                                  int *work_done, int budget)
4945 {
4946         struct igb_ring *rx_ring = q_vector->rx_ring;
4947         struct net_device *netdev = rx_ring->netdev;
4948         struct pci_dev *pdev = rx_ring->pdev;
4949         union e1000_adv_rx_desc *rx_desc , *next_rxd;
4950         struct igb_buffer *buffer_info , *next_buffer;
4951         struct sk_buff *skb;
4952         bool cleaned = false;
4953         int cleaned_count = 0;
4954         unsigned int total_bytes = 0, total_packets = 0;
4955         unsigned int i;
4956         u32 staterr;
4957         u16 length;
4958         u16 vlan_tag;
4959
4960         i = rx_ring->next_to_clean;
4961         buffer_info = &rx_ring->buffer_info[i];
4962         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
4963         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4964
4965         while (staterr & E1000_RXD_STAT_DD) {
4966                 if (*work_done >= budget)
4967                         break;
4968                 (*work_done)++;
4969
4970                 skb = buffer_info->skb;
4971                 prefetch(skb->data - NET_IP_ALIGN);
4972                 buffer_info->skb = NULL;
4973
4974                 i++;
4975                 if (i == rx_ring->count)
4976                         i = 0;
4977
4978                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
4979                 prefetch(next_rxd);
4980                 next_buffer = &rx_ring->buffer_info[i];
4981
4982                 length = le16_to_cpu(rx_desc->wb.upper.length);
4983                 cleaned = true;
4984                 cleaned_count++;
4985
4986                 if (buffer_info->dma) {
4987                         pci_unmap_single(pdev, buffer_info->dma,
4988                                          rx_ring->rx_buffer_len,
4989                                          PCI_DMA_FROMDEVICE);
4990                         buffer_info->dma = 0;
4991                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
4992                                 skb_put(skb, length);
4993                                 goto send_up;
4994                         }
4995                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
4996                 }
4997
4998                 if (length) {
4999                         pci_unmap_page(pdev, buffer_info->page_dma,
5000                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5001                         buffer_info->page_dma = 0;
5002
5003                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
5004                                                 buffer_info->page,
5005                                                 buffer_info->page_offset,
5006                                                 length);
5007
5008                         if (page_count(buffer_info->page) != 1)
5009                                 buffer_info->page = NULL;
5010                         else
5011                                 get_page(buffer_info->page);
5012
5013                         skb->len += length;
5014                         skb->data_len += length;
5015                         skb->truesize += length;
5016                 }
5017
5018                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5019                         buffer_info->skb = next_buffer->skb;
5020                         buffer_info->dma = next_buffer->dma;
5021                         next_buffer->skb = skb;
5022                         next_buffer->dma = 0;
5023                         goto next_desc;
5024                 }
5025 send_up:
5026                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5027                         dev_kfree_skb_irq(skb);
5028                         goto next_desc;
5029                 }
5030
5031                 igb_rx_hwtstamp(q_vector, staterr, skb);
5032                 total_bytes += skb->len;
5033                 total_packets++;
5034
5035                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5036
5037                 skb->protocol = eth_type_trans(skb, netdev);
5038                 skb_record_rx_queue(skb, rx_ring->queue_index);
5039
5040                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5041                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5042
5043                 igb_receive_skb(q_vector, skb, vlan_tag);
5044
5045 next_desc:
5046                 rx_desc->wb.upper.status_error = 0;
5047
5048                 /* return some buffers to hardware, one at a time is too slow */
5049                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5050                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5051                         cleaned_count = 0;
5052                 }
5053
5054                 /* use prefetched values */
5055                 rx_desc = next_rxd;
5056                 buffer_info = next_buffer;
5057                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5058         }
5059
5060         rx_ring->next_to_clean = i;
5061         cleaned_count = igb_desc_unused(rx_ring);
5062
5063         if (cleaned_count)
5064                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5065
5066         rx_ring->total_packets += total_packets;
5067         rx_ring->total_bytes += total_bytes;
5068         rx_ring->rx_stats.packets += total_packets;
5069         rx_ring->rx_stats.bytes += total_bytes;
5070         return cleaned;
5071 }
5072
5073 /**
5074  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5075  * @adapter: address of board private structure
5076  **/
5077 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5078 {
5079         struct net_device *netdev = rx_ring->netdev;
5080         union e1000_adv_rx_desc *rx_desc;
5081         struct igb_buffer *buffer_info;
5082         struct sk_buff *skb;
5083         unsigned int i;
5084         int bufsz;
5085
5086         i = rx_ring->next_to_use;
5087         buffer_info = &rx_ring->buffer_info[i];
5088
5089         bufsz = rx_ring->rx_buffer_len;
5090
5091         while (cleaned_count--) {
5092                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5093
5094                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5095                         if (!buffer_info->page) {
5096                                 buffer_info->page = netdev_alloc_page(netdev);
5097                                 if (!buffer_info->page) {
5098                                         rx_ring->rx_stats.alloc_failed++;
5099                                         goto no_buffers;
5100                                 }
5101                                 buffer_info->page_offset = 0;
5102                         } else {
5103                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5104                         }
5105                         buffer_info->page_dma =
5106                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5107                                              buffer_info->page_offset,
5108                                              PAGE_SIZE / 2,
5109                                              PCI_DMA_FROMDEVICE);
5110                         if (pci_dma_mapping_error(rx_ring->pdev,
5111                                                   buffer_info->page_dma)) {
5112                                 buffer_info->page_dma = 0;
5113                                 rx_ring->rx_stats.alloc_failed++;
5114                                 goto no_buffers;
5115                         }
5116                 }
5117
5118                 skb = buffer_info->skb;
5119                 if (!skb) {
5120                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5121                         if (!skb) {
5122                                 rx_ring->rx_stats.alloc_failed++;
5123                                 goto no_buffers;
5124                         }
5125
5126                         buffer_info->skb = skb;
5127                 }
5128                 if (!buffer_info->dma) {
5129                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5130                                                           skb->data,
5131                                                           bufsz,
5132                                                           PCI_DMA_FROMDEVICE);
5133                         if (pci_dma_mapping_error(rx_ring->pdev,
5134                                                   buffer_info->dma)) {
5135                                 buffer_info->dma = 0;
5136                                 rx_ring->rx_stats.alloc_failed++;
5137                                 goto no_buffers;
5138                         }
5139                 }
5140                 /* Refresh the desc even if buffer_addrs didn't change because
5141                  * each write-back erases this info. */
5142                 if (bufsz < IGB_RXBUFFER_1024) {
5143                         rx_desc->read.pkt_addr =
5144                              cpu_to_le64(buffer_info->page_dma);
5145                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5146                 } else {
5147                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5148                         rx_desc->read.hdr_addr = 0;
5149                 }
5150
5151                 i++;
5152                 if (i == rx_ring->count)
5153                         i = 0;
5154                 buffer_info = &rx_ring->buffer_info[i];
5155         }
5156
5157 no_buffers:
5158         if (rx_ring->next_to_use != i) {
5159                 rx_ring->next_to_use = i;
5160                 if (i == 0)
5161                         i = (rx_ring->count - 1);
5162                 else
5163                         i--;
5164
5165                 /* Force memory writes to complete before letting h/w
5166                  * know there are new descriptors to fetch.  (Only
5167                  * applicable for weak-ordered memory model archs,
5168                  * such as IA-64). */
5169                 wmb();
5170                 writel(i, rx_ring->tail);
5171         }
5172 }
5173
5174 /**
5175  * igb_mii_ioctl -
5176  * @netdev:
5177  * @ifreq:
5178  * @cmd:
5179  **/
5180 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5181 {
5182         struct igb_adapter *adapter = netdev_priv(netdev);
5183         struct mii_ioctl_data *data = if_mii(ifr);
5184
5185         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5186                 return -EOPNOTSUPP;
5187
5188         switch (cmd) {
5189         case SIOCGMIIPHY:
5190                 data->phy_id = adapter->hw.phy.addr;
5191                 break;
5192         case SIOCGMIIREG:
5193                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5194                                      &data->val_out))
5195                         return -EIO;
5196                 break;
5197         case SIOCSMIIREG:
5198         default:
5199                 return -EOPNOTSUPP;
5200         }
5201         return 0;
5202 }
5203
5204 /**
5205  * igb_hwtstamp_ioctl - control hardware time stamping
5206  * @netdev:
5207  * @ifreq:
5208  * @cmd:
5209  *
5210  * Outgoing time stamping can be enabled and disabled. Play nice and
5211  * disable it when requested, although it shouldn't case any overhead
5212  * when no packet needs it. At most one packet in the queue may be
5213  * marked for time stamping, otherwise it would be impossible to tell
5214  * for sure to which packet the hardware time stamp belongs.
5215  *
5216  * Incoming time stamping has to be configured via the hardware
5217  * filters. Not all combinations are supported, in particular event
5218  * type has to be specified. Matching the kind of event packet is
5219  * not supported, with the exception of "all V2 events regardless of
5220  * level 2 or 4".
5221  *
5222  **/
5223 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5224                               struct ifreq *ifr, int cmd)
5225 {
5226         struct igb_adapter *adapter = netdev_priv(netdev);
5227         struct e1000_hw *hw = &adapter->hw;
5228         struct hwtstamp_config config;
5229         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5230         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5231         u32 tsync_rx_cfg = 0;
5232         bool is_l4 = false;
5233         bool is_l2 = false;
5234         u32 regval;
5235
5236         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5237                 return -EFAULT;
5238
5239         /* reserved for future extensions */
5240         if (config.flags)
5241                 return -EINVAL;
5242
5243         switch (config.tx_type) {
5244         case HWTSTAMP_TX_OFF:
5245                 tsync_tx_ctl = 0;
5246         case HWTSTAMP_TX_ON:
5247                 break;
5248         default:
5249                 return -ERANGE;
5250         }
5251
5252         switch (config.rx_filter) {
5253         case HWTSTAMP_FILTER_NONE:
5254                 tsync_rx_ctl = 0;
5255                 break;
5256         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5257         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5258         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5259         case HWTSTAMP_FILTER_ALL:
5260                 /*
5261                  * register TSYNCRXCFG must be set, therefore it is not
5262                  * possible to time stamp both Sync and Delay_Req messages
5263                  * => fall back to time stamping all packets
5264                  */
5265                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5266                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5267                 break;
5268         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5269                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5270                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5271                 is_l4 = true;
5272                 break;
5273         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5274                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5275                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5276                 is_l4 = true;
5277                 break;
5278         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5279         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5280                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5281                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5282                 is_l2 = true;
5283                 is_l4 = true;
5284                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5285                 break;
5286         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5287         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5288                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5289                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5290                 is_l2 = true;
5291                 is_l4 = true;
5292                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5293                 break;
5294         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5295         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5296         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5297                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5298                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5299                 is_l2 = true;
5300                 break;
5301         default:
5302                 return -ERANGE;
5303         }
5304
5305         if (hw->mac.type == e1000_82575) {
5306                 if (tsync_rx_ctl | tsync_tx_ctl)
5307                         return -EINVAL;
5308                 return 0;
5309         }
5310
5311         /* enable/disable TX */
5312         regval = rd32(E1000_TSYNCTXCTL);
5313         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5314         regval |= tsync_tx_ctl;
5315         wr32(E1000_TSYNCTXCTL, regval);
5316
5317         /* enable/disable RX */
5318         regval = rd32(E1000_TSYNCRXCTL);
5319         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5320         regval |= tsync_rx_ctl;
5321         wr32(E1000_TSYNCRXCTL, regval);
5322
5323         /* define which PTP packets are time stamped */
5324         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5325
5326         /* define ethertype filter for timestamped packets */
5327         if (is_l2)
5328                 wr32(E1000_ETQF(3),
5329                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5330                                  E1000_ETQF_1588 | /* enable timestamping */
5331                                  ETH_P_1588));     /* 1588 eth protocol type */
5332         else
5333                 wr32(E1000_ETQF(3), 0);
5334
5335 #define PTP_PORT 319
5336         /* L4 Queue Filter[3]: filter by destination port and protocol */
5337         if (is_l4) {
5338                 u32 ftqf = (IPPROTO_UDP /* UDP */
5339                         | E1000_FTQF_VF_BP /* VF not compared */
5340                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5341                         | E1000_FTQF_MASK); /* mask all inputs */
5342                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5343
5344                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5345                 wr32(E1000_IMIREXT(3),
5346                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5347                 if (hw->mac.type == e1000_82576) {
5348                         /* enable source port check */
5349                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5350                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5351                 }
5352                 wr32(E1000_FTQF(3), ftqf);
5353         } else {
5354                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5355         }
5356         wrfl();
5357
5358         adapter->hwtstamp_config = config;
5359
5360         /* clear TX/RX time stamp registers, just to be sure */
5361         regval = rd32(E1000_TXSTMPH);
5362         regval = rd32(E1000_RXSTMPH);
5363
5364         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5365                 -EFAULT : 0;
5366 }
5367
5368 /**
5369  * igb_ioctl -
5370  * @netdev:
5371  * @ifreq:
5372  * @cmd:
5373  **/
5374 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5375 {
5376         switch (cmd) {
5377         case SIOCGMIIPHY:
5378         case SIOCGMIIREG:
5379         case SIOCSMIIREG:
5380                 return igb_mii_ioctl(netdev, ifr, cmd);
5381         case SIOCSHWTSTAMP:
5382                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5383         default:
5384                 return -EOPNOTSUPP;
5385         }
5386 }
5387
5388 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5389 {
5390         struct igb_adapter *adapter = hw->back;
5391         u16 cap_offset;
5392
5393         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5394         if (!cap_offset)
5395                 return -E1000_ERR_CONFIG;
5396
5397         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5398
5399         return 0;
5400 }
5401
5402 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5403 {
5404         struct igb_adapter *adapter = hw->back;
5405         u16 cap_offset;
5406
5407         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5408         if (!cap_offset)
5409                 return -E1000_ERR_CONFIG;
5410
5411         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5412
5413         return 0;
5414 }
5415
5416 static void igb_vlan_rx_register(struct net_device *netdev,
5417                                  struct vlan_group *grp)
5418 {
5419         struct igb_adapter *adapter = netdev_priv(netdev);
5420         struct e1000_hw *hw = &adapter->hw;
5421         u32 ctrl, rctl;
5422
5423         igb_irq_disable(adapter);
5424         adapter->vlgrp = grp;
5425
5426         if (grp) {
5427                 /* enable VLAN tag insert/strip */
5428                 ctrl = rd32(E1000_CTRL);
5429                 ctrl |= E1000_CTRL_VME;
5430                 wr32(E1000_CTRL, ctrl);
5431
5432                 /* Disable CFI check */
5433                 rctl = rd32(E1000_RCTL);
5434                 rctl &= ~E1000_RCTL_CFIEN;
5435                 wr32(E1000_RCTL, rctl);
5436         } else {
5437                 /* disable VLAN tag insert/strip */
5438                 ctrl = rd32(E1000_CTRL);
5439                 ctrl &= ~E1000_CTRL_VME;
5440                 wr32(E1000_CTRL, ctrl);
5441         }
5442
5443         igb_rlpml_set(adapter);
5444
5445         if (!test_bit(__IGB_DOWN, &adapter->state))
5446                 igb_irq_enable(adapter);
5447 }
5448
5449 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5450 {
5451         struct igb_adapter *adapter = netdev_priv(netdev);
5452         struct e1000_hw *hw = &adapter->hw;
5453         int pf_id = adapter->vfs_allocated_count;
5454
5455         /* attempt to add filter to vlvf array */
5456         igb_vlvf_set(adapter, vid, true, pf_id);
5457
5458         /* add the filter since PF can receive vlans w/o entry in vlvf */
5459         igb_vfta_set(hw, vid, true);
5460 }
5461
5462 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5463 {
5464         struct igb_adapter *adapter = netdev_priv(netdev);
5465         struct e1000_hw *hw = &adapter->hw;
5466         int pf_id = adapter->vfs_allocated_count;
5467         s32 err;
5468
5469         igb_irq_disable(adapter);
5470         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5471
5472         if (!test_bit(__IGB_DOWN, &adapter->state))
5473                 igb_irq_enable(adapter);
5474
5475         /* remove vlan from VLVF table array */
5476         err = igb_vlvf_set(adapter, vid, false, pf_id);
5477
5478         /* if vid was not present in VLVF just remove it from table */
5479         if (err)
5480                 igb_vfta_set(hw, vid, false);
5481 }
5482
5483 static void igb_restore_vlan(struct igb_adapter *adapter)
5484 {
5485         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5486
5487         if (adapter->vlgrp) {
5488                 u16 vid;
5489                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5490                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5491                                 continue;
5492                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5493                 }
5494         }
5495 }
5496
5497 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5498 {
5499         struct pci_dev *pdev = adapter->pdev;
5500         struct e1000_mac_info *mac = &adapter->hw.mac;
5501
5502         mac->autoneg = 0;
5503
5504         switch (spddplx) {
5505         case SPEED_10 + DUPLEX_HALF:
5506                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5507                 break;
5508         case SPEED_10 + DUPLEX_FULL:
5509                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5510                 break;
5511         case SPEED_100 + DUPLEX_HALF:
5512                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5513                 break;
5514         case SPEED_100 + DUPLEX_FULL:
5515                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5516                 break;
5517         case SPEED_1000 + DUPLEX_FULL:
5518                 mac->autoneg = 1;
5519                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5520                 break;
5521         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5522         default:
5523                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5524                 return -EINVAL;
5525         }
5526         return 0;
5527 }
5528
5529 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5530 {
5531         struct net_device *netdev = pci_get_drvdata(pdev);
5532         struct igb_adapter *adapter = netdev_priv(netdev);
5533         struct e1000_hw *hw = &adapter->hw;
5534         u32 ctrl, rctl, status;
5535         u32 wufc = adapter->wol;
5536 #ifdef CONFIG_PM
5537         int retval = 0;
5538 #endif
5539
5540         netif_device_detach(netdev);
5541
5542         if (netif_running(netdev))
5543                 igb_close(netdev);
5544
5545         igb_clear_interrupt_scheme(adapter);
5546
5547 #ifdef CONFIG_PM
5548         retval = pci_save_state(pdev);
5549         if (retval)
5550                 return retval;
5551 #endif
5552
5553         status = rd32(E1000_STATUS);
5554         if (status & E1000_STATUS_LU)
5555                 wufc &= ~E1000_WUFC_LNKC;
5556
5557         if (wufc) {
5558                 igb_setup_rctl(adapter);
5559                 igb_set_rx_mode(netdev);
5560
5561                 /* turn on all-multi mode if wake on multicast is enabled */
5562                 if (wufc & E1000_WUFC_MC) {
5563                         rctl = rd32(E1000_RCTL);
5564                         rctl |= E1000_RCTL_MPE;
5565                         wr32(E1000_RCTL, rctl);
5566                 }
5567
5568                 ctrl = rd32(E1000_CTRL);
5569                 /* advertise wake from D3Cold */
5570                 #define E1000_CTRL_ADVD3WUC 0x00100000
5571                 /* phy power management enable */
5572                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5573                 ctrl |= E1000_CTRL_ADVD3WUC;
5574                 wr32(E1000_CTRL, ctrl);
5575
5576                 /* Allow time for pending master requests to run */
5577                 igb_disable_pcie_master(hw);
5578
5579                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5580                 wr32(E1000_WUFC, wufc);
5581         } else {
5582                 wr32(E1000_WUC, 0);
5583                 wr32(E1000_WUFC, 0);
5584         }
5585
5586         *enable_wake = wufc || adapter->en_mng_pt;
5587         if (!*enable_wake)
5588                 igb_shutdown_serdes_link_82575(hw);
5589
5590         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5591          * would have already happened in close and is redundant. */
5592         igb_release_hw_control(adapter);
5593
5594         pci_disable_device(pdev);
5595
5596         return 0;
5597 }
5598
5599 #ifdef CONFIG_PM
5600 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5601 {
5602         int retval;
5603         bool wake;
5604
5605         retval = __igb_shutdown(pdev, &wake);
5606         if (retval)
5607                 return retval;
5608
5609         if (wake) {
5610                 pci_prepare_to_sleep(pdev);
5611         } else {
5612                 pci_wake_from_d3(pdev, false);
5613                 pci_set_power_state(pdev, PCI_D3hot);
5614         }
5615
5616         return 0;
5617 }
5618
5619 static int igb_resume(struct pci_dev *pdev)
5620 {
5621         struct net_device *netdev = pci_get_drvdata(pdev);
5622         struct igb_adapter *adapter = netdev_priv(netdev);
5623         struct e1000_hw *hw = &adapter->hw;
5624         u32 err;
5625
5626         pci_set_power_state(pdev, PCI_D0);
5627         pci_restore_state(pdev);
5628
5629         err = pci_enable_device_mem(pdev);
5630         if (err) {
5631                 dev_err(&pdev->dev,
5632                         "igb: Cannot enable PCI device from suspend\n");
5633                 return err;
5634         }
5635         pci_set_master(pdev);
5636
5637         pci_enable_wake(pdev, PCI_D3hot, 0);
5638         pci_enable_wake(pdev, PCI_D3cold, 0);
5639
5640         if (igb_init_interrupt_scheme(adapter)) {
5641                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5642                 return -ENOMEM;
5643         }
5644
5645         /* e1000_power_up_phy(adapter); */
5646
5647         igb_reset(adapter);
5648
5649         /* let the f/w know that the h/w is now under the control of the
5650          * driver. */
5651         igb_get_hw_control(adapter);
5652
5653         wr32(E1000_WUS, ~0);
5654
5655         if (netif_running(netdev)) {
5656                 err = igb_open(netdev);
5657                 if (err)
5658                         return err;
5659         }
5660
5661         netif_device_attach(netdev);
5662
5663         return 0;
5664 }
5665 #endif
5666
5667 static void igb_shutdown(struct pci_dev *pdev)
5668 {
5669         bool wake;
5670
5671         __igb_shutdown(pdev, &wake);
5672
5673         if (system_state == SYSTEM_POWER_OFF) {
5674                 pci_wake_from_d3(pdev, wake);
5675                 pci_set_power_state(pdev, PCI_D3hot);
5676         }
5677 }
5678
5679 #ifdef CONFIG_NET_POLL_CONTROLLER
5680 /*
5681  * Polling 'interrupt' - used by things like netconsole to send skbs
5682  * without having to re-enable interrupts. It's not called while
5683  * the interrupt routine is executing.
5684  */
5685 static void igb_netpoll(struct net_device *netdev)
5686 {
5687         struct igb_adapter *adapter = netdev_priv(netdev);
5688         struct e1000_hw *hw = &adapter->hw;
5689         int i;
5690
5691         if (!adapter->msix_entries) {
5692                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5693                 igb_irq_disable(adapter);
5694                 napi_schedule(&q_vector->napi);
5695                 return;
5696         }
5697
5698         for (i = 0; i < adapter->num_q_vectors; i++) {
5699                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5700                 wr32(E1000_EIMC, q_vector->eims_value);
5701                 napi_schedule(&q_vector->napi);
5702         }
5703 }
5704 #endif /* CONFIG_NET_POLL_CONTROLLER */
5705
5706 /**
5707  * igb_io_error_detected - called when PCI error is detected
5708  * @pdev: Pointer to PCI device
5709  * @state: The current pci connection state
5710  *
5711  * This function is called after a PCI bus error affecting
5712  * this device has been detected.
5713  */
5714 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5715                                               pci_channel_state_t state)
5716 {
5717         struct net_device *netdev = pci_get_drvdata(pdev);
5718         struct igb_adapter *adapter = netdev_priv(netdev);
5719
5720         netif_device_detach(netdev);
5721
5722         if (state == pci_channel_io_perm_failure)
5723                 return PCI_ERS_RESULT_DISCONNECT;
5724
5725         if (netif_running(netdev))
5726                 igb_down(adapter);
5727         pci_disable_device(pdev);
5728
5729         /* Request a slot slot reset. */
5730         return PCI_ERS_RESULT_NEED_RESET;
5731 }
5732
5733 /**
5734  * igb_io_slot_reset - called after the pci bus has been reset.
5735  * @pdev: Pointer to PCI device
5736  *
5737  * Restart the card from scratch, as if from a cold-boot. Implementation
5738  * resembles the first-half of the igb_resume routine.
5739  */
5740 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5741 {
5742         struct net_device *netdev = pci_get_drvdata(pdev);
5743         struct igb_adapter *adapter = netdev_priv(netdev);
5744         struct e1000_hw *hw = &adapter->hw;
5745         pci_ers_result_t result;
5746         int err;
5747
5748         if (pci_enable_device_mem(pdev)) {
5749                 dev_err(&pdev->dev,
5750                         "Cannot re-enable PCI device after reset.\n");
5751                 result = PCI_ERS_RESULT_DISCONNECT;
5752         } else {
5753                 pci_set_master(pdev);
5754                 pci_restore_state(pdev);
5755
5756                 pci_enable_wake(pdev, PCI_D3hot, 0);
5757                 pci_enable_wake(pdev, PCI_D3cold, 0);
5758
5759                 igb_reset(adapter);
5760                 wr32(E1000_WUS, ~0);
5761                 result = PCI_ERS_RESULT_RECOVERED;
5762         }
5763
5764         err = pci_cleanup_aer_uncorrect_error_status(pdev);
5765         if (err) {
5766                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
5767                         "failed 0x%0x\n", err);
5768                 /* non-fatal, continue */
5769         }
5770
5771         return result;
5772 }
5773
5774 /**
5775  * igb_io_resume - called when traffic can start flowing again.
5776  * @pdev: Pointer to PCI device
5777  *
5778  * This callback is called when the error recovery driver tells us that
5779  * its OK to resume normal operation. Implementation resembles the
5780  * second-half of the igb_resume routine.
5781  */
5782 static void igb_io_resume(struct pci_dev *pdev)
5783 {
5784         struct net_device *netdev = pci_get_drvdata(pdev);
5785         struct igb_adapter *adapter = netdev_priv(netdev);
5786
5787         if (netif_running(netdev)) {
5788                 if (igb_up(adapter)) {
5789                         dev_err(&pdev->dev, "igb_up failed after reset\n");
5790                         return;
5791                 }
5792         }
5793
5794         netif_device_attach(netdev);
5795
5796         /* let the f/w know that the h/w is now under the control of the
5797          * driver. */
5798         igb_get_hw_control(adapter);
5799 }
5800
5801 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
5802                              u8 qsel)
5803 {
5804         u32 rar_low, rar_high;
5805         struct e1000_hw *hw = &adapter->hw;
5806
5807         /* HW expects these in little endian so we reverse the byte order
5808          * from network order (big endian) to little endian
5809          */
5810         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
5811                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
5812         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
5813
5814         /* Indicate to hardware the Address is Valid. */
5815         rar_high |= E1000_RAH_AV;
5816
5817         if (hw->mac.type == e1000_82575)
5818                 rar_high |= E1000_RAH_POOL_1 * qsel;
5819         else
5820                 rar_high |= E1000_RAH_POOL_1 << qsel;
5821
5822         wr32(E1000_RAL(index), rar_low);
5823         wrfl();
5824         wr32(E1000_RAH(index), rar_high);
5825         wrfl();
5826 }
5827
5828 static int igb_set_vf_mac(struct igb_adapter *adapter,
5829                           int vf, unsigned char *mac_addr)
5830 {
5831         struct e1000_hw *hw = &adapter->hw;
5832         /* VF MAC addresses start at end of receive addresses and moves
5833          * torwards the first, as a result a collision should not be possible */
5834         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5835
5836         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
5837
5838         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
5839
5840         return 0;
5841 }
5842
5843 static void igb_vmm_control(struct igb_adapter *adapter)
5844 {
5845         struct e1000_hw *hw = &adapter->hw;
5846         u32 reg;
5847
5848         /* replication is not supported for 82575 */
5849         if (hw->mac.type == e1000_82575)
5850                 return;
5851
5852         /* enable replication vlan tag stripping */
5853         reg = rd32(E1000_RPLOLR);
5854         reg |= E1000_RPLOLR_STRVLAN;
5855         wr32(E1000_RPLOLR, reg);
5856
5857         /* notify HW that the MAC is adding vlan tags */
5858         reg = rd32(E1000_DTXCTL);
5859         reg |= E1000_DTXCTL_VLAN_ADDED;
5860         wr32(E1000_DTXCTL, reg);
5861
5862         if (adapter->vfs_allocated_count) {
5863                 igb_vmdq_set_loopback_pf(hw, true);
5864                 igb_vmdq_set_replication_pf(hw, true);
5865         } else {
5866                 igb_vmdq_set_loopback_pf(hw, false);
5867                 igb_vmdq_set_replication_pf(hw, false);
5868         }
5869 }
5870
5871 /* igb_main.c */