igb: move timesync init into a seperate function
[safe/jmp/linux-2.6] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "1.3.16-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56                                 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60         [board_82575] = &e1000_82575_info,
61 };
62
63 static struct pci_device_id igb_pci_tbl[] = {
64         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
74         /* required last entry */
75         {0, }
76 };
77
78 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
79
80 void igb_reset(struct igb_adapter *);
81 static int igb_setup_all_tx_resources(struct igb_adapter *);
82 static int igb_setup_all_rx_resources(struct igb_adapter *);
83 static void igb_free_all_tx_resources(struct igb_adapter *);
84 static void igb_free_all_rx_resources(struct igb_adapter *);
85 static void igb_setup_mrqc(struct igb_adapter *);
86 void igb_update_stats(struct igb_adapter *);
87 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
88 static void __devexit igb_remove(struct pci_dev *pdev);
89 static int igb_sw_init(struct igb_adapter *);
90 static int igb_open(struct net_device *);
91 static int igb_close(struct net_device *);
92 static void igb_configure_tx(struct igb_adapter *);
93 static void igb_configure_rx(struct igb_adapter *);
94 static void igb_clean_all_tx_rings(struct igb_adapter *);
95 static void igb_clean_all_rx_rings(struct igb_adapter *);
96 static void igb_clean_tx_ring(struct igb_ring *);
97 static void igb_clean_rx_ring(struct igb_ring *);
98 static void igb_set_rx_mode(struct net_device *);
99 static void igb_update_phy_info(unsigned long);
100 static void igb_watchdog(unsigned long);
101 static void igb_watchdog_task(struct work_struct *);
102 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
103 static struct net_device_stats *igb_get_stats(struct net_device *);
104 static int igb_change_mtu(struct net_device *, int);
105 static int igb_set_mac(struct net_device *, void *);
106 static void igb_set_uta(struct igb_adapter *adapter);
107 static irqreturn_t igb_intr(int irq, void *);
108 static irqreturn_t igb_intr_msi(int irq, void *);
109 static irqreturn_t igb_msix_other(int irq, void *);
110 static irqreturn_t igb_msix_ring(int irq, void *);
111 #ifdef CONFIG_IGB_DCA
112 static void igb_update_dca(struct igb_q_vector *);
113 static void igb_setup_dca(struct igb_adapter *);
114 #endif /* CONFIG_IGB_DCA */
115 static bool igb_clean_tx_irq(struct igb_q_vector *);
116 static int igb_poll(struct napi_struct *, int);
117 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
118 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
119 static void igb_tx_timeout(struct net_device *);
120 static void igb_reset_task(struct work_struct *);
121 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
122 static void igb_vlan_rx_add_vid(struct net_device *, u16);
123 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
124 static void igb_restore_vlan(struct igb_adapter *);
125 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
126 static void igb_ping_all_vfs(struct igb_adapter *);
127 static void igb_msg_task(struct igb_adapter *);
128 static void igb_vmm_control(struct igb_adapter *);
129 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
130 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
131
132 #ifdef CONFIG_PM
133 static int igb_suspend(struct pci_dev *, pm_message_t);
134 static int igb_resume(struct pci_dev *);
135 #endif
136 static void igb_shutdown(struct pci_dev *);
137 #ifdef CONFIG_IGB_DCA
138 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
139 static struct notifier_block dca_notifier = {
140         .notifier_call  = igb_notify_dca,
141         .next           = NULL,
142         .priority       = 0
143 };
144 #endif
145 #ifdef CONFIG_NET_POLL_CONTROLLER
146 /* for netdump / net console */
147 static void igb_netpoll(struct net_device *);
148 #endif
149 #ifdef CONFIG_PCI_IOV
150 static unsigned int max_vfs = 0;
151 module_param(max_vfs, uint, 0);
152 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
153                  "per physical function");
154 #endif /* CONFIG_PCI_IOV */
155
156 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
157                      pci_channel_state_t);
158 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
159 static void igb_io_resume(struct pci_dev *);
160
161 static struct pci_error_handlers igb_err_handler = {
162         .error_detected = igb_io_error_detected,
163         .slot_reset = igb_io_slot_reset,
164         .resume = igb_io_resume,
165 };
166
167
168 static struct pci_driver igb_driver = {
169         .name     = igb_driver_name,
170         .id_table = igb_pci_tbl,
171         .probe    = igb_probe,
172         .remove   = __devexit_p(igb_remove),
173 #ifdef CONFIG_PM
174         /* Power Managment Hooks */
175         .suspend  = igb_suspend,
176         .resume   = igb_resume,
177 #endif
178         .shutdown = igb_shutdown,
179         .err_handler = &igb_err_handler
180 };
181
182 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
183 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
184 MODULE_LICENSE("GPL");
185 MODULE_VERSION(DRV_VERSION);
186
187 /**
188  * igb_read_clock - read raw cycle counter (to be used by time counter)
189  */
190 static cycle_t igb_read_clock(const struct cyclecounter *tc)
191 {
192         struct igb_adapter *adapter =
193                 container_of(tc, struct igb_adapter, cycles);
194         struct e1000_hw *hw = &adapter->hw;
195         u64 stamp = 0;
196         int shift = 0;
197
198         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
199         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
200         return stamp;
201 }
202
203 #ifdef DEBUG
204 /**
205  * igb_get_hw_dev_name - return device name string
206  * used by hardware layer to print debugging information
207  **/
208 char *igb_get_hw_dev_name(struct e1000_hw *hw)
209 {
210         struct igb_adapter *adapter = hw->back;
211         return adapter->netdev->name;
212 }
213
214 /**
215  * igb_get_time_str - format current NIC and system time as string
216  */
217 static char *igb_get_time_str(struct igb_adapter *adapter,
218                               char buffer[160])
219 {
220         cycle_t hw = adapter->cycles.read(&adapter->cycles);
221         struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
222         struct timespec sys;
223         struct timespec delta;
224         getnstimeofday(&sys);
225
226         delta = timespec_sub(nic, sys);
227
228         sprintf(buffer,
229                 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
230                 hw,
231                 (long)nic.tv_sec, nic.tv_nsec,
232                 (long)sys.tv_sec, sys.tv_nsec,
233                 (long)delta.tv_sec, delta.tv_nsec);
234
235         return buffer;
236 }
237 #endif
238
239 /**
240  * igb_init_module - Driver Registration Routine
241  *
242  * igb_init_module is the first routine called when the driver is
243  * loaded. All it does is register with the PCI subsystem.
244  **/
245 static int __init igb_init_module(void)
246 {
247         int ret;
248         printk(KERN_INFO "%s - version %s\n",
249                igb_driver_string, igb_driver_version);
250
251         printk(KERN_INFO "%s\n", igb_copyright);
252
253 #ifdef CONFIG_IGB_DCA
254         dca_register_notify(&dca_notifier);
255 #endif
256         ret = pci_register_driver(&igb_driver);
257         return ret;
258 }
259
260 module_init(igb_init_module);
261
262 /**
263  * igb_exit_module - Driver Exit Cleanup Routine
264  *
265  * igb_exit_module is called just before the driver is removed
266  * from memory.
267  **/
268 static void __exit igb_exit_module(void)
269 {
270 #ifdef CONFIG_IGB_DCA
271         dca_unregister_notify(&dca_notifier);
272 #endif
273         pci_unregister_driver(&igb_driver);
274 }
275
276 module_exit(igb_exit_module);
277
278 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
279 /**
280  * igb_cache_ring_register - Descriptor ring to register mapping
281  * @adapter: board private structure to initialize
282  *
283  * Once we know the feature-set enabled for the device, we'll cache
284  * the register offset the descriptor ring is assigned to.
285  **/
286 static void igb_cache_ring_register(struct igb_adapter *adapter)
287 {
288         int i = 0, j = 0;
289         u32 rbase_offset = adapter->vfs_allocated_count;
290
291         switch (adapter->hw.mac.type) {
292         case e1000_82576:
293                 /* The queues are allocated for virtualization such that VF 0
294                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
295                  * In order to avoid collision we start at the first free queue
296                  * and continue consuming queues in the same sequence
297                  */
298                 if (adapter->vfs_allocated_count) {
299                         for (; i < adapter->num_rx_queues; i++)
300                                 adapter->rx_ring[i].reg_idx = rbase_offset +
301                                                               Q_IDX_82576(i);
302                         for (; j < adapter->num_tx_queues; j++)
303                                 adapter->tx_ring[j].reg_idx = rbase_offset +
304                                                               Q_IDX_82576(j);
305                 }
306         case e1000_82575:
307         default:
308                 for (; i < adapter->num_rx_queues; i++)
309                         adapter->rx_ring[i].reg_idx = rbase_offset + i;
310                 for (; j < adapter->num_tx_queues; j++)
311                         adapter->tx_ring[j].reg_idx = rbase_offset + j;
312                 break;
313         }
314 }
315
316 static void igb_free_queues(struct igb_adapter *adapter)
317 {
318         kfree(adapter->tx_ring);
319         kfree(adapter->rx_ring);
320
321         adapter->tx_ring = NULL;
322         adapter->rx_ring = NULL;
323
324         adapter->num_rx_queues = 0;
325         adapter->num_tx_queues = 0;
326 }
327
328 /**
329  * igb_alloc_queues - Allocate memory for all rings
330  * @adapter: board private structure to initialize
331  *
332  * We allocate one ring per queue at run-time since we don't know the
333  * number of queues at compile-time.
334  **/
335 static int igb_alloc_queues(struct igb_adapter *adapter)
336 {
337         int i;
338
339         adapter->tx_ring = kcalloc(adapter->num_tx_queues,
340                                    sizeof(struct igb_ring), GFP_KERNEL);
341         if (!adapter->tx_ring)
342                 goto err;
343
344         adapter->rx_ring = kcalloc(adapter->num_rx_queues,
345                                    sizeof(struct igb_ring), GFP_KERNEL);
346         if (!adapter->rx_ring)
347                 goto err;
348
349         for (i = 0; i < adapter->num_tx_queues; i++) {
350                 struct igb_ring *ring = &(adapter->tx_ring[i]);
351                 ring->count = adapter->tx_ring_count;
352                 ring->queue_index = i;
353                 ring->pdev = adapter->pdev;
354                 ring->netdev = adapter->netdev;
355                 /* For 82575, context index must be unique per ring. */
356                 if (adapter->hw.mac.type == e1000_82575)
357                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
358         }
359
360         for (i = 0; i < adapter->num_rx_queues; i++) {
361                 struct igb_ring *ring = &(adapter->rx_ring[i]);
362                 ring->count = adapter->rx_ring_count;
363                 ring->queue_index = i;
364                 ring->pdev = adapter->pdev;
365                 ring->netdev = adapter->netdev;
366                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
367                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
368                 /* set flag indicating ring supports SCTP checksum offload */
369                 if (adapter->hw.mac.type >= e1000_82576)
370                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
371         }
372
373         igb_cache_ring_register(adapter);
374
375         return 0;
376
377 err:
378         igb_free_queues(adapter);
379
380         return -ENOMEM;
381 }
382
383 #define IGB_N0_QUEUE -1
384 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
385 {
386         u32 msixbm = 0;
387         struct igb_adapter *adapter = q_vector->adapter;
388         struct e1000_hw *hw = &adapter->hw;
389         u32 ivar, index;
390         int rx_queue = IGB_N0_QUEUE;
391         int tx_queue = IGB_N0_QUEUE;
392
393         if (q_vector->rx_ring)
394                 rx_queue = q_vector->rx_ring->reg_idx;
395         if (q_vector->tx_ring)
396                 tx_queue = q_vector->tx_ring->reg_idx;
397
398         switch (hw->mac.type) {
399         case e1000_82575:
400                 /* The 82575 assigns vectors using a bitmask, which matches the
401                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
402                    or more queues to a vector, we write the appropriate bits
403                    into the MSIXBM register for that vector. */
404                 if (rx_queue > IGB_N0_QUEUE)
405                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
406                 if (tx_queue > IGB_N0_QUEUE)
407                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
408                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
409                 q_vector->eims_value = msixbm;
410                 break;
411         case e1000_82576:
412                 /* 82576 uses a table-based method for assigning vectors.
413                    Each queue has a single entry in the table to which we write
414                    a vector number along with a "valid" bit.  Sadly, the layout
415                    of the table is somewhat counterintuitive. */
416                 if (rx_queue > IGB_N0_QUEUE) {
417                         index = (rx_queue & 0x7);
418                         ivar = array_rd32(E1000_IVAR0, index);
419                         if (rx_queue < 8) {
420                                 /* vector goes into low byte of register */
421                                 ivar = ivar & 0xFFFFFF00;
422                                 ivar |= msix_vector | E1000_IVAR_VALID;
423                         } else {
424                                 /* vector goes into third byte of register */
425                                 ivar = ivar & 0xFF00FFFF;
426                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
427                         }
428                         array_wr32(E1000_IVAR0, index, ivar);
429                 }
430                 if (tx_queue > IGB_N0_QUEUE) {
431                         index = (tx_queue & 0x7);
432                         ivar = array_rd32(E1000_IVAR0, index);
433                         if (tx_queue < 8) {
434                                 /* vector goes into second byte of register */
435                                 ivar = ivar & 0xFFFF00FF;
436                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
437                         } else {
438                                 /* vector goes into high byte of register */
439                                 ivar = ivar & 0x00FFFFFF;
440                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
441                         }
442                         array_wr32(E1000_IVAR0, index, ivar);
443                 }
444                 q_vector->eims_value = 1 << msix_vector;
445                 break;
446         default:
447                 BUG();
448                 break;
449         }
450 }
451
452 /**
453  * igb_configure_msix - Configure MSI-X hardware
454  *
455  * igb_configure_msix sets up the hardware to properly
456  * generate MSI-X interrupts.
457  **/
458 static void igb_configure_msix(struct igb_adapter *adapter)
459 {
460         u32 tmp;
461         int i, vector = 0;
462         struct e1000_hw *hw = &adapter->hw;
463
464         adapter->eims_enable_mask = 0;
465
466         /* set vector for other causes, i.e. link changes */
467         switch (hw->mac.type) {
468         case e1000_82575:
469                 tmp = rd32(E1000_CTRL_EXT);
470                 /* enable MSI-X PBA support*/
471                 tmp |= E1000_CTRL_EXT_PBA_CLR;
472
473                 /* Auto-Mask interrupts upon ICR read. */
474                 tmp |= E1000_CTRL_EXT_EIAME;
475                 tmp |= E1000_CTRL_EXT_IRCA;
476
477                 wr32(E1000_CTRL_EXT, tmp);
478
479                 /* enable msix_other interrupt */
480                 array_wr32(E1000_MSIXBM(0), vector++,
481                                       E1000_EIMS_OTHER);
482                 adapter->eims_other = E1000_EIMS_OTHER;
483
484                 break;
485
486         case e1000_82576:
487                 /* Turn on MSI-X capability first, or our settings
488                  * won't stick.  And it will take days to debug. */
489                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
490                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
491                                 E1000_GPIE_NSICR);
492
493                 /* enable msix_other interrupt */
494                 adapter->eims_other = 1 << vector;
495                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
496
497                 wr32(E1000_IVAR_MISC, tmp);
498                 break;
499         default:
500                 /* do nothing, since nothing else supports MSI-X */
501                 break;
502         } /* switch (hw->mac.type) */
503
504         adapter->eims_enable_mask |= adapter->eims_other;
505
506         for (i = 0; i < adapter->num_q_vectors; i++) {
507                 struct igb_q_vector *q_vector = adapter->q_vector[i];
508                 igb_assign_vector(q_vector, vector++);
509                 adapter->eims_enable_mask |= q_vector->eims_value;
510         }
511
512         wrfl();
513 }
514
515 /**
516  * igb_request_msix - Initialize MSI-X interrupts
517  *
518  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
519  * kernel.
520  **/
521 static int igb_request_msix(struct igb_adapter *adapter)
522 {
523         struct net_device *netdev = adapter->netdev;
524         struct e1000_hw *hw = &adapter->hw;
525         int i, err = 0, vector = 0;
526
527         err = request_irq(adapter->msix_entries[vector].vector,
528                           &igb_msix_other, 0, netdev->name, adapter);
529         if (err)
530                 goto out;
531         vector++;
532
533         for (i = 0; i < adapter->num_q_vectors; i++) {
534                 struct igb_q_vector *q_vector = adapter->q_vector[i];
535
536                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
537
538                 if (q_vector->rx_ring && q_vector->tx_ring)
539                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
540                                 q_vector->rx_ring->queue_index);
541                 else if (q_vector->tx_ring)
542                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
543                                 q_vector->tx_ring->queue_index);
544                 else if (q_vector->rx_ring)
545                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
546                                 q_vector->rx_ring->queue_index);
547                 else
548                         sprintf(q_vector->name, "%s-unused", netdev->name);
549
550                 err = request_irq(adapter->msix_entries[vector].vector,
551                                   &igb_msix_ring, 0, q_vector->name,
552                                   q_vector);
553                 if (err)
554                         goto out;
555                 vector++;
556         }
557
558         igb_configure_msix(adapter);
559         return 0;
560 out:
561         return err;
562 }
563
564 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
565 {
566         if (adapter->msix_entries) {
567                 pci_disable_msix(adapter->pdev);
568                 kfree(adapter->msix_entries);
569                 adapter->msix_entries = NULL;
570         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
571                 pci_disable_msi(adapter->pdev);
572         }
573 }
574
575 /**
576  * igb_free_q_vectors - Free memory allocated for interrupt vectors
577  * @adapter: board private structure to initialize
578  *
579  * This function frees the memory allocated to the q_vectors.  In addition if
580  * NAPI is enabled it will delete any references to the NAPI struct prior
581  * to freeing the q_vector.
582  **/
583 static void igb_free_q_vectors(struct igb_adapter *adapter)
584 {
585         int v_idx;
586
587         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
588                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
589                 adapter->q_vector[v_idx] = NULL;
590                 netif_napi_del(&q_vector->napi);
591                 kfree(q_vector);
592         }
593         adapter->num_q_vectors = 0;
594 }
595
596 /**
597  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
598  *
599  * This function resets the device so that it has 0 rx queues, tx queues, and
600  * MSI-X interrupts allocated.
601  */
602 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
603 {
604         igb_free_queues(adapter);
605         igb_free_q_vectors(adapter);
606         igb_reset_interrupt_capability(adapter);
607 }
608
609 /**
610  * igb_set_interrupt_capability - set MSI or MSI-X if supported
611  *
612  * Attempt to configure interrupts using the best available
613  * capabilities of the hardware and kernel.
614  **/
615 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
616 {
617         int err;
618         int numvecs, i;
619
620         /* Number of supported queues. */
621         adapter->num_rx_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
622         adapter->num_tx_queues = min_t(u32, IGB_MAX_TX_QUEUES, num_online_cpus());
623
624         /* start with one vector for every rx queue */
625         numvecs = adapter->num_rx_queues;
626
627         /* if tx handler is seperate add 1 for every tx queue */
628         numvecs += adapter->num_tx_queues;
629
630         /* store the number of vectors reserved for queues */
631         adapter->num_q_vectors = numvecs;
632
633         /* add 1 vector for link status interrupts */
634         numvecs++;
635         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
636                                         GFP_KERNEL);
637         if (!adapter->msix_entries)
638                 goto msi_only;
639
640         for (i = 0; i < numvecs; i++)
641                 adapter->msix_entries[i].entry = i;
642
643         err = pci_enable_msix(adapter->pdev,
644                               adapter->msix_entries,
645                               numvecs);
646         if (err == 0)
647                 goto out;
648
649         igb_reset_interrupt_capability(adapter);
650
651         /* If we can't do MSI-X, try MSI */
652 msi_only:
653 #ifdef CONFIG_PCI_IOV
654         /* disable SR-IOV for non MSI-X configurations */
655         if (adapter->vf_data) {
656                 struct e1000_hw *hw = &adapter->hw;
657                 /* disable iov and allow time for transactions to clear */
658                 pci_disable_sriov(adapter->pdev);
659                 msleep(500);
660
661                 kfree(adapter->vf_data);
662                 adapter->vf_data = NULL;
663                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
664                 msleep(100);
665                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
666         }
667 #endif
668         adapter->vfs_allocated_count = 0;
669         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
670         adapter->num_rx_queues = 1;
671         adapter->num_tx_queues = 1;
672         adapter->num_q_vectors = 1;
673         if (!pci_enable_msi(adapter->pdev))
674                 adapter->flags |= IGB_FLAG_HAS_MSI;
675 out:
676         /* Notify the stack of the (possibly) reduced Tx Queue count. */
677         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
678         return;
679 }
680
681 /**
682  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
683  * @adapter: board private structure to initialize
684  *
685  * We allocate one q_vector per queue interrupt.  If allocation fails we
686  * return -ENOMEM.
687  **/
688 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
689 {
690         struct igb_q_vector *q_vector;
691         struct e1000_hw *hw = &adapter->hw;
692         int v_idx;
693
694         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
695                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
696                 if (!q_vector)
697                         goto err_out;
698                 q_vector->adapter = adapter;
699                 q_vector->itr_shift = (hw->mac.type == e1000_82575) ? 16 : 0;
700                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
701                 q_vector->itr_val = IGB_START_ITR;
702                 q_vector->set_itr = 1;
703                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
704                 adapter->q_vector[v_idx] = q_vector;
705         }
706         return 0;
707
708 err_out:
709         while (v_idx) {
710                 v_idx--;
711                 q_vector = adapter->q_vector[v_idx];
712                 netif_napi_del(&q_vector->napi);
713                 kfree(q_vector);
714                 adapter->q_vector[v_idx] = NULL;
715         }
716         return -ENOMEM;
717 }
718
719 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
720                                       int ring_idx, int v_idx)
721 {
722         struct igb_q_vector *q_vector;
723
724         q_vector = adapter->q_vector[v_idx];
725         q_vector->rx_ring = &adapter->rx_ring[ring_idx];
726         q_vector->rx_ring->q_vector = q_vector;
727         q_vector->itr_val = adapter->rx_itr_setting;
728         if (q_vector->itr_val && q_vector->itr_val <= 3)
729                 q_vector->itr_val = IGB_START_ITR;
730 }
731
732 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
733                                       int ring_idx, int v_idx)
734 {
735         struct igb_q_vector *q_vector;
736
737         q_vector = adapter->q_vector[v_idx];
738         q_vector->tx_ring = &adapter->tx_ring[ring_idx];
739         q_vector->tx_ring->q_vector = q_vector;
740         q_vector->itr_val = adapter->tx_itr_setting;
741         if (q_vector->itr_val && q_vector->itr_val <= 3)
742                 q_vector->itr_val = IGB_START_ITR;
743 }
744
745 /**
746  * igb_map_ring_to_vector - maps allocated queues to vectors
747  *
748  * This function maps the recently allocated queues to vectors.
749  **/
750 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
751 {
752         int i;
753         int v_idx = 0;
754
755         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
756             (adapter->num_q_vectors < adapter->num_tx_queues))
757                 return -ENOMEM;
758
759         if (adapter->num_q_vectors >=
760             (adapter->num_rx_queues + adapter->num_tx_queues)) {
761                 for (i = 0; i < adapter->num_rx_queues; i++)
762                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
763                 for (i = 0; i < adapter->num_tx_queues; i++)
764                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
765         } else {
766                 for (i = 0; i < adapter->num_rx_queues; i++) {
767                         if (i < adapter->num_tx_queues)
768                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
769                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
770                 }
771                 for (; i < adapter->num_tx_queues; i++)
772                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
773         }
774         return 0;
775 }
776
777 /**
778  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
779  *
780  * This function initializes the interrupts and allocates all of the queues.
781  **/
782 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
783 {
784         struct pci_dev *pdev = adapter->pdev;
785         int err;
786
787         igb_set_interrupt_capability(adapter);
788
789         err = igb_alloc_q_vectors(adapter);
790         if (err) {
791                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
792                 goto err_alloc_q_vectors;
793         }
794
795         err = igb_alloc_queues(adapter);
796         if (err) {
797                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
798                 goto err_alloc_queues;
799         }
800
801         err = igb_map_ring_to_vector(adapter);
802         if (err) {
803                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
804                 goto err_map_queues;
805         }
806
807
808         return 0;
809 err_map_queues:
810         igb_free_queues(adapter);
811 err_alloc_queues:
812         igb_free_q_vectors(adapter);
813 err_alloc_q_vectors:
814         igb_reset_interrupt_capability(adapter);
815         return err;
816 }
817
818 /**
819  * igb_request_irq - initialize interrupts
820  *
821  * Attempts to configure interrupts using the best available
822  * capabilities of the hardware and kernel.
823  **/
824 static int igb_request_irq(struct igb_adapter *adapter)
825 {
826         struct net_device *netdev = adapter->netdev;
827         struct pci_dev *pdev = adapter->pdev;
828         struct e1000_hw *hw = &adapter->hw;
829         int err = 0;
830
831         if (adapter->msix_entries) {
832                 err = igb_request_msix(adapter);
833                 if (!err)
834                         goto request_done;
835                 /* fall back to MSI */
836                 igb_clear_interrupt_scheme(adapter);
837                 if (!pci_enable_msi(adapter->pdev))
838                         adapter->flags |= IGB_FLAG_HAS_MSI;
839                 igb_free_all_tx_resources(adapter);
840                 igb_free_all_rx_resources(adapter);
841                 adapter->num_tx_queues = 1;
842                 adapter->num_rx_queues = 1;
843                 adapter->num_q_vectors = 1;
844                 err = igb_alloc_q_vectors(adapter);
845                 if (err) {
846                         dev_err(&pdev->dev,
847                                 "Unable to allocate memory for vectors\n");
848                         goto request_done;
849                 }
850                 err = igb_alloc_queues(adapter);
851                 if (err) {
852                         dev_err(&pdev->dev,
853                                 "Unable to allocate memory for queues\n");
854                         igb_free_q_vectors(adapter);
855                         goto request_done;
856                 }
857                 igb_setup_all_tx_resources(adapter);
858                 igb_setup_all_rx_resources(adapter);
859         } else {
860                 switch (hw->mac.type) {
861                 case e1000_82575:
862                         wr32(E1000_MSIXBM(0),
863                              (E1000_EICR_RX_QUEUE0 |
864                               E1000_EICR_TX_QUEUE0 |
865                               E1000_EIMS_OTHER));
866                         break;
867                 case e1000_82576:
868                         wr32(E1000_IVAR0, E1000_IVAR_VALID);
869                         break;
870                 default:
871                         break;
872                 }
873         }
874
875         if (adapter->flags & IGB_FLAG_HAS_MSI) {
876                 err = request_irq(adapter->pdev->irq, &igb_intr_msi, 0,
877                                   netdev->name, adapter);
878                 if (!err)
879                         goto request_done;
880
881                 /* fall back to legacy interrupts */
882                 igb_reset_interrupt_capability(adapter);
883                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
884         }
885
886         err = request_irq(adapter->pdev->irq, &igb_intr, IRQF_SHARED,
887                           netdev->name, adapter);
888
889         if (err)
890                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
891                         err);
892
893 request_done:
894         return err;
895 }
896
897 static void igb_free_irq(struct igb_adapter *adapter)
898 {
899         if (adapter->msix_entries) {
900                 int vector = 0, i;
901
902                 free_irq(adapter->msix_entries[vector++].vector, adapter);
903
904                 for (i = 0; i < adapter->num_q_vectors; i++) {
905                         struct igb_q_vector *q_vector = adapter->q_vector[i];
906                         free_irq(adapter->msix_entries[vector++].vector,
907                                  q_vector);
908                 }
909         } else {
910                 free_irq(adapter->pdev->irq, adapter);
911         }
912 }
913
914 /**
915  * igb_irq_disable - Mask off interrupt generation on the NIC
916  * @adapter: board private structure
917  **/
918 static void igb_irq_disable(struct igb_adapter *adapter)
919 {
920         struct e1000_hw *hw = &adapter->hw;
921
922         /*
923          * we need to be careful when disabling interrupts.  The VFs are also
924          * mapped into these registers and so clearing the bits can cause
925          * issues on the VF drivers so we only need to clear what we set
926          */
927         if (adapter->msix_entries) {
928                 u32 regval = rd32(E1000_EIAM);
929                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
930                 wr32(E1000_EIMC, adapter->eims_enable_mask);
931                 regval = rd32(E1000_EIAC);
932                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
933         }
934
935         wr32(E1000_IAM, 0);
936         wr32(E1000_IMC, ~0);
937         wrfl();
938         synchronize_irq(adapter->pdev->irq);
939 }
940
941 /**
942  * igb_irq_enable - Enable default interrupt generation settings
943  * @adapter: board private structure
944  **/
945 static void igb_irq_enable(struct igb_adapter *adapter)
946 {
947         struct e1000_hw *hw = &adapter->hw;
948
949         if (adapter->msix_entries) {
950                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
951                 u32 regval = rd32(E1000_EIAC);
952                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
953                 regval = rd32(E1000_EIAM);
954                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
955                 wr32(E1000_EIMS, adapter->eims_enable_mask);
956                 if (adapter->vfs_allocated_count) {
957                         wr32(E1000_MBVFIMR, 0xFF);
958                         ims |= E1000_IMS_VMMB;
959                 }
960                 wr32(E1000_IMS, ims);
961         } else {
962                 wr32(E1000_IMS, IMS_ENABLE_MASK);
963                 wr32(E1000_IAM, IMS_ENABLE_MASK);
964         }
965 }
966
967 static void igb_update_mng_vlan(struct igb_adapter *adapter)
968 {
969         struct e1000_hw *hw = &adapter->hw;
970         u16 vid = adapter->hw.mng_cookie.vlan_id;
971         u16 old_vid = adapter->mng_vlan_id;
972
973         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
974                 /* add VID to filter table */
975                 igb_vfta_set(hw, vid, true);
976                 adapter->mng_vlan_id = vid;
977         } else {
978                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
979         }
980
981         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
982             (vid != old_vid) &&
983             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
984                 /* remove VID from filter table */
985                 igb_vfta_set(hw, old_vid, false);
986         }
987 }
988
989 /**
990  * igb_release_hw_control - release control of the h/w to f/w
991  * @adapter: address of board private structure
992  *
993  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
994  * For ASF and Pass Through versions of f/w this means that the
995  * driver is no longer loaded.
996  *
997  **/
998 static void igb_release_hw_control(struct igb_adapter *adapter)
999 {
1000         struct e1000_hw *hw = &adapter->hw;
1001         u32 ctrl_ext;
1002
1003         /* Let firmware take over control of h/w */
1004         ctrl_ext = rd32(E1000_CTRL_EXT);
1005         wr32(E1000_CTRL_EXT,
1006                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1007 }
1008
1009 /**
1010  * igb_get_hw_control - get control of the h/w from f/w
1011  * @adapter: address of board private structure
1012  *
1013  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1014  * For ASF and Pass Through versions of f/w this means that
1015  * the driver is loaded.
1016  *
1017  **/
1018 static void igb_get_hw_control(struct igb_adapter *adapter)
1019 {
1020         struct e1000_hw *hw = &adapter->hw;
1021         u32 ctrl_ext;
1022
1023         /* Let firmware know the driver has taken over */
1024         ctrl_ext = rd32(E1000_CTRL_EXT);
1025         wr32(E1000_CTRL_EXT,
1026                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1027 }
1028
1029 /**
1030  * igb_configure - configure the hardware for RX and TX
1031  * @adapter: private board structure
1032  **/
1033 static void igb_configure(struct igb_adapter *adapter)
1034 {
1035         struct net_device *netdev = adapter->netdev;
1036         int i;
1037
1038         igb_get_hw_control(adapter);
1039         igb_set_rx_mode(netdev);
1040
1041         igb_restore_vlan(adapter);
1042
1043         igb_setup_tctl(adapter);
1044         igb_setup_mrqc(adapter);
1045         igb_setup_rctl(adapter);
1046
1047         igb_configure_tx(adapter);
1048         igb_configure_rx(adapter);
1049
1050         igb_rx_fifo_flush_82575(&adapter->hw);
1051
1052         /* call igb_desc_unused which always leaves
1053          * at least 1 descriptor unused to make sure
1054          * next_to_use != next_to_clean */
1055         for (i = 0; i < adapter->num_rx_queues; i++) {
1056                 struct igb_ring *ring = &adapter->rx_ring[i];
1057                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1058         }
1059
1060
1061         adapter->tx_queue_len = netdev->tx_queue_len;
1062 }
1063
1064
1065 /**
1066  * igb_up - Open the interface and prepare it to handle traffic
1067  * @adapter: board private structure
1068  **/
1069 int igb_up(struct igb_adapter *adapter)
1070 {
1071         struct e1000_hw *hw = &adapter->hw;
1072         int i;
1073
1074         /* hardware has been reset, we need to reload some things */
1075         igb_configure(adapter);
1076
1077         clear_bit(__IGB_DOWN, &adapter->state);
1078
1079         for (i = 0; i < adapter->num_q_vectors; i++) {
1080                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1081                 napi_enable(&q_vector->napi);
1082         }
1083         if (adapter->msix_entries)
1084                 igb_configure_msix(adapter);
1085
1086         /* Clear any pending interrupts. */
1087         rd32(E1000_ICR);
1088         igb_irq_enable(adapter);
1089
1090         /* notify VFs that reset has been completed */
1091         if (adapter->vfs_allocated_count) {
1092                 u32 reg_data = rd32(E1000_CTRL_EXT);
1093                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1094                 wr32(E1000_CTRL_EXT, reg_data);
1095         }
1096
1097         netif_tx_start_all_queues(adapter->netdev);
1098
1099         /* start the watchdog. */
1100         hw->mac.get_link_status = 1;
1101         schedule_work(&adapter->watchdog_task);
1102
1103         return 0;
1104 }
1105
1106 void igb_down(struct igb_adapter *adapter)
1107 {
1108         struct net_device *netdev = adapter->netdev;
1109         struct e1000_hw *hw = &adapter->hw;
1110         u32 tctl, rctl;
1111         int i;
1112
1113         /* signal that we're down so the interrupt handler does not
1114          * reschedule our watchdog timer */
1115         set_bit(__IGB_DOWN, &adapter->state);
1116
1117         /* disable receives in the hardware */
1118         rctl = rd32(E1000_RCTL);
1119         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1120         /* flush and sleep below */
1121
1122         netif_tx_stop_all_queues(netdev);
1123
1124         /* disable transmits in the hardware */
1125         tctl = rd32(E1000_TCTL);
1126         tctl &= ~E1000_TCTL_EN;
1127         wr32(E1000_TCTL, tctl);
1128         /* flush both disables and wait for them to finish */
1129         wrfl();
1130         msleep(10);
1131
1132         for (i = 0; i < adapter->num_q_vectors; i++) {
1133                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1134                 napi_disable(&q_vector->napi);
1135         }
1136
1137         igb_irq_disable(adapter);
1138
1139         del_timer_sync(&adapter->watchdog_timer);
1140         del_timer_sync(&adapter->phy_info_timer);
1141
1142         netdev->tx_queue_len = adapter->tx_queue_len;
1143         netif_carrier_off(netdev);
1144
1145         /* record the stats before reset*/
1146         igb_update_stats(adapter);
1147
1148         adapter->link_speed = 0;
1149         adapter->link_duplex = 0;
1150
1151         if (!pci_channel_offline(adapter->pdev))
1152                 igb_reset(adapter);
1153         igb_clean_all_tx_rings(adapter);
1154         igb_clean_all_rx_rings(adapter);
1155 #ifdef CONFIG_IGB_DCA
1156
1157         /* since we reset the hardware DCA settings were cleared */
1158         igb_setup_dca(adapter);
1159 #endif
1160 }
1161
1162 void igb_reinit_locked(struct igb_adapter *adapter)
1163 {
1164         WARN_ON(in_interrupt());
1165         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1166                 msleep(1);
1167         igb_down(adapter);
1168         igb_up(adapter);
1169         clear_bit(__IGB_RESETTING, &adapter->state);
1170 }
1171
1172 void igb_reset(struct igb_adapter *adapter)
1173 {
1174         struct pci_dev *pdev = adapter->pdev;
1175         struct e1000_hw *hw = &adapter->hw;
1176         struct e1000_mac_info *mac = &hw->mac;
1177         struct e1000_fc_info *fc = &hw->fc;
1178         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1179         u16 hwm;
1180
1181         /* Repartition Pba for greater than 9k mtu
1182          * To take effect CTRL.RST is required.
1183          */
1184         switch (mac->type) {
1185         case e1000_82576:
1186                 pba = rd32(E1000_RXPBS);
1187                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1188                 break;
1189         case e1000_82575:
1190         default:
1191                 pba = E1000_PBA_34K;
1192                 break;
1193         }
1194
1195         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1196             (mac->type < e1000_82576)) {
1197                 /* adjust PBA for jumbo frames */
1198                 wr32(E1000_PBA, pba);
1199
1200                 /* To maintain wire speed transmits, the Tx FIFO should be
1201                  * large enough to accommodate two full transmit packets,
1202                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1203                  * the Rx FIFO should be large enough to accommodate at least
1204                  * one full receive packet and is similarly rounded up and
1205                  * expressed in KB. */
1206                 pba = rd32(E1000_PBA);
1207                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1208                 tx_space = pba >> 16;
1209                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1210                 pba &= 0xffff;
1211                 /* the tx fifo also stores 16 bytes of information about the tx
1212                  * but don't include ethernet FCS because hardware appends it */
1213                 min_tx_space = (adapter->max_frame_size +
1214                                 sizeof(union e1000_adv_tx_desc) -
1215                                 ETH_FCS_LEN) * 2;
1216                 min_tx_space = ALIGN(min_tx_space, 1024);
1217                 min_tx_space >>= 10;
1218                 /* software strips receive CRC, so leave room for it */
1219                 min_rx_space = adapter->max_frame_size;
1220                 min_rx_space = ALIGN(min_rx_space, 1024);
1221                 min_rx_space >>= 10;
1222
1223                 /* If current Tx allocation is less than the min Tx FIFO size,
1224                  * and the min Tx FIFO size is less than the current Rx FIFO
1225                  * allocation, take space away from current Rx allocation */
1226                 if (tx_space < min_tx_space &&
1227                     ((min_tx_space - tx_space) < pba)) {
1228                         pba = pba - (min_tx_space - tx_space);
1229
1230                         /* if short on rx space, rx wins and must trump tx
1231                          * adjustment */
1232                         if (pba < min_rx_space)
1233                                 pba = min_rx_space;
1234                 }
1235                 wr32(E1000_PBA, pba);
1236         }
1237
1238         /* flow control settings */
1239         /* The high water mark must be low enough to fit one full frame
1240          * (or the size used for early receive) above it in the Rx FIFO.
1241          * Set it to the lower of:
1242          * - 90% of the Rx FIFO size, or
1243          * - the full Rx FIFO size minus one full frame */
1244         hwm = min(((pba << 10) * 9 / 10),
1245                         ((pba << 10) - 2 * adapter->max_frame_size));
1246
1247         if (mac->type < e1000_82576) {
1248                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
1249                 fc->low_water = fc->high_water - 8;
1250         } else {
1251                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1252                 fc->low_water = fc->high_water - 16;
1253         }
1254         fc->pause_time = 0xFFFF;
1255         fc->send_xon = 1;
1256         fc->current_mode = fc->requested_mode;
1257
1258         /* disable receive for all VFs and wait one second */
1259         if (adapter->vfs_allocated_count) {
1260                 int i;
1261                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1262                         adapter->vf_data[i].flags = 0;
1263
1264                 /* ping all the active vfs to let them know we are going down */
1265                 igb_ping_all_vfs(adapter);
1266
1267                 /* disable transmits and receives */
1268                 wr32(E1000_VFRE, 0);
1269                 wr32(E1000_VFTE, 0);
1270         }
1271
1272         /* Allow time for pending master requests to run */
1273         hw->mac.ops.reset_hw(hw);
1274         wr32(E1000_WUC, 0);
1275
1276         if (hw->mac.ops.init_hw(hw))
1277                 dev_err(&pdev->dev, "Hardware Error\n");
1278
1279         igb_update_mng_vlan(adapter);
1280
1281         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1282         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1283
1284         igb_reset_adaptive(hw);
1285         igb_get_phy_info(hw);
1286 }
1287
1288 static const struct net_device_ops igb_netdev_ops = {
1289         .ndo_open               = igb_open,
1290         .ndo_stop               = igb_close,
1291         .ndo_start_xmit         = igb_xmit_frame_adv,
1292         .ndo_get_stats          = igb_get_stats,
1293         .ndo_set_rx_mode        = igb_set_rx_mode,
1294         .ndo_set_multicast_list = igb_set_rx_mode,
1295         .ndo_set_mac_address    = igb_set_mac,
1296         .ndo_change_mtu         = igb_change_mtu,
1297         .ndo_do_ioctl           = igb_ioctl,
1298         .ndo_tx_timeout         = igb_tx_timeout,
1299         .ndo_validate_addr      = eth_validate_addr,
1300         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1301         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1302         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1303 #ifdef CONFIG_NET_POLL_CONTROLLER
1304         .ndo_poll_controller    = igb_netpoll,
1305 #endif
1306 };
1307
1308 /**
1309  * igb_probe - Device Initialization Routine
1310  * @pdev: PCI device information struct
1311  * @ent: entry in igb_pci_tbl
1312  *
1313  * Returns 0 on success, negative on failure
1314  *
1315  * igb_probe initializes an adapter identified by a pci_dev structure.
1316  * The OS initialization, configuring of the adapter private structure,
1317  * and a hardware reset occur.
1318  **/
1319 static int __devinit igb_probe(struct pci_dev *pdev,
1320                                const struct pci_device_id *ent)
1321 {
1322         struct net_device *netdev;
1323         struct igb_adapter *adapter;
1324         struct e1000_hw *hw;
1325         u16 eeprom_data = 0;
1326         static int global_quad_port_a; /* global quad port a indication */
1327         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1328         unsigned long mmio_start, mmio_len;
1329         int err, pci_using_dac;
1330         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1331         u32 part_num;
1332
1333         err = pci_enable_device_mem(pdev);
1334         if (err)
1335                 return err;
1336
1337         pci_using_dac = 0;
1338         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1339         if (!err) {
1340                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1341                 if (!err)
1342                         pci_using_dac = 1;
1343         } else {
1344                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1345                 if (err) {
1346                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1347                         if (err) {
1348                                 dev_err(&pdev->dev, "No usable DMA "
1349                                         "configuration, aborting\n");
1350                                 goto err_dma;
1351                         }
1352                 }
1353         }
1354
1355         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1356                                            IORESOURCE_MEM),
1357                                            igb_driver_name);
1358         if (err)
1359                 goto err_pci_reg;
1360
1361         pci_enable_pcie_error_reporting(pdev);
1362
1363         pci_set_master(pdev);
1364         pci_save_state(pdev);
1365
1366         err = -ENOMEM;
1367         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1368                                    IGB_ABS_MAX_TX_QUEUES);
1369         if (!netdev)
1370                 goto err_alloc_etherdev;
1371
1372         SET_NETDEV_DEV(netdev, &pdev->dev);
1373
1374         pci_set_drvdata(pdev, netdev);
1375         adapter = netdev_priv(netdev);
1376         adapter->netdev = netdev;
1377         adapter->pdev = pdev;
1378         hw = &adapter->hw;
1379         hw->back = adapter;
1380         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1381
1382         mmio_start = pci_resource_start(pdev, 0);
1383         mmio_len = pci_resource_len(pdev, 0);
1384
1385         err = -EIO;
1386         hw->hw_addr = ioremap(mmio_start, mmio_len);
1387         if (!hw->hw_addr)
1388                 goto err_ioremap;
1389
1390         netdev->netdev_ops = &igb_netdev_ops;
1391         igb_set_ethtool_ops(netdev);
1392         netdev->watchdog_timeo = 5 * HZ;
1393
1394         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1395
1396         netdev->mem_start = mmio_start;
1397         netdev->mem_end = mmio_start + mmio_len;
1398
1399         /* PCI config space info */
1400         hw->vendor_id = pdev->vendor;
1401         hw->device_id = pdev->device;
1402         hw->revision_id = pdev->revision;
1403         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1404         hw->subsystem_device_id = pdev->subsystem_device;
1405
1406         /* Copy the default MAC, PHY and NVM function pointers */
1407         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1408         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1409         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1410         /* Initialize skew-specific constants */
1411         err = ei->get_invariants(hw);
1412         if (err)
1413                 goto err_sw_init;
1414
1415         /* setup the private structure */
1416         err = igb_sw_init(adapter);
1417         if (err)
1418                 goto err_sw_init;
1419
1420         igb_get_bus_info_pcie(hw);
1421
1422         hw->phy.autoneg_wait_to_complete = false;
1423         hw->mac.adaptive_ifs = true;
1424
1425         /* Copper options */
1426         if (hw->phy.media_type == e1000_media_type_copper) {
1427                 hw->phy.mdix = AUTO_ALL_MODES;
1428                 hw->phy.disable_polarity_correction = false;
1429                 hw->phy.ms_type = e1000_ms_hw_default;
1430         }
1431
1432         if (igb_check_reset_block(hw))
1433                 dev_info(&pdev->dev,
1434                         "PHY reset is blocked due to SOL/IDER session.\n");
1435
1436         netdev->features = NETIF_F_SG |
1437                            NETIF_F_IP_CSUM |
1438                            NETIF_F_HW_VLAN_TX |
1439                            NETIF_F_HW_VLAN_RX |
1440                            NETIF_F_HW_VLAN_FILTER;
1441
1442         netdev->features |= NETIF_F_IPV6_CSUM;
1443         netdev->features |= NETIF_F_TSO;
1444         netdev->features |= NETIF_F_TSO6;
1445         netdev->features |= NETIF_F_GRO;
1446
1447         netdev->vlan_features |= NETIF_F_TSO;
1448         netdev->vlan_features |= NETIF_F_TSO6;
1449         netdev->vlan_features |= NETIF_F_IP_CSUM;
1450         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1451         netdev->vlan_features |= NETIF_F_SG;
1452
1453         if (pci_using_dac)
1454                 netdev->features |= NETIF_F_HIGHDMA;
1455
1456         if (hw->mac.type >= e1000_82576)
1457                 netdev->features |= NETIF_F_SCTP_CSUM;
1458
1459         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1460
1461         /* before reading the NVM, reset the controller to put the device in a
1462          * known good starting state */
1463         hw->mac.ops.reset_hw(hw);
1464
1465         /* make sure the NVM is good */
1466         if (igb_validate_nvm_checksum(hw) < 0) {
1467                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1468                 err = -EIO;
1469                 goto err_eeprom;
1470         }
1471
1472         /* copy the MAC address out of the NVM */
1473         if (hw->mac.ops.read_mac_addr(hw))
1474                 dev_err(&pdev->dev, "NVM Read Error\n");
1475
1476         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1477         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1478
1479         if (!is_valid_ether_addr(netdev->perm_addr)) {
1480                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1481                 err = -EIO;
1482                 goto err_eeprom;
1483         }
1484
1485         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1486                     (unsigned long) adapter);
1487         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1488                     (unsigned long) adapter);
1489
1490         INIT_WORK(&adapter->reset_task, igb_reset_task);
1491         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1492
1493         /* Initialize link properties that are user-changeable */
1494         adapter->fc_autoneg = true;
1495         hw->mac.autoneg = true;
1496         hw->phy.autoneg_advertised = 0x2f;
1497
1498         hw->fc.requested_mode = e1000_fc_default;
1499         hw->fc.current_mode = e1000_fc_default;
1500
1501         igb_validate_mdi_setting(hw);
1502
1503         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1504          * enable the ACPI Magic Packet filter
1505          */
1506
1507         if (hw->bus.func == 0)
1508                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1509         else if (hw->bus.func == 1)
1510                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1511
1512         if (eeprom_data & eeprom_apme_mask)
1513                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1514
1515         /* now that we have the eeprom settings, apply the special cases where
1516          * the eeprom may be wrong or the board simply won't support wake on
1517          * lan on a particular port */
1518         switch (pdev->device) {
1519         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1520                 adapter->eeprom_wol = 0;
1521                 break;
1522         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1523         case E1000_DEV_ID_82576_FIBER:
1524         case E1000_DEV_ID_82576_SERDES:
1525                 /* Wake events only supported on port A for dual fiber
1526                  * regardless of eeprom setting */
1527                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1528                         adapter->eeprom_wol = 0;
1529                 break;
1530         case E1000_DEV_ID_82576_QUAD_COPPER:
1531                 /* if quad port adapter, disable WoL on all but port A */
1532                 if (global_quad_port_a != 0)
1533                         adapter->eeprom_wol = 0;
1534                 else
1535                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1536                 /* Reset for multiple quad port adapters */
1537                 if (++global_quad_port_a == 4)
1538                         global_quad_port_a = 0;
1539                 break;
1540         }
1541
1542         /* initialize the wol settings based on the eeprom settings */
1543         adapter->wol = adapter->eeprom_wol;
1544         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1545
1546         /* reset the hardware with the new settings */
1547         igb_reset(adapter);
1548
1549         /* let the f/w know that the h/w is now under the control of the
1550          * driver. */
1551         igb_get_hw_control(adapter);
1552
1553         strcpy(netdev->name, "eth%d");
1554         err = register_netdev(netdev);
1555         if (err)
1556                 goto err_register;
1557
1558         /* carrier off reporting is important to ethtool even BEFORE open */
1559         netif_carrier_off(netdev);
1560
1561 #ifdef CONFIG_IGB_DCA
1562         if (dca_add_requester(&pdev->dev) == 0) {
1563                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1564                 dev_info(&pdev->dev, "DCA enabled\n");
1565                 igb_setup_dca(adapter);
1566         }
1567
1568 #endif
1569         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1570         /* print bus type/speed/width info */
1571         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1572                  netdev->name,
1573                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1574                                                             "unknown"),
1575                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1576                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1577                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1578                    "unknown"),
1579                  netdev->dev_addr);
1580
1581         igb_read_part_num(hw, &part_num);
1582         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1583                 (part_num >> 8), (part_num & 0xff));
1584
1585         dev_info(&pdev->dev,
1586                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1587                 adapter->msix_entries ? "MSI-X" :
1588                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1589                 adapter->num_rx_queues, adapter->num_tx_queues);
1590
1591         return 0;
1592
1593 err_register:
1594         igb_release_hw_control(adapter);
1595 err_eeprom:
1596         if (!igb_check_reset_block(hw))
1597                 igb_reset_phy(hw);
1598
1599         if (hw->flash_address)
1600                 iounmap(hw->flash_address);
1601 err_sw_init:
1602         igb_clear_interrupt_scheme(adapter);
1603         iounmap(hw->hw_addr);
1604 err_ioremap:
1605         free_netdev(netdev);
1606 err_alloc_etherdev:
1607         pci_release_selected_regions(pdev,
1608                                      pci_select_bars(pdev, IORESOURCE_MEM));
1609 err_pci_reg:
1610 err_dma:
1611         pci_disable_device(pdev);
1612         return err;
1613 }
1614
1615 /**
1616  * igb_remove - Device Removal Routine
1617  * @pdev: PCI device information struct
1618  *
1619  * igb_remove is called by the PCI subsystem to alert the driver
1620  * that it should release a PCI device.  The could be caused by a
1621  * Hot-Plug event, or because the driver is going to be removed from
1622  * memory.
1623  **/
1624 static void __devexit igb_remove(struct pci_dev *pdev)
1625 {
1626         struct net_device *netdev = pci_get_drvdata(pdev);
1627         struct igb_adapter *adapter = netdev_priv(netdev);
1628         struct e1000_hw *hw = &adapter->hw;
1629
1630         /* flush_scheduled work may reschedule our watchdog task, so
1631          * explicitly disable watchdog tasks from being rescheduled  */
1632         set_bit(__IGB_DOWN, &adapter->state);
1633         del_timer_sync(&adapter->watchdog_timer);
1634         del_timer_sync(&adapter->phy_info_timer);
1635
1636         flush_scheduled_work();
1637
1638 #ifdef CONFIG_IGB_DCA
1639         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1640                 dev_info(&pdev->dev, "DCA disabled\n");
1641                 dca_remove_requester(&pdev->dev);
1642                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1643                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1644         }
1645 #endif
1646
1647         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1648          * would have already happened in close and is redundant. */
1649         igb_release_hw_control(adapter);
1650
1651         unregister_netdev(netdev);
1652
1653         if (!igb_check_reset_block(hw))
1654                 igb_reset_phy(hw);
1655
1656         igb_clear_interrupt_scheme(adapter);
1657
1658 #ifdef CONFIG_PCI_IOV
1659         /* reclaim resources allocated to VFs */
1660         if (adapter->vf_data) {
1661                 /* disable iov and allow time for transactions to clear */
1662                 pci_disable_sriov(pdev);
1663                 msleep(500);
1664
1665                 kfree(adapter->vf_data);
1666                 adapter->vf_data = NULL;
1667                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1668                 msleep(100);
1669                 dev_info(&pdev->dev, "IOV Disabled\n");
1670         }
1671 #endif
1672
1673         iounmap(hw->hw_addr);
1674         if (hw->flash_address)
1675                 iounmap(hw->flash_address);
1676         pci_release_selected_regions(pdev,
1677                                      pci_select_bars(pdev, IORESOURCE_MEM));
1678
1679         free_netdev(netdev);
1680
1681         pci_disable_pcie_error_reporting(pdev);
1682
1683         pci_disable_device(pdev);
1684 }
1685
1686 /**
1687  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1688  * @adapter: board private structure to initialize
1689  *
1690  * This function initializes the vf specific data storage and then attempts to
1691  * allocate the VFs.  The reason for ordering it this way is because it is much
1692  * mor expensive time wise to disable SR-IOV than it is to allocate and free
1693  * the memory for the VFs.
1694  **/
1695 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1696 {
1697 #ifdef CONFIG_PCI_IOV
1698         struct pci_dev *pdev = adapter->pdev;
1699
1700         if (adapter->vfs_allocated_count > 7)
1701                 adapter->vfs_allocated_count = 7;
1702
1703         if (adapter->vfs_allocated_count) {
1704                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1705                                            sizeof(struct vf_data_storage),
1706                                            GFP_KERNEL);
1707                 /* if allocation failed then we do not support SR-IOV */
1708                 if (!adapter->vf_data) {
1709                         adapter->vfs_allocated_count = 0;
1710                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
1711                                 "Data Storage\n");
1712                 }
1713         }
1714
1715         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1716                 kfree(adapter->vf_data);
1717                 adapter->vf_data = NULL;
1718 #endif /* CONFIG_PCI_IOV */
1719                 adapter->vfs_allocated_count = 0;
1720 #ifdef CONFIG_PCI_IOV
1721         } else {
1722                 unsigned char mac_addr[ETH_ALEN];
1723                 int i;
1724                 dev_info(&pdev->dev, "%d vfs allocated\n",
1725                          adapter->vfs_allocated_count);
1726                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1727                         random_ether_addr(mac_addr);
1728                         igb_set_vf_mac(adapter, i, mac_addr);
1729                 }
1730         }
1731 #endif /* CONFIG_PCI_IOV */
1732 }
1733
1734
1735 /**
1736  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1737  * @adapter: board private structure to initialize
1738  *
1739  * igb_init_hw_timer initializes the function pointer and values for the hw
1740  * timer found in hardware.
1741  **/
1742 static void igb_init_hw_timer(struct igb_adapter *adapter)
1743 {
1744         struct e1000_hw *hw = &adapter->hw;
1745
1746         switch (hw->mac.type) {
1747         case e1000_82576:
1748                 /*
1749                  * Initialize hardware timer: we keep it running just in case
1750                  * that some program needs it later on.
1751                  */
1752                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1753                 adapter->cycles.read = igb_read_clock;
1754                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1755                 adapter->cycles.mult = 1;
1756                 /**
1757                  * Scale the NIC clock cycle by a large factor so that
1758                  * relatively small clock corrections can be added or
1759                  * substracted at each clock tick. The drawbacks of a large
1760                  * factor are a) that the clock register overflows more quickly
1761                  * (not such a big deal) and b) that the increment per tick has
1762                  * to fit into 24 bits.  As a result we need to use a shift of
1763                  * 19 so we can fit a value of 16 into the TIMINCA register.
1764                  */
1765                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1766                 wr32(E1000_TIMINCA,
1767                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1768                                 (16 << IGB_82576_TSYNC_SHIFT));
1769
1770                 /* Set registers so that rollover occurs soon to test this. */
1771                 wr32(E1000_SYSTIML, 0x00000000);
1772                 wr32(E1000_SYSTIMH, 0xFF800000);
1773                 wrfl();
1774
1775                 timecounter_init(&adapter->clock,
1776                                  &adapter->cycles,
1777                                  ktime_to_ns(ktime_get_real()));
1778                 /*
1779                  * Synchronize our NIC clock against system wall clock. NIC
1780                  * time stamp reading requires ~3us per sample, each sample
1781                  * was pretty stable even under load => only require 10
1782                  * samples for each offset comparison.
1783                  */
1784                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1785                 adapter->compare.source = &adapter->clock;
1786                 adapter->compare.target = ktime_get_real;
1787                 adapter->compare.num_samples = 10;
1788                 timecompare_update(&adapter->compare, 0);
1789                 break;
1790         case e1000_82575:
1791                 /* 82575 does not support timesync */
1792         default:
1793                 break;
1794         }
1795
1796 }
1797
1798 /**
1799  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1800  * @adapter: board private structure to initialize
1801  *
1802  * igb_sw_init initializes the Adapter private data structure.
1803  * Fields are initialized based on PCI device information and
1804  * OS network device settings (MTU size).
1805  **/
1806 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1807 {
1808         struct e1000_hw *hw = &adapter->hw;
1809         struct net_device *netdev = adapter->netdev;
1810         struct pci_dev *pdev = adapter->pdev;
1811
1812         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1813
1814         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1815         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1816         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1817         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1818
1819         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1820         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1821
1822 #ifdef CONFIG_PCI_IOV
1823         if (hw->mac.type == e1000_82576)
1824                 adapter->vfs_allocated_count = max_vfs;
1825
1826 #endif /* CONFIG_PCI_IOV */
1827         /* This call may decrease the number of queues */
1828         if (igb_init_interrupt_scheme(adapter)) {
1829                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1830                 return -ENOMEM;
1831         }
1832
1833         igb_init_hw_timer(adapter);
1834         igb_probe_vfs(adapter);
1835
1836         /* Explicitly disable IRQ since the NIC can be in any state. */
1837         igb_irq_disable(adapter);
1838
1839         set_bit(__IGB_DOWN, &adapter->state);
1840         return 0;
1841 }
1842
1843 /**
1844  * igb_open - Called when a network interface is made active
1845  * @netdev: network interface device structure
1846  *
1847  * Returns 0 on success, negative value on failure
1848  *
1849  * The open entry point is called when a network interface is made
1850  * active by the system (IFF_UP).  At this point all resources needed
1851  * for transmit and receive operations are allocated, the interrupt
1852  * handler is registered with the OS, the watchdog timer is started,
1853  * and the stack is notified that the interface is ready.
1854  **/
1855 static int igb_open(struct net_device *netdev)
1856 {
1857         struct igb_adapter *adapter = netdev_priv(netdev);
1858         struct e1000_hw *hw = &adapter->hw;
1859         int err;
1860         int i;
1861
1862         /* disallow open during test */
1863         if (test_bit(__IGB_TESTING, &adapter->state))
1864                 return -EBUSY;
1865
1866         netif_carrier_off(netdev);
1867
1868         /* allocate transmit descriptors */
1869         err = igb_setup_all_tx_resources(adapter);
1870         if (err)
1871                 goto err_setup_tx;
1872
1873         /* allocate receive descriptors */
1874         err = igb_setup_all_rx_resources(adapter);
1875         if (err)
1876                 goto err_setup_rx;
1877
1878         /* e1000_power_up_phy(adapter); */
1879
1880         /* before we allocate an interrupt, we must be ready to handle it.
1881          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1882          * as soon as we call pci_request_irq, so we have to setup our
1883          * clean_rx handler before we do so.  */
1884         igb_configure(adapter);
1885
1886         err = igb_request_irq(adapter);
1887         if (err)
1888                 goto err_req_irq;
1889
1890         /* From here on the code is the same as igb_up() */
1891         clear_bit(__IGB_DOWN, &adapter->state);
1892
1893         for (i = 0; i < adapter->num_q_vectors; i++) {
1894                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1895                 napi_enable(&q_vector->napi);
1896         }
1897
1898         /* Clear any pending interrupts. */
1899         rd32(E1000_ICR);
1900
1901         igb_irq_enable(adapter);
1902
1903         /* notify VFs that reset has been completed */
1904         if (adapter->vfs_allocated_count) {
1905                 u32 reg_data = rd32(E1000_CTRL_EXT);
1906                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1907                 wr32(E1000_CTRL_EXT, reg_data);
1908         }
1909
1910         netif_tx_start_all_queues(netdev);
1911
1912         /* start the watchdog. */
1913         hw->mac.get_link_status = 1;
1914         schedule_work(&adapter->watchdog_task);
1915
1916         return 0;
1917
1918 err_req_irq:
1919         igb_release_hw_control(adapter);
1920         /* e1000_power_down_phy(adapter); */
1921         igb_free_all_rx_resources(adapter);
1922 err_setup_rx:
1923         igb_free_all_tx_resources(adapter);
1924 err_setup_tx:
1925         igb_reset(adapter);
1926
1927         return err;
1928 }
1929
1930 /**
1931  * igb_close - Disables a network interface
1932  * @netdev: network interface device structure
1933  *
1934  * Returns 0, this is not allowed to fail
1935  *
1936  * The close entry point is called when an interface is de-activated
1937  * by the OS.  The hardware is still under the driver's control, but
1938  * needs to be disabled.  A global MAC reset is issued to stop the
1939  * hardware, and all transmit and receive resources are freed.
1940  **/
1941 static int igb_close(struct net_device *netdev)
1942 {
1943         struct igb_adapter *adapter = netdev_priv(netdev);
1944
1945         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
1946         igb_down(adapter);
1947
1948         igb_free_irq(adapter);
1949
1950         igb_free_all_tx_resources(adapter);
1951         igb_free_all_rx_resources(adapter);
1952
1953         return 0;
1954 }
1955
1956 /**
1957  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
1958  * @tx_ring: tx descriptor ring (for a specific queue) to setup
1959  *
1960  * Return 0 on success, negative on failure
1961  **/
1962 int igb_setup_tx_resources(struct igb_ring *tx_ring)
1963 {
1964         struct pci_dev *pdev = tx_ring->pdev;
1965         int size;
1966
1967         size = sizeof(struct igb_buffer) * tx_ring->count;
1968         tx_ring->buffer_info = vmalloc(size);
1969         if (!tx_ring->buffer_info)
1970                 goto err;
1971         memset(tx_ring->buffer_info, 0, size);
1972
1973         /* round up to nearest 4K */
1974         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
1975         tx_ring->size = ALIGN(tx_ring->size, 4096);
1976
1977         tx_ring->desc = pci_alloc_consistent(pdev,
1978                                              tx_ring->size,
1979                                              &tx_ring->dma);
1980
1981         if (!tx_ring->desc)
1982                 goto err;
1983
1984         tx_ring->next_to_use = 0;
1985         tx_ring->next_to_clean = 0;
1986         return 0;
1987
1988 err:
1989         vfree(tx_ring->buffer_info);
1990         dev_err(&pdev->dev,
1991                 "Unable to allocate memory for the transmit descriptor ring\n");
1992         return -ENOMEM;
1993 }
1994
1995 /**
1996  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
1997  *                                (Descriptors) for all queues
1998  * @adapter: board private structure
1999  *
2000  * Return 0 on success, negative on failure
2001  **/
2002 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2003 {
2004         struct pci_dev *pdev = adapter->pdev;
2005         int i, err = 0;
2006
2007         for (i = 0; i < adapter->num_tx_queues; i++) {
2008                 err = igb_setup_tx_resources(&adapter->tx_ring[i]);
2009                 if (err) {
2010                         dev_err(&pdev->dev,
2011                                 "Allocation for Tx Queue %u failed\n", i);
2012                         for (i--; i >= 0; i--)
2013                                 igb_free_tx_resources(&adapter->tx_ring[i]);
2014                         break;
2015                 }
2016         }
2017
2018         for (i = 0; i < IGB_MAX_TX_QUEUES; i++) {
2019                 int r_idx = i % adapter->num_tx_queues;
2020                 adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
2021         }
2022         return err;
2023 }
2024
2025 /**
2026  * igb_setup_tctl - configure the transmit control registers
2027  * @adapter: Board private structure
2028  **/
2029 void igb_setup_tctl(struct igb_adapter *adapter)
2030 {
2031         struct e1000_hw *hw = &adapter->hw;
2032         u32 tctl;
2033
2034         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2035         wr32(E1000_TXDCTL(0), 0);
2036
2037         /* Program the Transmit Control Register */
2038         tctl = rd32(E1000_TCTL);
2039         tctl &= ~E1000_TCTL_CT;
2040         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2041                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2042
2043         igb_config_collision_dist(hw);
2044
2045         /* Enable transmits */
2046         tctl |= E1000_TCTL_EN;
2047
2048         wr32(E1000_TCTL, tctl);
2049 }
2050
2051 /**
2052  * igb_configure_tx_ring - Configure transmit ring after Reset
2053  * @adapter: board private structure
2054  * @ring: tx ring to configure
2055  *
2056  * Configure a transmit ring after a reset.
2057  **/
2058 void igb_configure_tx_ring(struct igb_adapter *adapter,
2059                            struct igb_ring *ring)
2060 {
2061         struct e1000_hw *hw = &adapter->hw;
2062         u32 txdctl;
2063         u64 tdba = ring->dma;
2064         int reg_idx = ring->reg_idx;
2065
2066         /* disable the queue */
2067         txdctl = rd32(E1000_TXDCTL(reg_idx));
2068         wr32(E1000_TXDCTL(reg_idx),
2069                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2070         wrfl();
2071         mdelay(10);
2072
2073         wr32(E1000_TDLEN(reg_idx),
2074                         ring->count * sizeof(union e1000_adv_tx_desc));
2075         wr32(E1000_TDBAL(reg_idx),
2076                         tdba & 0x00000000ffffffffULL);
2077         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2078
2079         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2080         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2081         writel(0, ring->head);
2082         writel(0, ring->tail);
2083
2084         txdctl |= IGB_TX_PTHRESH;
2085         txdctl |= IGB_TX_HTHRESH << 8;
2086         txdctl |= IGB_TX_WTHRESH << 16;
2087
2088         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2089         wr32(E1000_TXDCTL(reg_idx), txdctl);
2090 }
2091
2092 /**
2093  * igb_configure_tx - Configure transmit Unit after Reset
2094  * @adapter: board private structure
2095  *
2096  * Configure the Tx unit of the MAC after a reset.
2097  **/
2098 static void igb_configure_tx(struct igb_adapter *adapter)
2099 {
2100         int i;
2101
2102         for (i = 0; i < adapter->num_tx_queues; i++)
2103                 igb_configure_tx_ring(adapter, &adapter->tx_ring[i]);
2104 }
2105
2106 /**
2107  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2108  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2109  *
2110  * Returns 0 on success, negative on failure
2111  **/
2112 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2113 {
2114         struct pci_dev *pdev = rx_ring->pdev;
2115         int size, desc_len;
2116
2117         size = sizeof(struct igb_buffer) * rx_ring->count;
2118         rx_ring->buffer_info = vmalloc(size);
2119         if (!rx_ring->buffer_info)
2120                 goto err;
2121         memset(rx_ring->buffer_info, 0, size);
2122
2123         desc_len = sizeof(union e1000_adv_rx_desc);
2124
2125         /* Round up to nearest 4K */
2126         rx_ring->size = rx_ring->count * desc_len;
2127         rx_ring->size = ALIGN(rx_ring->size, 4096);
2128
2129         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2130                                              &rx_ring->dma);
2131
2132         if (!rx_ring->desc)
2133                 goto err;
2134
2135         rx_ring->next_to_clean = 0;
2136         rx_ring->next_to_use = 0;
2137
2138         return 0;
2139
2140 err:
2141         vfree(rx_ring->buffer_info);
2142         rx_ring->buffer_info = NULL;
2143         dev_err(&pdev->dev, "Unable to allocate memory for "
2144                 "the receive descriptor ring\n");
2145         return -ENOMEM;
2146 }
2147
2148 /**
2149  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2150  *                                (Descriptors) for all queues
2151  * @adapter: board private structure
2152  *
2153  * Return 0 on success, negative on failure
2154  **/
2155 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2156 {
2157         struct pci_dev *pdev = adapter->pdev;
2158         int i, err = 0;
2159
2160         for (i = 0; i < adapter->num_rx_queues; i++) {
2161                 err = igb_setup_rx_resources(&adapter->rx_ring[i]);
2162                 if (err) {
2163                         dev_err(&pdev->dev,
2164                                 "Allocation for Rx Queue %u failed\n", i);
2165                         for (i--; i >= 0; i--)
2166                                 igb_free_rx_resources(&adapter->rx_ring[i]);
2167                         break;
2168                 }
2169         }
2170
2171         return err;
2172 }
2173
2174 /**
2175  * igb_setup_mrqc - configure the multiple receive queue control registers
2176  * @adapter: Board private structure
2177  **/
2178 static void igb_setup_mrqc(struct igb_adapter *adapter)
2179 {
2180         struct e1000_hw *hw = &adapter->hw;
2181         u32 mrqc, rxcsum;
2182         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2183         union e1000_reta {
2184                 u32 dword;
2185                 u8  bytes[4];
2186         } reta;
2187         static const u8 rsshash[40] = {
2188                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2189                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2190                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2191                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2192
2193         /* Fill out hash function seeds */
2194         for (j = 0; j < 10; j++) {
2195                 u32 rsskey = rsshash[(j * 4)];
2196                 rsskey |= rsshash[(j * 4) + 1] << 8;
2197                 rsskey |= rsshash[(j * 4) + 2] << 16;
2198                 rsskey |= rsshash[(j * 4) + 3] << 24;
2199                 array_wr32(E1000_RSSRK(0), j, rsskey);
2200         }
2201
2202         num_rx_queues = adapter->num_rx_queues;
2203
2204         if (adapter->vfs_allocated_count) {
2205                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2206                 switch (hw->mac.type) {
2207                 case e1000_82576:
2208                         shift = 3;
2209                         num_rx_queues = 2;
2210                         break;
2211                 case e1000_82575:
2212                         shift = 2;
2213                         shift2 = 6;
2214                 default:
2215                         break;
2216                 }
2217         } else {
2218                 if (hw->mac.type == e1000_82575)
2219                         shift = 6;
2220         }
2221
2222         for (j = 0; j < (32 * 4); j++) {
2223                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2224                 if (shift2)
2225                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2226                 if ((j & 3) == 3)
2227                         wr32(E1000_RETA(j >> 2), reta.dword);
2228         }
2229
2230         /*
2231          * Disable raw packet checksumming so that RSS hash is placed in
2232          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2233          * offloads as they are enabled by default
2234          */
2235         rxcsum = rd32(E1000_RXCSUM);
2236         rxcsum |= E1000_RXCSUM_PCSD;
2237
2238         if (adapter->hw.mac.type >= e1000_82576)
2239                 /* Enable Receive Checksum Offload for SCTP */
2240                 rxcsum |= E1000_RXCSUM_CRCOFL;
2241
2242         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2243         wr32(E1000_RXCSUM, rxcsum);
2244
2245         /* If VMDq is enabled then we set the appropriate mode for that, else
2246          * we default to RSS so that an RSS hash is calculated per packet even
2247          * if we are only using one queue */
2248         if (adapter->vfs_allocated_count) {
2249                 if (hw->mac.type > e1000_82575) {
2250                         /* Set the default pool for the PF's first queue */
2251                         u32 vtctl = rd32(E1000_VT_CTL);
2252                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2253                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2254                         vtctl |= adapter->vfs_allocated_count <<
2255                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2256                         wr32(E1000_VT_CTL, vtctl);
2257                 }
2258                 if (adapter->num_rx_queues > 1)
2259                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2260                 else
2261                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2262         } else {
2263                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2264         }
2265         igb_vmm_control(adapter);
2266
2267         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2268                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2269         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2270                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2271         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2272                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2273         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2274                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2275
2276         wr32(E1000_MRQC, mrqc);
2277 }
2278
2279 /**
2280  * igb_setup_rctl - configure the receive control registers
2281  * @adapter: Board private structure
2282  **/
2283 void igb_setup_rctl(struct igb_adapter *adapter)
2284 {
2285         struct e1000_hw *hw = &adapter->hw;
2286         u32 rctl;
2287
2288         rctl = rd32(E1000_RCTL);
2289
2290         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2291         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2292
2293         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2294                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2295
2296         /*
2297          * enable stripping of CRC. It's unlikely this will break BMC
2298          * redirection as it did with e1000. Newer features require
2299          * that the HW strips the CRC.
2300          */
2301         rctl |= E1000_RCTL_SECRC;
2302
2303         /* disable store bad packets and clear size bits. */
2304         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2305
2306         /* enable LPE to prevent packets larger than max_frame_size */
2307         rctl |= E1000_RCTL_LPE;
2308
2309         /* disable queue 0 to prevent tail write w/o re-config */
2310         wr32(E1000_RXDCTL(0), 0);
2311
2312         /* Attention!!!  For SR-IOV PF driver operations you must enable
2313          * queue drop for all VF and PF queues to prevent head of line blocking
2314          * if an un-trusted VF does not provide descriptors to hardware.
2315          */
2316         if (adapter->vfs_allocated_count) {
2317                 /* set all queue drop enable bits */
2318                 wr32(E1000_QDE, ALL_QUEUES);
2319         }
2320
2321         wr32(E1000_RCTL, rctl);
2322 }
2323
2324 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2325                                    int vfn)
2326 {
2327         struct e1000_hw *hw = &adapter->hw;
2328         u32 vmolr;
2329
2330         /* if it isn't the PF check to see if VFs are enabled and
2331          * increase the size to support vlan tags */
2332         if (vfn < adapter->vfs_allocated_count &&
2333             adapter->vf_data[vfn].vlans_enabled)
2334                 size += VLAN_TAG_SIZE;
2335
2336         vmolr = rd32(E1000_VMOLR(vfn));
2337         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2338         vmolr |= size | E1000_VMOLR_LPE;
2339         wr32(E1000_VMOLR(vfn), vmolr);
2340
2341         return 0;
2342 }
2343
2344 /**
2345  * igb_rlpml_set - set maximum receive packet size
2346  * @adapter: board private structure
2347  *
2348  * Configure maximum receivable packet size.
2349  **/
2350 static void igb_rlpml_set(struct igb_adapter *adapter)
2351 {
2352         u32 max_frame_size = adapter->max_frame_size;
2353         struct e1000_hw *hw = &adapter->hw;
2354         u16 pf_id = adapter->vfs_allocated_count;
2355
2356         if (adapter->vlgrp)
2357                 max_frame_size += VLAN_TAG_SIZE;
2358
2359         /* if vfs are enabled we set RLPML to the largest possible request
2360          * size and set the VMOLR RLPML to the size we need */
2361         if (pf_id) {
2362                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2363                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2364         }
2365
2366         wr32(E1000_RLPML, max_frame_size);
2367 }
2368
2369 static inline void igb_set_vmolr(struct igb_adapter *adapter, int vfn)
2370 {
2371         struct e1000_hw *hw = &adapter->hw;
2372         u32 vmolr;
2373
2374         /*
2375          * This register exists only on 82576 and newer so if we are older then
2376          * we should exit and do nothing
2377          */
2378         if (hw->mac.type < e1000_82576)
2379                 return;
2380
2381         vmolr = rd32(E1000_VMOLR(vfn));
2382         vmolr |= E1000_VMOLR_AUPE |        /* Accept untagged packets */
2383                  E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2384
2385         /* clear all bits that might not be set */
2386         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2387
2388         if (adapter->num_rx_queues > 1 && vfn == adapter->vfs_allocated_count)
2389                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2390         /*
2391          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2392          * multicast packets
2393          */
2394         if (vfn <= adapter->vfs_allocated_count)
2395                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2396
2397         wr32(E1000_VMOLR(vfn), vmolr);
2398 }
2399
2400 /**
2401  * igb_configure_rx_ring - Configure a receive ring after Reset
2402  * @adapter: board private structure
2403  * @ring: receive ring to be configured
2404  *
2405  * Configure the Rx unit of the MAC after a reset.
2406  **/
2407 void igb_configure_rx_ring(struct igb_adapter *adapter,
2408                            struct igb_ring *ring)
2409 {
2410         struct e1000_hw *hw = &adapter->hw;
2411         u64 rdba = ring->dma;
2412         int reg_idx = ring->reg_idx;
2413         u32 srrctl, rxdctl;
2414
2415         /* disable the queue */
2416         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2417         wr32(E1000_RXDCTL(reg_idx),
2418                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2419
2420         /* Set DMA base address registers */
2421         wr32(E1000_RDBAL(reg_idx),
2422              rdba & 0x00000000ffffffffULL);
2423         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2424         wr32(E1000_RDLEN(reg_idx),
2425                        ring->count * sizeof(union e1000_adv_rx_desc));
2426
2427         /* initialize head and tail */
2428         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2429         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2430         writel(0, ring->head);
2431         writel(0, ring->tail);
2432
2433         /* set descriptor configuration */
2434         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2435                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2436                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2437 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2438                 srrctl |= IGB_RXBUFFER_16384 >>
2439                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2440 #else
2441                 srrctl |= (PAGE_SIZE / 2) >>
2442                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2443 #endif
2444                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2445         } else {
2446                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2447                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2448                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2449         }
2450
2451         wr32(E1000_SRRCTL(reg_idx), srrctl);
2452
2453         /* set filtering for VMDQ pools */
2454         igb_set_vmolr(adapter, reg_idx & 0x7);
2455
2456         /* enable receive descriptor fetching */
2457         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2458         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2459         rxdctl &= 0xFFF00000;
2460         rxdctl |= IGB_RX_PTHRESH;
2461         rxdctl |= IGB_RX_HTHRESH << 8;
2462         rxdctl |= IGB_RX_WTHRESH << 16;
2463         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2464 }
2465
2466 /**
2467  * igb_configure_rx - Configure receive Unit after Reset
2468  * @adapter: board private structure
2469  *
2470  * Configure the Rx unit of the MAC after a reset.
2471  **/
2472 static void igb_configure_rx(struct igb_adapter *adapter)
2473 {
2474         int i;
2475
2476         /* set UTA to appropriate mode */
2477         igb_set_uta(adapter);
2478
2479         /* set the correct pool for the PF default MAC address in entry 0 */
2480         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2481                          adapter->vfs_allocated_count);
2482
2483         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2484          * the Base and Length of the Rx Descriptor Ring */
2485         for (i = 0; i < adapter->num_rx_queues; i++)
2486                 igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
2487 }
2488
2489 /**
2490  * igb_free_tx_resources - Free Tx Resources per Queue
2491  * @tx_ring: Tx descriptor ring for a specific queue
2492  *
2493  * Free all transmit software resources
2494  **/
2495 void igb_free_tx_resources(struct igb_ring *tx_ring)
2496 {
2497         igb_clean_tx_ring(tx_ring);
2498
2499         vfree(tx_ring->buffer_info);
2500         tx_ring->buffer_info = NULL;
2501
2502         /* if not set, then don't free */
2503         if (!tx_ring->desc)
2504                 return;
2505
2506         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2507                             tx_ring->desc, tx_ring->dma);
2508
2509         tx_ring->desc = NULL;
2510 }
2511
2512 /**
2513  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2514  * @adapter: board private structure
2515  *
2516  * Free all transmit software resources
2517  **/
2518 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2519 {
2520         int i;
2521
2522         for (i = 0; i < adapter->num_tx_queues; i++)
2523                 igb_free_tx_resources(&adapter->tx_ring[i]);
2524 }
2525
2526 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2527                                     struct igb_buffer *buffer_info)
2528 {
2529         buffer_info->dma = 0;
2530         if (buffer_info->skb) {
2531                 skb_dma_unmap(&tx_ring->pdev->dev,
2532                               buffer_info->skb,
2533                               DMA_TO_DEVICE);
2534                 dev_kfree_skb_any(buffer_info->skb);
2535                 buffer_info->skb = NULL;
2536         }
2537         buffer_info->time_stamp = 0;
2538         /* buffer_info must be completely set up in the transmit path */
2539 }
2540
2541 /**
2542  * igb_clean_tx_ring - Free Tx Buffers
2543  * @tx_ring: ring to be cleaned
2544  **/
2545 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2546 {
2547         struct igb_buffer *buffer_info;
2548         unsigned long size;
2549         unsigned int i;
2550
2551         if (!tx_ring->buffer_info)
2552                 return;
2553         /* Free all the Tx ring sk_buffs */
2554
2555         for (i = 0; i < tx_ring->count; i++) {
2556                 buffer_info = &tx_ring->buffer_info[i];
2557                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2558         }
2559
2560         size = sizeof(struct igb_buffer) * tx_ring->count;
2561         memset(tx_ring->buffer_info, 0, size);
2562
2563         /* Zero out the descriptor ring */
2564         memset(tx_ring->desc, 0, tx_ring->size);
2565
2566         tx_ring->next_to_use = 0;
2567         tx_ring->next_to_clean = 0;
2568 }
2569
2570 /**
2571  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2572  * @adapter: board private structure
2573  **/
2574 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2575 {
2576         int i;
2577
2578         for (i = 0; i < adapter->num_tx_queues; i++)
2579                 igb_clean_tx_ring(&adapter->tx_ring[i]);
2580 }
2581
2582 /**
2583  * igb_free_rx_resources - Free Rx Resources
2584  * @rx_ring: ring to clean the resources from
2585  *
2586  * Free all receive software resources
2587  **/
2588 void igb_free_rx_resources(struct igb_ring *rx_ring)
2589 {
2590         igb_clean_rx_ring(rx_ring);
2591
2592         vfree(rx_ring->buffer_info);
2593         rx_ring->buffer_info = NULL;
2594
2595         /* if not set, then don't free */
2596         if (!rx_ring->desc)
2597                 return;
2598
2599         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2600                             rx_ring->desc, rx_ring->dma);
2601
2602         rx_ring->desc = NULL;
2603 }
2604
2605 /**
2606  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2607  * @adapter: board private structure
2608  *
2609  * Free all receive software resources
2610  **/
2611 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2612 {
2613         int i;
2614
2615         for (i = 0; i < adapter->num_rx_queues; i++)
2616                 igb_free_rx_resources(&adapter->rx_ring[i]);
2617 }
2618
2619 /**
2620  * igb_clean_rx_ring - Free Rx Buffers per Queue
2621  * @rx_ring: ring to free buffers from
2622  **/
2623 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2624 {
2625         struct igb_buffer *buffer_info;
2626         unsigned long size;
2627         unsigned int i;
2628
2629         if (!rx_ring->buffer_info)
2630                 return;
2631
2632         /* Free all the Rx ring sk_buffs */
2633         for (i = 0; i < rx_ring->count; i++) {
2634                 buffer_info = &rx_ring->buffer_info[i];
2635                 if (buffer_info->dma) {
2636                         pci_unmap_single(rx_ring->pdev,
2637                                          buffer_info->dma,
2638                                          rx_ring->rx_buffer_len,
2639                                          PCI_DMA_FROMDEVICE);
2640                         buffer_info->dma = 0;
2641                 }
2642
2643                 if (buffer_info->skb) {
2644                         dev_kfree_skb(buffer_info->skb);
2645                         buffer_info->skb = NULL;
2646                 }
2647                 if (buffer_info->page_dma) {
2648                         pci_unmap_page(rx_ring->pdev,
2649                                        buffer_info->page_dma,
2650                                        PAGE_SIZE / 2,
2651                                        PCI_DMA_FROMDEVICE);
2652                         buffer_info->page_dma = 0;
2653                 }
2654                 if (buffer_info->page) {
2655                         put_page(buffer_info->page);
2656                         buffer_info->page = NULL;
2657                         buffer_info->page_offset = 0;
2658                 }
2659         }
2660
2661         size = sizeof(struct igb_buffer) * rx_ring->count;
2662         memset(rx_ring->buffer_info, 0, size);
2663
2664         /* Zero out the descriptor ring */
2665         memset(rx_ring->desc, 0, rx_ring->size);
2666
2667         rx_ring->next_to_clean = 0;
2668         rx_ring->next_to_use = 0;
2669 }
2670
2671 /**
2672  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2673  * @adapter: board private structure
2674  **/
2675 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2676 {
2677         int i;
2678
2679         for (i = 0; i < adapter->num_rx_queues; i++)
2680                 igb_clean_rx_ring(&adapter->rx_ring[i]);
2681 }
2682
2683 /**
2684  * igb_set_mac - Change the Ethernet Address of the NIC
2685  * @netdev: network interface device structure
2686  * @p: pointer to an address structure
2687  *
2688  * Returns 0 on success, negative on failure
2689  **/
2690 static int igb_set_mac(struct net_device *netdev, void *p)
2691 {
2692         struct igb_adapter *adapter = netdev_priv(netdev);
2693         struct e1000_hw *hw = &adapter->hw;
2694         struct sockaddr *addr = p;
2695
2696         if (!is_valid_ether_addr(addr->sa_data))
2697                 return -EADDRNOTAVAIL;
2698
2699         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2700         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2701
2702         /* set the correct pool for the new PF MAC address in entry 0 */
2703         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2704                          adapter->vfs_allocated_count);
2705
2706         return 0;
2707 }
2708
2709 /**
2710  * igb_write_mc_addr_list - write multicast addresses to MTA
2711  * @netdev: network interface device structure
2712  *
2713  * Writes multicast address list to the MTA hash table.
2714  * Returns: -ENOMEM on failure
2715  *                0 on no addresses written
2716  *                X on writing X addresses to MTA
2717  **/
2718 static int igb_write_mc_addr_list(struct net_device *netdev)
2719 {
2720         struct igb_adapter *adapter = netdev_priv(netdev);
2721         struct e1000_hw *hw = &adapter->hw;
2722         struct dev_mc_list *mc_ptr = netdev->mc_list;
2723         u8  *mta_list;
2724         u32 vmolr = 0;
2725         int i;
2726
2727         if (!netdev->mc_count) {
2728                 /* nothing to program, so clear mc list */
2729                 igb_update_mc_addr_list(hw, NULL, 0);
2730                 igb_restore_vf_multicasts(adapter);
2731                 return 0;
2732         }
2733
2734         mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
2735         if (!mta_list)
2736                 return -ENOMEM;
2737
2738         /* set vmolr receive overflow multicast bit */
2739         vmolr |= E1000_VMOLR_ROMPE;
2740
2741         /* The shared function expects a packed array of only addresses. */
2742         mc_ptr = netdev->mc_list;
2743
2744         for (i = 0; i < netdev->mc_count; i++) {
2745                 if (!mc_ptr)
2746                         break;
2747                 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2748                 mc_ptr = mc_ptr->next;
2749         }
2750         igb_update_mc_addr_list(hw, mta_list, i);
2751         kfree(mta_list);
2752
2753         return netdev->mc_count;
2754 }
2755
2756 /**
2757  * igb_write_uc_addr_list - write unicast addresses to RAR table
2758  * @netdev: network interface device structure
2759  *
2760  * Writes unicast address list to the RAR table.
2761  * Returns: -ENOMEM on failure/insufficient address space
2762  *                0 on no addresses written
2763  *                X on writing X addresses to the RAR table
2764  **/
2765 static int igb_write_uc_addr_list(struct net_device *netdev)
2766 {
2767         struct igb_adapter *adapter = netdev_priv(netdev);
2768         struct e1000_hw *hw = &adapter->hw;
2769         unsigned int vfn = adapter->vfs_allocated_count;
2770         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2771         int count = 0;
2772
2773         /* return ENOMEM indicating insufficient memory for addresses */
2774         if (netdev->uc.count > rar_entries)
2775                 return -ENOMEM;
2776
2777         if (netdev->uc.count && rar_entries) {
2778                 struct netdev_hw_addr *ha;
2779                 list_for_each_entry(ha, &netdev->uc.list, list) {
2780                         if (!rar_entries)
2781                                 break;
2782                         igb_rar_set_qsel(adapter, ha->addr,
2783                                          rar_entries--,
2784                                          vfn);
2785                         count++;
2786                 }
2787         }
2788         /* write the addresses in reverse order to avoid write combining */
2789         for (; rar_entries > 0 ; rar_entries--) {
2790                 wr32(E1000_RAH(rar_entries), 0);
2791                 wr32(E1000_RAL(rar_entries), 0);
2792         }
2793         wrfl();
2794
2795         return count;
2796 }
2797
2798 /**
2799  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2800  * @netdev: network interface device structure
2801  *
2802  * The set_rx_mode entry point is called whenever the unicast or multicast
2803  * address lists or the network interface flags are updated.  This routine is
2804  * responsible for configuring the hardware for proper unicast, multicast,
2805  * promiscuous mode, and all-multi behavior.
2806  **/
2807 static void igb_set_rx_mode(struct net_device *netdev)
2808 {
2809         struct igb_adapter *adapter = netdev_priv(netdev);
2810         struct e1000_hw *hw = &adapter->hw;
2811         unsigned int vfn = adapter->vfs_allocated_count;
2812         u32 rctl, vmolr = 0;
2813         int count;
2814
2815         /* Check for Promiscuous and All Multicast modes */
2816         rctl = rd32(E1000_RCTL);
2817
2818         /* clear the effected bits */
2819         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2820
2821         if (netdev->flags & IFF_PROMISC) {
2822                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2823                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2824         } else {
2825                 if (netdev->flags & IFF_ALLMULTI) {
2826                         rctl |= E1000_RCTL_MPE;
2827                         vmolr |= E1000_VMOLR_MPME;
2828                 } else {
2829                         /*
2830                          * Write addresses to the MTA, if the attempt fails
2831                          * then we should just turn on promiscous mode so
2832                          * that we can at least receive multicast traffic
2833                          */
2834                         count = igb_write_mc_addr_list(netdev);
2835                         if (count < 0) {
2836                                 rctl |= E1000_RCTL_MPE;
2837                                 vmolr |= E1000_VMOLR_MPME;
2838                         } else if (count) {
2839                                 vmolr |= E1000_VMOLR_ROMPE;
2840                         }
2841                 }
2842                 /*
2843                  * Write addresses to available RAR registers, if there is not
2844                  * sufficient space to store all the addresses then enable
2845                  * unicast promiscous mode
2846                  */
2847                 count = igb_write_uc_addr_list(netdev);
2848                 if (count < 0) {
2849                         rctl |= E1000_RCTL_UPE;
2850                         vmolr |= E1000_VMOLR_ROPE;
2851                 }
2852                 rctl |= E1000_RCTL_VFE;
2853         }
2854         wr32(E1000_RCTL, rctl);
2855
2856         /*
2857          * In order to support SR-IOV and eventually VMDq it is necessary to set
2858          * the VMOLR to enable the appropriate modes.  Without this workaround
2859          * we will have issues with VLAN tag stripping not being done for frames
2860          * that are only arriving because we are the default pool
2861          */
2862         if (hw->mac.type < e1000_82576)
2863                 return;
2864
2865         vmolr |= rd32(E1000_VMOLR(vfn)) &
2866                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
2867         wr32(E1000_VMOLR(vfn), vmolr);
2868         igb_restore_vf_multicasts(adapter);
2869 }
2870
2871 /* Need to wait a few seconds after link up to get diagnostic information from
2872  * the phy */
2873 static void igb_update_phy_info(unsigned long data)
2874 {
2875         struct igb_adapter *adapter = (struct igb_adapter *) data;
2876         igb_get_phy_info(&adapter->hw);
2877 }
2878
2879 /**
2880  * igb_has_link - check shared code for link and determine up/down
2881  * @adapter: pointer to driver private info
2882  **/
2883 static bool igb_has_link(struct igb_adapter *adapter)
2884 {
2885         struct e1000_hw *hw = &adapter->hw;
2886         bool link_active = false;
2887         s32 ret_val = 0;
2888
2889         /* get_link_status is set on LSC (link status) interrupt or
2890          * rx sequence error interrupt.  get_link_status will stay
2891          * false until the e1000_check_for_link establishes link
2892          * for copper adapters ONLY
2893          */
2894         switch (hw->phy.media_type) {
2895         case e1000_media_type_copper:
2896                 if (hw->mac.get_link_status) {
2897                         ret_val = hw->mac.ops.check_for_link(hw);
2898                         link_active = !hw->mac.get_link_status;
2899                 } else {
2900                         link_active = true;
2901                 }
2902                 break;
2903         case e1000_media_type_internal_serdes:
2904                 ret_val = hw->mac.ops.check_for_link(hw);
2905                 link_active = hw->mac.serdes_has_link;
2906                 break;
2907         default:
2908         case e1000_media_type_unknown:
2909                 break;
2910         }
2911
2912         return link_active;
2913 }
2914
2915 /**
2916  * igb_watchdog - Timer Call-back
2917  * @data: pointer to adapter cast into an unsigned long
2918  **/
2919 static void igb_watchdog(unsigned long data)
2920 {
2921         struct igb_adapter *adapter = (struct igb_adapter *)data;
2922         /* Do the rest outside of interrupt context */
2923         schedule_work(&adapter->watchdog_task);
2924 }
2925
2926 static void igb_watchdog_task(struct work_struct *work)
2927 {
2928         struct igb_adapter *adapter = container_of(work,
2929                                                    struct igb_adapter,
2930                                                    watchdog_task);
2931         struct e1000_hw *hw = &adapter->hw;
2932         struct net_device *netdev = adapter->netdev;
2933         struct igb_ring *tx_ring = adapter->tx_ring;
2934         u32 link;
2935         int i;
2936
2937         link = igb_has_link(adapter);
2938         if (link) {
2939                 if (!netif_carrier_ok(netdev)) {
2940                         u32 ctrl;
2941                         hw->mac.ops.get_speed_and_duplex(hw,
2942                                                          &adapter->link_speed,
2943                                                          &adapter->link_duplex);
2944
2945                         ctrl = rd32(E1000_CTRL);
2946                         /* Links status message must follow this format */
2947                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
2948                                  "Flow Control: %s\n",
2949                                netdev->name,
2950                                adapter->link_speed,
2951                                adapter->link_duplex == FULL_DUPLEX ?
2952                                  "Full Duplex" : "Half Duplex",
2953                                ((ctrl & E1000_CTRL_TFCE) &&
2954                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
2955                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
2956                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
2957
2958                         /* tweak tx_queue_len according to speed/duplex and
2959                          * adjust the timeout factor */
2960                         netdev->tx_queue_len = adapter->tx_queue_len;
2961                         adapter->tx_timeout_factor = 1;
2962                         switch (adapter->link_speed) {
2963                         case SPEED_10:
2964                                 netdev->tx_queue_len = 10;
2965                                 adapter->tx_timeout_factor = 14;
2966                                 break;
2967                         case SPEED_100:
2968                                 netdev->tx_queue_len = 100;
2969                                 /* maybe add some timeout factor ? */
2970                                 break;
2971                         }
2972
2973                         netif_carrier_on(netdev);
2974
2975                         igb_ping_all_vfs(adapter);
2976
2977                         /* link state has changed, schedule phy info update */
2978                         if (!test_bit(__IGB_DOWN, &adapter->state))
2979                                 mod_timer(&adapter->phy_info_timer,
2980                                           round_jiffies(jiffies + 2 * HZ));
2981                 }
2982         } else {
2983                 if (netif_carrier_ok(netdev)) {
2984                         adapter->link_speed = 0;
2985                         adapter->link_duplex = 0;
2986                         /* Links status message must follow this format */
2987                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
2988                                netdev->name);
2989                         netif_carrier_off(netdev);
2990
2991                         igb_ping_all_vfs(adapter);
2992
2993                         /* link state has changed, schedule phy info update */
2994                         if (!test_bit(__IGB_DOWN, &adapter->state))
2995                                 mod_timer(&adapter->phy_info_timer,
2996                                           round_jiffies(jiffies + 2 * HZ));
2997                 }
2998         }
2999
3000         igb_update_stats(adapter);
3001         igb_update_adaptive(hw);
3002
3003         if (!netif_carrier_ok(netdev)) {
3004                 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3005                         /* We've lost link, so the controller stops DMA,
3006                          * but we've got queued Tx work that's never going
3007                          * to get done, so reset controller to flush Tx.
3008                          * (Do the reset outside of interrupt context). */
3009                         adapter->tx_timeout_count++;
3010                         schedule_work(&adapter->reset_task);
3011                         /* return immediately since reset is imminent */
3012                         return;
3013                 }
3014         }
3015
3016         /* Force detection of hung controller every watchdog period */
3017         for (i = 0; i < adapter->num_tx_queues; i++)
3018                 adapter->tx_ring[i].detect_tx_hung = true;
3019
3020         /* Cause software interrupt to ensure rx ring is cleaned */
3021         if (adapter->msix_entries) {
3022                 u32 eics = 0;
3023                 for (i = 0; i < adapter->num_q_vectors; i++) {
3024                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3025                         eics |= q_vector->eims_value;
3026                 }
3027                 wr32(E1000_EICS, eics);
3028         } else {
3029                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3030         }
3031
3032         /* Reset the timer */
3033         if (!test_bit(__IGB_DOWN, &adapter->state))
3034                 mod_timer(&adapter->watchdog_timer,
3035                           round_jiffies(jiffies + 2 * HZ));
3036 }
3037
3038 enum latency_range {
3039         lowest_latency = 0,
3040         low_latency = 1,
3041         bulk_latency = 2,
3042         latency_invalid = 255
3043 };
3044
3045 /**
3046  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3047  *
3048  *      Stores a new ITR value based on strictly on packet size.  This
3049  *      algorithm is less sophisticated than that used in igb_update_itr,
3050  *      due to the difficulty of synchronizing statistics across multiple
3051  *      receive rings.  The divisors and thresholds used by this fuction
3052  *      were determined based on theoretical maximum wire speed and testing
3053  *      data, in order to minimize response time while increasing bulk
3054  *      throughput.
3055  *      This functionality is controlled by the InterruptThrottleRate module
3056  *      parameter (see igb_param.c)
3057  *      NOTE:  This function is called only when operating in a multiqueue
3058  *             receive environment.
3059  * @q_vector: pointer to q_vector
3060  **/
3061 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3062 {
3063         int new_val = q_vector->itr_val;
3064         int avg_wire_size = 0;
3065         struct igb_adapter *adapter = q_vector->adapter;
3066
3067         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3068          * ints/sec - ITR timer value of 120 ticks.
3069          */
3070         if (adapter->link_speed != SPEED_1000) {
3071                 new_val = 976;
3072                 goto set_itr_val;
3073         }
3074
3075         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3076                 struct igb_ring *ring = q_vector->rx_ring;
3077                 avg_wire_size = ring->total_bytes / ring->total_packets;
3078         }
3079
3080         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3081                 struct igb_ring *ring = q_vector->tx_ring;
3082                 avg_wire_size = max_t(u32, avg_wire_size,
3083                                       (ring->total_bytes /
3084                                        ring->total_packets));
3085         }
3086
3087         /* if avg_wire_size isn't set no work was done */
3088         if (!avg_wire_size)
3089                 goto clear_counts;
3090
3091         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3092         avg_wire_size += 24;
3093
3094         /* Don't starve jumbo frames */
3095         avg_wire_size = min(avg_wire_size, 3000);
3096
3097         /* Give a little boost to mid-size frames */
3098         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3099                 new_val = avg_wire_size / 3;
3100         else
3101                 new_val = avg_wire_size / 2;
3102
3103 set_itr_val:
3104         if (new_val != q_vector->itr_val) {
3105                 q_vector->itr_val = new_val;
3106                 q_vector->set_itr = 1;
3107         }
3108 clear_counts:
3109         if (q_vector->rx_ring) {
3110                 q_vector->rx_ring->total_bytes = 0;
3111                 q_vector->rx_ring->total_packets = 0;
3112         }
3113         if (q_vector->tx_ring) {
3114                 q_vector->tx_ring->total_bytes = 0;
3115                 q_vector->tx_ring->total_packets = 0;
3116         }
3117 }
3118
3119 /**
3120  * igb_update_itr - update the dynamic ITR value based on statistics
3121  *      Stores a new ITR value based on packets and byte
3122  *      counts during the last interrupt.  The advantage of per interrupt
3123  *      computation is faster updates and more accurate ITR for the current
3124  *      traffic pattern.  Constants in this function were computed
3125  *      based on theoretical maximum wire speed and thresholds were set based
3126  *      on testing data as well as attempting to minimize response time
3127  *      while increasing bulk throughput.
3128  *      this functionality is controlled by the InterruptThrottleRate module
3129  *      parameter (see igb_param.c)
3130  *      NOTE:  These calculations are only valid when operating in a single-
3131  *             queue environment.
3132  * @adapter: pointer to adapter
3133  * @itr_setting: current q_vector->itr_val
3134  * @packets: the number of packets during this measurement interval
3135  * @bytes: the number of bytes during this measurement interval
3136  **/
3137 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3138                                    int packets, int bytes)
3139 {
3140         unsigned int retval = itr_setting;
3141
3142         if (packets == 0)
3143                 goto update_itr_done;
3144
3145         switch (itr_setting) {
3146         case lowest_latency:
3147                 /* handle TSO and jumbo frames */
3148                 if (bytes/packets > 8000)
3149                         retval = bulk_latency;
3150                 else if ((packets < 5) && (bytes > 512))
3151                         retval = low_latency;
3152                 break;
3153         case low_latency:  /* 50 usec aka 20000 ints/s */
3154                 if (bytes > 10000) {
3155                         /* this if handles the TSO accounting */
3156                         if (bytes/packets > 8000) {
3157                                 retval = bulk_latency;
3158                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3159                                 retval = bulk_latency;
3160                         } else if ((packets > 35)) {
3161                                 retval = lowest_latency;
3162                         }
3163                 } else if (bytes/packets > 2000) {
3164                         retval = bulk_latency;
3165                 } else if (packets <= 2 && bytes < 512) {
3166                         retval = lowest_latency;
3167                 }
3168                 break;
3169         case bulk_latency: /* 250 usec aka 4000 ints/s */
3170                 if (bytes > 25000) {
3171                         if (packets > 35)
3172                                 retval = low_latency;
3173                 } else if (bytes < 1500) {
3174                         retval = low_latency;
3175                 }
3176                 break;
3177         }
3178
3179 update_itr_done:
3180         return retval;
3181 }
3182
3183 static void igb_set_itr(struct igb_adapter *adapter)
3184 {
3185         struct igb_q_vector *q_vector = adapter->q_vector[0];
3186         u16 current_itr;
3187         u32 new_itr = q_vector->itr_val;
3188
3189         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3190         if (adapter->link_speed != SPEED_1000) {
3191                 current_itr = 0;
3192                 new_itr = 4000;
3193                 goto set_itr_now;
3194         }
3195
3196         adapter->rx_itr = igb_update_itr(adapter,
3197                                     adapter->rx_itr,
3198                                     adapter->rx_ring->total_packets,
3199                                     adapter->rx_ring->total_bytes);
3200
3201         adapter->tx_itr = igb_update_itr(adapter,
3202                                     adapter->tx_itr,
3203                                     adapter->tx_ring->total_packets,
3204                                     adapter->tx_ring->total_bytes);
3205         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3206
3207         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3208         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3209                 current_itr = low_latency;
3210
3211         switch (current_itr) {
3212         /* counts and packets in update_itr are dependent on these numbers */
3213         case lowest_latency:
3214                 new_itr = 56;  /* aka 70,000 ints/sec */
3215                 break;
3216         case low_latency:
3217                 new_itr = 196; /* aka 20,000 ints/sec */
3218                 break;
3219         case bulk_latency:
3220                 new_itr = 980; /* aka 4,000 ints/sec */
3221                 break;
3222         default:
3223                 break;
3224         }
3225
3226 set_itr_now:
3227         adapter->rx_ring->total_bytes = 0;
3228         adapter->rx_ring->total_packets = 0;
3229         adapter->tx_ring->total_bytes = 0;
3230         adapter->tx_ring->total_packets = 0;
3231
3232         if (new_itr != q_vector->itr_val) {
3233                 /* this attempts to bias the interrupt rate towards Bulk
3234                  * by adding intermediate steps when interrupt rate is
3235                  * increasing */
3236                 new_itr = new_itr > q_vector->itr_val ?
3237                              max((new_itr * q_vector->itr_val) /
3238                                  (new_itr + (q_vector->itr_val >> 2)),
3239                                  new_itr) :
3240                              new_itr;
3241                 /* Don't write the value here; it resets the adapter's
3242                  * internal timer, and causes us to delay far longer than
3243                  * we should between interrupts.  Instead, we write the ITR
3244                  * value at the beginning of the next interrupt so the timing
3245                  * ends up being correct.
3246                  */
3247                 q_vector->itr_val = new_itr;
3248                 q_vector->set_itr = 1;
3249         }
3250
3251         return;
3252 }
3253
3254 #define IGB_TX_FLAGS_CSUM               0x00000001
3255 #define IGB_TX_FLAGS_VLAN               0x00000002
3256 #define IGB_TX_FLAGS_TSO                0x00000004
3257 #define IGB_TX_FLAGS_IPV4               0x00000008
3258 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3259 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3260 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3261
3262 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3263                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3264 {
3265         struct e1000_adv_tx_context_desc *context_desc;
3266         unsigned int i;
3267         int err;
3268         struct igb_buffer *buffer_info;
3269         u32 info = 0, tu_cmd = 0;
3270         u32 mss_l4len_idx, l4len;
3271         *hdr_len = 0;
3272
3273         if (skb_header_cloned(skb)) {
3274                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3275                 if (err)
3276                         return err;
3277         }
3278
3279         l4len = tcp_hdrlen(skb);
3280         *hdr_len += l4len;
3281
3282         if (skb->protocol == htons(ETH_P_IP)) {
3283                 struct iphdr *iph = ip_hdr(skb);
3284                 iph->tot_len = 0;
3285                 iph->check = 0;
3286                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3287                                                          iph->daddr, 0,
3288                                                          IPPROTO_TCP,
3289                                                          0);
3290         } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
3291                 ipv6_hdr(skb)->payload_len = 0;
3292                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3293                                                        &ipv6_hdr(skb)->daddr,
3294                                                        0, IPPROTO_TCP, 0);
3295         }
3296
3297         i = tx_ring->next_to_use;
3298
3299         buffer_info = &tx_ring->buffer_info[i];
3300         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3301         /* VLAN MACLEN IPLEN */
3302         if (tx_flags & IGB_TX_FLAGS_VLAN)
3303                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3304         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3305         *hdr_len += skb_network_offset(skb);
3306         info |= skb_network_header_len(skb);
3307         *hdr_len += skb_network_header_len(skb);
3308         context_desc->vlan_macip_lens = cpu_to_le32(info);
3309
3310         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3311         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3312
3313         if (skb->protocol == htons(ETH_P_IP))
3314                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3315         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3316
3317         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3318
3319         /* MSS L4LEN IDX */
3320         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3321         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3322
3323         /* For 82575, context index must be unique per ring. */
3324         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3325                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3326
3327         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3328         context_desc->seqnum_seed = 0;
3329
3330         buffer_info->time_stamp = jiffies;
3331         buffer_info->next_to_watch = i;
3332         buffer_info->dma = 0;
3333         i++;
3334         if (i == tx_ring->count)
3335                 i = 0;
3336
3337         tx_ring->next_to_use = i;
3338
3339         return true;
3340 }
3341
3342 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3343                                    struct sk_buff *skb, u32 tx_flags)
3344 {
3345         struct e1000_adv_tx_context_desc *context_desc;
3346         struct pci_dev *pdev = tx_ring->pdev;
3347         struct igb_buffer *buffer_info;
3348         u32 info = 0, tu_cmd = 0;
3349         unsigned int i;
3350
3351         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3352             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3353                 i = tx_ring->next_to_use;
3354                 buffer_info = &tx_ring->buffer_info[i];
3355                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3356
3357                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3358                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3359
3360                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3361                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3362                         info |= skb_network_header_len(skb);
3363
3364                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3365
3366                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3367
3368                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3369                         __be16 protocol;
3370
3371                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3372                                 const struct vlan_ethhdr *vhdr =
3373                                           (const struct vlan_ethhdr*)skb->data;
3374
3375                                 protocol = vhdr->h_vlan_encapsulated_proto;
3376                         } else {
3377                                 protocol = skb->protocol;
3378                         }
3379
3380                         switch (protocol) {
3381                         case cpu_to_be16(ETH_P_IP):
3382                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3383                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3384                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3385                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3386                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3387                                 break;
3388                         case cpu_to_be16(ETH_P_IPV6):
3389                                 /* XXX what about other V6 headers?? */
3390                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3391                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3392                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3393                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3394                                 break;
3395                         default:
3396                                 if (unlikely(net_ratelimit()))
3397                                         dev_warn(&pdev->dev,
3398                                             "partial checksum but proto=%x!\n",
3399                                             skb->protocol);
3400                                 break;
3401                         }
3402                 }
3403
3404                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3405                 context_desc->seqnum_seed = 0;
3406                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3407                         context_desc->mss_l4len_idx =
3408                                 cpu_to_le32(tx_ring->reg_idx << 4);
3409
3410                 buffer_info->time_stamp = jiffies;
3411                 buffer_info->next_to_watch = i;
3412                 buffer_info->dma = 0;
3413
3414                 i++;
3415                 if (i == tx_ring->count)
3416                         i = 0;
3417                 tx_ring->next_to_use = i;
3418
3419                 return true;
3420         }
3421         return false;
3422 }
3423
3424 #define IGB_MAX_TXD_PWR 16
3425 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3426
3427 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3428                                  unsigned int first)
3429 {
3430         struct igb_buffer *buffer_info;
3431         struct pci_dev *pdev = tx_ring->pdev;
3432         unsigned int len = skb_headlen(skb);
3433         unsigned int count = 0, i;
3434         unsigned int f;
3435         dma_addr_t *map;
3436
3437         i = tx_ring->next_to_use;
3438
3439         if (skb_dma_map(&pdev->dev, skb, DMA_TO_DEVICE)) {
3440                 dev_err(&pdev->dev, "TX DMA map failed\n");
3441                 return 0;
3442         }
3443
3444         map = skb_shinfo(skb)->dma_maps;
3445
3446         buffer_info = &tx_ring->buffer_info[i];
3447         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3448         buffer_info->length = len;
3449         /* set time_stamp *before* dma to help avoid a possible race */
3450         buffer_info->time_stamp = jiffies;
3451         buffer_info->next_to_watch = i;
3452         buffer_info->dma = skb_shinfo(skb)->dma_head;
3453
3454         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3455                 struct skb_frag_struct *frag;
3456
3457                 i++;
3458                 if (i == tx_ring->count)
3459                         i = 0;
3460
3461                 frag = &skb_shinfo(skb)->frags[f];
3462                 len = frag->size;
3463
3464                 buffer_info = &tx_ring->buffer_info[i];
3465                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3466                 buffer_info->length = len;
3467                 buffer_info->time_stamp = jiffies;
3468                 buffer_info->next_to_watch = i;
3469                 buffer_info->dma = map[count];
3470                 count++;
3471         }
3472
3473         tx_ring->buffer_info[i].skb = skb;
3474         tx_ring->buffer_info[first].next_to_watch = i;
3475
3476         return ++count;
3477 }
3478
3479 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3480                                     int tx_flags, int count, u32 paylen,
3481                                     u8 hdr_len)
3482 {
3483         union e1000_adv_tx_desc *tx_desc;
3484         struct igb_buffer *buffer_info;
3485         u32 olinfo_status = 0, cmd_type_len;
3486         unsigned int i = tx_ring->next_to_use;
3487
3488         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3489                         E1000_ADVTXD_DCMD_DEXT);
3490
3491         if (tx_flags & IGB_TX_FLAGS_VLAN)
3492                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3493
3494         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3495                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3496
3497         if (tx_flags & IGB_TX_FLAGS_TSO) {
3498                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3499
3500                 /* insert tcp checksum */
3501                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3502
3503                 /* insert ip checksum */
3504                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3505                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3506
3507         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3508                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3509         }
3510
3511         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3512             (tx_flags & (IGB_TX_FLAGS_CSUM |
3513                          IGB_TX_FLAGS_TSO |
3514                          IGB_TX_FLAGS_VLAN)))
3515                 olinfo_status |= tx_ring->reg_idx << 4;
3516
3517         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3518
3519         do {
3520                 buffer_info = &tx_ring->buffer_info[i];
3521                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3522                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3523                 tx_desc->read.cmd_type_len =
3524                         cpu_to_le32(cmd_type_len | buffer_info->length);
3525                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3526                 count--;
3527                 i++;
3528                 if (i == tx_ring->count)
3529                         i = 0;
3530         } while (count > 0);
3531
3532         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3533         /* Force memory writes to complete before letting h/w
3534          * know there are new descriptors to fetch.  (Only
3535          * applicable for weak-ordered memory model archs,
3536          * such as IA-64). */
3537         wmb();
3538
3539         tx_ring->next_to_use = i;
3540         writel(i, tx_ring->tail);
3541         /* we need this if more than one processor can write to our tail
3542          * at a time, it syncronizes IO on IA64/Altix systems */
3543         mmiowb();
3544 }
3545
3546 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3547 {
3548         struct net_device *netdev = tx_ring->netdev;
3549
3550         netif_stop_subqueue(netdev, tx_ring->queue_index);
3551
3552         /* Herbert's original patch had:
3553          *  smp_mb__after_netif_stop_queue();
3554          * but since that doesn't exist yet, just open code it. */
3555         smp_mb();
3556
3557         /* We need to check again in a case another CPU has just
3558          * made room available. */
3559         if (igb_desc_unused(tx_ring) < size)
3560                 return -EBUSY;
3561
3562         /* A reprieve! */
3563         netif_wake_subqueue(netdev, tx_ring->queue_index);
3564         tx_ring->tx_stats.restart_queue++;
3565         return 0;
3566 }
3567
3568 static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3569 {
3570         if (igb_desc_unused(tx_ring) >= size)
3571                 return 0;
3572         return __igb_maybe_stop_tx(tx_ring, size);
3573 }
3574
3575 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3576                                     struct igb_ring *tx_ring)
3577 {
3578         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3579         unsigned int first;
3580         unsigned int tx_flags = 0;
3581         u8 hdr_len = 0;
3582         int tso = 0, count;
3583         union skb_shared_tx *shtx = skb_tx(skb);
3584
3585         /* need: 1 descriptor per page,
3586          *       + 2 desc gap to keep tail from touching head,
3587          *       + 1 desc for skb->data,
3588          *       + 1 desc for context descriptor,
3589          * otherwise try next time */
3590         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3591                 /* this is a hard error */
3592                 return NETDEV_TX_BUSY;
3593         }
3594
3595         if (unlikely(shtx->hardware)) {
3596                 shtx->in_progress = 1;
3597                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3598         }
3599
3600         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3601                 tx_flags |= IGB_TX_FLAGS_VLAN;
3602                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3603         }
3604
3605         if (skb->protocol == htons(ETH_P_IP))
3606                 tx_flags |= IGB_TX_FLAGS_IPV4;
3607
3608         first = tx_ring->next_to_use;
3609         if (skb_is_gso(skb)) {
3610                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3611
3612                 if (tso < 0) {
3613                         dev_kfree_skb_any(skb);
3614                         return NETDEV_TX_OK;
3615                 }
3616         }
3617
3618         if (tso)
3619                 tx_flags |= IGB_TX_FLAGS_TSO;
3620         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3621                  (skb->ip_summed == CHECKSUM_PARTIAL))
3622                 tx_flags |= IGB_TX_FLAGS_CSUM;
3623
3624         /*
3625          * count reflects descriptors mapped, if 0 or less then mapping error
3626          * has occured and we need to rewind the descriptor queue
3627          */
3628         count = igb_tx_map_adv(tx_ring, skb, first);
3629         if (count <= 0) {
3630                 dev_kfree_skb_any(skb);
3631                 tx_ring->buffer_info[first].time_stamp = 0;
3632                 tx_ring->next_to_use = first;
3633                 return NETDEV_TX_OK;
3634         }
3635
3636         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3637
3638         /* Make sure there is space in the ring for the next send. */
3639         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3640
3641         return NETDEV_TX_OK;
3642 }
3643
3644 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3645                                       struct net_device *netdev)
3646 {
3647         struct igb_adapter *adapter = netdev_priv(netdev);
3648         struct igb_ring *tx_ring;
3649         int r_idx = 0;
3650
3651         if (test_bit(__IGB_DOWN, &adapter->state)) {
3652                 dev_kfree_skb_any(skb);
3653                 return NETDEV_TX_OK;
3654         }
3655
3656         if (skb->len <= 0) {
3657                 dev_kfree_skb_any(skb);
3658                 return NETDEV_TX_OK;
3659         }
3660
3661         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3662         tx_ring = adapter->multi_tx_table[r_idx];
3663
3664         /* This goes back to the question of how to logically map a tx queue
3665          * to a flow.  Right now, performance is impacted slightly negatively
3666          * if using multiple tx queues.  If the stack breaks away from a
3667          * single qdisc implementation, we can look at this again. */
3668         return igb_xmit_frame_ring_adv(skb, tx_ring);
3669 }
3670
3671 /**
3672  * igb_tx_timeout - Respond to a Tx Hang
3673  * @netdev: network interface device structure
3674  **/
3675 static void igb_tx_timeout(struct net_device *netdev)
3676 {
3677         struct igb_adapter *adapter = netdev_priv(netdev);
3678         struct e1000_hw *hw = &adapter->hw;
3679
3680         /* Do the reset outside of interrupt context */
3681         adapter->tx_timeout_count++;
3682
3683         schedule_work(&adapter->reset_task);
3684         wr32(E1000_EICS,
3685              (adapter->eims_enable_mask & ~adapter->eims_other));
3686 }
3687
3688 static void igb_reset_task(struct work_struct *work)
3689 {
3690         struct igb_adapter *adapter;
3691         adapter = container_of(work, struct igb_adapter, reset_task);
3692
3693         igb_reinit_locked(adapter);
3694 }
3695
3696 /**
3697  * igb_get_stats - Get System Network Statistics
3698  * @netdev: network interface device structure
3699  *
3700  * Returns the address of the device statistics structure.
3701  * The statistics are actually updated from the timer callback.
3702  **/
3703 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3704 {
3705         /* only return the current stats */
3706         return &netdev->stats;
3707 }
3708
3709 /**
3710  * igb_change_mtu - Change the Maximum Transfer Unit
3711  * @netdev: network interface device structure
3712  * @new_mtu: new value for maximum frame size
3713  *
3714  * Returns 0 on success, negative on failure
3715  **/
3716 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3717 {
3718         struct igb_adapter *adapter = netdev_priv(netdev);
3719         struct pci_dev *pdev = adapter->pdev;
3720         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3721         u32 rx_buffer_len, i;
3722
3723         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3724                 dev_err(&pdev->dev, "Invalid MTU setting\n");
3725                 return -EINVAL;
3726         }
3727
3728         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3729                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3730                 return -EINVAL;
3731         }
3732
3733         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3734                 msleep(1);
3735
3736         /* igb_down has a dependency on max_frame_size */
3737         adapter->max_frame_size = max_frame;
3738
3739         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3740          * means we reserve 2 more, this pushes us to allocate from the next
3741          * larger slab size.
3742          * i.e. RXBUFFER_2048 --> size-4096 slab
3743          */
3744
3745         if (max_frame <= IGB_RXBUFFER_1024)
3746                 rx_buffer_len = IGB_RXBUFFER_1024;
3747         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3748                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3749         else
3750                 rx_buffer_len = IGB_RXBUFFER_128;
3751
3752         if (netif_running(netdev))
3753                 igb_down(adapter);
3754
3755         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3756                  netdev->mtu, new_mtu);
3757         netdev->mtu = new_mtu;
3758
3759         for (i = 0; i < adapter->num_rx_queues; i++)
3760                 adapter->rx_ring[i].rx_buffer_len = rx_buffer_len;
3761
3762         if (netif_running(netdev))
3763                 igb_up(adapter);
3764         else
3765                 igb_reset(adapter);
3766
3767         clear_bit(__IGB_RESETTING, &adapter->state);
3768
3769         return 0;
3770 }
3771
3772 /**
3773  * igb_update_stats - Update the board statistics counters
3774  * @adapter: board private structure
3775  **/
3776
3777 void igb_update_stats(struct igb_adapter *adapter)
3778 {
3779         struct net_device *netdev = adapter->netdev;
3780         struct e1000_hw *hw = &adapter->hw;
3781         struct pci_dev *pdev = adapter->pdev;
3782         u32 rnbc;
3783         u16 phy_tmp;
3784         int i;
3785         u64 bytes, packets;
3786
3787 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3788
3789         /*
3790          * Prevent stats update while adapter is being reset, or if the pci
3791          * connection is down.
3792          */
3793         if (adapter->link_speed == 0)
3794                 return;
3795         if (pci_channel_offline(pdev))
3796                 return;
3797
3798         bytes = 0;
3799         packets = 0;
3800         for (i = 0; i < adapter->num_rx_queues; i++) {
3801                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3802                 adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
3803                 netdev->stats.rx_fifo_errors += rqdpc_tmp;
3804                 bytes += adapter->rx_ring[i].rx_stats.bytes;
3805                 packets += adapter->rx_ring[i].rx_stats.packets;
3806         }
3807
3808         netdev->stats.rx_bytes = bytes;
3809         netdev->stats.rx_packets = packets;
3810
3811         bytes = 0;
3812         packets = 0;
3813         for (i = 0; i < adapter->num_tx_queues; i++) {
3814                 bytes += adapter->tx_ring[i].tx_stats.bytes;
3815                 packets += adapter->tx_ring[i].tx_stats.packets;
3816         }
3817         netdev->stats.tx_bytes = bytes;
3818         netdev->stats.tx_packets = packets;
3819
3820         /* read stats registers */
3821         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
3822         adapter->stats.gprc += rd32(E1000_GPRC);
3823         adapter->stats.gorc += rd32(E1000_GORCL);
3824         rd32(E1000_GORCH); /* clear GORCL */
3825         adapter->stats.bprc += rd32(E1000_BPRC);
3826         adapter->stats.mprc += rd32(E1000_MPRC);
3827         adapter->stats.roc += rd32(E1000_ROC);
3828
3829         adapter->stats.prc64 += rd32(E1000_PRC64);
3830         adapter->stats.prc127 += rd32(E1000_PRC127);
3831         adapter->stats.prc255 += rd32(E1000_PRC255);
3832         adapter->stats.prc511 += rd32(E1000_PRC511);
3833         adapter->stats.prc1023 += rd32(E1000_PRC1023);
3834         adapter->stats.prc1522 += rd32(E1000_PRC1522);
3835         adapter->stats.symerrs += rd32(E1000_SYMERRS);
3836         adapter->stats.sec += rd32(E1000_SEC);
3837
3838         adapter->stats.mpc += rd32(E1000_MPC);
3839         adapter->stats.scc += rd32(E1000_SCC);
3840         adapter->stats.ecol += rd32(E1000_ECOL);
3841         adapter->stats.mcc += rd32(E1000_MCC);
3842         adapter->stats.latecol += rd32(E1000_LATECOL);
3843         adapter->stats.dc += rd32(E1000_DC);
3844         adapter->stats.rlec += rd32(E1000_RLEC);
3845         adapter->stats.xonrxc += rd32(E1000_XONRXC);
3846         adapter->stats.xontxc += rd32(E1000_XONTXC);
3847         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
3848         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
3849         adapter->stats.fcruc += rd32(E1000_FCRUC);
3850         adapter->stats.gptc += rd32(E1000_GPTC);
3851         adapter->stats.gotc += rd32(E1000_GOTCL);
3852         rd32(E1000_GOTCH); /* clear GOTCL */
3853         rnbc = rd32(E1000_RNBC);
3854         adapter->stats.rnbc += rnbc;
3855         netdev->stats.rx_fifo_errors += rnbc;
3856         adapter->stats.ruc += rd32(E1000_RUC);
3857         adapter->stats.rfc += rd32(E1000_RFC);
3858         adapter->stats.rjc += rd32(E1000_RJC);
3859         adapter->stats.tor += rd32(E1000_TORH);
3860         adapter->stats.tot += rd32(E1000_TOTH);
3861         adapter->stats.tpr += rd32(E1000_TPR);
3862
3863         adapter->stats.ptc64 += rd32(E1000_PTC64);
3864         adapter->stats.ptc127 += rd32(E1000_PTC127);
3865         adapter->stats.ptc255 += rd32(E1000_PTC255);
3866         adapter->stats.ptc511 += rd32(E1000_PTC511);
3867         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
3868         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
3869
3870         adapter->stats.mptc += rd32(E1000_MPTC);
3871         adapter->stats.bptc += rd32(E1000_BPTC);
3872
3873         /* used for adaptive IFS */
3874         hw->mac.tx_packet_delta = rd32(E1000_TPT);
3875         adapter->stats.tpt += hw->mac.tx_packet_delta;
3876         hw->mac.collision_delta = rd32(E1000_COLC);
3877         adapter->stats.colc += hw->mac.collision_delta;
3878
3879         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
3880         adapter->stats.rxerrc += rd32(E1000_RXERRC);
3881         adapter->stats.tncrs += rd32(E1000_TNCRS);
3882         adapter->stats.tsctc += rd32(E1000_TSCTC);
3883         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
3884
3885         adapter->stats.iac += rd32(E1000_IAC);
3886         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
3887         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
3888         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
3889         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
3890         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
3891         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
3892         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
3893         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
3894
3895         /* Fill out the OS statistics structure */
3896         netdev->stats.multicast = adapter->stats.mprc;
3897         netdev->stats.collisions = adapter->stats.colc;
3898
3899         /* Rx Errors */
3900
3901         /* RLEC on some newer hardware can be incorrect so build
3902          * our own version based on RUC and ROC */
3903         netdev->stats.rx_errors = adapter->stats.rxerrc +
3904                 adapter->stats.crcerrs + adapter->stats.algnerrc +
3905                 adapter->stats.ruc + adapter->stats.roc +
3906                 adapter->stats.cexterr;
3907         netdev->stats.rx_length_errors = adapter->stats.ruc +
3908                                               adapter->stats.roc;
3909         netdev->stats.rx_crc_errors = adapter->stats.crcerrs;
3910         netdev->stats.rx_frame_errors = adapter->stats.algnerrc;
3911         netdev->stats.rx_missed_errors = adapter->stats.mpc;
3912
3913         /* Tx Errors */
3914         netdev->stats.tx_errors = adapter->stats.ecol +
3915                                        adapter->stats.latecol;
3916         netdev->stats.tx_aborted_errors = adapter->stats.ecol;
3917         netdev->stats.tx_window_errors = adapter->stats.latecol;
3918         netdev->stats.tx_carrier_errors = adapter->stats.tncrs;
3919
3920         /* Tx Dropped needs to be maintained elsewhere */
3921
3922         /* Phy Stats */
3923         if (hw->phy.media_type == e1000_media_type_copper) {
3924                 if ((adapter->link_speed == SPEED_1000) &&
3925                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
3926                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
3927                         adapter->phy_stats.idle_errors += phy_tmp;
3928                 }
3929         }
3930
3931         /* Management Stats */
3932         adapter->stats.mgptc += rd32(E1000_MGTPTC);
3933         adapter->stats.mgprc += rd32(E1000_MGTPRC);
3934         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
3935 }
3936
3937 static irqreturn_t igb_msix_other(int irq, void *data)
3938 {
3939         struct igb_adapter *adapter = data;
3940         struct e1000_hw *hw = &adapter->hw;
3941         u32 icr = rd32(E1000_ICR);
3942         /* reading ICR causes bit 31 of EICR to be cleared */
3943
3944         if (icr & E1000_ICR_DOUTSYNC) {
3945                 /* HW is reporting DMA is out of sync */
3946                 adapter->stats.doosync++;
3947         }
3948
3949         /* Check for a mailbox event */
3950         if (icr & E1000_ICR_VMMB)
3951                 igb_msg_task(adapter);
3952
3953         if (icr & E1000_ICR_LSC) {
3954                 hw->mac.get_link_status = 1;
3955                 /* guard against interrupt when we're going down */
3956                 if (!test_bit(__IGB_DOWN, &adapter->state))
3957                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
3958         }
3959
3960         if (adapter->vfs_allocated_count)
3961                 wr32(E1000_IMS, E1000_IMS_LSC |
3962                                 E1000_IMS_VMMB |
3963                                 E1000_IMS_DOUTSYNC);
3964         else
3965                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
3966         wr32(E1000_EIMS, adapter->eims_other);
3967
3968         return IRQ_HANDLED;
3969 }
3970
3971 static void igb_write_itr(struct igb_q_vector *q_vector)
3972 {
3973         u32 itr_val = q_vector->itr_val & 0x7FFC;
3974
3975         if (!q_vector->set_itr)
3976                 return;
3977
3978         if (!itr_val)
3979                 itr_val = 0x4;
3980
3981         if (q_vector->itr_shift)
3982                 itr_val |= itr_val << q_vector->itr_shift;
3983         else
3984                 itr_val |= 0x8000000;
3985
3986         writel(itr_val, q_vector->itr_register);
3987         q_vector->set_itr = 0;
3988 }
3989
3990 static irqreturn_t igb_msix_ring(int irq, void *data)
3991 {
3992         struct igb_q_vector *q_vector = data;
3993
3994         /* Write the ITR value calculated from the previous interrupt. */
3995         igb_write_itr(q_vector);
3996
3997         napi_schedule(&q_vector->napi);
3998
3999         return IRQ_HANDLED;
4000 }
4001
4002 #ifdef CONFIG_IGB_DCA
4003 static void igb_update_dca(struct igb_q_vector *q_vector)
4004 {
4005         struct igb_adapter *adapter = q_vector->adapter;
4006         struct e1000_hw *hw = &adapter->hw;
4007         int cpu = get_cpu();
4008
4009         if (q_vector->cpu == cpu)
4010                 goto out_no_update;
4011
4012         if (q_vector->tx_ring) {
4013                 int q = q_vector->tx_ring->reg_idx;
4014                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4015                 if (hw->mac.type == e1000_82575) {
4016                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4017                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4018                 } else {
4019                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4020                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4021                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4022                 }
4023                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4024                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4025         }
4026         if (q_vector->rx_ring) {
4027                 int q = q_vector->rx_ring->reg_idx;
4028                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4029                 if (hw->mac.type == e1000_82575) {
4030                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4031                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4032                 } else {
4033                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4034                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4035                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4036                 }
4037                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4038                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4039                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4040                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4041         }
4042         q_vector->cpu = cpu;
4043 out_no_update:
4044         put_cpu();
4045 }
4046
4047 static void igb_setup_dca(struct igb_adapter *adapter)
4048 {
4049         struct e1000_hw *hw = &adapter->hw;
4050         int i;
4051
4052         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4053                 return;
4054
4055         /* Always use CB2 mode, difference is masked in the CB driver. */
4056         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4057
4058         for (i = 0; i < adapter->num_q_vectors; i++) {
4059                 struct igb_q_vector *q_vector = adapter->q_vector[i];
4060                 q_vector->cpu = -1;
4061                 igb_update_dca(q_vector);
4062         }
4063 }
4064
4065 static int __igb_notify_dca(struct device *dev, void *data)
4066 {
4067         struct net_device *netdev = dev_get_drvdata(dev);
4068         struct igb_adapter *adapter = netdev_priv(netdev);
4069         struct pci_dev *pdev = adapter->pdev;
4070         struct e1000_hw *hw = &adapter->hw;
4071         unsigned long event = *(unsigned long *)data;
4072
4073         switch (event) {
4074         case DCA_PROVIDER_ADD:
4075                 /* if already enabled, don't do it again */
4076                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4077                         break;
4078                 if (dca_add_requester(dev) == 0) {
4079                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4080                         dev_info(&pdev->dev, "DCA enabled\n");
4081                         igb_setup_dca(adapter);
4082                         break;
4083                 }
4084                 /* Fall Through since DCA is disabled. */
4085         case DCA_PROVIDER_REMOVE:
4086                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4087                         /* without this a class_device is left
4088                          * hanging around in the sysfs model */
4089                         dca_remove_requester(dev);
4090                         dev_info(&pdev->dev, "DCA disabled\n");
4091                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4092                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4093                 }
4094                 break;
4095         }
4096
4097         return 0;
4098 }
4099
4100 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4101                           void *p)
4102 {
4103         int ret_val;
4104
4105         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4106                                          __igb_notify_dca);
4107
4108         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4109 }
4110 #endif /* CONFIG_IGB_DCA */
4111
4112 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4113 {
4114         struct e1000_hw *hw = &adapter->hw;
4115         u32 ping;
4116         int i;
4117
4118         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4119                 ping = E1000_PF_CONTROL_MSG;
4120                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4121                         ping |= E1000_VT_MSGTYPE_CTS;
4122                 igb_write_mbx(hw, &ping, 1, i);
4123         }
4124 }
4125
4126 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4127 {
4128         struct e1000_hw *hw = &adapter->hw;
4129         u32 vmolr = rd32(E1000_VMOLR(vf));
4130         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4131
4132         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4133                             IGB_VF_FLAG_MULTI_PROMISC);
4134         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4135
4136         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4137                 vmolr |= E1000_VMOLR_MPME;
4138                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4139         } else {
4140                 /*
4141                  * if we have hashes and we are clearing a multicast promisc
4142                  * flag we need to write the hashes to the MTA as this step
4143                  * was previously skipped
4144                  */
4145                 if (vf_data->num_vf_mc_hashes > 30) {
4146                         vmolr |= E1000_VMOLR_MPME;
4147                 } else if (vf_data->num_vf_mc_hashes) {
4148                         int j;
4149                         vmolr |= E1000_VMOLR_ROMPE;
4150                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4151                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4152                 }
4153         }
4154
4155         wr32(E1000_VMOLR(vf), vmolr);
4156
4157         /* there are flags left unprocessed, likely not supported */
4158         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4159                 return -EINVAL;
4160
4161         return 0;
4162
4163 }
4164
4165 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4166                                   u32 *msgbuf, u32 vf)
4167 {
4168         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4169         u16 *hash_list = (u16 *)&msgbuf[1];
4170         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4171         int i;
4172
4173         /* salt away the number of multicast addresses assigned
4174          * to this VF for later use to restore when the PF multi cast
4175          * list changes
4176          */
4177         vf_data->num_vf_mc_hashes = n;
4178
4179         /* only up to 30 hash values supported */
4180         if (n > 30)
4181                 n = 30;
4182
4183         /* store the hashes for later use */
4184         for (i = 0; i < n; i++)
4185                 vf_data->vf_mc_hashes[i] = hash_list[i];
4186
4187         /* Flush and reset the mta with the new values */
4188         igb_set_rx_mode(adapter->netdev);
4189
4190         return 0;
4191 }
4192
4193 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4194 {
4195         struct e1000_hw *hw = &adapter->hw;
4196         struct vf_data_storage *vf_data;
4197         int i, j;
4198
4199         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4200                 u32 vmolr = rd32(E1000_VMOLR(i));
4201                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4202
4203                 vf_data = &adapter->vf_data[i];
4204
4205                 if ((vf_data->num_vf_mc_hashes > 30) ||
4206                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4207                         vmolr |= E1000_VMOLR_MPME;
4208                 } else if (vf_data->num_vf_mc_hashes) {
4209                         vmolr |= E1000_VMOLR_ROMPE;
4210                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4211                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4212                 }
4213                 wr32(E1000_VMOLR(i), vmolr);
4214         }
4215 }
4216
4217 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4218 {
4219         struct e1000_hw *hw = &adapter->hw;
4220         u32 pool_mask, reg, vid;
4221         int i;
4222
4223         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4224
4225         /* Find the vlan filter for this id */
4226         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4227                 reg = rd32(E1000_VLVF(i));
4228
4229                 /* remove the vf from the pool */
4230                 reg &= ~pool_mask;
4231
4232                 /* if pool is empty then remove entry from vfta */
4233                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4234                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4235                         reg = 0;
4236                         vid = reg & E1000_VLVF_VLANID_MASK;
4237                         igb_vfta_set(hw, vid, false);
4238                 }
4239
4240                 wr32(E1000_VLVF(i), reg);
4241         }
4242
4243         adapter->vf_data[vf].vlans_enabled = 0;
4244 }
4245
4246 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4247 {
4248         struct e1000_hw *hw = &adapter->hw;
4249         u32 reg, i;
4250
4251         /* The vlvf table only exists on 82576 hardware and newer */
4252         if (hw->mac.type < e1000_82576)
4253                 return -1;
4254
4255         /* we only need to do this if VMDq is enabled */
4256         if (!adapter->vfs_allocated_count)
4257                 return -1;
4258
4259         /* Find the vlan filter for this id */
4260         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4261                 reg = rd32(E1000_VLVF(i));
4262                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4263                     vid == (reg & E1000_VLVF_VLANID_MASK))
4264                         break;
4265         }
4266
4267         if (add) {
4268                 if (i == E1000_VLVF_ARRAY_SIZE) {
4269                         /* Did not find a matching VLAN ID entry that was
4270                          * enabled.  Search for a free filter entry, i.e.
4271                          * one without the enable bit set
4272                          */
4273                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4274                                 reg = rd32(E1000_VLVF(i));
4275                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4276                                         break;
4277                         }
4278                 }
4279                 if (i < E1000_VLVF_ARRAY_SIZE) {
4280                         /* Found an enabled/available entry */
4281                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4282
4283                         /* if !enabled we need to set this up in vfta */
4284                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4285                                 /* add VID to filter table */
4286                                 igb_vfta_set(hw, vid, true);
4287                                 reg |= E1000_VLVF_VLANID_ENABLE;
4288                         }
4289                         reg &= ~E1000_VLVF_VLANID_MASK;
4290                         reg |= vid;
4291                         wr32(E1000_VLVF(i), reg);
4292
4293                         /* do not modify RLPML for PF devices */
4294                         if (vf >= adapter->vfs_allocated_count)
4295                                 return 0;
4296
4297                         if (!adapter->vf_data[vf].vlans_enabled) {
4298                                 u32 size;
4299                                 reg = rd32(E1000_VMOLR(vf));
4300                                 size = reg & E1000_VMOLR_RLPML_MASK;
4301                                 size += 4;
4302                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4303                                 reg |= size;
4304                                 wr32(E1000_VMOLR(vf), reg);
4305                         }
4306
4307                         adapter->vf_data[vf].vlans_enabled++;
4308                         return 0;
4309                 }
4310         } else {
4311                 if (i < E1000_VLVF_ARRAY_SIZE) {
4312                         /* remove vf from the pool */
4313                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4314                         /* if pool is empty then remove entry from vfta */
4315                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4316                                 reg = 0;
4317                                 igb_vfta_set(hw, vid, false);
4318                         }
4319                         wr32(E1000_VLVF(i), reg);
4320
4321                         /* do not modify RLPML for PF devices */
4322                         if (vf >= adapter->vfs_allocated_count)
4323                                 return 0;
4324
4325                         adapter->vf_data[vf].vlans_enabled--;
4326                         if (!adapter->vf_data[vf].vlans_enabled) {
4327                                 u32 size;
4328                                 reg = rd32(E1000_VMOLR(vf));
4329                                 size = reg & E1000_VMOLR_RLPML_MASK;
4330                                 size -= 4;
4331                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4332                                 reg |= size;
4333                                 wr32(E1000_VMOLR(vf), reg);
4334                         }
4335                         return 0;
4336                 }
4337         }
4338         return -1;
4339 }
4340
4341 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4342 {
4343         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4344         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4345
4346         return igb_vlvf_set(adapter, vid, add, vf);
4347 }
4348
4349 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4350 {
4351         /* clear all flags */
4352         adapter->vf_data[vf].flags = 0;
4353         adapter->vf_data[vf].last_nack = jiffies;
4354
4355         /* reset offloads to defaults */
4356         igb_set_vmolr(adapter, vf);
4357
4358         /* reset vlans for device */
4359         igb_clear_vf_vfta(adapter, vf);
4360
4361         /* reset multicast table array for vf */
4362         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4363
4364         /* Flush and reset the mta with the new values */
4365         igb_set_rx_mode(adapter->netdev);
4366 }
4367
4368 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4369 {
4370         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4371
4372         /* generate a new mac address as we were hotplug removed/added */
4373         random_ether_addr(vf_mac);
4374
4375         /* process remaining reset events */
4376         igb_vf_reset(adapter, vf);
4377 }
4378
4379 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4380 {
4381         struct e1000_hw *hw = &adapter->hw;
4382         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4383         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4384         u32 reg, msgbuf[3];
4385         u8 *addr = (u8 *)(&msgbuf[1]);
4386
4387         /* process all the same items cleared in a function level reset */
4388         igb_vf_reset(adapter, vf);
4389
4390         /* set vf mac address */
4391         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4392
4393         /* enable transmit and receive for vf */
4394         reg = rd32(E1000_VFTE);
4395         wr32(E1000_VFTE, reg | (1 << vf));
4396         reg = rd32(E1000_VFRE);
4397         wr32(E1000_VFRE, reg | (1 << vf));
4398
4399         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4400
4401         /* reply to reset with ack and vf mac address */
4402         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4403         memcpy(addr, vf_mac, 6);
4404         igb_write_mbx(hw, msgbuf, 3, vf);
4405 }
4406
4407 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4408 {
4409         unsigned char *addr = (char *)&msg[1];
4410         int err = -1;
4411
4412         if (is_valid_ether_addr(addr))
4413                 err = igb_set_vf_mac(adapter, vf, addr);
4414
4415         return err;
4416 }
4417
4418 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4419 {
4420         struct e1000_hw *hw = &adapter->hw;
4421         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4422         u32 msg = E1000_VT_MSGTYPE_NACK;
4423
4424         /* if device isn't clear to send it shouldn't be reading either */
4425         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4426             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4427                 igb_write_mbx(hw, &msg, 1, vf);
4428                 vf_data->last_nack = jiffies;
4429         }
4430 }
4431
4432 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4433 {
4434         struct pci_dev *pdev = adapter->pdev;
4435         u32 msgbuf[E1000_VFMAILBOX_SIZE];
4436         struct e1000_hw *hw = &adapter->hw;
4437         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4438         s32 retval;
4439
4440         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4441
4442         if (retval)
4443                 dev_err(&pdev->dev, "Error receiving message from VF\n");
4444
4445         /* this is a message we already processed, do nothing */
4446         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4447                 return;
4448
4449         /*
4450          * until the vf completes a reset it should not be
4451          * allowed to start any configuration.
4452          */
4453
4454         if (msgbuf[0] == E1000_VF_RESET) {
4455                 igb_vf_reset_msg(adapter, vf);
4456                 return;
4457         }
4458
4459         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4460                 msgbuf[0] = E1000_VT_MSGTYPE_NACK;
4461                 if (time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4462                         igb_write_mbx(hw, msgbuf, 1, vf);
4463                         vf_data->last_nack = jiffies;
4464                 }
4465                 return;
4466         }
4467
4468         switch ((msgbuf[0] & 0xFFFF)) {
4469         case E1000_VF_SET_MAC_ADDR:
4470                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4471                 break;
4472         case E1000_VF_SET_PROMISC:
4473                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4474                 break;
4475         case E1000_VF_SET_MULTICAST:
4476                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4477                 break;
4478         case E1000_VF_SET_LPE:
4479                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4480                 break;
4481         case E1000_VF_SET_VLAN:
4482                 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4483                 break;
4484         default:
4485                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4486                 retval = -1;
4487                 break;
4488         }
4489
4490         /* notify the VF of the results of what it sent us */
4491         if (retval)
4492                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4493         else
4494                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4495
4496         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4497
4498         igb_write_mbx(hw, msgbuf, 1, vf);
4499 }
4500
4501 static void igb_msg_task(struct igb_adapter *adapter)
4502 {
4503         struct e1000_hw *hw = &adapter->hw;
4504         u32 vf;
4505
4506         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4507                 /* process any reset requests */
4508                 if (!igb_check_for_rst(hw, vf))
4509                         igb_vf_reset_event(adapter, vf);
4510
4511                 /* process any messages pending */
4512                 if (!igb_check_for_msg(hw, vf))
4513                         igb_rcv_msg_from_vf(adapter, vf);
4514
4515                 /* process any acks */
4516                 if (!igb_check_for_ack(hw, vf))
4517                         igb_rcv_ack_from_vf(adapter, vf);
4518         }
4519 }
4520
4521 /**
4522  *  igb_set_uta - Set unicast filter table address
4523  *  @adapter: board private structure
4524  *
4525  *  The unicast table address is a register array of 32-bit registers.
4526  *  The table is meant to be used in a way similar to how the MTA is used
4527  *  however due to certain limitations in the hardware it is necessary to
4528  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4529  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4530  **/
4531 static void igb_set_uta(struct igb_adapter *adapter)
4532 {
4533         struct e1000_hw *hw = &adapter->hw;
4534         int i;
4535
4536         /* The UTA table only exists on 82576 hardware and newer */
4537         if (hw->mac.type < e1000_82576)
4538                 return;
4539
4540         /* we only need to do this if VMDq is enabled */
4541         if (!adapter->vfs_allocated_count)
4542                 return;
4543
4544         for (i = 0; i < hw->mac.uta_reg_count; i++)
4545                 array_wr32(E1000_UTA, i, ~0);
4546 }
4547
4548 /**
4549  * igb_intr_msi - Interrupt Handler
4550  * @irq: interrupt number
4551  * @data: pointer to a network interface device structure
4552  **/
4553 static irqreturn_t igb_intr_msi(int irq, void *data)
4554 {
4555         struct igb_adapter *adapter = data;
4556         struct igb_q_vector *q_vector = adapter->q_vector[0];
4557         struct e1000_hw *hw = &adapter->hw;
4558         /* read ICR disables interrupts using IAM */
4559         u32 icr = rd32(E1000_ICR);
4560
4561         igb_write_itr(q_vector);
4562
4563         if (icr & E1000_ICR_DOUTSYNC) {
4564                 /* HW is reporting DMA is out of sync */
4565                 adapter->stats.doosync++;
4566         }
4567
4568         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4569                 hw->mac.get_link_status = 1;
4570                 if (!test_bit(__IGB_DOWN, &adapter->state))
4571                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4572         }
4573
4574         napi_schedule(&q_vector->napi);
4575
4576         return IRQ_HANDLED;
4577 }
4578
4579 /**
4580  * igb_intr - Legacy Interrupt Handler
4581  * @irq: interrupt number
4582  * @data: pointer to a network interface device structure
4583  **/
4584 static irqreturn_t igb_intr(int irq, void *data)
4585 {
4586         struct igb_adapter *adapter = data;
4587         struct igb_q_vector *q_vector = adapter->q_vector[0];
4588         struct e1000_hw *hw = &adapter->hw;
4589         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4590          * need for the IMC write */
4591         u32 icr = rd32(E1000_ICR);
4592         if (!icr)
4593                 return IRQ_NONE;  /* Not our interrupt */
4594
4595         igb_write_itr(q_vector);
4596
4597         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4598          * not set, then the adapter didn't send an interrupt */
4599         if (!(icr & E1000_ICR_INT_ASSERTED))
4600                 return IRQ_NONE;
4601
4602         if (icr & E1000_ICR_DOUTSYNC) {
4603                 /* HW is reporting DMA is out of sync */
4604                 adapter->stats.doosync++;
4605         }
4606
4607         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4608                 hw->mac.get_link_status = 1;
4609                 /* guard against interrupt when we're going down */
4610                 if (!test_bit(__IGB_DOWN, &adapter->state))
4611                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4612         }
4613
4614         napi_schedule(&q_vector->napi);
4615
4616         return IRQ_HANDLED;
4617 }
4618
4619 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4620 {
4621         struct igb_adapter *adapter = q_vector->adapter;
4622         struct e1000_hw *hw = &adapter->hw;
4623
4624         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4625             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4626                 if (!adapter->msix_entries)
4627                         igb_set_itr(adapter);
4628                 else
4629                         igb_update_ring_itr(q_vector);
4630         }
4631
4632         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4633                 if (adapter->msix_entries)
4634                         wr32(E1000_EIMS, q_vector->eims_value);
4635                 else
4636                         igb_irq_enable(adapter);
4637         }
4638 }
4639
4640 /**
4641  * igb_poll - NAPI Rx polling callback
4642  * @napi: napi polling structure
4643  * @budget: count of how many packets we should handle
4644  **/
4645 static int igb_poll(struct napi_struct *napi, int budget)
4646 {
4647         struct igb_q_vector *q_vector = container_of(napi,
4648                                                      struct igb_q_vector,
4649                                                      napi);
4650         int tx_clean_complete = 1, work_done = 0;
4651
4652 #ifdef CONFIG_IGB_DCA
4653         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4654                 igb_update_dca(q_vector);
4655 #endif
4656         if (q_vector->tx_ring)
4657                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4658
4659         if (q_vector->rx_ring)
4660                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4661
4662         if (!tx_clean_complete)
4663                 work_done = budget;
4664
4665         /* If not enough Rx work done, exit the polling mode */
4666         if (work_done < budget) {
4667                 napi_complete(napi);
4668                 igb_ring_irq_enable(q_vector);
4669         }
4670
4671         return work_done;
4672 }
4673
4674 /**
4675  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4676  * @adapter: board private structure
4677  * @shhwtstamps: timestamp structure to update
4678  * @regval: unsigned 64bit system time value.
4679  *
4680  * We need to convert the system time value stored in the RX/TXSTMP registers
4681  * into a hwtstamp which can be used by the upper level timestamping functions
4682  */
4683 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4684                                    struct skb_shared_hwtstamps *shhwtstamps,
4685                                    u64 regval)
4686 {
4687         u64 ns;
4688
4689         ns = timecounter_cyc2time(&adapter->clock, regval);
4690         timecompare_update(&adapter->compare, ns);
4691         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4692         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4693         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4694 }
4695
4696 /**
4697  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4698  * @q_vector: pointer to q_vector containing needed info
4699  * @skb: packet that was just sent
4700  *
4701  * If we were asked to do hardware stamping and such a time stamp is
4702  * available, then it must have been for this skb here because we only
4703  * allow only one such packet into the queue.
4704  */
4705 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4706 {
4707         struct igb_adapter *adapter = q_vector->adapter;
4708         union skb_shared_tx *shtx = skb_tx(skb);
4709         struct e1000_hw *hw = &adapter->hw;
4710         struct skb_shared_hwtstamps shhwtstamps;
4711         u64 regval;
4712
4713         /* if skb does not support hw timestamp or TX stamp not valid exit */
4714         if (likely(!shtx->hardware) ||
4715             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4716                 return;
4717
4718         regval = rd32(E1000_TXSTMPL);
4719         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4720
4721         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4722         skb_tstamp_tx(skb, &shhwtstamps);
4723 }
4724
4725 /**
4726  * igb_clean_tx_irq - Reclaim resources after transmit completes
4727  * @q_vector: pointer to q_vector containing needed info
4728  * returns true if ring is completely cleaned
4729  **/
4730 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4731 {
4732         struct igb_adapter *adapter = q_vector->adapter;
4733         struct igb_ring *tx_ring = q_vector->tx_ring;
4734         struct net_device *netdev = tx_ring->netdev;
4735         struct e1000_hw *hw = &adapter->hw;
4736         struct igb_buffer *buffer_info;
4737         struct sk_buff *skb;
4738         union e1000_adv_tx_desc *tx_desc, *eop_desc;
4739         unsigned int total_bytes = 0, total_packets = 0;
4740         unsigned int i, eop, count = 0;
4741         bool cleaned = false;
4742
4743         i = tx_ring->next_to_clean;
4744         eop = tx_ring->buffer_info[i].next_to_watch;
4745         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4746
4747         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
4748                (count < tx_ring->count)) {
4749                 for (cleaned = false; !cleaned; count++) {
4750                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4751                         buffer_info = &tx_ring->buffer_info[i];
4752                         cleaned = (i == eop);
4753                         skb = buffer_info->skb;
4754
4755                         if (skb) {
4756                                 unsigned int segs, bytecount;
4757                                 /* gso_segs is currently only valid for tcp */
4758                                 segs = skb_shinfo(skb)->gso_segs ?: 1;
4759                                 /* multiply data chunks by size of headers */
4760                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
4761                                             skb->len;
4762                                 total_packets += segs;
4763                                 total_bytes += bytecount;
4764
4765                                 igb_tx_hwtstamp(q_vector, skb);
4766                         }
4767
4768                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4769                         tx_desc->wb.status = 0;
4770
4771                         i++;
4772                         if (i == tx_ring->count)
4773                                 i = 0;
4774                 }
4775                 eop = tx_ring->buffer_info[i].next_to_watch;
4776                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4777         }
4778
4779         tx_ring->next_to_clean = i;
4780
4781         if (unlikely(count &&
4782                      netif_carrier_ok(netdev) &&
4783                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
4784                 /* Make sure that anybody stopping the queue after this
4785                  * sees the new next_to_clean.
4786                  */
4787                 smp_mb();
4788                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
4789                     !(test_bit(__IGB_DOWN, &adapter->state))) {
4790                         netif_wake_subqueue(netdev, tx_ring->queue_index);
4791                         tx_ring->tx_stats.restart_queue++;
4792                 }
4793         }
4794
4795         if (tx_ring->detect_tx_hung) {
4796                 /* Detect a transmit hang in hardware, this serializes the
4797                  * check with the clearing of time_stamp and movement of i */
4798                 tx_ring->detect_tx_hung = false;
4799                 if (tx_ring->buffer_info[i].time_stamp &&
4800                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
4801                                (adapter->tx_timeout_factor * HZ))
4802                     && !(rd32(E1000_STATUS) &
4803                          E1000_STATUS_TXOFF)) {
4804
4805                         /* detected Tx unit hang */
4806                         dev_err(&tx_ring->pdev->dev,
4807                                 "Detected Tx Unit Hang\n"
4808                                 "  Tx Queue             <%d>\n"
4809                                 "  TDH                  <%x>\n"
4810                                 "  TDT                  <%x>\n"
4811                                 "  next_to_use          <%x>\n"
4812                                 "  next_to_clean        <%x>\n"
4813                                 "buffer_info[next_to_clean]\n"
4814                                 "  time_stamp           <%lx>\n"
4815                                 "  next_to_watch        <%x>\n"
4816                                 "  jiffies              <%lx>\n"
4817                                 "  desc.status          <%x>\n",
4818                                 tx_ring->queue_index,
4819                                 readl(tx_ring->head),
4820                                 readl(tx_ring->tail),
4821                                 tx_ring->next_to_use,
4822                                 tx_ring->next_to_clean,
4823                                 tx_ring->buffer_info[eop].time_stamp,
4824                                 eop,
4825                                 jiffies,
4826                                 eop_desc->wb.status);
4827                         netif_stop_subqueue(netdev, tx_ring->queue_index);
4828                 }
4829         }
4830         tx_ring->total_bytes += total_bytes;
4831         tx_ring->total_packets += total_packets;
4832         tx_ring->tx_stats.bytes += total_bytes;
4833         tx_ring->tx_stats.packets += total_packets;
4834         return (count < tx_ring->count);
4835 }
4836
4837 /**
4838  * igb_receive_skb - helper function to handle rx indications
4839  * @q_vector: structure containing interrupt and ring information
4840  * @skb: packet to send up
4841  * @vlan_tag: vlan tag for packet
4842  **/
4843 static void igb_receive_skb(struct igb_q_vector *q_vector,
4844                             struct sk_buff *skb,
4845                             u16 vlan_tag)
4846 {
4847         struct igb_adapter *adapter = q_vector->adapter;
4848
4849         if (vlan_tag)
4850                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
4851                                  vlan_tag, skb);
4852         else
4853                 napi_gro_receive(&q_vector->napi, skb);
4854 }
4855
4856 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
4857                                        u32 status_err, struct sk_buff *skb)
4858 {
4859         skb->ip_summed = CHECKSUM_NONE;
4860
4861         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
4862         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
4863              (status_err & E1000_RXD_STAT_IXSM))
4864                 return;
4865
4866         /* TCP/UDP checksum error bit is set */
4867         if (status_err &
4868             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
4869                 /*
4870                  * work around errata with sctp packets where the TCPE aka
4871                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
4872                  * packets, (aka let the stack check the crc32c)
4873                  */
4874                 if ((skb->len == 60) &&
4875                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
4876                         ring->rx_stats.csum_err++;
4877
4878                 /* let the stack verify checksum errors */
4879                 return;
4880         }
4881         /* It must be a TCP or UDP packet with a valid checksum */
4882         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
4883                 skb->ip_summed = CHECKSUM_UNNECESSARY;
4884
4885         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
4886 }
4887
4888 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
4889                                    struct sk_buff *skb)
4890 {
4891         struct igb_adapter *adapter = q_vector->adapter;
4892         struct e1000_hw *hw = &adapter->hw;
4893         u64 regval;
4894
4895         /*
4896          * If this bit is set, then the RX registers contain the time stamp. No
4897          * other packet will be time stamped until we read these registers, so
4898          * read the registers to make them available again. Because only one
4899          * packet can be time stamped at a time, we know that the register
4900          * values must belong to this one here and therefore we don't need to
4901          * compare any of the additional attributes stored for it.
4902          *
4903          * If nothing went wrong, then it should have a skb_shared_tx that we
4904          * can turn into a skb_shared_hwtstamps.
4905          */
4906         if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
4907                 return;
4908         if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
4909                 return;
4910
4911         regval = rd32(E1000_RXSTMPL);
4912         regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4913
4914         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
4915 }
4916 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
4917                                union e1000_adv_rx_desc *rx_desc)
4918 {
4919         /* HW will not DMA in data larger than the given buffer, even if it
4920          * parses the (NFS, of course) header to be larger.  In that case, it
4921          * fills the header buffer and spills the rest into the page.
4922          */
4923         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
4924                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
4925         if (hlen > rx_ring->rx_buffer_len)
4926                 hlen = rx_ring->rx_buffer_len;
4927         return hlen;
4928 }
4929
4930 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
4931                                  int *work_done, int budget)
4932 {
4933         struct igb_ring *rx_ring = q_vector->rx_ring;
4934         struct net_device *netdev = rx_ring->netdev;
4935         struct pci_dev *pdev = rx_ring->pdev;
4936         union e1000_adv_rx_desc *rx_desc , *next_rxd;
4937         struct igb_buffer *buffer_info , *next_buffer;
4938         struct sk_buff *skb;
4939         bool cleaned = false;
4940         int cleaned_count = 0;
4941         unsigned int total_bytes = 0, total_packets = 0;
4942         unsigned int i;
4943         u32 staterr;
4944         u16 length;
4945         u16 vlan_tag;
4946
4947         i = rx_ring->next_to_clean;
4948         buffer_info = &rx_ring->buffer_info[i];
4949         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
4950         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4951
4952         while (staterr & E1000_RXD_STAT_DD) {
4953                 if (*work_done >= budget)
4954                         break;
4955                 (*work_done)++;
4956
4957                 skb = buffer_info->skb;
4958                 prefetch(skb->data - NET_IP_ALIGN);
4959                 buffer_info->skb = NULL;
4960
4961                 i++;
4962                 if (i == rx_ring->count)
4963                         i = 0;
4964
4965                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
4966                 prefetch(next_rxd);
4967                 next_buffer = &rx_ring->buffer_info[i];
4968
4969                 length = le16_to_cpu(rx_desc->wb.upper.length);
4970                 cleaned = true;
4971                 cleaned_count++;
4972
4973                 if (buffer_info->dma) {
4974                         pci_unmap_single(pdev, buffer_info->dma,
4975                                          rx_ring->rx_buffer_len,
4976                                          PCI_DMA_FROMDEVICE);
4977                         buffer_info->dma = 0;
4978                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
4979                                 skb_put(skb, length);
4980                                 goto send_up;
4981                         }
4982                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
4983                 }
4984
4985                 if (length) {
4986                         pci_unmap_page(pdev, buffer_info->page_dma,
4987                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
4988                         buffer_info->page_dma = 0;
4989
4990                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
4991                                                 buffer_info->page,
4992                                                 buffer_info->page_offset,
4993                                                 length);
4994
4995                         if (page_count(buffer_info->page) != 1)
4996                                 buffer_info->page = NULL;
4997                         else
4998                                 get_page(buffer_info->page);
4999
5000                         skb->len += length;
5001                         skb->data_len += length;
5002                         skb->truesize += length;
5003                 }
5004
5005                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5006                         buffer_info->skb = next_buffer->skb;
5007                         buffer_info->dma = next_buffer->dma;
5008                         next_buffer->skb = skb;
5009                         next_buffer->dma = 0;
5010                         goto next_desc;
5011                 }
5012 send_up:
5013                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5014                         dev_kfree_skb_irq(skb);
5015                         goto next_desc;
5016                 }
5017
5018                 igb_rx_hwtstamp(q_vector, staterr, skb);
5019                 total_bytes += skb->len;
5020                 total_packets++;
5021
5022                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5023
5024                 skb->protocol = eth_type_trans(skb, netdev);
5025                 skb_record_rx_queue(skb, rx_ring->queue_index);
5026
5027                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5028                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5029
5030                 igb_receive_skb(q_vector, skb, vlan_tag);
5031
5032 next_desc:
5033                 rx_desc->wb.upper.status_error = 0;
5034
5035                 /* return some buffers to hardware, one at a time is too slow */
5036                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5037                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5038                         cleaned_count = 0;
5039                 }
5040
5041                 /* use prefetched values */
5042                 rx_desc = next_rxd;
5043                 buffer_info = next_buffer;
5044                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5045         }
5046
5047         rx_ring->next_to_clean = i;
5048         cleaned_count = igb_desc_unused(rx_ring);
5049
5050         if (cleaned_count)
5051                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5052
5053         rx_ring->total_packets += total_packets;
5054         rx_ring->total_bytes += total_bytes;
5055         rx_ring->rx_stats.packets += total_packets;
5056         rx_ring->rx_stats.bytes += total_bytes;
5057         return cleaned;
5058 }
5059
5060 /**
5061  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5062  * @adapter: address of board private structure
5063  **/
5064 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5065 {
5066         struct net_device *netdev = rx_ring->netdev;
5067         union e1000_adv_rx_desc *rx_desc;
5068         struct igb_buffer *buffer_info;
5069         struct sk_buff *skb;
5070         unsigned int i;
5071         int bufsz;
5072
5073         i = rx_ring->next_to_use;
5074         buffer_info = &rx_ring->buffer_info[i];
5075
5076         bufsz = rx_ring->rx_buffer_len;
5077
5078         while (cleaned_count--) {
5079                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5080
5081                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5082                         if (!buffer_info->page) {
5083                                 buffer_info->page = netdev_alloc_page(netdev);
5084                                 if (!buffer_info->page) {
5085                                         rx_ring->rx_stats.alloc_failed++;
5086                                         goto no_buffers;
5087                                 }
5088                                 buffer_info->page_offset = 0;
5089                         } else {
5090                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5091                         }
5092                         buffer_info->page_dma =
5093                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5094                                              buffer_info->page_offset,
5095                                              PAGE_SIZE / 2,
5096                                              PCI_DMA_FROMDEVICE);
5097                         if (pci_dma_mapping_error(rx_ring->pdev,
5098                                                   buffer_info->page_dma)) {
5099                                 buffer_info->page_dma = 0;
5100                                 rx_ring->rx_stats.alloc_failed++;
5101                                 goto no_buffers;
5102                         }
5103                 }
5104
5105                 skb = buffer_info->skb;
5106                 if (!skb) {
5107                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5108                         if (!skb) {
5109                                 rx_ring->rx_stats.alloc_failed++;
5110                                 goto no_buffers;
5111                         }
5112
5113                         buffer_info->skb = skb;
5114                 }
5115                 if (!buffer_info->dma) {
5116                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5117                                                           skb->data,
5118                                                           bufsz,
5119                                                           PCI_DMA_FROMDEVICE);
5120                         if (pci_dma_mapping_error(rx_ring->pdev,
5121                                                   buffer_info->dma)) {
5122                                 buffer_info->dma = 0;
5123                                 rx_ring->rx_stats.alloc_failed++;
5124                                 goto no_buffers;
5125                         }
5126                 }
5127                 /* Refresh the desc even if buffer_addrs didn't change because
5128                  * each write-back erases this info. */
5129                 if (bufsz < IGB_RXBUFFER_1024) {
5130                         rx_desc->read.pkt_addr =
5131                              cpu_to_le64(buffer_info->page_dma);
5132                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5133                 } else {
5134                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5135                         rx_desc->read.hdr_addr = 0;
5136                 }
5137
5138                 i++;
5139                 if (i == rx_ring->count)
5140                         i = 0;
5141                 buffer_info = &rx_ring->buffer_info[i];
5142         }
5143
5144 no_buffers:
5145         if (rx_ring->next_to_use != i) {
5146                 rx_ring->next_to_use = i;
5147                 if (i == 0)
5148                         i = (rx_ring->count - 1);
5149                 else
5150                         i--;
5151
5152                 /* Force memory writes to complete before letting h/w
5153                  * know there are new descriptors to fetch.  (Only
5154                  * applicable for weak-ordered memory model archs,
5155                  * such as IA-64). */
5156                 wmb();
5157                 writel(i, rx_ring->tail);
5158         }
5159 }
5160
5161 /**
5162  * igb_mii_ioctl -
5163  * @netdev:
5164  * @ifreq:
5165  * @cmd:
5166  **/
5167 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5168 {
5169         struct igb_adapter *adapter = netdev_priv(netdev);
5170         struct mii_ioctl_data *data = if_mii(ifr);
5171
5172         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5173                 return -EOPNOTSUPP;
5174
5175         switch (cmd) {
5176         case SIOCGMIIPHY:
5177                 data->phy_id = adapter->hw.phy.addr;
5178                 break;
5179         case SIOCGMIIREG:
5180                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5181                                      &data->val_out))
5182                         return -EIO;
5183                 break;
5184         case SIOCSMIIREG:
5185         default:
5186                 return -EOPNOTSUPP;
5187         }
5188         return 0;
5189 }
5190
5191 /**
5192  * igb_hwtstamp_ioctl - control hardware time stamping
5193  * @netdev:
5194  * @ifreq:
5195  * @cmd:
5196  *
5197  * Outgoing time stamping can be enabled and disabled. Play nice and
5198  * disable it when requested, although it shouldn't case any overhead
5199  * when no packet needs it. At most one packet in the queue may be
5200  * marked for time stamping, otherwise it would be impossible to tell
5201  * for sure to which packet the hardware time stamp belongs.
5202  *
5203  * Incoming time stamping has to be configured via the hardware
5204  * filters. Not all combinations are supported, in particular event
5205  * type has to be specified. Matching the kind of event packet is
5206  * not supported, with the exception of "all V2 events regardless of
5207  * level 2 or 4".
5208  *
5209  **/
5210 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5211                               struct ifreq *ifr, int cmd)
5212 {
5213         struct igb_adapter *adapter = netdev_priv(netdev);
5214         struct e1000_hw *hw = &adapter->hw;
5215         struct hwtstamp_config config;
5216         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5217         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5218         u32 tsync_rx_cfg = 0;
5219         bool is_l4 = false;
5220         bool is_l2 = false;
5221         u32 regval;
5222
5223         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5224                 return -EFAULT;
5225
5226         /* reserved for future extensions */
5227         if (config.flags)
5228                 return -EINVAL;
5229
5230         switch (config.tx_type) {
5231         case HWTSTAMP_TX_OFF:
5232                 tsync_tx_ctl = 0;
5233         case HWTSTAMP_TX_ON:
5234                 break;
5235         default:
5236                 return -ERANGE;
5237         }
5238
5239         switch (config.rx_filter) {
5240         case HWTSTAMP_FILTER_NONE:
5241                 tsync_rx_ctl = 0;
5242                 break;
5243         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5244         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5245         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5246         case HWTSTAMP_FILTER_ALL:
5247                 /*
5248                  * register TSYNCRXCFG must be set, therefore it is not
5249                  * possible to time stamp both Sync and Delay_Req messages
5250                  * => fall back to time stamping all packets
5251                  */
5252                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5253                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5254                 break;
5255         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5256                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5257                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5258                 is_l4 = true;
5259                 break;
5260         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5261                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5262                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5263                 is_l4 = true;
5264                 break;
5265         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5266         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5267                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5268                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5269                 is_l2 = true;
5270                 is_l4 = true;
5271                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5272                 break;
5273         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5274         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5275                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5276                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5277                 is_l2 = true;
5278                 is_l4 = true;
5279                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5280                 break;
5281         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5282         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5283         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5284                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5285                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5286                 is_l2 = true;
5287                 break;
5288         default:
5289                 return -ERANGE;
5290         }
5291
5292         if (hw->mac.type == e1000_82575) {
5293                 if (tsync_rx_ctl | tsync_tx_ctl)
5294                         return -EINVAL;
5295                 return 0;
5296         }
5297
5298         /* enable/disable TX */
5299         regval = rd32(E1000_TSYNCTXCTL);
5300         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5301         regval |= tsync_tx_ctl;
5302         wr32(E1000_TSYNCTXCTL, regval);
5303
5304         /* enable/disable RX */
5305         regval = rd32(E1000_TSYNCRXCTL);
5306         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5307         regval |= tsync_rx_ctl;
5308         wr32(E1000_TSYNCRXCTL, regval);
5309
5310         /* define which PTP packets are time stamped */
5311         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5312
5313         /* define ethertype filter for timestamped packets */
5314         if (is_l2)
5315                 wr32(E1000_ETQF(3),
5316                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5317                                  E1000_ETQF_1588 | /* enable timestamping */
5318                                  ETH_P_1588));     /* 1588 eth protocol type */
5319         else
5320                 wr32(E1000_ETQF(3), 0);
5321
5322 #define PTP_PORT 319
5323         /* L4 Queue Filter[3]: filter by destination port and protocol */
5324         if (is_l4) {
5325                 u32 ftqf = (IPPROTO_UDP /* UDP */
5326                         | E1000_FTQF_VF_BP /* VF not compared */
5327                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5328                         | E1000_FTQF_MASK); /* mask all inputs */
5329                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5330
5331                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5332                 wr32(E1000_IMIREXT(3),
5333                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5334                 if (hw->mac.type == e1000_82576) {
5335                         /* enable source port check */
5336                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5337                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5338                 }
5339                 wr32(E1000_FTQF(3), ftqf);
5340         } else {
5341                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5342         }
5343         wrfl();
5344
5345         adapter->hwtstamp_config = config;
5346
5347         /* clear TX/RX time stamp registers, just to be sure */
5348         regval = rd32(E1000_TXSTMPH);
5349         regval = rd32(E1000_RXSTMPH);
5350
5351         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5352                 -EFAULT : 0;
5353 }
5354
5355 /**
5356  * igb_ioctl -
5357  * @netdev:
5358  * @ifreq:
5359  * @cmd:
5360  **/
5361 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5362 {
5363         switch (cmd) {
5364         case SIOCGMIIPHY:
5365         case SIOCGMIIREG:
5366         case SIOCSMIIREG:
5367                 return igb_mii_ioctl(netdev, ifr, cmd);
5368         case SIOCSHWTSTAMP:
5369                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5370         default:
5371                 return -EOPNOTSUPP;
5372         }
5373 }
5374
5375 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5376 {
5377         struct igb_adapter *adapter = hw->back;
5378         u16 cap_offset;
5379
5380         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5381         if (!cap_offset)
5382                 return -E1000_ERR_CONFIG;
5383
5384         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5385
5386         return 0;
5387 }
5388
5389 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5390 {
5391         struct igb_adapter *adapter = hw->back;
5392         u16 cap_offset;
5393
5394         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5395         if (!cap_offset)
5396                 return -E1000_ERR_CONFIG;
5397
5398         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5399
5400         return 0;
5401 }
5402
5403 static void igb_vlan_rx_register(struct net_device *netdev,
5404                                  struct vlan_group *grp)
5405 {
5406         struct igb_adapter *adapter = netdev_priv(netdev);
5407         struct e1000_hw *hw = &adapter->hw;
5408         u32 ctrl, rctl;
5409
5410         igb_irq_disable(adapter);
5411         adapter->vlgrp = grp;
5412
5413         if (grp) {
5414                 /* enable VLAN tag insert/strip */
5415                 ctrl = rd32(E1000_CTRL);
5416                 ctrl |= E1000_CTRL_VME;
5417                 wr32(E1000_CTRL, ctrl);
5418
5419                 /* Disable CFI check */
5420                 rctl = rd32(E1000_RCTL);
5421                 rctl &= ~E1000_RCTL_CFIEN;
5422                 wr32(E1000_RCTL, rctl);
5423         } else {
5424                 /* disable VLAN tag insert/strip */
5425                 ctrl = rd32(E1000_CTRL);
5426                 ctrl &= ~E1000_CTRL_VME;
5427                 wr32(E1000_CTRL, ctrl);
5428         }
5429
5430         igb_rlpml_set(adapter);
5431
5432         if (!test_bit(__IGB_DOWN, &adapter->state))
5433                 igb_irq_enable(adapter);
5434 }
5435
5436 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5437 {
5438         struct igb_adapter *adapter = netdev_priv(netdev);
5439         struct e1000_hw *hw = &adapter->hw;
5440         int pf_id = adapter->vfs_allocated_count;
5441
5442         /* attempt to add filter to vlvf array */
5443         igb_vlvf_set(adapter, vid, true, pf_id);
5444
5445         /* add the filter since PF can receive vlans w/o entry in vlvf */
5446         igb_vfta_set(hw, vid, true);
5447 }
5448
5449 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5450 {
5451         struct igb_adapter *adapter = netdev_priv(netdev);
5452         struct e1000_hw *hw = &adapter->hw;
5453         int pf_id = adapter->vfs_allocated_count;
5454         s32 err;
5455
5456         igb_irq_disable(adapter);
5457         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5458
5459         if (!test_bit(__IGB_DOWN, &adapter->state))
5460                 igb_irq_enable(adapter);
5461
5462         /* remove vlan from VLVF table array */
5463         err = igb_vlvf_set(adapter, vid, false, pf_id);
5464
5465         /* if vid was not present in VLVF just remove it from table */
5466         if (err)
5467                 igb_vfta_set(hw, vid, false);
5468 }
5469
5470 static void igb_restore_vlan(struct igb_adapter *adapter)
5471 {
5472         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5473
5474         if (adapter->vlgrp) {
5475                 u16 vid;
5476                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5477                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5478                                 continue;
5479                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5480                 }
5481         }
5482 }
5483
5484 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5485 {
5486         struct pci_dev *pdev = adapter->pdev;
5487         struct e1000_mac_info *mac = &adapter->hw.mac;
5488
5489         mac->autoneg = 0;
5490
5491         switch (spddplx) {
5492         case SPEED_10 + DUPLEX_HALF:
5493                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5494                 break;
5495         case SPEED_10 + DUPLEX_FULL:
5496                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5497                 break;
5498         case SPEED_100 + DUPLEX_HALF:
5499                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5500                 break;
5501         case SPEED_100 + DUPLEX_FULL:
5502                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5503                 break;
5504         case SPEED_1000 + DUPLEX_FULL:
5505                 mac->autoneg = 1;
5506                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5507                 break;
5508         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5509         default:
5510                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5511                 return -EINVAL;
5512         }
5513         return 0;
5514 }
5515
5516 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5517 {
5518         struct net_device *netdev = pci_get_drvdata(pdev);
5519         struct igb_adapter *adapter = netdev_priv(netdev);
5520         struct e1000_hw *hw = &adapter->hw;
5521         u32 ctrl, rctl, status;
5522         u32 wufc = adapter->wol;
5523 #ifdef CONFIG_PM
5524         int retval = 0;
5525 #endif
5526
5527         netif_device_detach(netdev);
5528
5529         if (netif_running(netdev))
5530                 igb_close(netdev);
5531
5532         igb_clear_interrupt_scheme(adapter);
5533
5534 #ifdef CONFIG_PM
5535         retval = pci_save_state(pdev);
5536         if (retval)
5537                 return retval;
5538 #endif
5539
5540         status = rd32(E1000_STATUS);
5541         if (status & E1000_STATUS_LU)
5542                 wufc &= ~E1000_WUFC_LNKC;
5543
5544         if (wufc) {
5545                 igb_setup_rctl(adapter);
5546                 igb_set_rx_mode(netdev);
5547
5548                 /* turn on all-multi mode if wake on multicast is enabled */
5549                 if (wufc & E1000_WUFC_MC) {
5550                         rctl = rd32(E1000_RCTL);
5551                         rctl |= E1000_RCTL_MPE;
5552                         wr32(E1000_RCTL, rctl);
5553                 }
5554
5555                 ctrl = rd32(E1000_CTRL);
5556                 /* advertise wake from D3Cold */
5557                 #define E1000_CTRL_ADVD3WUC 0x00100000
5558                 /* phy power management enable */
5559                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5560                 ctrl |= E1000_CTRL_ADVD3WUC;
5561                 wr32(E1000_CTRL, ctrl);
5562
5563                 /* Allow time for pending master requests to run */
5564                 igb_disable_pcie_master(hw);
5565
5566                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5567                 wr32(E1000_WUFC, wufc);
5568         } else {
5569                 wr32(E1000_WUC, 0);
5570                 wr32(E1000_WUFC, 0);
5571         }
5572
5573         *enable_wake = wufc || adapter->en_mng_pt;
5574         if (!*enable_wake)
5575                 igb_shutdown_serdes_link_82575(hw);
5576
5577         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5578          * would have already happened in close and is redundant. */
5579         igb_release_hw_control(adapter);
5580
5581         pci_disable_device(pdev);
5582
5583         return 0;
5584 }
5585
5586 #ifdef CONFIG_PM
5587 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5588 {
5589         int retval;
5590         bool wake;
5591
5592         retval = __igb_shutdown(pdev, &wake);
5593         if (retval)
5594                 return retval;
5595
5596         if (wake) {
5597                 pci_prepare_to_sleep(pdev);
5598         } else {
5599                 pci_wake_from_d3(pdev, false);
5600                 pci_set_power_state(pdev, PCI_D3hot);
5601         }
5602
5603         return 0;
5604 }
5605
5606 static int igb_resume(struct pci_dev *pdev)
5607 {
5608         struct net_device *netdev = pci_get_drvdata(pdev);
5609         struct igb_adapter *adapter = netdev_priv(netdev);
5610         struct e1000_hw *hw = &adapter->hw;
5611         u32 err;
5612
5613         pci_set_power_state(pdev, PCI_D0);
5614         pci_restore_state(pdev);
5615
5616         err = pci_enable_device_mem(pdev);
5617         if (err) {
5618                 dev_err(&pdev->dev,
5619                         "igb: Cannot enable PCI device from suspend\n");
5620                 return err;
5621         }
5622         pci_set_master(pdev);
5623
5624         pci_enable_wake(pdev, PCI_D3hot, 0);
5625         pci_enable_wake(pdev, PCI_D3cold, 0);
5626
5627         if (igb_init_interrupt_scheme(adapter)) {
5628                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5629                 return -ENOMEM;
5630         }
5631
5632         /* e1000_power_up_phy(adapter); */
5633
5634         igb_reset(adapter);
5635
5636         /* let the f/w know that the h/w is now under the control of the
5637          * driver. */
5638         igb_get_hw_control(adapter);
5639
5640         wr32(E1000_WUS, ~0);
5641
5642         if (netif_running(netdev)) {
5643                 err = igb_open(netdev);
5644                 if (err)
5645                         return err;
5646         }
5647
5648         netif_device_attach(netdev);
5649
5650         return 0;
5651 }
5652 #endif
5653
5654 static void igb_shutdown(struct pci_dev *pdev)
5655 {
5656         bool wake;
5657
5658         __igb_shutdown(pdev, &wake);
5659
5660         if (system_state == SYSTEM_POWER_OFF) {
5661                 pci_wake_from_d3(pdev, wake);
5662                 pci_set_power_state(pdev, PCI_D3hot);
5663         }
5664 }
5665
5666 #ifdef CONFIG_NET_POLL_CONTROLLER
5667 /*
5668  * Polling 'interrupt' - used by things like netconsole to send skbs
5669  * without having to re-enable interrupts. It's not called while
5670  * the interrupt routine is executing.
5671  */
5672 static void igb_netpoll(struct net_device *netdev)
5673 {
5674         struct igb_adapter *adapter = netdev_priv(netdev);
5675         struct e1000_hw *hw = &adapter->hw;
5676         int i;
5677
5678         if (!adapter->msix_entries) {
5679                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5680                 igb_irq_disable(adapter);
5681                 napi_schedule(&q_vector->napi);
5682                 return;
5683         }
5684
5685         for (i = 0; i < adapter->num_q_vectors; i++) {
5686                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5687                 wr32(E1000_EIMC, q_vector->eims_value);
5688                 napi_schedule(&q_vector->napi);
5689         }
5690 }
5691 #endif /* CONFIG_NET_POLL_CONTROLLER */
5692
5693 /**
5694  * igb_io_error_detected - called when PCI error is detected
5695  * @pdev: Pointer to PCI device
5696  * @state: The current pci connection state
5697  *
5698  * This function is called after a PCI bus error affecting
5699  * this device has been detected.
5700  */
5701 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5702                                               pci_channel_state_t state)
5703 {
5704         struct net_device *netdev = pci_get_drvdata(pdev);
5705         struct igb_adapter *adapter = netdev_priv(netdev);
5706
5707         netif_device_detach(netdev);
5708
5709         if (state == pci_channel_io_perm_failure)
5710                 return PCI_ERS_RESULT_DISCONNECT;
5711
5712         if (netif_running(netdev))
5713                 igb_down(adapter);
5714         pci_disable_device(pdev);
5715
5716         /* Request a slot slot reset. */
5717         return PCI_ERS_RESULT_NEED_RESET;
5718 }
5719
5720 /**
5721  * igb_io_slot_reset - called after the pci bus has been reset.
5722  * @pdev: Pointer to PCI device
5723  *
5724  * Restart the card from scratch, as if from a cold-boot. Implementation
5725  * resembles the first-half of the igb_resume routine.
5726  */
5727 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5728 {
5729         struct net_device *netdev = pci_get_drvdata(pdev);
5730         struct igb_adapter *adapter = netdev_priv(netdev);
5731         struct e1000_hw *hw = &adapter->hw;
5732         pci_ers_result_t result;
5733         int err;
5734
5735         if (pci_enable_device_mem(pdev)) {
5736                 dev_err(&pdev->dev,
5737                         "Cannot re-enable PCI device after reset.\n");
5738                 result = PCI_ERS_RESULT_DISCONNECT;
5739         } else {
5740                 pci_set_master(pdev);
5741                 pci_restore_state(pdev);
5742
5743                 pci_enable_wake(pdev, PCI_D3hot, 0);
5744                 pci_enable_wake(pdev, PCI_D3cold, 0);
5745
5746                 igb_reset(adapter);
5747                 wr32(E1000_WUS, ~0);
5748                 result = PCI_ERS_RESULT_RECOVERED;
5749         }
5750
5751         err = pci_cleanup_aer_uncorrect_error_status(pdev);
5752         if (err) {
5753                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
5754                         "failed 0x%0x\n", err);
5755                 /* non-fatal, continue */
5756         }
5757
5758         return result;
5759 }
5760
5761 /**
5762  * igb_io_resume - called when traffic can start flowing again.
5763  * @pdev: Pointer to PCI device
5764  *
5765  * This callback is called when the error recovery driver tells us that
5766  * its OK to resume normal operation. Implementation resembles the
5767  * second-half of the igb_resume routine.
5768  */
5769 static void igb_io_resume(struct pci_dev *pdev)
5770 {
5771         struct net_device *netdev = pci_get_drvdata(pdev);
5772         struct igb_adapter *adapter = netdev_priv(netdev);
5773
5774         if (netif_running(netdev)) {
5775                 if (igb_up(adapter)) {
5776                         dev_err(&pdev->dev, "igb_up failed after reset\n");
5777                         return;
5778                 }
5779         }
5780
5781         netif_device_attach(netdev);
5782
5783         /* let the f/w know that the h/w is now under the control of the
5784          * driver. */
5785         igb_get_hw_control(adapter);
5786 }
5787
5788 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
5789                              u8 qsel)
5790 {
5791         u32 rar_low, rar_high;
5792         struct e1000_hw *hw = &adapter->hw;
5793
5794         /* HW expects these in little endian so we reverse the byte order
5795          * from network order (big endian) to little endian
5796          */
5797         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
5798                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
5799         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
5800
5801         /* Indicate to hardware the Address is Valid. */
5802         rar_high |= E1000_RAH_AV;
5803
5804         if (hw->mac.type == e1000_82575)
5805                 rar_high |= E1000_RAH_POOL_1 * qsel;
5806         else
5807                 rar_high |= E1000_RAH_POOL_1 << qsel;
5808
5809         wr32(E1000_RAL(index), rar_low);
5810         wrfl();
5811         wr32(E1000_RAH(index), rar_high);
5812         wrfl();
5813 }
5814
5815 static int igb_set_vf_mac(struct igb_adapter *adapter,
5816                           int vf, unsigned char *mac_addr)
5817 {
5818         struct e1000_hw *hw = &adapter->hw;
5819         /* VF MAC addresses start at end of receive addresses and moves
5820          * torwards the first, as a result a collision should not be possible */
5821         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5822
5823         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
5824
5825         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
5826
5827         return 0;
5828 }
5829
5830 static void igb_vmm_control(struct igb_adapter *adapter)
5831 {
5832         struct e1000_hw *hw = &adapter->hw;
5833         u32 reg;
5834
5835         /* replication is not supported for 82575 */
5836         if (hw->mac.type == e1000_82575)
5837                 return;
5838
5839         /* enable replication vlan tag stripping */
5840         reg = rd32(E1000_RPLOLR);
5841         reg |= E1000_RPLOLR_STRVLAN;
5842         wr32(E1000_RPLOLR, reg);
5843
5844         /* notify HW that the MAC is adding vlan tags */
5845         reg = rd32(E1000_DTXCTL);
5846         reg |= E1000_DTXCTL_VLAN_ADDED;
5847         wr32(E1000_DTXCTL, reg);
5848
5849         if (adapter->vfs_allocated_count) {
5850                 igb_vmdq_set_loopback_pf(hw, true);
5851                 igb_vmdq_set_replication_pf(hw, true);
5852         } else {
5853                 igb_vmdq_set_loopback_pf(hw, false);
5854                 igb_vmdq_set_replication_pf(hw, false);
5855         }
5856 }
5857
5858 /* igb_main.c */