e1000/e1000e/igb/ixgb: don't txhang after link down
[safe/jmp/linux-2.6] / drivers / net / igb / igb_main.c
index 2cb267f..8784c05 100644 (file)
@@ -62,8 +62,10 @@ static const struct e1000_info *igb_info_tbl[] = {
 
 static struct pci_device_id igb_pci_tbl[] = {
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
@@ -133,8 +135,8 @@ static inline int igb_set_vf_rlpml(struct igb_adapter *, int, int);
 static int igb_set_vf_mac(struct igb_adapter *adapter, int, unsigned char *);
 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
 
-static int igb_suspend(struct pci_dev *, pm_message_t);
 #ifdef CONFIG_PM
+static int igb_suspend(struct pci_dev *, pm_message_t);
 static int igb_resume(struct pci_dev *);
 #endif
 static void igb_shutdown(struct pci_dev *);
@@ -150,14 +152,13 @@ static struct notifier_block dca_notifier = {
 /* for netdump / net console */
 static void igb_netpoll(struct net_device *);
 #endif
-
 #ifdef CONFIG_PCI_IOV
-static ssize_t igb_set_num_vfs(struct device *, struct device_attribute *,
-                               const char *, size_t);
-static ssize_t igb_show_num_vfs(struct device *, struct device_attribute *,
-                               char *);
-DEVICE_ATTR(num_vfs, S_IRUGO | S_IWUSR, igb_show_num_vfs, igb_set_num_vfs);
-#endif
+static unsigned int max_vfs = 0;
+module_param(max_vfs, uint, 0);
+MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
+                 "per physical function");
+#endif /* CONFIG_PCI_IOV */
+
 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
                     pci_channel_state_t);
 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
@@ -276,6 +277,17 @@ static char *igb_get_time_str(struct igb_adapter *adapter,
 #endif
 
 /**
+ * igb_desc_unused - calculate if we have unused descriptors
+ **/
+static int igb_desc_unused(struct igb_ring *ring)
+{
+       if (ring->next_to_clean > ring->next_to_use)
+               return ring->next_to_clean - ring->next_to_use - 1;
+
+       return ring->count + ring->next_to_clean - ring->next_to_use - 1;
+}
+
+/**
  * igb_init_module - Driver Registration Routine
  *
  * igb_init_module is the first routine called when the driver is
@@ -407,6 +419,9 @@ static void igb_free_queues(struct igb_adapter *adapter)
        for (i = 0; i < adapter->num_rx_queues; i++)
                netif_napi_del(&adapter->rx_ring[i].napi);
 
+       adapter->num_rx_queues = 0;
+       adapter->num_tx_queues = 0;
+
        kfree(adapter->tx_ring);
        kfree(adapter->rx_ring);
 }
@@ -655,6 +670,21 @@ static void igb_set_interrupt_capability(struct igb_adapter *adapter)
 
        /* If we can't do MSI-X, try MSI */
 msi_only:
+#ifdef CONFIG_PCI_IOV
+       /* disable SR-IOV for non MSI-X configurations */
+       if (adapter->vf_data) {
+               struct e1000_hw *hw = &adapter->hw;
+               /* disable iov and allow time for transactions to clear */
+               pci_disable_sriov(adapter->pdev);
+               msleep(500);
+
+               kfree(adapter->vf_data);
+               adapter->vf_data = NULL;
+               wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
+               msleep(100);
+               dev_info(&adapter->pdev->dev, "IOV Disabled\n");
+       }
+#endif
        adapter->num_rx_queues = 1;
        adapter->num_tx_queues = 1;
        if (!pci_enable_msi(adapter->pdev))
@@ -871,12 +901,12 @@ static void igb_configure(struct igb_adapter *adapter)
 
        igb_rx_fifo_flush_82575(&adapter->hw);
 
-       /* call IGB_DESC_UNUSED which always leaves
+       /* call igb_desc_unused which always leaves
         * at least 1 descriptor unused to make sure
         * next_to_use != next_to_clean */
        for (i = 0; i < adapter->num_rx_queues; i++) {
                struct igb_ring *ring = &adapter->rx_ring[i];
-               igb_alloc_rx_buffers_adv(ring, IGB_DESC_UNUSED(ring));
+               igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
        }
 
 
@@ -912,6 +942,8 @@ int igb_up(struct igb_adapter *adapter)
        rd32(E1000_ICR);
        igb_irq_enable(adapter);
 
+       netif_tx_start_all_queues(adapter->netdev);
+
        /* Fire a link change interrupt to start the watchdog. */
        wr32(E1000_ICS, E1000_ICS_LSC);
        return 0;
@@ -1126,11 +1158,10 @@ static int __devinit igb_probe(struct pci_dev *pdev,
        struct net_device *netdev;
        struct igb_adapter *adapter;
        struct e1000_hw *hw;
-       struct pci_dev *us_dev;
        const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
        unsigned long mmio_start, mmio_len;
-       int err, pci_using_dac, pos;
-       u16 eeprom_data = 0, state = 0;
+       int err, pci_using_dac;
+       u16 eeprom_data = 0;
        u16 eeprom_apme_mask = IGB_EEPROM_APME;
        u32 part_num;
 
@@ -1139,15 +1170,15 @@ static int __devinit igb_probe(struct pci_dev *pdev,
                return err;
 
        pci_using_dac = 0;
-       err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
        if (!err) {
-               err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
+               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
                if (!err)
                        pci_using_dac = 1;
        } else {
-               err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
                if (err) {
-                       err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+                       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
                        if (err) {
                                dev_err(&pdev->dev, "No usable DMA "
                                        "configuration, aborting\n");
@@ -1156,27 +1187,6 @@ static int __devinit igb_probe(struct pci_dev *pdev,
                }
        }
 
-       /* 82575 requires that the pci-e link partner disable the L0s state */
-       switch (pdev->device) {
-       case E1000_DEV_ID_82575EB_COPPER:
-       case E1000_DEV_ID_82575EB_FIBER_SERDES:
-       case E1000_DEV_ID_82575GB_QUAD_COPPER:
-               us_dev = pdev->bus->self;
-               pos = pci_find_capability(us_dev, PCI_CAP_ID_EXP);
-               if (pos) {
-                       pci_read_config_word(us_dev, pos + PCI_EXP_LNKCTL,
-                                            &state);
-                       state &= ~PCIE_LINK_STATE_L0S;
-                       pci_write_config_word(us_dev, pos + PCI_EXP_LNKCTL,
-                                             state);
-                       dev_info(&pdev->dev,
-                                "Disabling ASPM L0s upstream switch port %s\n",
-                                pci_name(us_dev));
-               }
-       default:
-               break;
-       }
-
        err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
                                           IORESOURCE_MEM),
                                           igb_driver_name);
@@ -1244,6 +1254,46 @@ static int __devinit igb_probe(struct pci_dev *pdev,
        if (err)
                goto err_sw_init;
 
+#ifdef CONFIG_PCI_IOV
+       /* since iov functionality isn't critical to base device function we
+        * can accept failure.  If it fails we don't allow iov to be enabled */
+       if (hw->mac.type == e1000_82576) {
+               /* 82576 supports a maximum of 7 VFs in addition to the PF */
+               unsigned int num_vfs = (max_vfs > 7) ? 7 : max_vfs;
+               int i;
+               unsigned char mac_addr[ETH_ALEN];
+
+               if (num_vfs) {
+                       adapter->vf_data = kcalloc(num_vfs,
+                                               sizeof(struct vf_data_storage),
+                                               GFP_KERNEL);
+                       if (!adapter->vf_data) {
+                               dev_err(&pdev->dev,
+                                       "Could not allocate VF private data - "
+                                       "IOV enable failed\n");
+                       } else {
+                               err = pci_enable_sriov(pdev, num_vfs);
+                               if (!err) {
+                                       adapter->vfs_allocated_count = num_vfs;
+                                       dev_info(&pdev->dev,
+                                                "%d vfs allocated\n",
+                                                num_vfs);
+                                       for (i = 0;
+                                            i < adapter->vfs_allocated_count;
+                                            i++) {
+                                               random_ether_addr(mac_addr);
+                                               igb_set_vf_mac(adapter, i,
+                                                              mac_addr);
+                                       }
+                               } else {
+                                       kfree(adapter->vf_data);
+                                       adapter->vf_data = NULL;
+                               }
+                       }
+               }
+       }
+
+#endif
        /* setup the private structure */
        err = igb_sw_init(adapter);
        if (err)
@@ -1295,6 +1345,9 @@ static int __devinit igb_probe(struct pci_dev *pdev,
        if (pci_using_dac)
                netdev->features |= NETIF_F_HIGHDMA;
 
+       if (adapter->hw.mac.type == e1000_82576)
+               netdev->features |= NETIF_F_SCTP_CSUM;
+
        adapter->en_mng_pt = igb_enable_mng_pass_thru(&adapter->hw);
 
        /* before reading the NVM, reset the controller to put the device in a
@@ -1321,13 +1374,10 @@ static int __devinit igb_probe(struct pci_dev *pdev,
                goto err_eeprom;
        }
 
-       init_timer(&adapter->watchdog_timer);
-       adapter->watchdog_timer.function = &igb_watchdog;
-       adapter->watchdog_timer.data = (unsigned long) adapter;
-
-       init_timer(&adapter->phy_info_timer);
-       adapter->phy_info_timer.function = &igb_update_phy_info;
-       adapter->phy_info_timer.data = (unsigned long) adapter;
+       setup_timer(&adapter->watchdog_timer, &igb_watchdog,
+                   (unsigned long) adapter);
+       setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
+                   (unsigned long) adapter);
 
        INIT_WORK(&adapter->reset_task, igb_reset_task);
        INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
@@ -1345,15 +1395,14 @@ static int __devinit igb_probe(struct pci_dev *pdev,
 
        igb_validate_mdi_setting(hw);
 
-       adapter->rx_csum = 1;
-
        /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
         * enable the ACPI Magic Packet filter
         */
 
-       if (hw->bus.func == 0 ||
-           hw->device_id == E1000_DEV_ID_82575EB_COPPER)
+       if (hw->bus.func == 0)
                hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
+       else if (hw->bus.func == 1)
+               hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
 
        if (eeprom_data & eeprom_apme_mask)
                adapter->eeprom_wol |= E1000_WUFC_MAG;
@@ -1373,6 +1422,16 @@ static int __devinit igb_probe(struct pci_dev *pdev,
                if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
                        adapter->eeprom_wol = 0;
                break;
+       case E1000_DEV_ID_82576_QUAD_COPPER:
+               /* if quad port adapter, disable WoL on all but port A */
+               if (global_quad_port_a != 0)
+                       adapter->eeprom_wol = 0;
+               else
+                       adapter->flags |= IGB_FLAG_QUAD_PORT_A;
+               /* Reset for multiple quad port adapters */
+               if (++global_quad_port_a == 4)
+                       global_quad_port_a = 0;
+               break;
        }
 
        /* initialize the wol settings based on the eeprom settings */
@@ -1386,28 +1445,14 @@ static int __devinit igb_probe(struct pci_dev *pdev,
         * driver. */
        igb_get_hw_control(adapter);
 
-       /* tell the stack to leave us alone until igb_open() is called */
-       netif_carrier_off(netdev);
-       netif_tx_stop_all_queues(netdev);
-
        strcpy(netdev->name, "eth%d");
        err = register_netdev(netdev);
        if (err)
                goto err_register;
 
-#ifdef CONFIG_PCI_IOV
-       /* since iov functionality isn't critical to base device function we
-        * can accept failure.  If it fails we don't allow iov to be enabled */
-       if (hw->mac.type == e1000_82576) {
-               err = pci_enable_sriov(pdev, 0);
-               if (!err)
-                       err = device_create_file(&netdev->dev,
-                                                &dev_attr_num_vfs);
-               if (err)
-                       dev_err(&pdev->dev, "Failed to initialize IOV\n");
-       }
+       /* carrier off reporting is important to ethtool even BEFORE open */
+       netif_carrier_off(netdev);
 
-#endif
 #ifdef CONFIG_IGB_DCA
        if (dca_add_requester(&pdev->dev) == 0) {
                adapter->flags |= IGB_FLAG_DCA_ENABLED;
@@ -1477,9 +1522,10 @@ static int __devinit igb_probe(struct pci_dev *pdev,
                 netdev->name,
                 ((hw->bus.speed == e1000_bus_speed_2500)
                  ? "2.5Gb/s" : "unknown"),
-                ((hw->bus.width == e1000_bus_width_pcie_x4)
-                 ? "Width x4" : (hw->bus.width == e1000_bus_width_pcie_x1)
-                 ? "Width x1" : "unknown"),
+                ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
+                 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
+                 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
+                  "unknown"),
                 netdev->dev_addr);
 
        igb_read_part_num(hw, &part_num);
@@ -1655,6 +1701,8 @@ static int igb_open(struct net_device *netdev)
        if (test_bit(__IGB_TESTING, &adapter->state))
                return -EBUSY;
 
+       netif_carrier_off(netdev);
+
        /* allocate transmit descriptors */
        err = igb_setup_all_tx_resources(adapter);
        if (err)
@@ -2189,29 +2237,24 @@ static void igb_configure_rx(struct igb_adapter *adapter)
                mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
                         E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
 
-
                wr32(E1000_MRQC, mrqc);
-
-               /* Multiqueue and raw packet checksumming are mutually
-                * exclusive.  Note that this not the same as TCP/IP
-                * checksumming, which works fine. */
-               rxcsum = rd32(E1000_RXCSUM);
-               rxcsum |= E1000_RXCSUM_PCSD;
-               wr32(E1000_RXCSUM, rxcsum);
-       } else {
+       } else if (adapter->vfs_allocated_count) {
                /* Enable multi-queue for sr-iov */
-               if (adapter->vfs_allocated_count)
-                       wr32(E1000_MRQC, E1000_MRQC_ENABLE_VMDQ);
-               /* Enable Receive Checksum Offload for TCP and UDP */
-               rxcsum = rd32(E1000_RXCSUM);
-               if (adapter->rx_csum)
-                       rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPPCSE;
-               else
-                       rxcsum &= ~(E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPPCSE);
-
-               wr32(E1000_RXCSUM, rxcsum);
+               wr32(E1000_MRQC, E1000_MRQC_ENABLE_VMDQ);
        }
 
+       /* Enable Receive Checksum Offload for TCP and UDP */
+       rxcsum = rd32(E1000_RXCSUM);
+       /* Disable raw packet checksumming */
+       rxcsum |= E1000_RXCSUM_PCSD;
+
+       if (adapter->hw.mac.type == e1000_82576)
+               /* Enable Receive Checksum Offload for SCTP */
+               rxcsum |= E1000_RXCSUM_CRCOFL;
+
+       /* Don't need to set TUOFL or IPOFL, they default to 1 */
+       wr32(E1000_RXCSUM, rxcsum);
+
        /* Set the default pool for the PF's first queue */
        igb_configure_vt_default_pool(adapter);
 
@@ -2258,19 +2301,14 @@ static void igb_free_all_tx_resources(struct igb_adapter *adapter)
 static void igb_unmap_and_free_tx_resource(struct igb_adapter *adapter,
                                           struct igb_buffer *buffer_info)
 {
-       if (buffer_info->dma) {
-               pci_unmap_page(adapter->pdev,
-                               buffer_info->dma,
-                               buffer_info->length,
-                               PCI_DMA_TODEVICE);
-               buffer_info->dma = 0;
-       }
+       buffer_info->dma = 0;
        if (buffer_info->skb) {
+               skb_dma_unmap(&adapter->pdev->dev, buffer_info->skb,
+                             DMA_TO_DEVICE);
                dev_kfree_skb_any(buffer_info->skb);
                buffer_info->skb = NULL;
        }
        buffer_info->time_stamp = 0;
-       buffer_info->next_to_watch = 0;
        /* buffer_info must be completely set up in the transmit path */
 }
 
@@ -2465,7 +2503,7 @@ static void igb_set_multi(struct net_device *netdev)
        struct e1000_hw *hw = &adapter->hw;
        struct e1000_mac_info *mac = &hw->mac;
        struct dev_mc_list *mc_ptr;
-       u8  *mta_list;
+       u8  *mta_list = NULL;
        u32 rctl;
        int i;
 
@@ -2486,17 +2524,15 @@ static void igb_set_multi(struct net_device *netdev)
        }
        wr32(E1000_RCTL, rctl);
 
-       if (!netdev->mc_count) {
-               /* nothing to program, so clear mc list */
-               igb_update_mc_addr_list(hw, NULL, 0, 1,
-                                       mac->rar_entry_count);
-               return;
+       if (netdev->mc_count) {
+               mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
+               if (!mta_list) {
+                       dev_err(&adapter->pdev->dev,
+                               "failed to allocate multicast filter list\n");
+                       return;
+               }
        }
 
-       mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
-       if (!mta_list)
-               return;
-
        /* The shared function expects a packed array of only addresses. */
        mc_ptr = netdev->mc_list;
 
@@ -2626,7 +2662,6 @@ static void igb_watchdog_task(struct work_struct *work)
                        }
 
                        netif_carrier_on(netdev);
-                       netif_tx_wake_all_queues(netdev);
 
                        igb_ping_all_vfs(adapter);
 
@@ -2643,7 +2678,6 @@ static void igb_watchdog_task(struct work_struct *work)
                        printk(KERN_INFO "igb: %s NIC Link is Down\n",
                               netdev->name);
                        netif_carrier_off(netdev);
-                       netif_tx_stop_all_queues(netdev);
 
                        igb_ping_all_vfs(adapter);
 
@@ -2670,13 +2704,15 @@ link_up:
        igb_update_adaptive(&adapter->hw);
 
        if (!netif_carrier_ok(netdev)) {
-               if (IGB_DESC_UNUSED(tx_ring) + 1 < tx_ring->count) {
+               if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
                        /* We've lost link, so the controller stops DMA,
                         * but we've got queued Tx work that's never going
                         * to get done, so reset controller to flush Tx.
                         * (Do the reset outside of interrupt context). */
                        adapter->tx_timeout_count++;
                        schedule_work(&adapter->reset_task);
+                       /* return immediately since reset is imminent */
+                       return;
                }
        }
 
@@ -2860,13 +2896,13 @@ static void igb_set_itr(struct igb_adapter *adapter)
        switch (current_itr) {
        /* counts and packets in update_itr are dependent on these numbers */
        case lowest_latency:
-               new_itr = 70000;
+               new_itr = 56;  /* aka 70,000 ints/sec */
                break;
        case low_latency:
-               new_itr = 20000; /* aka hwitr = ~200 */
+               new_itr = 196; /* aka 20,000 ints/sec */
                break;
        case bulk_latency:
-               new_itr = 4000;
+               new_itr = 980; /* aka 4,000 ints/sec */
                break;
        default:
                break;
@@ -2885,7 +2921,8 @@ set_itr_now:
                 * by adding intermediate steps when interrupt rate is
                 * increasing */
                new_itr = new_itr > adapter->itr ?
-                            min(adapter->itr + (new_itr >> 2), new_itr) :
+                            max((new_itr * adapter->itr) /
+                                (new_itr + (adapter->itr >> 2)), new_itr) :
                             new_itr;
                /* Don't write the value here; it resets the adapter's
                 * internal timer, and causes us to delay far longer than
@@ -2894,7 +2931,7 @@ set_itr_now:
                 * ends up being correct.
                 */
                adapter->itr = new_itr;
-               adapter->rx_ring->itr_val = 1000000000 / (new_itr * 256);
+               adapter->rx_ring->itr_val = new_itr;
                adapter->rx_ring->set_itr = 1;
        }
 
@@ -3017,16 +3054,31 @@ static inline bool igb_tx_csum_adv(struct igb_adapter *adapter,
                tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
 
                if (skb->ip_summed == CHECKSUM_PARTIAL) {
-                       switch (skb->protocol) {
+                       __be16 protocol;
+
+                       if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
+                               const struct vlan_ethhdr *vhdr =
+                                         (const struct vlan_ethhdr*)skb->data;
+
+                               protocol = vhdr->h_vlan_encapsulated_proto;
+                       } else {
+                               protocol = skb->protocol;
+                       }
+
+                       switch (protocol) {
                        case cpu_to_be16(ETH_P_IP):
                                tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
                                if (ip_hdr(skb)->protocol == IPPROTO_TCP)
                                        tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
+                               else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
+                                       tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
                                break;
                        case cpu_to_be16(ETH_P_IPV6):
                                /* XXX what about other V6 headers?? */
                                if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
                                        tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
+                               else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
+                                       tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
                                break;
                        default:
                                if (unlikely(net_ratelimit()))
@@ -3070,25 +3122,33 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter,
        unsigned int len = skb_headlen(skb);
        unsigned int count = 0, i;
        unsigned int f;
+       dma_addr_t *map;
 
        i = tx_ring->next_to_use;
 
+       if (skb_dma_map(&adapter->pdev->dev, skb, DMA_TO_DEVICE)) {
+               dev_err(&adapter->pdev->dev, "TX DMA map failed\n");
+               return 0;
+       }
+
+       map = skb_shinfo(skb)->dma_maps;
+
        buffer_info = &tx_ring->buffer_info[i];
        BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
        buffer_info->length = len;
        /* set time_stamp *before* dma to help avoid a possible race */
        buffer_info->time_stamp = jiffies;
        buffer_info->next_to_watch = i;
-       buffer_info->dma = pci_map_single(adapter->pdev, skb->data, len,
-                                         PCI_DMA_TODEVICE);
+       buffer_info->dma = map[count];
        count++;
-       i++;
-       if (i == tx_ring->count)
-               i = 0;
 
        for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
                struct skb_frag_struct *frag;
 
+               i++;
+               if (i == tx_ring->count)
+                       i = 0;
+
                frag = &skb_shinfo(skb)->frags[f];
                len = frag->size;
 
@@ -3097,19 +3157,10 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter,
                buffer_info->length = len;
                buffer_info->time_stamp = jiffies;
                buffer_info->next_to_watch = i;
-               buffer_info->dma = pci_map_page(adapter->pdev,
-                                               frag->page,
-                                               frag->page_offset,
-                                               len,
-                                               PCI_DMA_TODEVICE);
-
+               buffer_info->dma = map[count];
                count++;
-               i++;
-               if (i == tx_ring->count)
-                       i = 0;
        }
 
-       i = ((i == 0) ? tx_ring->count - 1 : i - 1);
        tx_ring->buffer_info[i].skb = skb;
        tx_ring->buffer_info[first].next_to_watch = i;
 
@@ -3197,7 +3248,7 @@ static int __igb_maybe_stop_tx(struct net_device *netdev,
 
        /* We need to check again in a case another CPU has just
         * made room available. */
-       if (IGB_DESC_UNUSED(tx_ring) < size)
+       if (igb_desc_unused(tx_ring) < size)
                return -EBUSY;
 
        /* A reprieve! */
@@ -3209,7 +3260,7 @@ static int __igb_maybe_stop_tx(struct net_device *netdev,
 static int igb_maybe_stop_tx(struct net_device *netdev,
                             struct igb_ring *tx_ring, int size)
 {
-       if (IGB_DESC_UNUSED(tx_ring) >= size)
+       if (igb_desc_unused(tx_ring) >= size)
                return 0;
        return __igb_maybe_stop_tx(netdev, tx_ring, size);
 }
@@ -3222,6 +3273,7 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb,
        unsigned int first;
        unsigned int tx_flags = 0;
        u8 hdr_len = 0;
+       int count = 0;
        int tso = 0;
        union skb_shared_tx *shtx;
 
@@ -3283,14 +3335,23 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb,
                 (skb->ip_summed == CHECKSUM_PARTIAL))
                tx_flags |= IGB_TX_FLAGS_CSUM;
 
-       igb_tx_queue_adv(adapter, tx_ring, tx_flags,
-                        igb_tx_map_adv(adapter, tx_ring, skb, first),
-                        skb->len, hdr_len);
-
-       netdev->trans_start = jiffies;
-
-       /* Make sure there is space in the ring for the next send. */
-       igb_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 4);
+       /*
+        * count reflects descriptors mapped, if 0 then mapping error
+        * has occured and we need to rewind the descriptor queue
+        */
+       count = igb_tx_map_adv(adapter, tx_ring, skb, first);
+
+       if (count) {
+               igb_tx_queue_adv(adapter, tx_ring, tx_flags, count,
+                                skb->len, hdr_len);
+               netdev->trans_start = jiffies;
+               /* Make sure there is space in the ring for the next send. */
+               igb_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 4);
+       } else {
+               dev_kfree_skb_any(skb);
+               tx_ring->buffer_info[first].time_stamp = 0;
+               tx_ring->next_to_use = first;
+       }
 
        return NETDEV_TX_OK;
 }
@@ -3827,7 +3888,7 @@ static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
 
        for (i = 0; i < adapter->vfs_allocated_count; i++) {
                vf_data = &adapter->vf_data[i];
-               for (j = 0; j < vf_data[i].num_vf_mc_hashes; j++)
+               for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
                        igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
        }
 }
@@ -3894,10 +3955,15 @@ static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
 
                        /* if !enabled we need to set this up in vfta */
                        if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
-                               /* add VID to filter table */
-                               igb_vfta_set(hw, vid, true);
+                               /* add VID to filter table, if bit already set
+                                * PF must have added it outside of table */
+                               if (igb_vfta_set(hw, vid, true))
+                                       reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT +
+                                               adapter->vfs_allocated_count);
                                reg |= E1000_VLVF_VLANID_ENABLE;
                        }
+                       reg &= ~E1000_VLVF_VLANID_MASK;
+                       reg |= vid;
 
                        wr32(E1000_VLVF(i), reg);
                        return 0;
@@ -4186,19 +4252,17 @@ static inline void igb_rx_irq_enable(struct igb_ring *rx_ring)
 static int igb_poll(struct napi_struct *napi, int budget)
 {
        struct igb_ring *rx_ring = container_of(napi, struct igb_ring, napi);
-       struct igb_adapter *adapter = rx_ring->adapter;
-       struct net_device *netdev = adapter->netdev;
        int work_done = 0;
 
 #ifdef CONFIG_IGB_DCA
-       if (adapter->flags & IGB_FLAG_DCA_ENABLED)
+       if (rx_ring->adapter->flags & IGB_FLAG_DCA_ENABLED)
                igb_update_rx_dca(rx_ring);
 #endif
        igb_clean_rx_irq_adv(rx_ring, &work_done, budget);
 
        if (rx_ring->buddy) {
 #ifdef CONFIG_IGB_DCA
-               if (adapter->flags & IGB_FLAG_DCA_ENABLED)
+               if (rx_ring->adapter->flags & IGB_FLAG_DCA_ENABLED)
                        igb_update_tx_dca(rx_ring->buddy);
 #endif
                if (!igb_clean_tx_irq(rx_ring->buddy))
@@ -4206,7 +4270,7 @@ static int igb_poll(struct napi_struct *napi, int budget)
        }
 
        /* If not enough Rx work done, exit the polling mode */
-       if ((work_done < budget) || !netif_running(netdev)) {
+       if (work_done < budget) {
                napi_complete(napi);
                igb_rx_irq_enable(rx_ring);
        }
@@ -4305,7 +4369,7 @@ static bool igb_clean_tx_irq(struct igb_ring *tx_ring)
 
        if (unlikely(count &&
                     netif_carrier_ok(netdev) &&
-                    IGB_DESC_UNUSED(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
+                    igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
                /* Make sure that anybody stopping the queue after this
                 * sees the new next_to_clean.
                 */
@@ -4376,20 +4440,12 @@ static void igb_receive_skb(struct igb_ring *ring, u8 status,
        bool vlan_extracted = (adapter->vlgrp && (status & E1000_RXD_STAT_VP));
 
        skb_record_rx_queue(skb, ring->queue_index);
-       if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
-               if (vlan_extracted)
-                       vlan_gro_receive(&ring->napi, adapter->vlgrp,
-                                        le16_to_cpu(rx_desc->wb.upper.vlan),
-                                        skb);
-               else
-                       napi_gro_receive(&ring->napi, skb);
-       } else {
-               if (vlan_extracted)
-                       vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
-                                         le16_to_cpu(rx_desc->wb.upper.vlan));
-               else
-                       netif_receive_skb(skb);
-       }
+       if (vlan_extracted)
+               vlan_gro_receive(&ring->napi, adapter->vlgrp,
+                                le16_to_cpu(rx_desc->wb.upper.vlan),
+                                skb);
+       else
+               napi_gro_receive(&ring->napi, skb);
 }
 
 static inline void igb_rx_checksum_adv(struct igb_adapter *adapter,
@@ -4398,19 +4454,28 @@ static inline void igb_rx_checksum_adv(struct igb_adapter *adapter,
        skb->ip_summed = CHECKSUM_NONE;
 
        /* Ignore Checksum bit is set or checksum is disabled through ethtool */
-       if ((status_err & E1000_RXD_STAT_IXSM) || !adapter->rx_csum)
+       if ((status_err & E1000_RXD_STAT_IXSM) ||
+           (adapter->flags & IGB_FLAG_RX_CSUM_DISABLED))
                return;
        /* TCP/UDP checksum error bit is set */
        if (status_err &
            (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
+               /*
+                * work around errata with sctp packets where the TCPE aka
+                * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
+                * packets, (aka let the stack check the crc32c)
+                */
+               if (!((adapter->hw.mac.type == e1000_82576) &&
+                     (skb->len == 60)))
+                       adapter->hw_csum_err++;
                /* let the stack verify checksum errors */
-               adapter->hw_csum_err++;
                return;
        }
        /* It must be a TCP or UDP packet with a valid checksum */
        if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
                skb->ip_summed = CHECKSUM_UNNECESSARY;
 
+       dev_dbg(&adapter->pdev->dev, "cksum success: bits %08X\n", status_err);
        adapter->hw_csum_good++;
 }
 
@@ -4582,7 +4647,7 @@ next_desc:
        }
 
        rx_ring->next_to_clean = i;
-       cleaned_count = IGB_DESC_UNUSED(rx_ring);
+       cleaned_count = igb_desc_unused(rx_ring);
 
        if (cleaned_count)
                igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
@@ -5041,7 +5106,7 @@ int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
        return 0;
 }
 
-static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
+static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
 {
        struct net_device *netdev = pci_get_drvdata(pdev);
        struct igb_adapter *adapter = netdev_priv(netdev);
@@ -5100,15 +5165,9 @@ static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
                wr32(E1000_WUFC, 0);
        }
 
-       /* make sure adapter isn't asleep if manageability/wol is enabled */
-       if (wufc || adapter->en_mng_pt) {
-               pci_enable_wake(pdev, PCI_D3hot, 1);
-               pci_enable_wake(pdev, PCI_D3cold, 1);
-       } else {
+       *enable_wake = wufc || adapter->en_mng_pt;
+       if (!*enable_wake)
                igb_shutdown_fiber_serdes_link_82575(hw);
-               pci_enable_wake(pdev, PCI_D3hot, 0);
-               pci_enable_wake(pdev, PCI_D3cold, 0);
-       }
 
        /* Release control of h/w to f/w.  If f/w is AMT enabled, this
         * would have already happened in close and is redundant. */
@@ -5116,12 +5175,29 @@ static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
 
        pci_disable_device(pdev);
 
-       pci_set_power_state(pdev, pci_choose_state(pdev, state));
-
        return 0;
 }
 
 #ifdef CONFIG_PM
+static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+       int retval;
+       bool wake;
+
+       retval = __igb_shutdown(pdev, &wake);
+       if (retval)
+               return retval;
+
+       if (wake) {
+               pci_prepare_to_sleep(pdev);
+       } else {
+               pci_wake_from_d3(pdev, false);
+               pci_set_power_state(pdev, PCI_D3hot);
+       }
+
+       return 0;
+}
+
 static int igb_resume(struct pci_dev *pdev)
 {
        struct net_device *netdev = pci_get_drvdata(pdev);
@@ -5174,7 +5250,14 @@ static int igb_resume(struct pci_dev *pdev)
 
 static void igb_shutdown(struct pci_dev *pdev)
 {
-       igb_suspend(pdev, PMSG_SUSPEND);
+       bool wake;
+
+       __igb_shutdown(pdev, &wake);
+
+       if (system_state == SYSTEM_POWER_OFF) {
+               pci_wake_from_d3(pdev, wake);
+               pci_set_power_state(pdev, PCI_D3hot);
+       }
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -5385,89 +5468,4 @@ static void igb_vmm_control(struct igb_adapter *adapter)
        igb_vmdq_set_replication_pf(hw, true);
 }
 
-#ifdef CONFIG_PCI_IOV
-static ssize_t igb_show_num_vfs(struct device *dev,
-                                struct device_attribute *attr, char *buf)
-{
-       struct igb_adapter *adapter = netdev_priv(to_net_dev(dev));
-
-       return sprintf(buf, "%d\n", adapter->vfs_allocated_count);
-}
-
-static ssize_t igb_set_num_vfs(struct device *dev,
-                               struct device_attribute *attr,
-                               const char *buf, size_t count)
-{
-       struct net_device *netdev = to_net_dev(dev);
-       struct igb_adapter *adapter = netdev_priv(netdev);
-       struct e1000_hw *hw = &adapter->hw;
-       struct pci_dev *pdev = adapter->pdev;
-       unsigned int num_vfs, i;
-       unsigned char mac_addr[ETH_ALEN];
-       int err;
-
-       sscanf(buf, "%u", &num_vfs);
-
-       if (num_vfs > 7)
-               num_vfs = 7;
-
-       /* value unchanged do nothing */
-       if (num_vfs == adapter->vfs_allocated_count)
-               return count;
-
-       if (netdev->flags & IFF_UP)
-               igb_close(netdev);
-
-       igb_reset_interrupt_capability(adapter);
-       igb_free_queues(adapter);
-       adapter->tx_ring = NULL;
-       adapter->rx_ring = NULL;
-       adapter->vfs_allocated_count = 0;
-
-       /* reclaim resources allocated to VFs since we are changing count */
-       if (adapter->vf_data) {
-               /* disable iov and allow time for transactions to clear */
-               pci_disable_sriov(pdev);
-               msleep(500);
-
-               kfree(adapter->vf_data);
-               adapter->vf_data = NULL;
-               wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
-               msleep(100);
-               dev_info(&pdev->dev, "IOV Disabled\n");
-       }
-
-       if (num_vfs) {
-               adapter->vf_data = kcalloc(num_vfs,
-                                          sizeof(struct vf_data_storage),
-                                          GFP_KERNEL);
-               if (!adapter->vf_data) {
-                       dev_err(&pdev->dev, "Could not allocate VF private "
-                               "data - IOV enable failed\n");
-               } else {
-                       err = pci_enable_sriov(pdev, num_vfs);
-                       if (!err) {
-                               adapter->vfs_allocated_count = num_vfs;
-                               dev_info(&pdev->dev, "%d vfs allocated\n", num_vfs);
-                               for (i = 0; i < adapter->vfs_allocated_count; i++) {
-                                       random_ether_addr(mac_addr);
-                                       igb_set_vf_mac(adapter, i, mac_addr);
-                               }
-                       } else {
-                               kfree(adapter->vf_data);
-                               adapter->vf_data = NULL;
-                       }
-               }
-       }
-
-       igb_set_interrupt_capability(adapter);
-       igb_alloc_queues(adapter);
-       igb_reset(adapter);
-
-       if (netdev->flags & IFF_UP)
-               igb_open(netdev);
-
-       return count;
-}
-#endif /* CONFIG_PCI_IOV */
 /* igb_main.c */