igb: add support for 82576NS SerDes adapter
[safe/jmp/linux-2.6] / drivers / net / igb / igb_main.c
index 4853a74..83c0837 100644 (file)
@@ -63,8 +63,10 @@ static const struct e1000_info *igb_info_tbl[] = {
 static struct pci_device_id igb_pci_tbl[] = {
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
@@ -93,16 +95,19 @@ static void igb_clean_all_tx_rings(struct igb_adapter *);
 static void igb_clean_all_rx_rings(struct igb_adapter *);
 static void igb_clean_tx_ring(struct igb_ring *);
 static void igb_clean_rx_ring(struct igb_ring *);
-static void igb_set_multi(struct net_device *);
+static void igb_set_rx_mode(struct net_device *);
 static void igb_update_phy_info(unsigned long);
 static void igb_watchdog(unsigned long);
 static void igb_watchdog_task(struct work_struct *);
-static int igb_xmit_frame_ring_adv(struct sk_buff *, struct net_device *,
-                                 struct igb_ring *);
-static int igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
+static netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *,
+                                          struct net_device *,
+                                          struct igb_ring *);
+static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
+                                     struct net_device *);
 static struct net_device_stats *igb_get_stats(struct net_device *);
 static int igb_change_mtu(struct net_device *, int);
 static int igb_set_mac(struct net_device *, void *);
+static void igb_set_uta(struct igb_adapter *adapter);
 static irqreturn_t igb_intr(int irq, void *);
 static irqreturn_t igb_intr_msi(int irq, void *);
 static irqreturn_t igb_msix_other(int irq, void *);
@@ -124,19 +129,48 @@ static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
 static void igb_vlan_rx_add_vid(struct net_device *, u16);
 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
 static void igb_restore_vlan(struct igb_adapter *);
+static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
 static void igb_ping_all_vfs(struct igb_adapter *);
 static void igb_msg_task(struct igb_adapter *);
 static int igb_rcv_msg_from_vf(struct igb_adapter *, u32);
-static inline void igb_set_rah_pool(struct e1000_hw *, int , int);
-static void igb_set_mc_list_pools(struct igb_adapter *, int, u16);
 static void igb_vmm_control(struct igb_adapter *);
-static inline void igb_set_vmolr(struct e1000_hw *, int);
-static inline int igb_set_vf_rlpml(struct igb_adapter *, int, int);
 static int igb_set_vf_mac(struct igb_adapter *adapter, int, unsigned char *);
 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
 
-static int igb_suspend(struct pci_dev *, pm_message_t);
+static inline void igb_set_vmolr(struct e1000_hw *hw, int vfn)
+{
+       u32 reg_data;
+
+       reg_data = rd32(E1000_VMOLR(vfn));
+       reg_data |= E1000_VMOLR_BAM |    /* Accept broadcast */
+                   E1000_VMOLR_ROMPE |  /* Accept packets matched in MTA */
+                   E1000_VMOLR_AUPE |   /* Accept untagged packets */
+                   E1000_VMOLR_STRVLAN; /* Strip vlan tags */
+       wr32(E1000_VMOLR(vfn), reg_data);
+}
+
+static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
+                                 int vfn)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       u32 vmolr;
+
+       /* if it isn't the PF check to see if VFs are enabled and
+        * increase the size to support vlan tags */
+       if (vfn < adapter->vfs_allocated_count &&
+           adapter->vf_data[vfn].vlans_enabled)
+               size += VLAN_TAG_SIZE;
+
+       vmolr = rd32(E1000_VMOLR(vfn));
+       vmolr &= ~E1000_VMOLR_RLPML_MASK;
+       vmolr |= size | E1000_VMOLR_LPE;
+       wr32(E1000_VMOLR(vfn), vmolr);
+
+       return 0;
+}
+
 #ifdef CONFIG_PM
+static int igb_suspend(struct pci_dev *, pm_message_t);
 static int igb_resume(struct pci_dev *);
 #endif
 static void igb_shutdown(struct pci_dev *);
@@ -152,14 +186,13 @@ static struct notifier_block dca_notifier = {
 /* for netdump / net console */
 static void igb_netpoll(struct net_device *);
 #endif
-
 #ifdef CONFIG_PCI_IOV
-static ssize_t igb_set_num_vfs(struct device *, struct device_attribute *,
-                               const char *, size_t);
-static ssize_t igb_show_num_vfs(struct device *, struct device_attribute *,
-                               char *);
-DEVICE_ATTR(num_vfs, S_IRUGO | S_IWUSR, igb_show_num_vfs, igb_set_num_vfs);
-#endif
+static unsigned int max_vfs = 0;
+module_param(max_vfs, uint, 0);
+MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
+                 "per physical function");
+#endif /* CONFIG_PCI_IOV */
+
 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
                     pci_channel_state_t);
 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
@@ -420,6 +453,9 @@ static void igb_free_queues(struct igb_adapter *adapter)
        for (i = 0; i < adapter->num_rx_queues; i++)
                netif_napi_del(&adapter->rx_ring[i].napi);
 
+       adapter->num_rx_queues = 0;
+       adapter->num_tx_queues = 0;
+
        kfree(adapter->tx_ring);
        kfree(adapter->rx_ring);
 }
@@ -668,6 +704,21 @@ static void igb_set_interrupt_capability(struct igb_adapter *adapter)
 
        /* If we can't do MSI-X, try MSI */
 msi_only:
+#ifdef CONFIG_PCI_IOV
+       /* disable SR-IOV for non MSI-X configurations */
+       if (adapter->vf_data) {
+               struct e1000_hw *hw = &adapter->hw;
+               /* disable iov and allow time for transactions to clear */
+               pci_disable_sriov(adapter->pdev);
+               msleep(500);
+
+               kfree(adapter->vf_data);
+               adapter->vf_data = NULL;
+               wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
+               msleep(100);
+               dev_info(&adapter->pdev->dev, "IOV Disabled\n");
+       }
+#endif
        adapter->num_rx_queues = 1;
        adapter->num_tx_queues = 1;
        if (!pci_enable_msi(adapter->pdev))
@@ -767,9 +818,11 @@ static void igb_irq_disable(struct igb_adapter *adapter)
        struct e1000_hw *hw = &adapter->hw;
 
        if (adapter->msix_entries) {
-               wr32(E1000_EIAM, 0);
-               wr32(E1000_EIMC, ~0);
-               wr32(E1000_EIAC, 0);
+               u32 regval = rd32(E1000_EIAM);
+               wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
+               wr32(E1000_EIMC, adapter->eims_enable_mask);
+               regval = rd32(E1000_EIAC);
+               wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
        }
 
        wr32(E1000_IAM, 0);
@@ -787,8 +840,10 @@ static void igb_irq_enable(struct igb_adapter *adapter)
        struct e1000_hw *hw = &adapter->hw;
 
        if (adapter->msix_entries) {
-               wr32(E1000_EIAC, adapter->eims_enable_mask);
-               wr32(E1000_EIAM, adapter->eims_enable_mask);
+               u32 regval = rd32(E1000_EIAC);
+               wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
+               regval = rd32(E1000_EIAM);
+               wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
                wr32(E1000_EIMS, adapter->eims_enable_mask);
                if (adapter->vfs_allocated_count)
                        wr32(E1000_MBVFIMR, 0xFF);
@@ -874,7 +929,7 @@ static void igb_configure(struct igb_adapter *adapter)
        int i;
 
        igb_get_hw_control(adapter);
-       igb_set_multi(netdev);
+       igb_set_rx_mode(netdev);
 
        igb_restore_vlan(adapter);
 
@@ -918,13 +973,14 @@ int igb_up(struct igb_adapter *adapter)
                igb_configure_msix(adapter);
 
        igb_vmm_control(adapter);
-       igb_set_rah_pool(hw, adapter->vfs_allocated_count, 0);
        igb_set_vmolr(hw, adapter->vfs_allocated_count);
 
        /* Clear any pending interrupts. */
        rd32(E1000_ICR);
        igb_irq_enable(adapter);
 
+       netif_tx_start_all_queues(adapter->netdev);
+
        /* Fire a link change interrupt to start the watchdog. */
        wr32(E1000_ICS, E1000_ICS_LSC);
        return 0;
@@ -977,6 +1033,11 @@ void igb_down(struct igb_adapter *adapter)
                igb_reset(adapter);
        igb_clean_all_tx_rings(adapter);
        igb_clean_all_rx_rings(adapter);
+#ifdef CONFIG_IGB_DCA
+
+       /* since we reset the hardware DCA settings were cleared */
+       igb_setup_dca(adapter);
+#endif
 }
 
 void igb_reinit_locked(struct igb_adapter *adapter)
@@ -1071,7 +1132,7 @@ void igb_reset(struct igb_adapter *adapter)
        }
        fc->pause_time = 0xFFFF;
        fc->send_xon = 1;
-       fc->type = fc->original_type;
+       fc->current_mode = fc->requested_mode;
 
        /* disable receive for all VFs and wait one second */
        if (adapter->vfs_allocated_count) {
@@ -1108,7 +1169,8 @@ static const struct net_device_ops igb_netdev_ops = {
        .ndo_stop               = igb_close,
        .ndo_start_xmit         = igb_xmit_frame_adv,
        .ndo_get_stats          = igb_get_stats,
-       .ndo_set_multicast_list = igb_set_multi,
+       .ndo_set_rx_mode        = igb_set_rx_mode,
+       .ndo_set_multicast_list = igb_set_rx_mode,
        .ndo_set_mac_address    = igb_set_mac,
        .ndo_change_mtu         = igb_change_mtu,
        .ndo_do_ioctl           = igb_ioctl,
@@ -1151,15 +1213,15 @@ static int __devinit igb_probe(struct pci_dev *pdev,
                return err;
 
        pci_using_dac = 0;
-       err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
        if (!err) {
-               err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
+               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
                if (!err)
                        pci_using_dac = 1;
        } else {
-               err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
                if (err) {
-                       err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+                       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
                        if (err) {
                                dev_err(&pdev->dev, "No usable DMA "
                                        "configuration, aborting\n");
@@ -1174,12 +1236,7 @@ static int __devinit igb_probe(struct pci_dev *pdev,
        if (err)
                goto err_pci_reg;
 
-       err = pci_enable_pcie_error_reporting(pdev);
-       if (err) {
-               dev_err(&pdev->dev, "pci_enable_pcie_error_reporting failed "
-                       "0x%x\n", err);
-               /* non-fatal, continue */
-       }
+       pci_enable_pcie_error_reporting(pdev);
 
        pci_set_master(pdev);
        pci_save_state(pdev);
@@ -1235,6 +1292,46 @@ static int __devinit igb_probe(struct pci_dev *pdev,
        if (err)
                goto err_sw_init;
 
+#ifdef CONFIG_PCI_IOV
+       /* since iov functionality isn't critical to base device function we
+        * can accept failure.  If it fails we don't allow iov to be enabled */
+       if (hw->mac.type == e1000_82576) {
+               /* 82576 supports a maximum of 7 VFs in addition to the PF */
+               unsigned int num_vfs = (max_vfs > 7) ? 7 : max_vfs;
+               int i;
+               unsigned char mac_addr[ETH_ALEN];
+
+               if (num_vfs) {
+                       adapter->vf_data = kcalloc(num_vfs,
+                                               sizeof(struct vf_data_storage),
+                                               GFP_KERNEL);
+                       if (!adapter->vf_data) {
+                               dev_err(&pdev->dev,
+                                       "Could not allocate VF private data - "
+                                       "IOV enable failed\n");
+                       } else {
+                               err = pci_enable_sriov(pdev, num_vfs);
+                               if (!err) {
+                                       adapter->vfs_allocated_count = num_vfs;
+                                       dev_info(&pdev->dev,
+                                                "%d vfs allocated\n",
+                                                num_vfs);
+                                       for (i = 0;
+                                            i < adapter->vfs_allocated_count;
+                                            i++) {
+                                               random_ether_addr(mac_addr);
+                                               igb_set_vf_mac(adapter, i,
+                                                              mac_addr);
+                                       }
+                               } else {
+                                       kfree(adapter->vf_data);
+                                       adapter->vf_data = NULL;
+                               }
+                       }
+               }
+       }
+
+#endif
        /* setup the private structure */
        err = igb_sw_init(adapter);
        if (err)
@@ -1281,11 +1378,15 @@ static int __devinit igb_probe(struct pci_dev *pdev,
        netdev->vlan_features |= NETIF_F_TSO;
        netdev->vlan_features |= NETIF_F_TSO6;
        netdev->vlan_features |= NETIF_F_IP_CSUM;
+       netdev->vlan_features |= NETIF_F_IPV6_CSUM;
        netdev->vlan_features |= NETIF_F_SG;
 
        if (pci_using_dac)
                netdev->features |= NETIF_F_HIGHDMA;
 
+       if (adapter->hw.mac.type == e1000_82576)
+               netdev->features |= NETIF_F_SCTP_CSUM;
+
        adapter->en_mng_pt = igb_enable_mng_pass_thru(&adapter->hw);
 
        /* before reading the NVM, reset the controller to put the device in a
@@ -1325,16 +1426,14 @@ static int __devinit igb_probe(struct pci_dev *pdev,
        hw->mac.autoneg = true;
        hw->phy.autoneg_advertised = 0x2f;
 
-       hw->fc.original_type = e1000_fc_default;
-       hw->fc.type = e1000_fc_default;
+       hw->fc.requested_mode = e1000_fc_default;
+       hw->fc.current_mode = e1000_fc_default;
 
        adapter->itr_setting = IGB_DEFAULT_ITR;
        adapter->itr = IGB_START_ITR;
 
        igb_validate_mdi_setting(hw);
 
-       adapter->rx_csum = 1;
-
        /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
         * enable the ACPI Magic Packet filter
         */
@@ -1385,35 +1484,18 @@ static int __devinit igb_probe(struct pci_dev *pdev,
         * driver. */
        igb_get_hw_control(adapter);
 
-       /* tell the stack to leave us alone until igb_open() is called */
-       netif_carrier_off(netdev);
-       netif_tx_stop_all_queues(netdev);
-
        strcpy(netdev->name, "eth%d");
        err = register_netdev(netdev);
        if (err)
                goto err_register;
 
-#ifdef CONFIG_PCI_IOV
-       /* since iov functionality isn't critical to base device function we
-        * can accept failure.  If it fails we don't allow iov to be enabled */
-       if (hw->mac.type == e1000_82576) {
-               err = pci_enable_sriov(pdev, 0);
-               if (!err)
-                       err = device_create_file(&netdev->dev,
-                                                &dev_attr_num_vfs);
-               if (err)
-                       dev_err(&pdev->dev, "Failed to initialize IOV\n");
-       }
+       /* carrier off reporting is important to ethtool even BEFORE open */
+       netif_carrier_off(netdev);
 
-#endif
 #ifdef CONFIG_IGB_DCA
        if (dca_add_requester(&pdev->dev) == 0) {
                adapter->flags |= IGB_FLAG_DCA_ENABLED;
                dev_info(&pdev->dev, "DCA enabled\n");
-               /* Always use CB2 mode, difference is masked
-                * in the CB driver. */
-               wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
                igb_setup_dca(adapter);
        }
 #endif
@@ -1476,9 +1558,10 @@ static int __devinit igb_probe(struct pci_dev *pdev,
                 netdev->name,
                 ((hw->bus.speed == e1000_bus_speed_2500)
                  ? "2.5Gb/s" : "unknown"),
-                ((hw->bus.width == e1000_bus_width_pcie_x4)
-                 ? "Width x4" : (hw->bus.width == e1000_bus_width_pcie_x1)
-                 ? "Width x1" : "unknown"),
+                ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
+                 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
+                 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
+                  "unknown"),
                 netdev->dev_addr);
 
        igb_read_part_num(hw, &part_num);
@@ -1530,7 +1613,6 @@ static void __devexit igb_remove(struct pci_dev *pdev)
        struct net_device *netdev = pci_get_drvdata(pdev);
        struct igb_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
-       int err;
 
        /* flush_scheduled work may reschedule our watchdog task, so
         * explicitly disable watchdog tasks from being rescheduled  */
@@ -1584,10 +1666,7 @@ static void __devexit igb_remove(struct pci_dev *pdev)
 
        free_netdev(netdev);
 
-       err = pci_disable_pcie_error_reporting(pdev);
-       if (err)
-               dev_err(&pdev->dev,
-                       "pci_disable_pcie_error_reporting failed 0x%x\n", err);
+       pci_disable_pcie_error_reporting(pdev);
 
        pci_disable_device(pdev);
 }
@@ -1654,6 +1733,8 @@ static int igb_open(struct net_device *netdev)
        if (test_bit(__IGB_TESTING, &adapter->state))
                return -EBUSY;
 
+       netif_carrier_off(netdev);
+
        /* allocate transmit descriptors */
        err = igb_setup_all_tx_resources(adapter);
        if (err)
@@ -1678,7 +1759,6 @@ static int igb_open(struct net_device *netdev)
        igb_configure(adapter);
 
        igb_vmm_control(adapter);
-       igb_set_rah_pool(hw, adapter->vfs_allocated_count, 0);
        igb_set_vmolr(hw, adapter->vfs_allocated_count);
 
        err = igb_request_irq(adapter);
@@ -1961,7 +2041,7 @@ static void igb_setup_rctl(struct igb_adapter *adapter)
        struct e1000_hw *hw = &adapter->hw;
        u32 rctl;
        u32 srrctl = 0;
-       int i, j;
+       int i;
 
        rctl = rd32(E1000_RCTL);
 
@@ -2026,8 +2106,6 @@ static void igb_setup_rctl(struct igb_adapter *adapter)
        if (adapter->vfs_allocated_count) {
                u32 vmolr;
 
-               j = adapter->rx_ring[0].reg_idx;
-
                /* set all queue drop enable bits */
                wr32(E1000_QDE, ALL_QUEUES);
                srrctl |= E1000_SRRCTL_DROP_EN;
@@ -2035,16 +2113,16 @@ static void igb_setup_rctl(struct igb_adapter *adapter)
                /* disable queue 0 to prevent tail write w/o re-config */
                wr32(E1000_RXDCTL(0), 0);
 
-               vmolr = rd32(E1000_VMOLR(j));
+               vmolr = rd32(E1000_VMOLR(adapter->vfs_allocated_count));
                if (rctl & E1000_RCTL_LPE)
                        vmolr |= E1000_VMOLR_LPE;
-               if (adapter->num_rx_queues > 0)
+               if (adapter->num_rx_queues > 1)
                        vmolr |= E1000_VMOLR_RSSE;
-               wr32(E1000_VMOLR(j), vmolr);
+               wr32(E1000_VMOLR(adapter->vfs_allocated_count), vmolr);
        }
 
        for (i = 0; i < adapter->num_rx_queues; i++) {
-               j = adapter->rx_ring[i].reg_idx;
+               int j = adapter->rx_ring[i].reg_idx;
                wr32(E1000_SRRCTL(j), srrctl);
        }
 
@@ -2188,32 +2266,34 @@ static void igb_configure_rx(struct igb_adapter *adapter)
                mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
                         E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
 
-
                wr32(E1000_MRQC, mrqc);
-
-               /* Multiqueue and raw packet checksumming are mutually
-                * exclusive.  Note that this not the same as TCP/IP
-                * checksumming, which works fine. */
-               rxcsum = rd32(E1000_RXCSUM);
-               rxcsum |= E1000_RXCSUM_PCSD;
-               wr32(E1000_RXCSUM, rxcsum);
-       } else {
+       } else if (adapter->vfs_allocated_count) {
                /* Enable multi-queue for sr-iov */
-               if (adapter->vfs_allocated_count)
-                       wr32(E1000_MRQC, E1000_MRQC_ENABLE_VMDQ);
-               /* Enable Receive Checksum Offload for TCP and UDP */
-               rxcsum = rd32(E1000_RXCSUM);
-               if (adapter->rx_csum)
-                       rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPPCSE;
-               else
-                       rxcsum &= ~(E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPPCSE);
-
-               wr32(E1000_RXCSUM, rxcsum);
+               wr32(E1000_MRQC, E1000_MRQC_ENABLE_VMDQ);
        }
 
+       /* Enable Receive Checksum Offload for TCP and UDP */
+       rxcsum = rd32(E1000_RXCSUM);
+       /* Disable raw packet checksumming */
+       rxcsum |= E1000_RXCSUM_PCSD;
+
+       if (adapter->hw.mac.type == e1000_82576)
+               /* Enable Receive Checksum Offload for SCTP */
+               rxcsum |= E1000_RXCSUM_CRCOFL;
+
+       /* Don't need to set TUOFL or IPOFL, they default to 1 */
+       wr32(E1000_RXCSUM, rxcsum);
+
        /* Set the default pool for the PF's first queue */
        igb_configure_vt_default_pool(adapter);
 
+       /* set UTA to appropriate mode */
+       igb_set_uta(adapter);
+
+       /* set the correct pool for the PF default MAC address in entry 0 */
+       igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
+                        adapter->vfs_allocated_count);
+
        igb_rlpml_set(adapter);
 
        /* Enable Receives */
@@ -2257,19 +2337,14 @@ static void igb_free_all_tx_resources(struct igb_adapter *adapter)
 static void igb_unmap_and_free_tx_resource(struct igb_adapter *adapter,
                                           struct igb_buffer *buffer_info)
 {
-       if (buffer_info->dma) {
-               pci_unmap_page(adapter->pdev,
-                               buffer_info->dma,
-                               buffer_info->length,
-                               PCI_DMA_TODEVICE);
-               buffer_info->dma = 0;
-       }
+       buffer_info->dma = 0;
        if (buffer_info->skb) {
+               skb_dma_unmap(&adapter->pdev->dev, buffer_info->skb,
+                             DMA_TO_DEVICE);
                dev_kfree_skb_any(buffer_info->skb);
                buffer_info->skb = NULL;
        }
        buffer_info->time_stamp = 0;
-       buffer_info->next_to_watch = 0;
        /* buffer_info must be completely set up in the transmit path */
 }
 
@@ -2442,59 +2517,44 @@ static int igb_set_mac(struct net_device *netdev, void *p)
        memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
        memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
 
-       hw->mac.ops.rar_set(hw, hw->mac.addr, 0);
-
-       igb_set_rah_pool(hw, adapter->vfs_allocated_count, 0);
+       /* set the correct pool for the new PF MAC address in entry 0 */
+       igb_rar_set_qsel(adapter, hw->mac.addr, 0,
+                        adapter->vfs_allocated_count);
 
        return 0;
 }
 
 /**
- * igb_set_multi - Multicast and Promiscuous mode set
+ * igb_write_mc_addr_list - write multicast addresses to MTA
  * @netdev: network interface device structure
  *
- * The set_multi entry point is called whenever the multicast address
- * list or the network interface flags are updated.  This routine is
- * responsible for configuring the hardware for proper multicast,
- * promiscuous mode, and all-multi behavior.
+ * Writes multicast address list to the MTA hash table.
+ * Returns: -ENOMEM on failure
+ *                0 on no addresses written
+ *                X on writing X addresses to MTA
  **/
-static void igb_set_multi(struct net_device *netdev)
+static int igb_write_mc_addr_list(struct net_device *netdev)
 {
        struct igb_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
-       struct e1000_mac_info *mac = &hw->mac;
-       struct dev_mc_list *mc_ptr;
+       struct dev_mc_list *mc_ptr = netdev->mc_list;
        u8  *mta_list;
-       u32 rctl;
+       u32 vmolr = 0;
        int i;
 
-       /* Check for Promiscuous and All Multicast modes */
-
-       rctl = rd32(E1000_RCTL);
-
-       if (netdev->flags & IFF_PROMISC) {
-               rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
-               rctl &= ~E1000_RCTL_VFE;
-       } else {
-               if (netdev->flags & IFF_ALLMULTI) {
-                       rctl |= E1000_RCTL_MPE;
-                       rctl &= ~E1000_RCTL_UPE;
-               } else
-                       rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE);
-               rctl |= E1000_RCTL_VFE;
-       }
-       wr32(E1000_RCTL, rctl);
-
        if (!netdev->mc_count) {
                /* nothing to program, so clear mc list */
-               igb_update_mc_addr_list(hw, NULL, 0, 1,
-                                       mac->rar_entry_count);
-               return;
+               igb_update_mc_addr_list(hw, NULL, 0);
+               igb_restore_vf_multicasts(adapter);
+               return 0;
        }
 
        mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
        if (!mta_list)
-               return;
+               return -ENOMEM;
+
+       /* set vmolr receive overflow multicast bit */
+       vmolr |= E1000_VMOLR_ROMPE;
 
        /* The shared function expects a packed array of only addresses. */
        mc_ptr = netdev->mc_list;
@@ -2505,14 +2565,125 @@ static void igb_set_multi(struct net_device *netdev)
                memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
                mc_ptr = mc_ptr->next;
        }
-       igb_update_mc_addr_list(hw, mta_list, i,
-                               adapter->vfs_allocated_count + 1,
-                               mac->rar_entry_count);
+       igb_update_mc_addr_list(hw, mta_list, i);
+       kfree(mta_list);
 
-       igb_set_mc_list_pools(adapter, i, mac->rar_entry_count);
-       igb_restore_vf_multicasts(adapter);
+       return netdev->mc_count;
+}
 
-       kfree(mta_list);
+/**
+ * igb_write_uc_addr_list - write unicast addresses to RAR table
+ * @netdev: network interface device structure
+ *
+ * Writes unicast address list to the RAR table.
+ * Returns: -ENOMEM on failure/insufficient address space
+ *                0 on no addresses written
+ *                X on writing X addresses to the RAR table
+ **/
+static int igb_write_uc_addr_list(struct net_device *netdev)
+{
+       struct igb_adapter *adapter = netdev_priv(netdev);
+       struct e1000_hw *hw = &adapter->hw;
+       unsigned int vfn = adapter->vfs_allocated_count;
+       unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
+       int count = 0;
+
+       /* return ENOMEM indicating insufficient memory for addresses */
+       if (netdev->uc.count > rar_entries)
+               return -ENOMEM;
+
+       if (netdev->uc.count && rar_entries) {
+               struct netdev_hw_addr *ha;
+               list_for_each_entry(ha, &netdev->uc.list, list) {
+                       if (!rar_entries)
+                               break;
+                       igb_rar_set_qsel(adapter, ha->addr,
+                                        rar_entries--,
+                                        vfn);
+                       count++;
+               }
+       }
+       /* write the addresses in reverse order to avoid write combining */
+       for (; rar_entries > 0 ; rar_entries--) {
+               wr32(E1000_RAH(rar_entries), 0);
+               wr32(E1000_RAL(rar_entries), 0);
+       }
+       wrfl();
+
+       return count;
+}
+
+/**
+ * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
+ * @netdev: network interface device structure
+ *
+ * The set_rx_mode entry point is called whenever the unicast or multicast
+ * address lists or the network interface flags are updated.  This routine is
+ * responsible for configuring the hardware for proper unicast, multicast,
+ * promiscuous mode, and all-multi behavior.
+ **/
+static void igb_set_rx_mode(struct net_device *netdev)
+{
+       struct igb_adapter *adapter = netdev_priv(netdev);
+       struct e1000_hw *hw = &adapter->hw;
+       unsigned int vfn = adapter->vfs_allocated_count;
+       u32 rctl, vmolr = 0;
+       int count;
+
+       /* Check for Promiscuous and All Multicast modes */
+       rctl = rd32(E1000_RCTL);
+
+       /* clear the effected bits */
+       rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
+
+       if (netdev->flags & IFF_PROMISC) {
+               rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
+               vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
+       } else {
+               if (netdev->flags & IFF_ALLMULTI) {
+                       rctl |= E1000_RCTL_MPE;
+                       vmolr |= E1000_VMOLR_MPME;
+               } else {
+                       /*
+                        * Write addresses to the MTA, if the attempt fails
+                        * then we should just turn on promiscous mode so
+                        * that we can at least receive multicast traffic
+                        */
+                       count = igb_write_mc_addr_list(netdev);
+                       if (count < 0) {
+                               rctl |= E1000_RCTL_MPE;
+                               vmolr |= E1000_VMOLR_MPME;
+                       } else if (count) {
+                               vmolr |= E1000_VMOLR_ROMPE;
+                       }
+               }
+               /*
+                * Write addresses to available RAR registers, if there is not
+                * sufficient space to store all the addresses then enable
+                * unicast promiscous mode
+                */
+               count = igb_write_uc_addr_list(netdev);
+               if (count < 0) {
+                       rctl |= E1000_RCTL_UPE;
+                       vmolr |= E1000_VMOLR_ROPE;
+               }
+               rctl |= E1000_RCTL_VFE;
+       }
+       wr32(E1000_RCTL, rctl);
+
+       /*
+        * In order to support SR-IOV and eventually VMDq it is necessary to set
+        * the VMOLR to enable the appropriate modes.  Without this workaround
+        * we will have issues with VLAN tag stripping not being done for frames
+        * that are only arriving because we are the default pool
+        */
+       if (hw->mac.type < e1000_82576)
+               return;
+
+       vmolr |= rd32(E1000_VMOLR(vfn)) &
+                ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
+       wr32(E1000_VMOLR(vfn), vmolr);
+       igb_restore_vf_multicasts(adapter);
 }
 
 /* Need to wait a few seconds after link up to get diagnostic information from
@@ -2547,10 +2718,6 @@ static bool igb_has_link(struct igb_adapter *adapter)
                        link_active = true;
                }
                break;
-       case e1000_media_type_fiber:
-               ret_val = hw->mac.ops.check_for_link(hw);
-               link_active = !!(rd32(E1000_STATUS) & E1000_STATUS_LU);
-               break;
        case e1000_media_type_internal_serdes:
                ret_val = hw->mac.ops.check_for_link(hw);
                link_active = hw->mac.serdes_has_link;
@@ -2625,7 +2792,6 @@ static void igb_watchdog_task(struct work_struct *work)
                        }
 
                        netif_carrier_on(netdev);
-                       netif_tx_wake_all_queues(netdev);
 
                        igb_ping_all_vfs(adapter);
 
@@ -2642,7 +2808,6 @@ static void igb_watchdog_task(struct work_struct *work)
                        printk(KERN_INFO "igb: %s NIC Link is Down\n",
                               netdev->name);
                        netif_carrier_off(netdev);
-                       netif_tx_stop_all_queues(netdev);
 
                        igb_ping_all_vfs(adapter);
 
@@ -2676,6 +2841,8 @@ link_up:
                         * (Do the reset outside of interrupt context). */
                        adapter->tx_timeout_count++;
                        schedule_work(&adapter->reset_task);
+                       /* return immediately since reset is imminent */
+                       return;
                }
        }
 
@@ -2859,13 +3026,13 @@ static void igb_set_itr(struct igb_adapter *adapter)
        switch (current_itr) {
        /* counts and packets in update_itr are dependent on these numbers */
        case lowest_latency:
-               new_itr = 70000;
+               new_itr = 56;  /* aka 70,000 ints/sec */
                break;
        case low_latency:
-               new_itr = 20000; /* aka hwitr = ~200 */
+               new_itr = 196; /* aka 20,000 ints/sec */
                break;
        case bulk_latency:
-               new_itr = 4000;
+               new_itr = 980; /* aka 4,000 ints/sec */
                break;
        default:
                break;
@@ -2884,7 +3051,8 @@ set_itr_now:
                 * by adding intermediate steps when interrupt rate is
                 * increasing */
                new_itr = new_itr > adapter->itr ?
-                            min(adapter->itr + (new_itr >> 2), new_itr) :
+                            max((new_itr * adapter->itr) /
+                                (new_itr + (adapter->itr >> 2)), new_itr) :
                             new_itr;
                /* Don't write the value here; it resets the adapter's
                 * internal timer, and causes us to delay far longer than
@@ -2893,7 +3061,7 @@ set_itr_now:
                 * ends up being correct.
                 */
                adapter->itr = new_itr;
-               adapter->rx_ring->itr_val = 1000000000 / (new_itr * 256);
+               adapter->rx_ring->itr_val = new_itr;
                adapter->rx_ring->set_itr = 1;
        }
 
@@ -3032,11 +3200,15 @@ static inline bool igb_tx_csum_adv(struct igb_adapter *adapter,
                                tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
                                if (ip_hdr(skb)->protocol == IPPROTO_TCP)
                                        tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
+                               else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
+                                       tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
                                break;
                        case cpu_to_be16(ETH_P_IPV6):
                                /* XXX what about other V6 headers?? */
                                if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
                                        tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
+                               else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
+                                       tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
                                break;
                        default:
                                if (unlikely(net_ratelimit()))
@@ -3080,25 +3252,32 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter,
        unsigned int len = skb_headlen(skb);
        unsigned int count = 0, i;
        unsigned int f;
+       dma_addr_t *map;
 
        i = tx_ring->next_to_use;
 
+       if (skb_dma_map(&adapter->pdev->dev, skb, DMA_TO_DEVICE)) {
+               dev_err(&adapter->pdev->dev, "TX DMA map failed\n");
+               return 0;
+       }
+
+       map = skb_shinfo(skb)->dma_maps;
+
        buffer_info = &tx_ring->buffer_info[i];
        BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
        buffer_info->length = len;
        /* set time_stamp *before* dma to help avoid a possible race */
        buffer_info->time_stamp = jiffies;
        buffer_info->next_to_watch = i;
-       buffer_info->dma = pci_map_single(adapter->pdev, skb->data, len,
-                                         PCI_DMA_TODEVICE);
-       count++;
-       i++;
-       if (i == tx_ring->count)
-               i = 0;
+       buffer_info->dma = skb_shinfo(skb)->dma_head;
 
        for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
                struct skb_frag_struct *frag;
 
+               i++;
+               if (i == tx_ring->count)
+                       i = 0;
+
                frag = &skb_shinfo(skb)->frags[f];
                len = frag->size;
 
@@ -3107,23 +3286,14 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter,
                buffer_info->length = len;
                buffer_info->time_stamp = jiffies;
                buffer_info->next_to_watch = i;
-               buffer_info->dma = pci_map_page(adapter->pdev,
-                                               frag->page,
-                                               frag->page_offset,
-                                               len,
-                                               PCI_DMA_TODEVICE);
-
+               buffer_info->dma = map[count];
                count++;
-               i++;
-               if (i == tx_ring->count)
-                       i = 0;
        }
 
-       i = ((i == 0) ? tx_ring->count - 1 : i - 1);
        tx_ring->buffer_info[i].skb = skb;
        tx_ring->buffer_info[first].next_to_watch = i;
 
-       return count;
+       return count + 1;
 }
 
 static inline void igb_tx_queue_adv(struct igb_adapter *adapter,
@@ -3224,14 +3394,15 @@ static int igb_maybe_stop_tx(struct net_device *netdev,
        return __igb_maybe_stop_tx(netdev, tx_ring, size);
 }
 
-static int igb_xmit_frame_ring_adv(struct sk_buff *skb,
-                                  struct net_device *netdev,
-                                  struct igb_ring *tx_ring)
+static netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
+                                          struct net_device *netdev,
+                                          struct igb_ring *tx_ring)
 {
        struct igb_adapter *adapter = netdev_priv(netdev);
        unsigned int first;
        unsigned int tx_flags = 0;
        u8 hdr_len = 0;
+       int count = 0;
        int tso = 0;
        union skb_shared_tx *shtx;
 
@@ -3293,19 +3464,28 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb,
                 (skb->ip_summed == CHECKSUM_PARTIAL))
                tx_flags |= IGB_TX_FLAGS_CSUM;
 
-       igb_tx_queue_adv(adapter, tx_ring, tx_flags,
-                        igb_tx_map_adv(adapter, tx_ring, skb, first),
-                        skb->len, hdr_len);
-
-       netdev->trans_start = jiffies;
+       /*
+        * count reflects descriptors mapped, if 0 then mapping error
+        * has occured and we need to rewind the descriptor queue
+        */
+       count = igb_tx_map_adv(adapter, tx_ring, skb, first);
 
-       /* Make sure there is space in the ring for the next send. */
-       igb_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 4);
+       if (count) {
+               igb_tx_queue_adv(adapter, tx_ring, tx_flags, count,
+                                skb->len, hdr_len);
+               /* Make sure there is space in the ring for the next send. */
+               igb_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 4);
+       } else {
+               dev_kfree_skb_any(skb);
+               tx_ring->buffer_info[first].time_stamp = 0;
+               tx_ring->next_to_use = first;
+       }
 
        return NETDEV_TX_OK;
 }
 
-static int igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
+                                     struct net_device *netdev)
 {
        struct igb_adapter *adapter = netdev_priv(netdev);
        struct igb_ring *tx_ring;
@@ -3318,7 +3498,7 @@ static int igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *netdev)
         * to a flow.  Right now, performance is impacted slightly negatively
         * if using multiple tx queues.  If the stack breaks away from a
         * single qdisc implementation, we can look at this again. */
-       return (igb_xmit_frame_ring_adv(skb, netdev, tx_ring));
+       return igb_xmit_frame_ring_adv(skb, netdev, tx_ring);
 }
 
 /**
@@ -3537,8 +3717,35 @@ void igb_update_stats(struct igb_adapter *adapter)
 
        /* Rx Errors */
 
+       if (hw->mac.type != e1000_82575) {
+               u32 rqdpc_tmp;
+               u64 rqdpc_total = 0;
+               int i;
+               /* Read out drops stats per RX queue.  Notice RQDPC (Receive
+                * Queue Drop Packet Count) stats only gets incremented, if
+                * the DROP_EN but it set (in the SRRCTL register for that
+                * queue).  If DROP_EN bit is NOT set, then the some what
+                * equivalent count is stored in RNBC (not per queue basis).
+                * Also note the drop count is due to lack of available
+                * descriptors.
+                */
+               for (i = 0; i < adapter->num_rx_queues; i++) {
+                       rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0xFFF;
+                       adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
+                       rqdpc_total += adapter->rx_ring[i].rx_stats.drops;
+               }
+               adapter->net_stats.rx_fifo_errors = rqdpc_total;
+       }
+
+       /* Note RNBC (Receive No Buffers Count) is an not an exact
+        * drop count as the hardware FIFO might save the day.  Thats
+        * one of the reason for saving it in rx_fifo_errors, as its
+        * potentially not a true drop.
+        */
+       adapter->net_stats.rx_fifo_errors += adapter->stats.rnbc;
+
        /* RLEC on some newer hardware can be incorrect so build
-       * our own version based on RUC and ROC */
+        * our own version based on RUC and ROC */
        adapter->net_stats.rx_errors = adapter->stats.rxerrc +
                adapter->stats.crcerrs + adapter->stats.algnerrc +
                adapter->stats.ruc + adapter->stats.roc +
@@ -3722,11 +3929,15 @@ static void igb_update_tx_dca(struct igb_ring *tx_ring)
 
 static void igb_setup_dca(struct igb_adapter *adapter)
 {
+       struct e1000_hw *hw = &adapter->hw;
        int i;
 
        if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
                return;
 
+       /* Always use CB2 mode, difference is masked in the CB driver. */
+       wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
+
        for (i = 0; i < adapter->num_tx_queues; i++) {
                adapter->tx_ring[i].cpu = -1;
                igb_update_tx_dca(&adapter->tx_ring[i]);
@@ -3821,10 +4032,10 @@ static int igb_set_vf_multicasts(struct igb_adapter *adapter,
        /* VFs are limited to using the MTA hash table for their multicast
         * addresses */
        for (i = 0; i < n; i++)
-               vf_data->vf_mc_hashes[i] = hash_list[i];;
+               vf_data->vf_mc_hashes[i] = hash_list[i];
 
        /* Flush and reset the mta with the new values */
-       igb_set_multi(adapter->netdev);
+       igb_set_rx_mode(adapter->netdev);
 
        return 0;
 }
@@ -3867,6 +4078,8 @@ static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
 
                wr32(E1000_VLVF(i), reg);
        }
+
+       adapter->vf_data[vf].vlans_enabled = 0;
 }
 
 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
@@ -3915,6 +4128,22 @@ static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
                        reg |= vid;
 
                        wr32(E1000_VLVF(i), reg);
+
+                       /* do not modify RLPML for PF devices */
+                       if (vf >= adapter->vfs_allocated_count)
+                               return 0;
+
+                       if (!adapter->vf_data[vf].vlans_enabled) {
+                               u32 size;
+                               reg = rd32(E1000_VMOLR(vf));
+                               size = reg & E1000_VMOLR_RLPML_MASK;
+                               size += 4;
+                               reg &= ~E1000_VMOLR_RLPML_MASK;
+                               reg |= size;
+                               wr32(E1000_VMOLR(vf), reg);
+                       }
+                       adapter->vf_data[vf].vlans_enabled++;
+
                        return 0;
                }
        } else {
@@ -3927,6 +4156,21 @@ static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
                                igb_vfta_set(hw, vid, false);
                        }
                        wr32(E1000_VLVF(i), reg);
+
+                       /* do not modify RLPML for PF devices */
+                       if (vf >= adapter->vfs_allocated_count)
+                               return 0;
+
+                       adapter->vf_data[vf].vlans_enabled--;
+                       if (!adapter->vf_data[vf].vlans_enabled) {
+                               u32 size;
+                               reg = rd32(E1000_VMOLR(vf));
+                               size = reg & E1000_VMOLR_RLPML_MASK;
+                               size -= 4;
+                               reg &= ~E1000_VMOLR_RLPML_MASK;
+                               reg |= size;
+                               wr32(E1000_VMOLR(vf), reg);
+                       }
                        return 0;
                }
        }
@@ -3958,13 +4202,14 @@ static inline void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
        adapter->vf_data[vf].num_vf_mc_hashes = 0;
 
        /* Flush and reset the mta with the new values */
-       igb_set_multi(adapter->netdev);
+       igb_set_rx_mode(adapter->netdev);
 }
 
 static inline void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
 {
        struct e1000_hw *hw = &adapter->hw;
        unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
+       int rar_entry = hw->mac.rar_entry_count - (vf + 1);
        u32 reg, msgbuf[3];
        u8 *addr = (u8 *)(&msgbuf[1]);
 
@@ -3972,8 +4217,7 @@ static inline void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
        igb_vf_reset_event(adapter, vf);
 
        /* set vf mac address */
-       igb_rar_set(hw, vf_mac, vf + 1);
-       igb_set_rah_pool(hw, vf, vf + 1);
+       igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
 
        /* enable transmit and receive for vf */
        reg = rd32(E1000_VFTE);
@@ -4103,6 +4347,33 @@ static int igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
 }
 
 /**
+ *  igb_set_uta - Set unicast filter table address
+ *  @adapter: board private structure
+ *
+ *  The unicast table address is a register array of 32-bit registers.
+ *  The table is meant to be used in a way similar to how the MTA is used
+ *  however due to certain limitations in the hardware it is necessary to
+ *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
+ *  enable bit to allow vlan tag stripping when promiscous mode is enabled
+ **/
+static void igb_set_uta(struct igb_adapter *adapter)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       int i;
+
+       /* The UTA table only exists on 82576 hardware and newer */
+       if (hw->mac.type < e1000_82576)
+               return;
+
+       /* we only need to do this if VMDq is enabled */
+       if (!adapter->vfs_allocated_count)
+               return;
+
+       for (i = 0; i < hw->mac.uta_reg_count; i++)
+               array_wr32(E1000_UTA, i, ~0);
+}
+
+/**
  * igb_intr_msi - Interrupt Handler
  * @irq: interrupt number
  * @data: pointer to a network interface device structure
@@ -4389,20 +4660,12 @@ static void igb_receive_skb(struct igb_ring *ring, u8 status,
        bool vlan_extracted = (adapter->vlgrp && (status & E1000_RXD_STAT_VP));
 
        skb_record_rx_queue(skb, ring->queue_index);
-       if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
-               if (vlan_extracted)
-                       vlan_gro_receive(&ring->napi, adapter->vlgrp,
-                                        le16_to_cpu(rx_desc->wb.upper.vlan),
-                                        skb);
-               else
-                       napi_gro_receive(&ring->napi, skb);
-       } else {
-               if (vlan_extracted)
-                       vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
-                                         le16_to_cpu(rx_desc->wb.upper.vlan));
-               else
-                       netif_receive_skb(skb);
-       }
+       if (vlan_extracted)
+               vlan_gro_receive(&ring->napi, adapter->vlgrp,
+                                le16_to_cpu(rx_desc->wb.upper.vlan),
+                                skb);
+       else
+               napi_gro_receive(&ring->napi, skb);
 }
 
 static inline void igb_rx_checksum_adv(struct igb_adapter *adapter,
@@ -4411,22 +4674,45 @@ static inline void igb_rx_checksum_adv(struct igb_adapter *adapter,
        skb->ip_summed = CHECKSUM_NONE;
 
        /* Ignore Checksum bit is set or checksum is disabled through ethtool */
-       if ((status_err & E1000_RXD_STAT_IXSM) || !adapter->rx_csum)
+       if ((status_err & E1000_RXD_STAT_IXSM) ||
+           (adapter->flags & IGB_FLAG_RX_CSUM_DISABLED))
                return;
        /* TCP/UDP checksum error bit is set */
        if (status_err &
            (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
+               /*
+                * work around errata with sctp packets where the TCPE aka
+                * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
+                * packets, (aka let the stack check the crc32c)
+                */
+               if (!((adapter->hw.mac.type == e1000_82576) &&
+                     (skb->len == 60)))
+                       adapter->hw_csum_err++;
                /* let the stack verify checksum errors */
-               adapter->hw_csum_err++;
                return;
        }
        /* It must be a TCP or UDP packet with a valid checksum */
        if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
                skb->ip_summed = CHECKSUM_UNNECESSARY;
 
+       dev_dbg(&adapter->pdev->dev, "cksum success: bits %08X\n", status_err);
        adapter->hw_csum_good++;
 }
 
+static inline u16 igb_get_hlen(struct igb_adapter *adapter,
+                               union e1000_adv_rx_desc *rx_desc)
+{
+       /* HW will not DMA in data larger than the given buffer, even if it
+        * parses the (NFS, of course) header to be larger.  In that case, it
+        * fills the header buffer and spills the rest into the page.
+        */
+       u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
+                  E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
+       if (hlen > adapter->rx_ps_hdr_size)
+               hlen = adapter->rx_ps_hdr_size;
+       return hlen;
+}
+
 static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring,
                                 int *work_done, int budget)
 {
@@ -4441,7 +4727,8 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring,
        int cleaned_count = 0;
        unsigned int total_bytes = 0, total_packets = 0;
        unsigned int i;
-       u32 length, hlen, staterr;
+       u32 staterr;
+       u16 length;
 
        i = rx_ring->next_to_clean;
        buffer_info = &rx_ring->buffer_info[i];
@@ -4468,29 +4755,22 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring,
                cleaned = true;
                cleaned_count++;
 
+               /* this is the fast path for the non-packet split case */
                if (!adapter->rx_ps_hdr_size) {
                        pci_unmap_single(pdev, buffer_info->dma,
-                                        adapter->rx_buffer_len +
-                                          NET_IP_ALIGN,
+                                        adapter->rx_buffer_len,
                                         PCI_DMA_FROMDEVICE);
+                       buffer_info->dma = 0;
                        skb_put(skb, length);
                        goto send_up;
                }
 
-               /* HW will not DMA in data larger than the given buffer, even
-                * if it parses the (NFS, of course) header to be larger.  In
-                * that case, it fills the header buffer and spills the rest
-                * into the page.
-                */
-               hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
-                 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
-               if (hlen > adapter->rx_ps_hdr_size)
-                       hlen = adapter->rx_ps_hdr_size;
-
-               if (!skb_shinfo(skb)->nr_frags) {
+               if (buffer_info->dma) {
+                       u16 hlen = igb_get_hlen(adapter, rx_desc);
                        pci_unmap_single(pdev, buffer_info->dma,
-                                        adapter->rx_ps_hdr_size + NET_IP_ALIGN,
+                                        adapter->rx_ps_hdr_size,
                                         PCI_DMA_FROMDEVICE);
+                       buffer_info->dma = 0;
                        skb_put(skb, hlen);
                }
 
@@ -4632,7 +4912,6 @@ static void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring,
                bufsz = adapter->rx_ps_hdr_size;
        else
                bufsz = adapter->rx_buffer_len;
-       bufsz += NET_IP_ALIGN;
 
        while (cleaned_count--) {
                rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
@@ -4656,7 +4935,7 @@ static void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring,
                }
 
                if (!buffer_info->skb) {
-                       skb = netdev_alloc_skb(netdev, bufsz);
+                       skb = netdev_alloc_skb(netdev, bufsz + NET_IP_ALIGN);
                        if (!skb) {
                                adapter->alloc_rx_buff_failed++;
                                goto no_buffers;
@@ -4727,8 +5006,6 @@ static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
                data->phy_id = adapter->hw.phy.addr;
                break;
        case SIOCGMIIREG:
-               if (!capable(CAP_NET_ADMIN))
-                       return -EPERM;
                if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
                                     &data->val_out))
                        return -EIO;
@@ -4917,6 +5194,34 @@ static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
        }
 }
 
+s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
+{
+       struct igb_adapter *adapter = hw->back;
+       u16 cap_offset;
+
+       cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
+       if (!cap_offset)
+               return -E1000_ERR_CONFIG;
+
+       pci_read_config_word(adapter->pdev, cap_offset + reg, value);
+
+       return 0;
+}
+
+s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
+{
+       struct igb_adapter *adapter = hw->back;
+       u16 cap_offset;
+
+       cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
+       if (!cap_offset)
+               return -E1000_ERR_CONFIG;
+
+       pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
+
+       return 0;
+}
+
 static void igb_vlan_rx_register(struct net_device *netdev,
                                 struct vlan_group *grp)
 {
@@ -5020,14 +5325,6 @@ int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
 
        mac->autoneg = 0;
 
-       /* Fiber NICs only allow 1000 gbps Full duplex */
-       if ((adapter->hw.phy.media_type == e1000_media_type_fiber) &&
-               spddplx != (SPEED_1000 + DUPLEX_FULL)) {
-               dev_err(&adapter->pdev->dev,
-                       "Unsupported Speed/Duplex configuration\n");
-               return -EINVAL;
-       }
-
        switch (spddplx) {
        case SPEED_10 + DUPLEX_HALF:
                mac->forced_speed_duplex = ADVERTISE_10_HALF;
@@ -5054,7 +5351,7 @@ int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
        return 0;
 }
 
-static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
+static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
 {
        struct net_device *netdev = pci_get_drvdata(pdev);
        struct igb_adapter *adapter = netdev_priv(netdev);
@@ -5086,7 +5383,7 @@ static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
 
        if (wufc) {
                igb_setup_rctl(adapter);
-               igb_set_multi(netdev);
+               igb_set_rx_mode(netdev);
 
                /* turn on all-multi mode if wake on multicast is enabled */
                if (wufc & E1000_WUFC_MC) {
@@ -5113,15 +5410,9 @@ static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
                wr32(E1000_WUFC, 0);
        }
 
-       /* make sure adapter isn't asleep if manageability/wol is enabled */
-       if (wufc || adapter->en_mng_pt) {
-               pci_enable_wake(pdev, PCI_D3hot, 1);
-               pci_enable_wake(pdev, PCI_D3cold, 1);
-       } else {
-               igb_shutdown_fiber_serdes_link_82575(hw);
-               pci_enable_wake(pdev, PCI_D3hot, 0);
-               pci_enable_wake(pdev, PCI_D3cold, 0);
-       }
+       *enable_wake = wufc || adapter->en_mng_pt;
+       if (!*enable_wake)
+               igb_shutdown_serdes_link_82575(hw);
 
        /* Release control of h/w to f/w.  If f/w is AMT enabled, this
         * would have already happened in close and is redundant. */
@@ -5129,12 +5420,29 @@ static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
 
        pci_disable_device(pdev);
 
-       pci_set_power_state(pdev, pci_choose_state(pdev, state));
-
        return 0;
 }
 
 #ifdef CONFIG_PM
+static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+       int retval;
+       bool wake;
+
+       retval = __igb_shutdown(pdev, &wake);
+       if (retval)
+               return retval;
+
+       if (wake) {
+               pci_prepare_to_sleep(pdev);
+       } else {
+               pci_wake_from_d3(pdev, false);
+               pci_set_power_state(pdev, PCI_D3hot);
+       }
+
+       return 0;
+}
+
 static int igb_resume(struct pci_dev *pdev)
 {
        struct net_device *netdev = pci_get_drvdata(pdev);
@@ -5187,7 +5495,14 @@ static int igb_resume(struct pci_dev *pdev)
 
 static void igb_shutdown(struct pci_dev *pdev)
 {
-       igb_suspend(pdev, PMSG_SUSPEND);
+       bool wake;
+
+       __igb_shutdown(pdev, &wake);
+
+       if (system_state == SYSTEM_POWER_OFF) {
+               pci_wake_from_d3(pdev, wake);
+               pci_set_power_state(pdev, PCI_D3hot);
+       }
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -5239,6 +5554,9 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
 
        netif_device_detach(netdev);
 
+       if (state == pci_channel_io_perm_failure)
+               return PCI_ERS_RESULT_DISCONNECT;
+
        if (netif_running(netdev))
                igb_down(adapter);
        pci_disable_device(pdev);
@@ -5315,67 +5633,44 @@ static void igb_io_resume(struct pci_dev *pdev)
        igb_get_hw_control(adapter);
 }
 
-static inline void igb_set_vmolr(struct e1000_hw *hw, int vfn)
-{
-       u32 reg_data;
-
-       reg_data = rd32(E1000_VMOLR(vfn));
-       reg_data |= E1000_VMOLR_BAM |    /* Accept broadcast */
-                   E1000_VMOLR_ROPE |   /* Accept packets matched in UTA */
-                   E1000_VMOLR_ROMPE |  /* Accept packets matched in MTA */
-                   E1000_VMOLR_AUPE |   /* Accept untagged packets */
-                   E1000_VMOLR_STRVLAN; /* Strip vlan tags */
-       wr32(E1000_VMOLR(vfn), reg_data);
-}
-
-static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
-                                 int vfn)
+static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
+                             u8 qsel)
 {
+       u32 rar_low, rar_high;
        struct e1000_hw *hw = &adapter->hw;
-       u32 vmolr;
 
-       vmolr = rd32(E1000_VMOLR(vfn));
-       vmolr &= ~E1000_VMOLR_RLPML_MASK;
-       vmolr |= size | E1000_VMOLR_LPE;
-       wr32(E1000_VMOLR(vfn), vmolr);
-
-       return 0;
-}
-
-static inline void igb_set_rah_pool(struct e1000_hw *hw, int pool, int entry)
-{
-       u32 reg_data;
-
-       reg_data = rd32(E1000_RAH(entry));
-       reg_data &= ~E1000_RAH_POOL_MASK;
-       reg_data |= E1000_RAH_POOL_1 << pool;;
-       wr32(E1000_RAH(entry), reg_data);
-}
+       /* HW expects these in little endian so we reverse the byte order
+        * from network order (big endian) to little endian
+        */
+       rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
+                 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
+       rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
 
-static void igb_set_mc_list_pools(struct igb_adapter *adapter,
-                                 int entry_count, u16 total_rar_filters)
-{
-       struct e1000_hw *hw = &adapter->hw;
-       int i = adapter->vfs_allocated_count + 1;
+       /* Indicate to hardware the Address is Valid. */
+       rar_high |= E1000_RAH_AV;
 
-       if ((i + entry_count) < total_rar_filters)
-               total_rar_filters = i + entry_count;
+       if (hw->mac.type == e1000_82575)
+               rar_high |= E1000_RAH_POOL_1 * qsel;
+       else
+               rar_high |= E1000_RAH_POOL_1 << qsel;
 
-       for (; i < total_rar_filters; i++)
-               igb_set_rah_pool(hw, adapter->vfs_allocated_count, i);
+       wr32(E1000_RAL(index), rar_low);
+       wrfl();
+       wr32(E1000_RAH(index), rar_high);
+       wrfl();
 }
 
 static int igb_set_vf_mac(struct igb_adapter *adapter,
                           int vf, unsigned char *mac_addr)
 {
        struct e1000_hw *hw = &adapter->hw;
-       int rar_entry = vf + 1; /* VF MAC addresses start at entry 1 */
-
-       igb_rar_set(hw, mac_addr, rar_entry);
+       /* VF MAC addresses start at end of receive addresses and moves
+        * torwards the first, as a result a collision should not be possible */
+       int rar_entry = hw->mac.rar_entry_count - (vf + 1);
 
        memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
 
-       igb_set_rah_pool(hw, vf, rar_entry);
+       igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
 
        return 0;
 }
@@ -5398,89 +5693,4 @@ static void igb_vmm_control(struct igb_adapter *adapter)
        igb_vmdq_set_replication_pf(hw, true);
 }
 
-#ifdef CONFIG_PCI_IOV
-static ssize_t igb_show_num_vfs(struct device *dev,
-                                struct device_attribute *attr, char *buf)
-{
-       struct igb_adapter *adapter = netdev_priv(to_net_dev(dev));
-
-       return sprintf(buf, "%d\n", adapter->vfs_allocated_count);
-}
-
-static ssize_t igb_set_num_vfs(struct device *dev,
-                               struct device_attribute *attr,
-                               const char *buf, size_t count)
-{
-       struct net_device *netdev = to_net_dev(dev);
-       struct igb_adapter *adapter = netdev_priv(netdev);
-       struct e1000_hw *hw = &adapter->hw;
-       struct pci_dev *pdev = adapter->pdev;
-       unsigned int num_vfs, i;
-       unsigned char mac_addr[ETH_ALEN];
-       int err;
-
-       sscanf(buf, "%u", &num_vfs);
-
-       if (num_vfs > 7)
-               num_vfs = 7;
-
-       /* value unchanged do nothing */
-       if (num_vfs == adapter->vfs_allocated_count)
-               return count;
-
-       if (netdev->flags & IFF_UP)
-               igb_close(netdev);
-
-       igb_reset_interrupt_capability(adapter);
-       igb_free_queues(adapter);
-       adapter->tx_ring = NULL;
-       adapter->rx_ring = NULL;
-       adapter->vfs_allocated_count = 0;
-
-       /* reclaim resources allocated to VFs since we are changing count */
-       if (adapter->vf_data) {
-               /* disable iov and allow time for transactions to clear */
-               pci_disable_sriov(pdev);
-               msleep(500);
-
-               kfree(adapter->vf_data);
-               adapter->vf_data = NULL;
-               wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
-               msleep(100);
-               dev_info(&pdev->dev, "IOV Disabled\n");
-       }
-
-       if (num_vfs) {
-               adapter->vf_data = kcalloc(num_vfs,
-                                          sizeof(struct vf_data_storage),
-                                          GFP_KERNEL);
-               if (!adapter->vf_data) {
-                       dev_err(&pdev->dev, "Could not allocate VF private "
-                               "data - IOV enable failed\n");
-               } else {
-                       err = pci_enable_sriov(pdev, num_vfs);
-                       if (!err) {
-                               adapter->vfs_allocated_count = num_vfs;
-                               dev_info(&pdev->dev, "%d vfs allocated\n", num_vfs);
-                               for (i = 0; i < adapter->vfs_allocated_count; i++) {
-                                       random_ether_addr(mac_addr);
-                                       igb_set_vf_mac(adapter, i, mac_addr);
-                               }
-                       } else {
-                               kfree(adapter->vf_data);
-                               adapter->vf_data = NULL;
-                       }
-               }
-       }
-
-       igb_set_interrupt_capability(adapter);
-       igb_alloc_queues(adapter);
-       igb_reset(adapter);
-
-       if (netdev->flags & IFF_UP)
-               igb_open(netdev);
-
-       return count;
-}
-#endif /* CONFIG_PCI_IOV */
 /* igb_main.c */