X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=drivers%2Fnet%2Figb%2Figb_main.c;h=8784c051707e2e990bb26d6f2b65b5264b3a6242;hb=c2d5ab4973bfaa72cbb677801825ce56c8f69b56;hp=8c27e0a23dff30b7fba07a535787469a0dbf2e74;hpb=7d8eb29e6eae9cc13e1975daf28d2ae789c1f110;p=safe%2Fjmp%2Flinux-2.6 diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index 8c27e0a..8784c05 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007 Intel Corporation. + Copyright(c) 2007-2009 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -48,12 +49,12 @@ #endif #include "igb.h" -#define DRV_VERSION "1.2.45-k2" +#define DRV_VERSION "1.3.16-k2" char igb_driver_name[] = "igb"; char igb_driver_version[] = DRV_VERSION; static const char igb_driver_string[] = "Intel(R) Gigabit Ethernet Network Driver"; -static const char igb_copyright[] = "Copyright (c) 2008 Intel Corporation."; +static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation."; static const struct e1000_info *igb_info_tbl[] = { [board_82575] = &e1000_82575_info, @@ -61,8 +62,10 @@ static const struct e1000_info *igb_info_tbl[] = { static struct pci_device_id igb_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 }, @@ -105,7 +108,6 @@ static irqreturn_t igb_intr_msi(int irq, void *); static irqreturn_t igb_msix_other(int irq, void *); static irqreturn_t igb_msix_rx(int irq, void *); static irqreturn_t igb_msix_tx(int irq, void *); -static int igb_clean_rx_ring_msix(struct napi_struct *, int); #ifdef CONFIG_IGB_DCA static void igb_update_rx_dca(struct igb_ring *); static void igb_update_tx_dca(struct igb_ring *); @@ -122,9 +124,19 @@ static void igb_vlan_rx_register(struct net_device *, struct vlan_group *); static void igb_vlan_rx_add_vid(struct net_device *, u16); static void igb_vlan_rx_kill_vid(struct net_device *, u16); static void igb_restore_vlan(struct igb_adapter *); +static void igb_ping_all_vfs(struct igb_adapter *); +static void igb_msg_task(struct igb_adapter *); +static int igb_rcv_msg_from_vf(struct igb_adapter *, u32); +static inline void igb_set_rah_pool(struct e1000_hw *, int , int); +static void igb_set_mc_list_pools(struct igb_adapter *, int, u16); +static void igb_vmm_control(struct igb_adapter *); +static inline void igb_set_vmolr(struct e1000_hw *, int); +static inline int igb_set_vf_rlpml(struct igb_adapter *, int, int); +static int igb_set_vf_mac(struct igb_adapter *adapter, int, unsigned char *); +static void igb_restore_vf_multicasts(struct igb_adapter *adapter); -static int igb_suspend(struct pci_dev *, pm_message_t); #ifdef CONFIG_PM +static int igb_suspend(struct pci_dev *, pm_message_t); static int igb_resume(struct pci_dev *); #endif static void igb_shutdown(struct pci_dev *); @@ -136,11 +148,16 @@ static struct notifier_block dca_notifier = { .priority = 0 }; #endif - #ifdef CONFIG_NET_POLL_CONTROLLER /* for netdump / net console */ static void igb_netpoll(struct net_device *); #endif +#ifdef CONFIG_PCI_IOV +static unsigned int max_vfs = 0; +module_param(max_vfs, uint, 0); +MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate " + "per physical function"); +#endif /* CONFIG_PCI_IOV */ static pci_ers_result_t igb_io_error_detected(struct pci_dev *, pci_channel_state_t); @@ -175,6 +192,54 @@ MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); +/** + * Scale the NIC clock cycle by a large factor so that + * relatively small clock corrections can be added or + * substracted at each clock tick. The drawbacks of a + * large factor are a) that the clock register overflows + * more quickly (not such a big deal) and b) that the + * increment per tick has to fit into 24 bits. + * + * Note that + * TIMINCA = IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS * + * IGB_TSYNC_SCALE + * TIMINCA += TIMINCA * adjustment [ppm] / 1e9 + * + * The base scale factor is intentionally a power of two + * so that the division in %struct timecounter can be done with + * a shift. + */ +#define IGB_TSYNC_SHIFT (19) +#define IGB_TSYNC_SCALE (1<= (1<<24) +# error IGB_TSYNC_SCALE and/or IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS are too large to fit into TIMINCA +#endif + +/** + * igb_read_clock - read raw cycle counter (to be used by time counter) + */ +static cycle_t igb_read_clock(const struct cyclecounter *tc) +{ + struct igb_adapter *adapter = + container_of(tc, struct igb_adapter, cycles); + struct e1000_hw *hw = &adapter->hw; + u64 stamp; + + stamp = rd32(E1000_SYSTIML); + stamp |= (u64)rd32(E1000_SYSTIMH) << 32ULL; + + return stamp; +} + #ifdef DEBUG /** * igb_get_hw_dev_name - return device name string @@ -185,9 +250,44 @@ char *igb_get_hw_dev_name(struct e1000_hw *hw) struct igb_adapter *adapter = hw->back; return adapter->netdev->name; } + +/** + * igb_get_time_str - format current NIC and system time as string + */ +static char *igb_get_time_str(struct igb_adapter *adapter, + char buffer[160]) +{ + cycle_t hw = adapter->cycles.read(&adapter->cycles); + struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock)); + struct timespec sys; + struct timespec delta; + getnstimeofday(&sys); + + delta = timespec_sub(nic, sys); + + sprintf(buffer, + "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns", + hw, + (long)nic.tv_sec, nic.tv_nsec, + (long)sys.tv_sec, sys.tv_nsec, + (long)delta.tv_sec, delta.tv_nsec); + + return buffer; +} #endif /** + * igb_desc_unused - calculate if we have unused descriptors + **/ +static int igb_desc_unused(struct igb_ring *ring) +{ + if (ring->next_to_clean > ring->next_to_use) + return ring->next_to_clean - ring->next_to_use - 1; + + return ring->count + ring->next_to_clean - ring->next_to_use - 1; +} + +/** * igb_init_module - Driver Registration Routine * * igb_init_module is the first routine called when the driver is @@ -240,6 +340,7 @@ module_exit(igb_exit_module); static void igb_cache_ring_register(struct igb_adapter *adapter) { int i; + unsigned int rbase_offset = adapter->vfs_allocated_count; switch (adapter->hw.mac.type) { case e1000_82576: @@ -249,9 +350,11 @@ static void igb_cache_ring_register(struct igb_adapter *adapter) * and continue consuming queues in the same sequence */ for (i = 0; i < adapter->num_rx_queues; i++) - adapter->rx_ring[i].reg_idx = Q_IDX_82576(i); + adapter->rx_ring[i].reg_idx = rbase_offset + + Q_IDX_82576(i); for (i = 0; i < adapter->num_tx_queues; i++) - adapter->tx_ring[i].reg_idx = Q_IDX_82576(i); + adapter->tx_ring[i].reg_idx = rbase_offset + + Q_IDX_82576(i); break; case e1000_82575: default: @@ -316,6 +419,9 @@ static void igb_free_queues(struct igb_adapter *adapter) for (i = 0; i < adapter->num_rx_queues; i++) netif_napi_del(&adapter->rx_ring[i].napi); + adapter->num_rx_queues = 0; + adapter->num_tx_queues = 0; + kfree(adapter->tx_ring); kfree(adapter->rx_ring); } @@ -351,7 +457,7 @@ static void igb_assign_vector(struct igb_adapter *adapter, int rx_queue, a vector number along with a "valid" bit. Sadly, the layout of the table is somewhat counterintuitive. */ if (rx_queue > IGB_N0_QUEUE) { - index = (rx_queue >> 1); + index = (rx_queue >> 1) + adapter->vfs_allocated_count; ivar = array_rd32(E1000_IVAR0, index); if (rx_queue & 0x1) { /* vector goes into third byte of register */ @@ -366,7 +472,7 @@ static void igb_assign_vector(struct igb_adapter *adapter, int rx_queue, array_wr32(E1000_IVAR0, index, ivar); } if (tx_queue > IGB_N0_QUEUE) { - index = (tx_queue >> 1); + index = (tx_queue >> 1) + adapter->vfs_allocated_count; ivar = array_rd32(E1000_IVAR0, index); if (tx_queue & 0x1) { /* vector goes into high byte of register */ @@ -404,7 +510,7 @@ static void igb_configure_msix(struct igb_adapter *adapter) /* Turn on MSI-X capability first, or our settings * won't stick. And it will take days to debug. */ wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE | - E1000_GPIE_PBA | E1000_GPIE_EIAME | + E1000_GPIE_PBA | E1000_GPIE_EIAME | E1000_GPIE_NSICR); for (i = 0; i < adapter->num_tx_queues; i++) { @@ -503,9 +609,6 @@ static int igb_request_msix(struct igb_adapter *adapter) goto out; ring->itr_register = E1000_EITR(0) + (vector << 2); ring->itr_val = adapter->itr; - /* overwrite the poll routine for MSIX, we've already done - * netif_napi_add */ - ring->napi.poll = &igb_clean_rx_ring_msix; vector++; } @@ -567,6 +670,21 @@ static void igb_set_interrupt_capability(struct igb_adapter *adapter) /* If we can't do MSI-X, try MSI */ msi_only: +#ifdef CONFIG_PCI_IOV + /* disable SR-IOV for non MSI-X configurations */ + if (adapter->vf_data) { + struct e1000_hw *hw = &adapter->hw; + /* disable iov and allow time for transactions to clear */ + pci_disable_sriov(adapter->pdev); + msleep(500); + + kfree(adapter->vf_data); + adapter->vf_data = NULL; + wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); + msleep(100); + dev_info(&adapter->pdev->dev, "IOV Disabled\n"); + } +#endif adapter->num_rx_queues = 1; adapter->num_tx_queues = 1; if (!pci_enable_msi(adapter->pdev)) @@ -689,7 +807,10 @@ static void igb_irq_enable(struct igb_adapter *adapter) wr32(E1000_EIAC, adapter->eims_enable_mask); wr32(E1000_EIAM, adapter->eims_enable_mask); wr32(E1000_EIMS, adapter->eims_enable_mask); - wr32(E1000_IMS, E1000_IMS_LSC); + if (adapter->vfs_allocated_count) + wr32(E1000_MBVFIMR, 0xFF); + wr32(E1000_IMS, (E1000_IMS_LSC | E1000_IMS_VMMB | + E1000_IMS_DOUTSYNC)); } else { wr32(E1000_IMS, IMS_ENABLE_MASK); wr32(E1000_IAM, IMS_ENABLE_MASK); @@ -780,12 +901,12 @@ static void igb_configure(struct igb_adapter *adapter) igb_rx_fifo_flush_82575(&adapter->hw); - /* call IGB_DESC_UNUSED which always leaves + /* call igb_desc_unused which always leaves * at least 1 descriptor unused to make sure * next_to_use != next_to_clean */ for (i = 0; i < adapter->num_rx_queues; i++) { struct igb_ring *ring = &adapter->rx_ring[i]; - igb_alloc_rx_buffers_adv(ring, IGB_DESC_UNUSED(ring)); + igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring)); } @@ -813,10 +934,16 @@ int igb_up(struct igb_adapter *adapter) if (adapter->msix_entries) igb_configure_msix(adapter); + igb_vmm_control(adapter); + igb_set_rah_pool(hw, adapter->vfs_allocated_count, 0); + igb_set_vmolr(hw, adapter->vfs_allocated_count); + /* Clear any pending interrupts. */ rd32(E1000_ICR); igb_irq_enable(adapter); + netif_tx_start_all_queues(adapter->netdev); + /* Fire a link change interrupt to start the watchdog. */ wr32(E1000_ICS, E1000_ICS_LSC); return 0; @@ -858,6 +985,10 @@ void igb_down(struct igb_adapter *adapter) netdev->tx_queue_len = adapter->tx_queue_len; netif_carrier_off(netdev); + + /* record the stats before reset*/ + igb_update_stats(adapter); + adapter->link_speed = 0; adapter->link_duplex = 0; @@ -888,11 +1019,14 @@ void igb_reset(struct igb_adapter *adapter) /* Repartition Pba for greater than 9k mtu * To take effect CTRL.RST is required. */ - if (mac->type != e1000_82576) { - pba = E1000_PBA_34K; - } - else { + switch (mac->type) { + case e1000_82576: pba = E1000_PBA_64K; + break; + case e1000_82575: + default: + pba = E1000_PBA_34K; + break; } if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) && @@ -914,7 +1048,7 @@ void igb_reset(struct igb_adapter *adapter) /* the tx fifo also stores 16 bytes of information about the tx * but don't include ethernet FCS because hardware appends it */ min_tx_space = (adapter->max_frame_size + - sizeof(struct e1000_tx_desc) - + sizeof(union e1000_adv_tx_desc) - ETH_FCS_LEN) * 2; min_tx_space = ALIGN(min_tx_space, 1024); min_tx_space >>= 10; @@ -958,6 +1092,20 @@ void igb_reset(struct igb_adapter *adapter) fc->send_xon = 1; fc->type = fc->original_type; + /* disable receive for all VFs and wait one second */ + if (adapter->vfs_allocated_count) { + int i; + for (i = 0 ; i < adapter->vfs_allocated_count; i++) + adapter->vf_data[i].clear_to_send = false; + + /* ping all the active vfs to let them know we are going down */ + igb_ping_all_vfs(adapter); + + /* disable transmits and receives */ + wr32(E1000_VFRE, 0); + wr32(E1000_VFTE, 0); + } + /* Allow time for pending master requests to run */ adapter->hw.mac.ops.reset_hw(&adapter->hw); wr32(E1000_WUC, 0); @@ -1010,11 +1158,10 @@ static int __devinit igb_probe(struct pci_dev *pdev, struct net_device *netdev; struct igb_adapter *adapter; struct e1000_hw *hw; - struct pci_dev *us_dev; const struct e1000_info *ei = igb_info_tbl[ent->driver_data]; unsigned long mmio_start, mmio_len; - int i, err, pci_using_dac, pos; - u16 eeprom_data = 0, state = 0; + int err, pci_using_dac; + u16 eeprom_data = 0; u16 eeprom_apme_mask = IGB_EEPROM_APME; u32 part_num; @@ -1023,15 +1170,15 @@ static int __devinit igb_probe(struct pci_dev *pdev, return err; pci_using_dac = 0; - err = pci_set_dma_mask(pdev, DMA_64BIT_MASK); + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); if (!err) { - err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK); + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); if (!err) pci_using_dac = 1; } else { - err = pci_set_dma_mask(pdev, DMA_32BIT_MASK); + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { - err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { dev_err(&pdev->dev, "No usable DMA " "configuration, aborting\n"); @@ -1040,27 +1187,6 @@ static int __devinit igb_probe(struct pci_dev *pdev, } } - /* 82575 requires that the pci-e link partner disable the L0s state */ - switch (pdev->device) { - case E1000_DEV_ID_82575EB_COPPER: - case E1000_DEV_ID_82575EB_FIBER_SERDES: - case E1000_DEV_ID_82575GB_QUAD_COPPER: - us_dev = pdev->bus->self; - pos = pci_find_capability(us_dev, PCI_CAP_ID_EXP); - if (pos) { - pci_read_config_word(us_dev, pos + PCI_EXP_LNKCTL, - &state); - state &= ~PCIE_LINK_STATE_L0S; - pci_write_config_word(us_dev, pos + PCI_EXP_LNKCTL, - state); - dev_info(&pdev->dev, - "Disabling ASPM L0s upstream switch port %s\n", - pci_name(us_dev)); - } - default: - break; - } - err = pci_request_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM), igb_driver_name); @@ -1078,7 +1204,8 @@ static int __devinit igb_probe(struct pci_dev *pdev, pci_save_state(pdev); err = -ENOMEM; - netdev = alloc_etherdev_mq(sizeof(struct igb_adapter), IGB_MAX_TX_QUEUES); + netdev = alloc_etherdev_mq(sizeof(struct igb_adapter), + IGB_ABS_MAX_TX_QUEUES); if (!netdev) goto err_alloc_etherdev; @@ -1096,8 +1223,8 @@ static int __devinit igb_probe(struct pci_dev *pdev, mmio_len = pci_resource_len(pdev, 0); err = -EIO; - adapter->hw.hw_addr = ioremap(mmio_start, mmio_len); - if (!adapter->hw.hw_addr) + hw->hw_addr = ioremap(mmio_start, mmio_len); + if (!hw->hw_addr) goto err_ioremap; netdev->netdev_ops = &igb_netdev_ops; @@ -1125,8 +1252,49 @@ static int __devinit igb_probe(struct pci_dev *pdev, /* Initialize skew-specific constants */ err = ei->get_invariants(hw); if (err) - goto err_hw_init; + goto err_sw_init; + +#ifdef CONFIG_PCI_IOV + /* since iov functionality isn't critical to base device function we + * can accept failure. If it fails we don't allow iov to be enabled */ + if (hw->mac.type == e1000_82576) { + /* 82576 supports a maximum of 7 VFs in addition to the PF */ + unsigned int num_vfs = (max_vfs > 7) ? 7 : max_vfs; + int i; + unsigned char mac_addr[ETH_ALEN]; + + if (num_vfs) { + adapter->vf_data = kcalloc(num_vfs, + sizeof(struct vf_data_storage), + GFP_KERNEL); + if (!adapter->vf_data) { + dev_err(&pdev->dev, + "Could not allocate VF private data - " + "IOV enable failed\n"); + } else { + err = pci_enable_sriov(pdev, num_vfs); + if (!err) { + adapter->vfs_allocated_count = num_vfs; + dev_info(&pdev->dev, + "%d vfs allocated\n", + num_vfs); + for (i = 0; + i < adapter->vfs_allocated_count; + i++) { + random_ether_addr(mac_addr); + igb_set_vf_mac(adapter, i, + mac_addr); + } + } else { + kfree(adapter->vf_data); + adapter->vf_data = NULL; + } + } + } + } +#endif + /* setup the private structure */ err = igb_sw_init(adapter); if (err) goto err_sw_init; @@ -1167,9 +1335,7 @@ static int __devinit igb_probe(struct pci_dev *pdev, netdev->features |= NETIF_F_TSO; netdev->features |= NETIF_F_TSO6; -#ifdef CONFIG_IGB_LRO netdev->features |= NETIF_F_GRO; -#endif netdev->vlan_features |= NETIF_F_TSO; netdev->vlan_features |= NETIF_F_TSO6; @@ -1179,6 +1345,9 @@ static int __devinit igb_probe(struct pci_dev *pdev, if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; + if (adapter->hw.mac.type == e1000_82576) + netdev->features |= NETIF_F_SCTP_CSUM; + adapter->en_mng_pt = igb_enable_mng_pass_thru(&adapter->hw); /* before reading the NVM, reset the controller to put the device in a @@ -1205,25 +1374,15 @@ static int __devinit igb_probe(struct pci_dev *pdev, goto err_eeprom; } - init_timer(&adapter->watchdog_timer); - adapter->watchdog_timer.function = &igb_watchdog; - adapter->watchdog_timer.data = (unsigned long) adapter; - - init_timer(&adapter->phy_info_timer); - adapter->phy_info_timer.function = &igb_update_phy_info; - adapter->phy_info_timer.data = (unsigned long) adapter; + setup_timer(&adapter->watchdog_timer, &igb_watchdog, + (unsigned long) adapter); + setup_timer(&adapter->phy_info_timer, &igb_update_phy_info, + (unsigned long) adapter); INIT_WORK(&adapter->reset_task, igb_reset_task); INIT_WORK(&adapter->watchdog_task, igb_watchdog_task); - /* Initialize link & ring properties that are user-changeable */ - adapter->tx_ring->count = 256; - for (i = 0; i < adapter->num_tx_queues; i++) - adapter->tx_ring[i].count = adapter->tx_ring->count; - adapter->rx_ring->count = 256; - for (i = 0; i < adapter->num_rx_queues; i++) - adapter->rx_ring[i].count = adapter->rx_ring->count; - + /* Initialize link properties that are user-changeable */ adapter->fc_autoneg = true; hw->mac.autoneg = true; hw->phy.autoneg_advertised = 0x2f; @@ -1231,20 +1390,19 @@ static int __devinit igb_probe(struct pci_dev *pdev, hw->fc.original_type = e1000_fc_default; hw->fc.type = e1000_fc_default; - adapter->itr_setting = 3; + adapter->itr_setting = IGB_DEFAULT_ITR; adapter->itr = IGB_START_ITR; igb_validate_mdi_setting(hw); - adapter->rx_csum = 1; - /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM, * enable the ACPI Magic Packet filter */ - if (hw->bus.func == 0 || - hw->device_id == E1000_DEV_ID_82575EB_COPPER) + if (hw->bus.func == 0) hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); + else if (hw->bus.func == 1) + hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); if (eeprom_data & eeprom_apme_mask) adapter->eeprom_wol |= E1000_WUFC_MAG; @@ -1264,6 +1422,16 @@ static int __devinit igb_probe(struct pci_dev *pdev, if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1) adapter->eeprom_wol = 0; break; + case E1000_DEV_ID_82576_QUAD_COPPER: + /* if quad port adapter, disable WoL on all but port A */ + if (global_quad_port_a != 0) + adapter->eeprom_wol = 0; + else + adapter->flags |= IGB_FLAG_QUAD_PORT_A; + /* Reset for multiple quad port adapters */ + if (++global_quad_port_a == 4) + global_quad_port_a = 0; + break; } /* initialize the wol settings based on the eeprom settings */ @@ -1277,35 +1445,87 @@ static int __devinit igb_probe(struct pci_dev *pdev, * driver. */ igb_get_hw_control(adapter); - /* tell the stack to leave us alone until igb_open() is called */ - netif_carrier_off(netdev); - netif_tx_stop_all_queues(netdev); - strcpy(netdev->name, "eth%d"); err = register_netdev(netdev); if (err) goto err_register; + /* carrier off reporting is important to ethtool even BEFORE open */ + netif_carrier_off(netdev); + #ifdef CONFIG_IGB_DCA if (dca_add_requester(&pdev->dev) == 0) { adapter->flags |= IGB_FLAG_DCA_ENABLED; dev_info(&pdev->dev, "DCA enabled\n"); /* Always use CB2 mode, difference is masked * in the CB driver. */ - wr32(E1000_DCA_CTRL, 2); + wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2); igb_setup_dca(adapter); } #endif + /* + * Initialize hardware timer: we keep it running just in case + * that some program needs it later on. + */ + memset(&adapter->cycles, 0, sizeof(adapter->cycles)); + adapter->cycles.read = igb_read_clock; + adapter->cycles.mask = CLOCKSOURCE_MASK(64); + adapter->cycles.mult = 1; + adapter->cycles.shift = IGB_TSYNC_SHIFT; + wr32(E1000_TIMINCA, + (1<<24) | + IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS * IGB_TSYNC_SCALE); +#if 0 + /* + * Avoid rollover while we initialize by resetting the time counter. + */ + wr32(E1000_SYSTIML, 0x00000000); + wr32(E1000_SYSTIMH, 0x00000000); +#else + /* + * Set registers so that rollover occurs soon to test this. + */ + wr32(E1000_SYSTIML, 0x00000000); + wr32(E1000_SYSTIMH, 0xFF800000); +#endif + wrfl(); + timecounter_init(&adapter->clock, + &adapter->cycles, + ktime_to_ns(ktime_get_real())); + + /* + * Synchronize our NIC clock against system wall clock. NIC + * time stamp reading requires ~3us per sample, each sample + * was pretty stable even under load => only require 10 + * samples for each offset comparison. + */ + memset(&adapter->compare, 0, sizeof(adapter->compare)); + adapter->compare.source = &adapter->clock; + adapter->compare.target = ktime_get_real; + adapter->compare.num_samples = 10; + timecompare_update(&adapter->compare, 0); + +#ifdef DEBUG + { + char buffer[160]; + printk(KERN_DEBUG + "igb: %s: hw %p initialized timer\n", + igb_get_time_str(adapter, buffer), + &adapter->hw); + } +#endif + dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n"); /* print bus type/speed/width info */ dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n", netdev->name, ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" : "unknown"), - ((hw->bus.width == e1000_bus_width_pcie_x4) - ? "Width x4" : (hw->bus.width == e1000_bus_width_pcie_x1) - ? "Width x1" : "unknown"), + ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" : + (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" : + (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" : + "unknown"), netdev->dev_addr); igb_read_part_num(hw, &part_num); @@ -1331,7 +1551,6 @@ err_eeprom: igb_free_queues(adapter); err_sw_init: -err_hw_init: iounmap(hw->hw_addr); err_ioremap: free_netdev(netdev); @@ -1357,9 +1576,7 @@ static void __devexit igb_remove(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); -#ifdef CONFIG_IGB_DCA struct e1000_hw *hw = &adapter->hw; -#endif int err; /* flush_scheduled work may reschedule our watchdog task, so @@ -1375,7 +1592,7 @@ static void __devexit igb_remove(struct pci_dev *pdev) dev_info(&pdev->dev, "DCA disabled\n"); dca_remove_requester(&pdev->dev); adapter->flags &= ~IGB_FLAG_DCA_ENABLED; - wr32(E1000_DCA_CTRL, 1); + wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE); } #endif @@ -1392,9 +1609,23 @@ static void __devexit igb_remove(struct pci_dev *pdev) igb_free_queues(adapter); - iounmap(adapter->hw.hw_addr); - if (adapter->hw.flash_address) - iounmap(adapter->hw.flash_address); +#ifdef CONFIG_PCI_IOV + /* reclaim resources allocated to VFs */ + if (adapter->vf_data) { + /* disable iov and allow time for transactions to clear */ + pci_disable_sriov(pdev); + msleep(500); + + kfree(adapter->vf_data); + adapter->vf_data = NULL; + wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); + msleep(100); + dev_info(&pdev->dev, "IOV Disabled\n"); + } +#endif + iounmap(hw->hw_addr); + if (hw->flash_address) + iounmap(hw->flash_address); pci_release_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM)); @@ -1470,6 +1701,8 @@ static int igb_open(struct net_device *netdev) if (test_bit(__IGB_TESTING, &adapter->state)) return -EBUSY; + netif_carrier_off(netdev); + /* allocate transmit descriptors */ err = igb_setup_all_tx_resources(adapter); if (err) @@ -1493,6 +1726,10 @@ static int igb_open(struct net_device *netdev) * clean_rx handler before we do so. */ igb_configure(adapter); + igb_vmm_control(adapter); + igb_set_rah_pool(hw, adapter->vfs_allocated_count, 0); + igb_set_vmolr(hw, adapter->vfs_allocated_count); + err = igb_request_irq(adapter); if (err) goto err_req_irq; @@ -1568,7 +1805,6 @@ static int igb_close(struct net_device *netdev) * * Return 0 on success, negative on failure **/ - int igb_setup_tx_resources(struct igb_adapter *adapter, struct igb_ring *tx_ring) { @@ -1582,7 +1818,7 @@ int igb_setup_tx_resources(struct igb_adapter *adapter, memset(tx_ring->buffer_info, 0, size); /* round up to nearest 4K */ - tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc); + tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc); tx_ring->size = ALIGN(tx_ring->size, 4096); tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size, @@ -1629,7 +1865,7 @@ static int igb_setup_all_tx_resources(struct igb_adapter *adapter) for (i = 0; i < IGB_MAX_TX_QUEUES; i++) { r_idx = i % adapter->num_tx_queues; adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx]; - } + } return err; } @@ -1648,13 +1884,13 @@ static void igb_configure_tx(struct igb_adapter *adapter) int i, j; for (i = 0; i < adapter->num_tx_queues; i++) { - struct igb_ring *ring = &(adapter->tx_ring[i]); + struct igb_ring *ring = &adapter->tx_ring[i]; j = ring->reg_idx; wr32(E1000_TDLEN(j), - ring->count * sizeof(struct e1000_tx_desc)); + ring->count * sizeof(union e1000_adv_tx_desc)); tdba = ring->dma; wr32(E1000_TDBAL(j), - tdba & 0x00000000ffffffffULL); + tdba & 0x00000000ffffffffULL); wr32(E1000_TDBAH(j), tdba >> 32); ring->head = E1000_TDH(j); @@ -1674,12 +1910,11 @@ static void igb_configure_tx(struct igb_adapter *adapter) wr32(E1000_DCA_TXCTRL(j), txctrl); } - - - /* Use the default values for the Tx Inter Packet Gap (IPG) timer */ + /* disable queue 0 to prevent tail bump w/o re-configuration */ + if (adapter->vfs_allocated_count) + wr32(E1000_TXDCTL(0), 0); /* Program the Transmit Control Register */ - tctl = rd32(E1000_TCTL); tctl &= ~E1000_TCTL_CT; tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC | @@ -1703,7 +1938,6 @@ static void igb_configure_tx(struct igb_adapter *adapter) * * Returns 0 on success, negative on failure **/ - int igb_setup_rx_resources(struct igb_adapter *adapter, struct igb_ring *rx_ring) { @@ -1784,13 +2018,13 @@ static void igb_setup_rctl(struct igb_adapter *adapter) rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF | - (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT); + (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* * enable stripping of CRC. It's unlikely this will break BMC * redirection as it did with e1000. Newer features require * that the HW strips the CRC. - */ + */ rctl |= E1000_RCTL_SECRC; /* @@ -1834,6 +2068,30 @@ static void igb_setup_rctl(struct igb_adapter *adapter) srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; } + /* Attention!!! For SR-IOV PF driver operations you must enable + * queue drop for all VF and PF queues to prevent head of line blocking + * if an un-trusted VF does not provide descriptors to hardware. + */ + if (adapter->vfs_allocated_count) { + u32 vmolr; + + j = adapter->rx_ring[0].reg_idx; + + /* set all queue drop enable bits */ + wr32(E1000_QDE, ALL_QUEUES); + srrctl |= E1000_SRRCTL_DROP_EN; + + /* disable queue 0 to prevent tail write w/o re-config */ + wr32(E1000_RXDCTL(0), 0); + + vmolr = rd32(E1000_VMOLR(j)); + if (rctl & E1000_RCTL_LPE) + vmolr |= E1000_VMOLR_LPE; + if (adapter->num_rx_queues > 0) + vmolr |= E1000_VMOLR_RSSE; + wr32(E1000_VMOLR(j), vmolr); + } + for (i = 0; i < adapter->num_rx_queues; i++) { j = adapter->rx_ring[i].reg_idx; wr32(E1000_SRRCTL(j), srrctl); @@ -1843,6 +2101,54 @@ static void igb_setup_rctl(struct igb_adapter *adapter) } /** + * igb_rlpml_set - set maximum receive packet size + * @adapter: board private structure + * + * Configure maximum receivable packet size. + **/ +static void igb_rlpml_set(struct igb_adapter *adapter) +{ + u32 max_frame_size = adapter->max_frame_size; + struct e1000_hw *hw = &adapter->hw; + u16 pf_id = adapter->vfs_allocated_count; + + if (adapter->vlgrp) + max_frame_size += VLAN_TAG_SIZE; + + /* if vfs are enabled we set RLPML to the largest possible request + * size and set the VMOLR RLPML to the size we need */ + if (pf_id) { + igb_set_vf_rlpml(adapter, max_frame_size, pf_id); + max_frame_size = MAX_STD_JUMBO_FRAME_SIZE + VLAN_TAG_SIZE; + } + + wr32(E1000_RLPML, max_frame_size); +} + +/** + * igb_configure_vt_default_pool - Configure VT default pool + * @adapter: board private structure + * + * Configure the default pool + **/ +static void igb_configure_vt_default_pool(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u16 pf_id = adapter->vfs_allocated_count; + u32 vtctl; + + /* not in sr-iov mode - do nothing */ + if (!pf_id) + return; + + vtctl = rd32(E1000_VT_CTL); + vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK | + E1000_VT_CTL_DISABLE_DEF_POOL); + vtctl |= pf_id << E1000_VT_CTL_DEFAULT_POOL_SHIFT; + wr32(E1000_VT_CTL, vtctl); +} + +/** * igb_configure_rx - Configure receive Unit after Reset * @adapter: board private structure * @@ -1854,7 +2160,7 @@ static void igb_configure_rx(struct igb_adapter *adapter) struct e1000_hw *hw = &adapter->hw; u32 rctl, rxcsum; u32 rxdctl; - int i, j; + int i; /* disable receives while setting up the descriptors */ rctl = rd32(E1000_RCTL); @@ -1868,14 +2174,14 @@ static void igb_configure_rx(struct igb_adapter *adapter) /* Setup the HW Rx Head and Tail Descriptor Pointers and * the Base and Length of the Rx Descriptor Ring */ for (i = 0; i < adapter->num_rx_queues; i++) { - struct igb_ring *ring = &(adapter->rx_ring[i]); - j = ring->reg_idx; + struct igb_ring *ring = &adapter->rx_ring[i]; + int j = ring->reg_idx; rdba = ring->dma; wr32(E1000_RDBAL(j), - rdba & 0x00000000ffffffffULL); + rdba & 0x00000000ffffffffULL); wr32(E1000_RDBAH(j), rdba >> 32); wr32(E1000_RDLEN(j), - ring->count * sizeof(union e1000_adv_rx_desc)); + ring->count * sizeof(union e1000_adv_rx_desc)); ring->head = E1000_RDH(j); ring->tail = E1000_RDT(j); @@ -1913,7 +2219,10 @@ static void igb_configure_rx(struct igb_adapter *adapter) writel(reta.dword, hw->hw_addr + E1000_RETA(0) + (j & ~3)); } - mrqc = E1000_MRQC_ENABLE_RSS_4Q; + if (adapter->vfs_allocated_count) + mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q; + else + mrqc = E1000_MRQC_ENABLE_RSS_4Q; /* Fill out hash function seeds */ for (j = 0; j < 10; j++) @@ -1928,37 +2237,28 @@ static void igb_configure_rx(struct igb_adapter *adapter) mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX); - wr32(E1000_MRQC, mrqc); + } else if (adapter->vfs_allocated_count) { + /* Enable multi-queue for sr-iov */ + wr32(E1000_MRQC, E1000_MRQC_ENABLE_VMDQ); + } - /* Multiqueue and raw packet checksumming are mutually - * exclusive. Note that this not the same as TCP/IP - * checksumming, which works fine. */ - rxcsum = rd32(E1000_RXCSUM); - rxcsum |= E1000_RXCSUM_PCSD; - wr32(E1000_RXCSUM, rxcsum); - } else { - /* Enable Receive Checksum Offload for TCP and UDP */ - rxcsum = rd32(E1000_RXCSUM); - if (adapter->rx_csum) { - rxcsum |= E1000_RXCSUM_TUOFL; + /* Enable Receive Checksum Offload for TCP and UDP */ + rxcsum = rd32(E1000_RXCSUM); + /* Disable raw packet checksumming */ + rxcsum |= E1000_RXCSUM_PCSD; - /* Enable IPv4 payload checksum for UDP fragments - * Must be used in conjunction with packet-split. */ - if (adapter->rx_ps_hdr_size) - rxcsum |= E1000_RXCSUM_IPPCSE; - } else { - rxcsum &= ~E1000_RXCSUM_TUOFL; - /* don't need to clear IPPCSE as it defaults to 0 */ - } - wr32(E1000_RXCSUM, rxcsum); - } + if (adapter->hw.mac.type == e1000_82576) + /* Enable Receive Checksum Offload for SCTP */ + rxcsum |= E1000_RXCSUM_CRCOFL; - if (adapter->vlgrp) - wr32(E1000_RLPML, - adapter->max_frame_size + VLAN_TAG_SIZE); - else - wr32(E1000_RLPML, adapter->max_frame_size); + /* Don't need to set TUOFL or IPOFL, they default to 1 */ + wr32(E1000_RXCSUM, rxcsum); + + /* Set the default pool for the PF's first queue */ + igb_configure_vt_default_pool(adapter); + + igb_rlpml_set(adapter); /* Enable Receives */ wr32(E1000_RCTL, rctl); @@ -2001,14 +2301,10 @@ static void igb_free_all_tx_resources(struct igb_adapter *adapter) static void igb_unmap_and_free_tx_resource(struct igb_adapter *adapter, struct igb_buffer *buffer_info) { - if (buffer_info->dma) { - pci_unmap_page(adapter->pdev, - buffer_info->dma, - buffer_info->length, - PCI_DMA_TODEVICE); - buffer_info->dma = 0; - } + buffer_info->dma = 0; if (buffer_info->skb) { + skb_dma_unmap(&adapter->pdev->dev, buffer_info->skb, + DMA_TO_DEVICE); dev_kfree_skb_any(buffer_info->skb); buffer_info->skb = NULL; } @@ -2176,15 +2472,18 @@ static void igb_clean_all_rx_rings(struct igb_adapter *adapter) static int igb_set_mac(struct net_device *netdev, void *p) { struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; struct sockaddr *addr = p; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); - memcpy(adapter->hw.mac.addr, addr->sa_data, netdev->addr_len); + memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); - adapter->hw.mac.ops.rar_set(&adapter->hw, adapter->hw.mac.addr, 0); + hw->mac.ops.rar_set(hw, hw->mac.addr, 0); + + igb_set_rah_pool(hw, adapter->vfs_allocated_count, 0); return 0; } @@ -2204,7 +2503,7 @@ static void igb_set_multi(struct net_device *netdev) struct e1000_hw *hw = &adapter->hw; struct e1000_mac_info *mac = &hw->mac; struct dev_mc_list *mc_ptr; - u8 *mta_list; + u8 *mta_list = NULL; u32 rctl; int i; @@ -2225,17 +2524,15 @@ static void igb_set_multi(struct net_device *netdev) } wr32(E1000_RCTL, rctl); - if (!netdev->mc_count) { - /* nothing to program, so clear mc list */ - igb_update_mc_addr_list_82575(hw, NULL, 0, 1, - mac->rar_entry_count); - return; + if (netdev->mc_count) { + mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC); + if (!mta_list) { + dev_err(&adapter->pdev->dev, + "failed to allocate multicast filter list\n"); + return; + } } - mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC); - if (!mta_list) - return; - /* The shared function expects a packed array of only addresses. */ mc_ptr = netdev->mc_list; @@ -2245,8 +2542,13 @@ static void igb_set_multi(struct net_device *netdev) memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN); mc_ptr = mc_ptr->next; } - igb_update_mc_addr_list_82575(hw, mta_list, i, 1, - mac->rar_entry_count); + igb_update_mc_addr_list(hw, mta_list, i, + adapter->vfs_allocated_count + 1, + mac->rar_entry_count); + + igb_set_mc_list_pools(adapter, i, mac->rar_entry_count); + igb_restore_vf_multicasts(adapter); + kfree(mta_list); } @@ -2314,13 +2616,10 @@ static void igb_watchdog_task(struct work_struct *work) struct igb_adapter *adapter = container_of(work, struct igb_adapter, watchdog_task); struct e1000_hw *hw = &adapter->hw; - struct net_device *netdev = adapter->netdev; struct igb_ring *tx_ring = adapter->tx_ring; - struct e1000_mac_info *mac = &adapter->hw.mac; u32 link; u32 eics = 0; - s32 ret_val; int i; link = igb_has_link(adapter); @@ -2363,8 +2662,10 @@ static void igb_watchdog_task(struct work_struct *work) } netif_carrier_on(netdev); - netif_tx_wake_all_queues(netdev); + igb_ping_all_vfs(adapter); + + /* link state has changed, schedule phy info update */ if (!test_bit(__IGB_DOWN, &adapter->state)) mod_timer(&adapter->phy_info_timer, round_jiffies(jiffies + 2 * HZ)); @@ -2377,7 +2678,10 @@ static void igb_watchdog_task(struct work_struct *work) printk(KERN_INFO "igb: %s NIC Link is Down\n", netdev->name); netif_carrier_off(netdev); - netif_tx_stop_all_queues(netdev); + + igb_ping_all_vfs(adapter); + + /* link state has changed, schedule phy info update */ if (!test_bit(__IGB_DOWN, &adapter->state)) mod_timer(&adapter->phy_info_timer, round_jiffies(jiffies + 2 * HZ)); @@ -2387,9 +2691,9 @@ static void igb_watchdog_task(struct work_struct *work) link_up: igb_update_stats(adapter); - mac->tx_packet_delta = adapter->stats.tpt - adapter->tpt_old; + hw->mac.tx_packet_delta = adapter->stats.tpt - adapter->tpt_old; adapter->tpt_old = adapter->stats.tpt; - mac->collision_delta = adapter->stats.colc - adapter->colc_old; + hw->mac.collision_delta = adapter->stats.colc - adapter->colc_old; adapter->colc_old = adapter->stats.colc; adapter->gorc = adapter->stats.gorc - adapter->gorc_old; @@ -2400,13 +2704,15 @@ link_up: igb_update_adaptive(&adapter->hw); if (!netif_carrier_ok(netdev)) { - if (IGB_DESC_UNUSED(tx_ring) + 1 < tx_ring->count) { + if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) { /* We've lost link, so the controller stops DMA, * but we've got queued Tx work that's never going * to get done, so reset controller to flush Tx. * (Do the reset outside of interrupt context). */ adapter->tx_timeout_count++; schedule_work(&adapter->reset_task); + /* return immediately since reset is imminent */ + return; } } @@ -2546,7 +2852,7 @@ static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting, if (bytes > 25000) { if (packets > 35) retval = low_latency; - } else if (bytes < 6000) { + } else if (bytes < 1500) { retval = low_latency; } break; @@ -2578,27 +2884,25 @@ static void igb_set_itr(struct igb_adapter *adapter) adapter->tx_itr, adapter->tx_ring->total_packets, adapter->tx_ring->total_bytes); - current_itr = max(adapter->rx_itr, adapter->tx_itr); } else { current_itr = adapter->rx_itr; } /* conservative mode (itr 3) eliminates the lowest_latency setting */ - if (adapter->itr_setting == 3 && - current_itr == lowest_latency) + if (adapter->itr_setting == 3 && current_itr == lowest_latency) current_itr = low_latency; switch (current_itr) { /* counts and packets in update_itr are dependent on these numbers */ case lowest_latency: - new_itr = 70000; + new_itr = 56; /* aka 70,000 ints/sec */ break; case low_latency: - new_itr = 20000; /* aka hwitr = ~200 */ + new_itr = 196; /* aka 20,000 ints/sec */ break; case bulk_latency: - new_itr = 4000; + new_itr = 980; /* aka 4,000 ints/sec */ break; default: break; @@ -2617,7 +2921,8 @@ set_itr_now: * by adding intermediate steps when interrupt rate is * increasing */ new_itr = new_itr > adapter->itr ? - min(adapter->itr + (new_itr >> 2), new_itr) : + max((new_itr * adapter->itr) / + (new_itr + (adapter->itr >> 2)), new_itr) : new_itr; /* Don't write the value here; it resets the adapter's * internal timer, and causes us to delay far longer than @@ -2626,7 +2931,7 @@ set_itr_now: * ends up being correct. */ adapter->itr = new_itr; - adapter->rx_ring->itr_val = 1000000000 / (new_itr * 256); + adapter->rx_ring->itr_val = new_itr; adapter->rx_ring->set_itr = 1; } @@ -2638,6 +2943,7 @@ set_itr_now: #define IGB_TX_FLAGS_VLAN 0x00000002 #define IGB_TX_FLAGS_TSO 0x00000004 #define IGB_TX_FLAGS_IPV4 0x00000008 +#define IGB_TX_FLAGS_TSTAMP 0x00000010 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000 #define IGB_TX_FLAGS_VLAN_SHIFT 16 @@ -2703,7 +3009,7 @@ static inline int igb_tso_adv(struct igb_adapter *adapter, mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT); mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT); - /* Context index must be unique per ring. */ + /* For 82575, context index must be unique per ring. */ if (adapter->flags & IGB_FLAG_NEED_CTX_IDX) mss_l4len_idx |= tx_ring->queue_index << 4; @@ -2748,16 +3054,31 @@ static inline bool igb_tx_csum_adv(struct igb_adapter *adapter, tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT); if (skb->ip_summed == CHECKSUM_PARTIAL) { - switch (skb->protocol) { + __be16 protocol; + + if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { + const struct vlan_ethhdr *vhdr = + (const struct vlan_ethhdr*)skb->data; + + protocol = vhdr->h_vlan_encapsulated_proto; + } else { + protocol = skb->protocol; + } + + switch (protocol) { case cpu_to_be16(ETH_P_IP): tu_cmd |= E1000_ADVTXD_TUCMD_IPV4; if (ip_hdr(skb)->protocol == IPPROTO_TCP) tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP; + else if (ip_hdr(skb)->protocol == IPPROTO_SCTP) + tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP; break; case cpu_to_be16(ETH_P_IPV6): /* XXX what about other V6 headers?? */ if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP; + else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP) + tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP; break; default: if (unlikely(net_ratelimit())) @@ -2773,6 +3094,8 @@ static inline bool igb_tx_csum_adv(struct igb_adapter *adapter, if (adapter->flags & IGB_FLAG_NEED_CTX_IDX) context_desc->mss_l4len_idx = cpu_to_le32(tx_ring->queue_index << 4); + else + context_desc->mss_l4len_idx = 0; buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; @@ -2785,8 +3108,6 @@ static inline bool igb_tx_csum_adv(struct igb_adapter *adapter, return true; } - - return false; } @@ -2801,25 +3122,33 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter, unsigned int len = skb_headlen(skb); unsigned int count = 0, i; unsigned int f; + dma_addr_t *map; i = tx_ring->next_to_use; + if (skb_dma_map(&adapter->pdev->dev, skb, DMA_TO_DEVICE)) { + dev_err(&adapter->pdev->dev, "TX DMA map failed\n"); + return 0; + } + + map = skb_shinfo(skb)->dma_maps; + buffer_info = &tx_ring->buffer_info[i]; BUG_ON(len >= IGB_MAX_DATA_PER_TXD); buffer_info->length = len; /* set time_stamp *before* dma to help avoid a possible race */ buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; - buffer_info->dma = pci_map_single(adapter->pdev, skb->data, len, - PCI_DMA_TODEVICE); + buffer_info->dma = map[count]; count++; - i++; - if (i == tx_ring->count) - i = 0; for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) { struct skb_frag_struct *frag; + i++; + if (i == tx_ring->count) + i = 0; + frag = &skb_shinfo(skb)->frags[f]; len = frag->size; @@ -2828,19 +3157,10 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter, buffer_info->length = len; buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; - buffer_info->dma = pci_map_page(adapter->pdev, - frag->page, - frag->page_offset, - len, - PCI_DMA_TODEVICE); - + buffer_info->dma = map[count]; count++; - i++; - if (i == tx_ring->count) - i = 0; } - i = ((i == 0) ? tx_ring->count - 1 : i - 1); tx_ring->buffer_info[i].skb = skb; tx_ring->buffer_info[first].next_to_watch = i; @@ -2863,6 +3183,9 @@ static inline void igb_tx_queue_adv(struct igb_adapter *adapter, if (tx_flags & IGB_TX_FLAGS_VLAN) cmd_type_len |= E1000_ADVTXD_DCMD_VLE; + if (tx_flags & IGB_TX_FLAGS_TSTAMP) + cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP; + if (tx_flags & IGB_TX_FLAGS_TSO) { cmd_type_len |= E1000_ADVTXD_DCMD_TSE; @@ -2925,7 +3248,7 @@ static int __igb_maybe_stop_tx(struct net_device *netdev, /* We need to check again in a case another CPU has just * made room available. */ - if (IGB_DESC_UNUSED(tx_ring) < size) + if (igb_desc_unused(tx_ring) < size) return -EBUSY; /* A reprieve! */ @@ -2937,13 +3260,11 @@ static int __igb_maybe_stop_tx(struct net_device *netdev, static int igb_maybe_stop_tx(struct net_device *netdev, struct igb_ring *tx_ring, int size) { - if (IGB_DESC_UNUSED(tx_ring) >= size) + if (igb_desc_unused(tx_ring) >= size) return 0; return __igb_maybe_stop_tx(netdev, tx_ring, size); } -#define TXD_USE_COUNT(S) (((S) >> (IGB_MAX_TXD_PWR)) + 1) - static int igb_xmit_frame_ring_adv(struct sk_buff *skb, struct net_device *netdev, struct igb_ring *tx_ring) @@ -2951,11 +3272,10 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb, struct igb_adapter *adapter = netdev_priv(netdev); unsigned int first; unsigned int tx_flags = 0; - unsigned int len; u8 hdr_len = 0; + int count = 0; int tso = 0; - - len = skb_headlen(skb); + union skb_shared_tx *shtx; if (test_bit(__IGB_DOWN, &adapter->state)) { dev_kfree_skb_any(skb); @@ -2976,7 +3296,21 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb, /* this is a hard error */ return NETDEV_TX_BUSY; } - skb_orphan(skb); + + /* + * TODO: check that there currently is no other packet with + * time stamping in the queue + * + * When doing time stamping, keep the connection to the socket + * a while longer: it is still needed by skb_hwtstamp_tx(), + * called either in igb_tx_hwtstamp() or by our caller when + * doing software time stamping. + */ + shtx = skb_tx(skb); + if (unlikely(shtx->hardware)) { + shtx->in_progress = 1; + tx_flags |= IGB_TX_FLAGS_TSTAMP; + } if (adapter->vlgrp && vlan_tx_tag_present(skb)) { tx_flags |= IGB_TX_FLAGS_VLAN; @@ -2987,7 +3321,6 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb, tx_flags |= IGB_TX_FLAGS_IPV4; first = tx_ring->next_to_use; - tso = skb_is_gso(skb) ? igb_tso_adv(adapter, tx_ring, skb, tx_flags, &hdr_len) : 0; @@ -2998,18 +3331,27 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb, if (tso) tx_flags |= IGB_TX_FLAGS_TSO; - else if (igb_tx_csum_adv(adapter, tx_ring, skb, tx_flags)) - if (skb->ip_summed == CHECKSUM_PARTIAL) - tx_flags |= IGB_TX_FLAGS_CSUM; - - igb_tx_queue_adv(adapter, tx_ring, tx_flags, - igb_tx_map_adv(adapter, tx_ring, skb, first), - skb->len, hdr_len); - - netdev->trans_start = jiffies; + else if (igb_tx_csum_adv(adapter, tx_ring, skb, tx_flags) && + (skb->ip_summed == CHECKSUM_PARTIAL)) + tx_flags |= IGB_TX_FLAGS_CSUM; - /* Make sure there is space in the ring for the next send. */ - igb_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 4); + /* + * count reflects descriptors mapped, if 0 then mapping error + * has occured and we need to rewind the descriptor queue + */ + count = igb_tx_map_adv(adapter, tx_ring, skb, first); + + if (count) { + igb_tx_queue_adv(adapter, tx_ring, tx_flags, count, + skb->len, hdr_len); + netdev->trans_start = jiffies; + /* Make sure there is space in the ring for the next send. */ + igb_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 4); + } else { + dev_kfree_skb_any(skb); + tx_ring->buffer_info[first].time_stamp = 0; + tx_ring->next_to_use = first; + } return NETDEV_TX_OK; } @@ -3020,7 +3362,7 @@ static int igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *netdev) struct igb_ring *tx_ring; int r_idx = 0; - r_idx = skb->queue_mapping & (IGB_MAX_TX_QUEUES - 1); + r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1); tx_ring = adapter->multi_tx_table[r_idx]; /* This goes back to the question of how to logically map a tx queue @@ -3042,8 +3384,8 @@ static void igb_tx_timeout(struct net_device *netdev) /* Do the reset outside of interrupt context */ adapter->tx_timeout_count++; schedule_work(&adapter->reset_task); - wr32(E1000_EICS, adapter->eims_enable_mask & - ~(E1000_EIMS_TCP_TIMER | E1000_EIMS_OTHER)); + wr32(E1000_EICS, + (adapter->eims_enable_mask & ~adapter->eims_other)); } static void igb_reset_task(struct work_struct *work) @@ -3061,8 +3403,7 @@ static void igb_reset_task(struct work_struct *work) * Returns the address of the device statistics structure. * The statistics are actually updated from the timer callback. **/ -static struct net_device_stats * -igb_get_stats(struct net_device *netdev) +static struct net_device_stats *igb_get_stats(struct net_device *netdev) { struct igb_adapter *adapter = netdev_priv(netdev); @@ -3088,7 +3429,6 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu) return -EINVAL; } -#define MAX_STD_JUMBO_FRAME_SIZE 9234 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) { dev_err(&adapter->pdev->dev, "MTU > 9216 not supported.\n"); return -EINVAL; @@ -3096,6 +3436,7 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu) while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) msleep(1); + /* igb_down has a dependency on max_frame_size */ adapter->max_frame_size = max_frame; if (netif_running(netdev)) @@ -3121,6 +3462,12 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu) #else adapter->rx_buffer_len = PAGE_SIZE / 2; #endif + + /* if sr-iov is enabled we need to force buffer size to 1K or larger */ + if (adapter->vfs_allocated_count && + (adapter->rx_buffer_len < IGB_RXBUFFER_1024)) + adapter->rx_buffer_len = IGB_RXBUFFER_1024; + /* adjust allocation if LPE protects us, and we aren't using SBP */ if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN) || (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE)) @@ -3265,8 +3612,7 @@ void igb_update_stats(struct igb_adapter *adapter) /* Phy Stats */ if (hw->phy.media_type == e1000_media_type_copper) { if ((adapter->link_speed == SPEED_1000) && - (!igb_read_phy_reg(hw, PHY_1000T_STATUS, - &phy_tmp))) { + (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) { phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK; adapter->phy_stats.idle_errors += phy_tmp; } @@ -3278,7 +3624,6 @@ void igb_update_stats(struct igb_adapter *adapter) adapter->stats.mgpdc += rd32(E1000_MGTPDC); } - static irqreturn_t igb_msix_other(int irq, void *data) { struct net_device *netdev = data; @@ -3287,15 +3632,24 @@ static irqreturn_t igb_msix_other(int irq, void *data) u32 icr = rd32(E1000_ICR); /* reading ICR causes bit 31 of EICR to be cleared */ - if (!(icr & E1000_ICR_LSC)) - goto no_link_interrupt; - hw->mac.get_link_status = 1; - /* guard against interrupt when we're going down */ - if (!test_bit(__IGB_DOWN, &adapter->state)) - mod_timer(&adapter->watchdog_timer, jiffies + 1); - -no_link_interrupt: - wr32(E1000_IMS, E1000_IMS_LSC); + + if(icr & E1000_ICR_DOUTSYNC) { + /* HW is reporting DMA is out of sync */ + adapter->stats.doosync++; + } + + /* Check for a mailbox event */ + if (icr & E1000_ICR_VMMB) + igb_msg_task(adapter); + + if (icr & E1000_ICR_LSC) { + hw->mac.get_link_status = 1; + /* guard against interrupt when we're going down */ + if (!test_bit(__IGB_DOWN, &adapter->state)) + mod_timer(&adapter->watchdog_timer, jiffies + 1); + } + + wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_VMMB); wr32(E1000_EIMS, adapter->eims_other); return IRQ_HANDLED; @@ -3311,6 +3665,7 @@ static irqreturn_t igb_msix_tx(int irq, void *data) if (adapter->flags & IGB_FLAG_DCA_ENABLED) igb_update_tx_dca(tx_ring); #endif + tx_ring->total_bytes = 0; tx_ring->total_packets = 0; @@ -3331,13 +3686,11 @@ static void igb_write_itr(struct igb_ring *ring) if ((ring->adapter->itr_setting & 3) && ring->set_itr) { switch (hw->mac.type) { case e1000_82576: - wr32(ring->itr_register, - ring->itr_val | + wr32(ring->itr_register, ring->itr_val | 0x80000000); break; default: - wr32(ring->itr_register, - ring->itr_val | + wr32(ring->itr_register, ring->itr_val | (ring->itr_val << 16)); break; } @@ -3378,11 +3731,11 @@ static void igb_update_rx_dca(struct igb_ring *rx_ring) dca_rxctrl = rd32(E1000_DCA_RXCTRL(q)); if (hw->mac.type == e1000_82576) { dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576; - dca_rxctrl |= dca_get_tag(cpu) << + dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) << E1000_DCA_RXCTRL_CPUID_SHIFT; } else { dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK; - dca_rxctrl |= dca_get_tag(cpu); + dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu); } dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN; dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN; @@ -3405,11 +3758,11 @@ static void igb_update_tx_dca(struct igb_ring *tx_ring) dca_txctrl = rd32(E1000_DCA_TXCTRL(q)); if (hw->mac.type == e1000_82576) { dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576; - dca_txctrl |= dca_get_tag(cpu) << + dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) << E1000_DCA_TXCTRL_CPUID_SHIFT; } else { dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK; - dca_txctrl |= dca_get_tag(cpu); + dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu); } dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN; wr32(E1000_DCA_TXCTRL(q), dca_txctrl); @@ -3449,7 +3802,7 @@ static int __igb_notify_dca(struct device *dev, void *data) break; /* Always use CB2 mode, difference is masked * in the CB driver. */ - wr32(E1000_DCA_CTRL, 2); + wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2); if (dca_add_requester(dev) == 0) { adapter->flags |= IGB_FLAG_DCA_ENABLED; dev_info(&adapter->pdev->dev, "DCA enabled\n"); @@ -3464,7 +3817,7 @@ static int __igb_notify_dca(struct device *dev, void *data) dca_remove_requester(dev); dev_info(&adapter->pdev->dev, "DCA disabled\n"); adapter->flags &= ~IGB_FLAG_DCA_ENABLED; - wr32(E1000_DCA_CTRL, 1); + wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE); } break; } @@ -3484,20 +3837,341 @@ static int igb_notify_dca(struct notifier_block *nb, unsigned long event, } #endif /* CONFIG_IGB_DCA */ -/** - * igb_intr_msi - Interrupt Handler - * @irq: interrupt number - * @data: pointer to a network interface device structure - **/ -static irqreturn_t igb_intr_msi(int irq, void *data) +static void igb_ping_all_vfs(struct igb_adapter *adapter) { - struct net_device *netdev = data; - struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - /* read ICR disables interrupts using IAM */ - u32 icr = rd32(E1000_ICR); + u32 ping; + int i; - igb_write_itr(adapter->rx_ring); + for (i = 0 ; i < adapter->vfs_allocated_count; i++) { + ping = E1000_PF_CONTROL_MSG; + if (adapter->vf_data[i].clear_to_send) + ping |= E1000_VT_MSGTYPE_CTS; + igb_write_mbx(hw, &ping, 1, i); + } +} + +static int igb_set_vf_multicasts(struct igb_adapter *adapter, + u32 *msgbuf, u32 vf) +{ + int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; + u16 *hash_list = (u16 *)&msgbuf[1]; + struct vf_data_storage *vf_data = &adapter->vf_data[vf]; + int i; + + /* only up to 30 hash values supported */ + if (n > 30) + n = 30; + + /* salt away the number of multi cast addresses assigned + * to this VF for later use to restore when the PF multi cast + * list changes + */ + vf_data->num_vf_mc_hashes = n; + + /* VFs are limited to using the MTA hash table for their multicast + * addresses */ + for (i = 0; i < n; i++) + vf_data->vf_mc_hashes[i] = hash_list[i];; + + /* Flush and reset the mta with the new values */ + igb_set_multi(adapter->netdev); + + return 0; +} + +static void igb_restore_vf_multicasts(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct vf_data_storage *vf_data; + int i, j; + + for (i = 0; i < adapter->vfs_allocated_count; i++) { + vf_data = &adapter->vf_data[i]; + for (j = 0; j < vf_data->num_vf_mc_hashes; j++) + igb_mta_set(hw, vf_data->vf_mc_hashes[j]); + } +} + +static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + u32 pool_mask, reg, vid; + int i; + + pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); + + /* Find the vlan filter for this id */ + for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { + reg = rd32(E1000_VLVF(i)); + + /* remove the vf from the pool */ + reg &= ~pool_mask; + + /* if pool is empty then remove entry from vfta */ + if (!(reg & E1000_VLVF_POOLSEL_MASK) && + (reg & E1000_VLVF_VLANID_ENABLE)) { + reg = 0; + vid = reg & E1000_VLVF_VLANID_MASK; + igb_vfta_set(hw, vid, false); + } + + wr32(E1000_VLVF(i), reg); + } +} + +static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + u32 reg, i; + + /* It is an error to call this function when VFs are not enabled */ + if (!adapter->vfs_allocated_count) + return -1; + + /* Find the vlan filter for this id */ + for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { + reg = rd32(E1000_VLVF(i)); + if ((reg & E1000_VLVF_VLANID_ENABLE) && + vid == (reg & E1000_VLVF_VLANID_MASK)) + break; + } + + if (add) { + if (i == E1000_VLVF_ARRAY_SIZE) { + /* Did not find a matching VLAN ID entry that was + * enabled. Search for a free filter entry, i.e. + * one without the enable bit set + */ + for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { + reg = rd32(E1000_VLVF(i)); + if (!(reg & E1000_VLVF_VLANID_ENABLE)) + break; + } + } + if (i < E1000_VLVF_ARRAY_SIZE) { + /* Found an enabled/available entry */ + reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); + + /* if !enabled we need to set this up in vfta */ + if (!(reg & E1000_VLVF_VLANID_ENABLE)) { + /* add VID to filter table, if bit already set + * PF must have added it outside of table */ + if (igb_vfta_set(hw, vid, true)) + reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + + adapter->vfs_allocated_count); + reg |= E1000_VLVF_VLANID_ENABLE; + } + reg &= ~E1000_VLVF_VLANID_MASK; + reg |= vid; + + wr32(E1000_VLVF(i), reg); + return 0; + } + } else { + if (i < E1000_VLVF_ARRAY_SIZE) { + /* remove vf from the pool */ + reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf)); + /* if pool is empty then remove entry from vfta */ + if (!(reg & E1000_VLVF_POOLSEL_MASK)) { + reg = 0; + igb_vfta_set(hw, vid, false); + } + wr32(E1000_VLVF(i), reg); + return 0; + } + } + return -1; +} + +static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) +{ + int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; + int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK); + + return igb_vlvf_set(adapter, vid, add, vf); +} + +static inline void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + + /* disable mailbox functionality for vf */ + adapter->vf_data[vf].clear_to_send = false; + + /* reset offloads to defaults */ + igb_set_vmolr(hw, vf); + + /* reset vlans for device */ + igb_clear_vf_vfta(adapter, vf); + + /* reset multicast table array for vf */ + adapter->vf_data[vf].num_vf_mc_hashes = 0; + + /* Flush and reset the mta with the new values */ + igb_set_multi(adapter->netdev); +} + +static inline void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; + u32 reg, msgbuf[3]; + u8 *addr = (u8 *)(&msgbuf[1]); + + /* process all the same items cleared in a function level reset */ + igb_vf_reset_event(adapter, vf); + + /* set vf mac address */ + igb_rar_set(hw, vf_mac, vf + 1); + igb_set_rah_pool(hw, vf, vf + 1); + + /* enable transmit and receive for vf */ + reg = rd32(E1000_VFTE); + wr32(E1000_VFTE, reg | (1 << vf)); + reg = rd32(E1000_VFRE); + wr32(E1000_VFRE, reg | (1 << vf)); + + /* enable mailbox functionality for vf */ + adapter->vf_data[vf].clear_to_send = true; + + /* reply to reset with ack and vf mac address */ + msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK; + memcpy(addr, vf_mac, 6); + igb_write_mbx(hw, msgbuf, 3, vf); +} + +static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf) +{ + unsigned char *addr = (char *)&msg[1]; + int err = -1; + + if (is_valid_ether_addr(addr)) + err = igb_set_vf_mac(adapter, vf, addr); + + return err; + +} + +static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + u32 msg = E1000_VT_MSGTYPE_NACK; + + /* if device isn't clear to send it shouldn't be reading either */ + if (!adapter->vf_data[vf].clear_to_send) + igb_write_mbx(hw, &msg, 1, vf); +} + + +static void igb_msg_task(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 vf; + + for (vf = 0; vf < adapter->vfs_allocated_count; vf++) { + /* process any reset requests */ + if (!igb_check_for_rst(hw, vf)) { + adapter->vf_data[vf].clear_to_send = false; + igb_vf_reset_event(adapter, vf); + } + + /* process any messages pending */ + if (!igb_check_for_msg(hw, vf)) + igb_rcv_msg_from_vf(adapter, vf); + + /* process any acks */ + if (!igb_check_for_ack(hw, vf)) + igb_rcv_ack_from_vf(adapter, vf); + + } +} + +static int igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) +{ + u32 mbx_size = E1000_VFMAILBOX_SIZE; + u32 msgbuf[mbx_size]; + struct e1000_hw *hw = &adapter->hw; + s32 retval; + + retval = igb_read_mbx(hw, msgbuf, mbx_size, vf); + + if (retval) + dev_err(&adapter->pdev->dev, + "Error receiving message from VF\n"); + + /* this is a message we already processed, do nothing */ + if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK)) + return retval; + + /* + * until the vf completes a reset it should not be + * allowed to start any configuration. + */ + + if (msgbuf[0] == E1000_VF_RESET) { + igb_vf_reset_msg(adapter, vf); + + return retval; + } + + if (!adapter->vf_data[vf].clear_to_send) { + msgbuf[0] |= E1000_VT_MSGTYPE_NACK; + igb_write_mbx(hw, msgbuf, 1, vf); + return retval; + } + + switch ((msgbuf[0] & 0xFFFF)) { + case E1000_VF_SET_MAC_ADDR: + retval = igb_set_vf_mac_addr(adapter, msgbuf, vf); + break; + case E1000_VF_SET_MULTICAST: + retval = igb_set_vf_multicasts(adapter, msgbuf, vf); + break; + case E1000_VF_SET_LPE: + retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf); + break; + case E1000_VF_SET_VLAN: + retval = igb_set_vf_vlan(adapter, msgbuf, vf); + break; + default: + dev_err(&adapter->pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]); + retval = -1; + break; + } + + /* notify the VF of the results of what it sent us */ + if (retval) + msgbuf[0] |= E1000_VT_MSGTYPE_NACK; + else + msgbuf[0] |= E1000_VT_MSGTYPE_ACK; + + msgbuf[0] |= E1000_VT_MSGTYPE_CTS; + + igb_write_mbx(hw, msgbuf, 1, vf); + + return retval; +} + +/** + * igb_intr_msi - Interrupt Handler + * @irq: interrupt number + * @data: pointer to a network interface device structure + **/ +static irqreturn_t igb_intr_msi(int irq, void *data) +{ + struct net_device *netdev = data; + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + /* read ICR disables interrupts using IAM */ + u32 icr = rd32(E1000_ICR); + + igb_write_itr(adapter->rx_ring); + + if(icr & E1000_ICR_DOUTSYNC) { + /* HW is reporting DMA is out of sync */ + adapter->stats.doosync++; + } if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { hw->mac.get_link_status = 1; @@ -3511,7 +4185,7 @@ static irqreturn_t igb_intr_msi(int irq, void *data) } /** - * igb_intr - Interrupt Handler + * igb_intr - Legacy Interrupt Handler * @irq: interrupt number * @data: pointer to a network interface device structure **/ @@ -3523,7 +4197,6 @@ static irqreturn_t igb_intr(int irq, void *data) /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No * need for the IMC write */ u32 icr = rd32(E1000_ICR); - u32 eicr = 0; if (!icr) return IRQ_NONE; /* Not our interrupt */ @@ -3534,7 +4207,10 @@ static irqreturn_t igb_intr(int irq, void *data) if (!(icr & E1000_ICR_INT_ASSERTED)) return IRQ_NONE; - eicr = rd32(E1000_EICR); + if(icr & E1000_ICR_DOUTSYNC) { + /* HW is reporting DMA is out of sync */ + adapter->stats.doosync++; + } if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { hw->mac.get_link_status = 1; @@ -3548,6 +4224,26 @@ static irqreturn_t igb_intr(int irq, void *data) return IRQ_HANDLED; } +static inline void igb_rx_irq_enable(struct igb_ring *rx_ring) +{ + struct igb_adapter *adapter = rx_ring->adapter; + struct e1000_hw *hw = &adapter->hw; + + if (adapter->itr_setting & 3) { + if (adapter->num_rx_queues == 1) + igb_set_itr(adapter); + else + igb_update_ring_itr(rx_ring); + } + + if (!test_bit(__IGB_DOWN, &adapter->state)) { + if (adapter->msix_entries) + wr32(E1000_EIMS, rx_ring->eims_value); + else + igb_irq_enable(adapter); + } +} + /** * igb_poll - NAPI Rx polling callback * @napi: napi polling structure @@ -3556,70 +4252,64 @@ static irqreturn_t igb_intr(int irq, void *data) static int igb_poll(struct napi_struct *napi, int budget) { struct igb_ring *rx_ring = container_of(napi, struct igb_ring, napi); - struct igb_adapter *adapter = rx_ring->adapter; - struct net_device *netdev = adapter->netdev; - int tx_clean_complete, work_done = 0; + int work_done = 0; - /* this poll routine only supports one tx and one rx queue */ #ifdef CONFIG_IGB_DCA - if (adapter->flags & IGB_FLAG_DCA_ENABLED) - igb_update_tx_dca(&adapter->tx_ring[0]); + if (rx_ring->adapter->flags & IGB_FLAG_DCA_ENABLED) + igb_update_rx_dca(rx_ring); #endif - tx_clean_complete = igb_clean_tx_irq(&adapter->tx_ring[0]); + igb_clean_rx_irq_adv(rx_ring, &work_done, budget); + if (rx_ring->buddy) { #ifdef CONFIG_IGB_DCA - if (adapter->flags & IGB_FLAG_DCA_ENABLED) - igb_update_rx_dca(&adapter->rx_ring[0]); + if (rx_ring->adapter->flags & IGB_FLAG_DCA_ENABLED) + igb_update_tx_dca(rx_ring->buddy); #endif - igb_clean_rx_irq_adv(&adapter->rx_ring[0], &work_done, budget); + if (!igb_clean_tx_irq(rx_ring->buddy)) + work_done = budget; + } - /* If no Tx and not enough Rx work done, exit the polling mode */ - if ((tx_clean_complete && (work_done < budget)) || - !netif_running(netdev)) { - if (adapter->itr_setting & 3) - igb_set_itr(adapter); + /* If not enough Rx work done, exit the polling mode */ + if (work_done < budget) { napi_complete(napi); - if (!test_bit(__IGB_DOWN, &adapter->state)) - igb_irq_enable(adapter); - return 0; + igb_rx_irq_enable(rx_ring); } - return 1; + return work_done; } -static int igb_clean_rx_ring_msix(struct napi_struct *napi, int budget) +/** + * igb_hwtstamp - utility function which checks for TX time stamp + * @adapter: board private structure + * @skb: packet that was just sent + * + * If we were asked to do hardware stamping and such a time stamp is + * available, then it must have been for this skb here because we only + * allow only one such packet into the queue. + */ +static void igb_tx_hwtstamp(struct igb_adapter *adapter, struct sk_buff *skb) { - struct igb_ring *rx_ring = container_of(napi, struct igb_ring, napi); - struct igb_adapter *adapter = rx_ring->adapter; + union skb_shared_tx *shtx = skb_tx(skb); struct e1000_hw *hw = &adapter->hw; - struct net_device *netdev = adapter->netdev; - int work_done = 0; - -#ifdef CONFIG_IGB_DCA - if (adapter->flags & IGB_FLAG_DCA_ENABLED) - igb_update_rx_dca(rx_ring); -#endif - igb_clean_rx_irq_adv(rx_ring, &work_done, budget); - - - /* If not enough Rx work done, exit the polling mode */ - if ((work_done == 0) || !netif_running(netdev)) { - napi_complete(napi); - if (adapter->itr_setting & 3) { - if (adapter->num_rx_queues == 1) - igb_set_itr(adapter); - else - igb_update_ring_itr(rx_ring); + if (unlikely(shtx->hardware)) { + u32 valid = rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID; + if (valid) { + u64 regval = rd32(E1000_TXSTMPL); + u64 ns; + struct skb_shared_hwtstamps shhwtstamps; + + memset(&shhwtstamps, 0, sizeof(shhwtstamps)); + regval |= (u64)rd32(E1000_TXSTMPH) << 32; + ns = timecounter_cyc2time(&adapter->clock, + regval); + timecompare_update(&adapter->compare, ns); + shhwtstamps.hwtstamp = ns_to_ktime(ns); + shhwtstamps.syststamp = + timecompare_transform(&adapter->compare, ns); + skb_tstamp_tx(skb, &shhwtstamps); } - - if (!test_bit(__IGB_DOWN, &adapter->state)) - wr32(E1000_EIMS, rx_ring->eims_value); - - return 0; } - - return 1; } /** @@ -3660,6 +4350,8 @@ static bool igb_clean_tx_irq(struct igb_ring *tx_ring) skb->len; total_packets += segs; total_bytes += bytecount; + + igb_tx_hwtstamp(adapter, skb); } igb_unmap_and_free_tx_resource(adapter, buffer_info); @@ -3669,7 +4361,6 @@ static bool igb_clean_tx_irq(struct igb_ring *tx_ring) if (i == tx_ring->count) i = 0; } - eop = tx_ring->buffer_info[i].next_to_watch; eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop); } @@ -3678,7 +4369,7 @@ static bool igb_clean_tx_irq(struct igb_ring *tx_ring) if (unlikely(count && netif_carrier_ok(netdev) && - IGB_DESC_UNUSED(tx_ring) >= IGB_TX_QUEUE_WAKE)) { + igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) { /* Make sure that anybody stopping the queue after this * sees the new next_to_clean. */ @@ -3736,9 +4427,9 @@ static bool igb_clean_tx_irq(struct igb_ring *tx_ring) /** * igb_receive_skb - helper function to handle rx indications - * @ring: pointer to receive ring receving this packet + * @ring: pointer to receive ring receving this packet * @status: descriptor status field as written by hardware - * @vlan: descriptor vlan field as written by hardware (no le/be conversion) + * @rx_desc: receive descriptor containing vlan and type information. * @skb: pointer to sk_buff to be indicated to stack **/ static void igb_receive_skb(struct igb_ring *ring, u8 status, @@ -3749,42 +4440,42 @@ static void igb_receive_skb(struct igb_ring *ring, u8 status, bool vlan_extracted = (adapter->vlgrp && (status & E1000_RXD_STAT_VP)); skb_record_rx_queue(skb, ring->queue_index); - if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - if (vlan_extracted) - vlan_gro_receive(&ring->napi, adapter->vlgrp, - le16_to_cpu(rx_desc->wb.upper.vlan), - skb); - else - napi_gro_receive(&ring->napi, skb); - } else { - if (vlan_extracted) - vlan_hwaccel_receive_skb(skb, adapter->vlgrp, - le16_to_cpu(rx_desc->wb.upper.vlan)); - else - netif_receive_skb(skb); - } + if (vlan_extracted) + vlan_gro_receive(&ring->napi, adapter->vlgrp, + le16_to_cpu(rx_desc->wb.upper.vlan), + skb); + else + napi_gro_receive(&ring->napi, skb); } - static inline void igb_rx_checksum_adv(struct igb_adapter *adapter, u32 status_err, struct sk_buff *skb) { skb->ip_summed = CHECKSUM_NONE; /* Ignore Checksum bit is set or checksum is disabled through ethtool */ - if ((status_err & E1000_RXD_STAT_IXSM) || !adapter->rx_csum) + if ((status_err & E1000_RXD_STAT_IXSM) || + (adapter->flags & IGB_FLAG_RX_CSUM_DISABLED)) return; /* TCP/UDP checksum error bit is set */ if (status_err & (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) { + /* + * work around errata with sctp packets where the TCPE aka + * L4E bit is set incorrectly on 64 byte (60 byte w/o crc) + * packets, (aka let the stack check the crc32c) + */ + if (!((adapter->hw.mac.type == e1000_82576) && + (skb->len == 60))) + adapter->hw_csum_err++; /* let the stack verify checksum errors */ - adapter->hw_csum_err++; return; } /* It must be a TCP or UDP packet with a valid checksum */ if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) skb->ip_summed = CHECKSUM_UNNECESSARY; + dev_dbg(&adapter->pdev->dev, "cksum success: bits %08X\n", status_err); adapter->hw_csum_good++; } @@ -3793,15 +4484,16 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring, { struct igb_adapter *adapter = rx_ring->adapter; struct net_device *netdev = adapter->netdev; + struct e1000_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; union e1000_adv_rx_desc *rx_desc , *next_rxd; struct igb_buffer *buffer_info , *next_buffer; struct sk_buff *skb; - unsigned int i; - u32 length, hlen, staterr; bool cleaned = false; int cleaned_count = 0; unsigned int total_bytes = 0, total_packets = 0; + unsigned int i; + u32 length, hlen, staterr; i = rx_ring->next_to_clean; buffer_info = &rx_ring->buffer_info[i]; @@ -3849,8 +4541,7 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring, if (!skb_shinfo(skb)->nr_frags) { pci_unmap_single(pdev, buffer_info->dma, - adapter->rx_ps_hdr_size + - NET_IP_ALIGN, + adapter->rx_ps_hdr_size + NET_IP_ALIGN, PCI_DMA_FROMDEVICE); skb_put(skb, hlen); } @@ -3885,6 +4576,47 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring, goto next_desc; } send_up: + /* + * If this bit is set, then the RX registers contain + * the time stamp. No other packet will be time + * stamped until we read these registers, so read the + * registers to make them available again. Because + * only one packet can be time stamped at a time, we + * know that the register values must belong to this + * one here and therefore we don't need to compare + * any of the additional attributes stored for it. + * + * If nothing went wrong, then it should have a + * skb_shared_tx that we can turn into a + * skb_shared_hwtstamps. + * + * TODO: can time stamping be triggered (thus locking + * the registers) without the packet reaching this point + * here? In that case RX time stamping would get stuck. + * + * TODO: in "time stamp all packets" mode this bit is + * not set. Need a global flag for this mode and then + * always read the registers. Cannot be done without + * a race condition. + */ + if (unlikely(staterr & E1000_RXD_STAT_TS)) { + u64 regval; + u64 ns; + struct skb_shared_hwtstamps *shhwtstamps = + skb_hwtstamps(skb); + + WARN(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID), + "igb: no RX time stamp available for time stamped packet"); + regval = rd32(E1000_RXSTMPL); + regval |= (u64)rd32(E1000_RXSTMPH) << 32; + ns = timecounter_cyc2time(&adapter->clock, regval); + timecompare_update(&adapter->compare, ns); + memset(shhwtstamps, 0, sizeof(*shhwtstamps)); + shhwtstamps->hwtstamp = ns_to_ktime(ns); + shhwtstamps->syststamp = + timecompare_transform(&adapter->compare, ns); + } + if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) { dev_kfree_skb_irq(skb); goto next_desc; @@ -3915,7 +4647,7 @@ next_desc: } rx_ring->next_to_clean = i; - cleaned_count = IGB_DESC_UNUSED(rx_ring); + cleaned_count = igb_desc_unused(rx_ring); if (cleaned_count) igb_alloc_rx_buffers_adv(rx_ring, cleaned_count); @@ -3929,7 +4661,6 @@ next_desc: return cleaned; } - /** * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split * @adapter: address of board private structure @@ -4062,6 +4793,163 @@ static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) } /** + * igb_hwtstamp_ioctl - control hardware time stamping + * @netdev: + * @ifreq: + * @cmd: + * + * Outgoing time stamping can be enabled and disabled. Play nice and + * disable it when requested, although it shouldn't case any overhead + * when no packet needs it. At most one packet in the queue may be + * marked for time stamping, otherwise it would be impossible to tell + * for sure to which packet the hardware time stamp belongs. + * + * Incoming time stamping has to be configured via the hardware + * filters. Not all combinations are supported, in particular event + * type has to be specified. Matching the kind of event packet is + * not supported, with the exception of "all V2 events regardless of + * level 2 or 4". + * + **/ +static int igb_hwtstamp_ioctl(struct net_device *netdev, + struct ifreq *ifr, int cmd) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + struct hwtstamp_config config; + u32 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED; + u32 tsync_rx_ctl_bit = E1000_TSYNCRXCTL_ENABLED; + u32 tsync_rx_ctl_type = 0; + u32 tsync_rx_cfg = 0; + int is_l4 = 0; + int is_l2 = 0; + short port = 319; /* PTP */ + u32 regval; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + /* reserved for future extensions */ + if (config.flags) + return -EINVAL; + + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + tsync_tx_ctl_bit = 0; + break; + case HWTSTAMP_TX_ON: + tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED; + break; + default: + return -ERANGE; + } + + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + tsync_rx_ctl_bit = 0; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_ALL: + /* + * register TSYNCRXCFG must be set, therefore it is not + * possible to time stamp both Sync and Delay_Req messages + * => fall back to time stamping all packets + */ + tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_ALL; + config.rx_filter = HWTSTAMP_FILTER_ALL; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE; + is_l4 = 1; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE; + is_l4 = 1; + break; + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE; + is_l2 = 1; + is_l4 = 1; + config.rx_filter = HWTSTAMP_FILTER_SOME; + break; + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE; + is_l2 = 1; + is_l4 = 1; + config.rx_filter = HWTSTAMP_FILTER_SOME; + break; + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_EVENT_V2; + config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + is_l2 = 1; + break; + default: + return -ERANGE; + } + + /* enable/disable TX */ + regval = rd32(E1000_TSYNCTXCTL); + regval = (regval & ~E1000_TSYNCTXCTL_ENABLED) | tsync_tx_ctl_bit; + wr32(E1000_TSYNCTXCTL, regval); + + /* enable/disable RX, define which PTP packets are time stamped */ + regval = rd32(E1000_TSYNCRXCTL); + regval = (regval & ~E1000_TSYNCRXCTL_ENABLED) | tsync_rx_ctl_bit; + regval = (regval & ~0xE) | tsync_rx_ctl_type; + wr32(E1000_TSYNCRXCTL, regval); + wr32(E1000_TSYNCRXCFG, tsync_rx_cfg); + + /* + * Ethertype Filter Queue Filter[0][15:0] = 0x88F7 + * (Ethertype to filter on) + * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter) + * Ethertype Filter Queue Filter[0][30] = 0x1 (Enable Timestamping) + */ + wr32(E1000_ETQF0, is_l2 ? 0x440088f7 : 0); + + /* L4 Queue Filter[0]: only filter by source and destination port */ + wr32(E1000_SPQF0, htons(port)); + wr32(E1000_IMIREXT(0), is_l4 ? + ((1<<12) | (1<<19) /* bypass size and control flags */) : 0); + wr32(E1000_IMIR(0), is_l4 ? + (htons(port) + | (0<<16) /* immediate interrupt disabled */ + | 0 /* (1<<17) bit cleared: do not bypass + destination port check */) + : 0); + wr32(E1000_FTQF0, is_l4 ? + (0x11 /* UDP */ + | (1<<15) /* VF not compared */ + | (1<<27) /* Enable Timestamping */ + | (7<<28) /* only source port filter enabled, + source/target address and protocol + masked */) + : ((1<<15) | (15<<28) /* all mask bits set = filter not + enabled */)); + + wrfl(); + + adapter->hwtstamp_config = config; + + /* clear TX/RX time stamp registers, just to be sure */ + regval = rd32(E1000_TXSTMPH); + regval = rd32(E1000_RXSTMPH); + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; +} + +/** * igb_ioctl - * @netdev: * @ifreq: @@ -4074,6 +4962,8 @@ static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) case SIOCGMIIREG: case SIOCSMIIREG: return igb_mii_ioctl(netdev, ifr, cmd); + case SIOCSHWTSTAMP: + return igb_hwtstamp_ioctl(netdev, ifr, cmd); default: return -EOPNOTSUPP; } @@ -4100,8 +4990,6 @@ static void igb_vlan_rx_register(struct net_device *netdev, rctl &= ~E1000_RCTL_CFIEN; wr32(E1000_RCTL, rctl); igb_update_mng_vlan(adapter); - wr32(E1000_RLPML, - adapter->max_frame_size + VLAN_TAG_SIZE); } else { /* disable VLAN tag insert/strip */ ctrl = rd32(E1000_CTRL); @@ -4112,10 +5000,10 @@ static void igb_vlan_rx_register(struct net_device *netdev, igb_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id); adapter->mng_vlan_id = IGB_MNG_VLAN_NONE; } - wr32(E1000_RLPML, - adapter->max_frame_size); } + igb_rlpml_set(adapter); + if (!test_bit(__IGB_DOWN, &adapter->state)) igb_irq_enable(adapter); } @@ -4124,24 +5012,25 @@ static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - u32 vfta, index; + int pf_id = adapter->vfs_allocated_count; - if ((adapter->hw.mng_cookie.status & + if ((hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) && (vid == adapter->mng_vlan_id)) return; - /* add VID to filter table */ - index = (vid >> 5) & 0x7F; - vfta = array_rd32(E1000_VFTA, index); - vfta |= (1 << (vid & 0x1F)); - igb_write_vfta(&adapter->hw, index, vfta); + + /* add vid to vlvf if sr-iov is enabled, + * if that fails add directly to filter table */ + if (igb_vlvf_set(adapter, vid, true, pf_id)) + igb_vfta_set(hw, vid, true); + } static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - u32 vfta, index; + int pf_id = adapter->vfs_allocated_count; igb_irq_disable(adapter); vlan_group_set_device(adapter->vlgrp, vid, NULL); @@ -4157,11 +5046,10 @@ static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) return; } - /* remove VID from filter table */ - index = (vid >> 5) & 0x7F; - vfta = array_rd32(E1000_VFTA, index); - vfta &= ~(1 << (vid & 0x1F)); - igb_write_vfta(&adapter->hw, index, vfta); + /* remove vid from vlvf if sr-iov is enabled, + * if not in vlvf remove from vfta */ + if (igb_vlvf_set(adapter, vid, false, pf_id)) + igb_vfta_set(hw, vid, false); } static void igb_restore_vlan(struct igb_adapter *adapter) @@ -4218,8 +5106,7 @@ int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx) return 0; } - -static int igb_suspend(struct pci_dev *pdev, pm_message_t state) +static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake) { struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); @@ -4278,15 +5165,9 @@ static int igb_suspend(struct pci_dev *pdev, pm_message_t state) wr32(E1000_WUFC, 0); } - /* make sure adapter isn't asleep if manageability/wol is enabled */ - if (wufc || adapter->en_mng_pt) { - pci_enable_wake(pdev, PCI_D3hot, 1); - pci_enable_wake(pdev, PCI_D3cold, 1); - } else { + *enable_wake = wufc || adapter->en_mng_pt; + if (!*enable_wake) igb_shutdown_fiber_serdes_link_82575(hw); - pci_enable_wake(pdev, PCI_D3hot, 0); - pci_enable_wake(pdev, PCI_D3cold, 0); - } /* Release control of h/w to f/w. If f/w is AMT enabled, this * would have already happened in close and is redundant. */ @@ -4294,12 +5175,29 @@ static int igb_suspend(struct pci_dev *pdev, pm_message_t state) pci_disable_device(pdev); - pci_set_power_state(pdev, pci_choose_state(pdev, state)); - return 0; } #ifdef CONFIG_PM +static int igb_suspend(struct pci_dev *pdev, pm_message_t state) +{ + int retval; + bool wake; + + retval = __igb_shutdown(pdev, &wake); + if (retval) + return retval; + + if (wake) { + pci_prepare_to_sleep(pdev); + } else { + pci_wake_from_d3(pdev, false); + pci_set_power_state(pdev, PCI_D3hot); + } + + return 0; +} + static int igb_resume(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); @@ -4331,6 +5229,11 @@ static int igb_resume(struct pci_dev *pdev) /* e1000_power_up_phy(adapter); */ igb_reset(adapter); + + /* let the f/w know that the h/w is now under the control of the + * driver. */ + igb_get_hw_control(adapter); + wr32(E1000_WUS, ~0); if (netif_running(netdev)) { @@ -4341,17 +5244,20 @@ static int igb_resume(struct pci_dev *pdev) netif_device_attach(netdev); - /* let the f/w know that the h/w is now under the control of the - * driver. */ - igb_get_hw_control(adapter); - return 0; } #endif static void igb_shutdown(struct pci_dev *pdev) { - igb_suspend(pdev, PMSG_SUSPEND); + bool wake; + + __igb_shutdown(pdev, &wake); + + if (system_state == SYSTEM_POWER_OFF) { + pci_wake_from_d3(pdev, wake); + pci_set_power_state(pdev, PCI_D3hot); + } } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -4363,22 +5269,27 @@ static void igb_shutdown(struct pci_dev *pdev) static void igb_netpoll(struct net_device *netdev) { struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; int i; - int work_done = 0; - - igb_irq_disable(adapter); - adapter->flags |= IGB_FLAG_IN_NETPOLL; - for (i = 0; i < adapter->num_tx_queues; i++) - igb_clean_tx_irq(&adapter->tx_ring[i]); + if (!adapter->msix_entries) { + igb_irq_disable(adapter); + napi_schedule(&adapter->rx_ring[0].napi); + return; + } - for (i = 0; i < adapter->num_rx_queues; i++) - igb_clean_rx_irq_adv(&adapter->rx_ring[i], - &work_done, - adapter->rx_ring[i].napi.weight); + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igb_ring *tx_ring = &adapter->tx_ring[i]; + wr32(E1000_EIMC, tx_ring->eims_value); + igb_clean_tx_irq(tx_ring); + wr32(E1000_EIMS, tx_ring->eims_value); + } - adapter->flags &= ~IGB_FLAG_IN_NETPOLL; - igb_irq_enable(adapter); + for (i = 0; i < adapter->num_rx_queues; i++) { + struct igb_ring *rx_ring = &adapter->rx_ring[i]; + wr32(E1000_EIMC, rx_ring->eims_value); + napi_schedule(&rx_ring->napi); + } } #endif /* CONFIG_NET_POLL_CONTROLLER */ @@ -4474,4 +5385,87 @@ static void igb_io_resume(struct pci_dev *pdev) igb_get_hw_control(adapter); } +static inline void igb_set_vmolr(struct e1000_hw *hw, int vfn) +{ + u32 reg_data; + + reg_data = rd32(E1000_VMOLR(vfn)); + reg_data |= E1000_VMOLR_BAM | /* Accept broadcast */ + E1000_VMOLR_ROPE | /* Accept packets matched in UTA */ + E1000_VMOLR_ROMPE | /* Accept packets matched in MTA */ + E1000_VMOLR_AUPE | /* Accept untagged packets */ + E1000_VMOLR_STRVLAN; /* Strip vlan tags */ + wr32(E1000_VMOLR(vfn), reg_data); +} + +static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, + int vfn) +{ + struct e1000_hw *hw = &adapter->hw; + u32 vmolr; + + vmolr = rd32(E1000_VMOLR(vfn)); + vmolr &= ~E1000_VMOLR_RLPML_MASK; + vmolr |= size | E1000_VMOLR_LPE; + wr32(E1000_VMOLR(vfn), vmolr); + + return 0; +} + +static inline void igb_set_rah_pool(struct e1000_hw *hw, int pool, int entry) +{ + u32 reg_data; + + reg_data = rd32(E1000_RAH(entry)); + reg_data &= ~E1000_RAH_POOL_MASK; + reg_data |= E1000_RAH_POOL_1 << pool;; + wr32(E1000_RAH(entry), reg_data); +} + +static void igb_set_mc_list_pools(struct igb_adapter *adapter, + int entry_count, u16 total_rar_filters) +{ + struct e1000_hw *hw = &adapter->hw; + int i = adapter->vfs_allocated_count + 1; + + if ((i + entry_count) < total_rar_filters) + total_rar_filters = i + entry_count; + + for (; i < total_rar_filters; i++) + igb_set_rah_pool(hw, adapter->vfs_allocated_count, i); +} + +static int igb_set_vf_mac(struct igb_adapter *adapter, + int vf, unsigned char *mac_addr) +{ + struct e1000_hw *hw = &adapter->hw; + int rar_entry = vf + 1; /* VF MAC addresses start at entry 1 */ + + igb_rar_set(hw, mac_addr, rar_entry); + + memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN); + + igb_set_rah_pool(hw, vf, rar_entry); + + return 0; +} + +static void igb_vmm_control(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 reg_data; + + if (!adapter->vfs_allocated_count) + return; + + /* VF's need PF reset indication before they + * can send/receive mail */ + reg_data = rd32(E1000_CTRL_EXT); + reg_data |= E1000_CTRL_EXT_PFRSTD; + wr32(E1000_CTRL_EXT, reg_data); + + igb_vmdq_set_loopback_pf(hw, true); + igb_vmdq_set_replication_pf(hw, true); +} + /* igb_main.c */