e1000: fix flow control thresholds
[safe/jmp/linux-2.6] / drivers / net / e1000 / e1000_main.c
index 6a46cee..d7df00c 100644 (file)
@@ -137,9 +137,15 @@ static int e1000_clean(struct napi_struct *napi, int budget);
 static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
                               struct e1000_rx_ring *rx_ring,
                               int *work_done, int work_to_do);
+static bool e1000_clean_jumbo_rx_irq(struct e1000_adapter *adapter,
+                                    struct e1000_rx_ring *rx_ring,
+                                    int *work_done, int work_to_do);
 static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
-                                   struct e1000_rx_ring *rx_ring,
+                                  struct e1000_rx_ring *rx_ring,
                                   int cleaned_count);
+static void e1000_alloc_jumbo_rx_buffers(struct e1000_adapter *adapter,
+                                        struct e1000_rx_ring *rx_ring,
+                                        int cleaned_count);
 static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd);
 static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
                           int cmd);
@@ -498,6 +504,8 @@ int e1000_up(struct e1000_adapter *adapter)
 
        e1000_irq_enable(adapter);
 
+       netif_wake_queue(adapter->netdev);
+
        /* fire a link change interrupt to start the watchdog */
        ew32(ICS, E1000_ICS_LSC);
        return 0;
@@ -633,8 +641,8 @@ void e1000_reset(struct e1000_adapter *adapter)
 {
        struct e1000_hw *hw = &adapter->hw;
        u32 pba = 0, tx_space, min_tx_space, min_rx_space;
-       u16 fc_high_water_mark = E1000_FC_HIGH_DIFF;
        bool legacy_pba_adjust = false;
+       u16 hwm;
 
        /* Repartition Pba for greater than 9k mtu
         * To take effect CTRL.RST is required.
@@ -678,7 +686,7 @@ void e1000_reset(struct e1000_adapter *adapter)
        }
 
        if (legacy_pba_adjust) {
-               if (adapter->netdev->mtu > E1000_RXBUFFER_8192)
+               if (hw->max_frame_size > E1000_RXBUFFER_8192)
                        pba -= 8; /* allocate more FIFO for Tx */
 
                if (hw->mac_type == e1000_82547) {
@@ -688,14 +696,14 @@ void e1000_reset(struct e1000_adapter *adapter)
                                (E1000_PBA_40K - pba) << E1000_PBA_BYTES_SHIFT;
                        atomic_set(&adapter->tx_fifo_stall, 0);
                }
-       } else if (hw->max_frame_size > MAXIMUM_ETHERNET_FRAME_SIZE) {
+       } else if (hw->max_frame_size >  ETH_FRAME_LEN + ETH_FCS_LEN) {
                /* adjust PBA for jumbo frames */
                ew32(PBA, pba);
 
                /* To maintain wire speed transmits, the Tx FIFO should be
-                * large enough to accomodate two full transmit packets,
+                * large enough to accommodate two full transmit packets,
                 * rounded up to the next 1KB and expressed in KB.  Likewise,
-                * the Rx FIFO should be large enough to accomodate at least
+                * the Rx FIFO should be large enough to accommodate at least
                 * one full receive packet and is similarly rounded up and
                 * expressed in KB. */
                pba = er32(PBA);
@@ -703,13 +711,17 @@ void e1000_reset(struct e1000_adapter *adapter)
                tx_space = pba >> 16;
                /* lower 16 bits has Rx packet buffer allocation size in KB */
                pba &= 0xffff;
-               /* don't include ethernet FCS because hardware appends/strips */
-               min_rx_space = adapter->netdev->mtu + ENET_HEADER_SIZE +
-                              VLAN_TAG_SIZE;
-               min_tx_space = min_rx_space;
-               min_tx_space *= 2;
+               /*
+                * the tx fifo also stores 16 bytes of information about the tx
+                * but don't include ethernet FCS because hardware appends it
+                */
+               min_tx_space = (hw->max_frame_size +
+                               sizeof(struct e1000_tx_desc) -
+                               ETH_FCS_LEN) * 2;
                min_tx_space = ALIGN(min_tx_space, 1024);
                min_tx_space >>= 10;
+               /* software strips receive CRC, so leave room for it */
+               min_rx_space = hw->max_frame_size;
                min_rx_space = ALIGN(min_rx_space, 1024);
                min_rx_space >>= 10;
 
@@ -746,23 +758,22 @@ void e1000_reset(struct e1000_adapter *adapter)
 
        ew32(PBA, pba);
 
-       /* flow control settings */
-       /* Set the FC high water mark to 90% of the FIFO size.
-        * Required to clear last 3 LSB */
-       fc_high_water_mark = ((pba * 9216)/10) & 0xFFF8;
-       /* We can't use 90% on small FIFOs because the remainder
-        * would be less than 1 full frame.  In this case, we size
-        * it to allow at least a full frame above the high water
-        *  mark. */
-       if (pba < E1000_PBA_16K)
-               fc_high_water_mark = (pba * 1024) - 1600;
-
-       hw->fc_high_water = fc_high_water_mark;
-       hw->fc_low_water = fc_high_water_mark - 8;
-       if (hw->mac_type == e1000_80003es2lan)
-               hw->fc_pause_time = 0xFFFF;
-       else
-               hw->fc_pause_time = E1000_FC_PAUSE_TIME;
+       /*
+        * flow control settings:
+        * The high water mark must be low enough to fit one full frame
+        * (or the size used for early receive) above it in the Rx FIFO.
+        * Set it to the lower of:
+        * - 90% of the Rx FIFO size, and
+        * - the full Rx FIFO size minus the early receive size (for parts
+        *   with ERT support assuming ERT set to E1000_ERT_2048), or
+        * - the full Rx FIFO size minus one full frame
+        */
+       hwm = min(((pba << 10) * 9 / 10),
+                 ((pba << 10) - hw->max_frame_size));
+
+       hw->fc_high_water = hwm & 0xFFF8;       /* 8-byte granularity */
+       hw->fc_low_water = hw->fc_high_water - 8;
+       hw->fc_pause_time = E1000_FC_PAUSE_TIME;
        hw->fc_send_xon = 1;
        hw->fc = hw->original_fc;
 
@@ -1234,15 +1245,14 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
            !e1000_check_mng_mode(hw))
                e1000_get_hw_control(adapter);
 
-       /* tell the stack to leave us alone until e1000_open() is called */
-       netif_carrier_off(netdev);
-       netif_stop_queue(netdev);
-
        strcpy(netdev->name, "eth%d");
        err = register_netdev(netdev);
        if (err)
                goto err_register;
 
+       /* carrier off reporting is important to ethtool even BEFORE open */
+       netif_carrier_off(netdev);
+
        DPRINTK(PROBE, INFO, "Intel(R) PRO/1000 Network Connection\n");
 
        cards_found++;
@@ -1441,6 +1451,8 @@ static int e1000_open(struct net_device *netdev)
        if (test_bit(__E1000_TESTING, &adapter->flags))
                return -EBUSY;
 
+       netif_carrier_off(netdev);
+
        /* allocate transmit descriptors */
        err = e1000_setup_all_tx_resources(adapter);
        if (err)
@@ -1859,6 +1871,7 @@ setup_rx_desc_die:
 
        rxdr->next_to_clean = 0;
        rxdr->next_to_use = 0;
+       rxdr->rx_skb_top = NULL;
 
        return 0;
 }
@@ -1965,10 +1978,17 @@ static void e1000_configure_rx(struct e1000_adapter *adapter)
        struct e1000_hw *hw = &adapter->hw;
        u32 rdlen, rctl, rxcsum, ctrl_ext;
 
-       rdlen = adapter->rx_ring[0].count *
-               sizeof(struct e1000_rx_desc);
-       adapter->clean_rx = e1000_clean_rx_irq;
-       adapter->alloc_rx_buf = e1000_alloc_rx_buffers;
+       if (adapter->netdev->mtu > ETH_DATA_LEN) {
+               rdlen = adapter->rx_ring[0].count *
+                       sizeof(struct e1000_rx_desc);
+               adapter->clean_rx = e1000_clean_jumbo_rx_irq;
+               adapter->alloc_rx_buf = e1000_alloc_jumbo_rx_buffers;
+       } else {
+               rdlen = adapter->rx_ring[0].count *
+                       sizeof(struct e1000_rx_desc);
+               adapter->clean_rx = e1000_clean_rx_irq;
+               adapter->alloc_rx_buf = e1000_alloc_rx_buffers;
+       }
 
        /* disable receives while setting up the descriptors */
        rctl = er32(RCTL);
@@ -2182,22 +2202,39 @@ static void e1000_clean_rx_ring(struct e1000_adapter *adapter,
        /* Free all the Rx ring sk_buffs */
        for (i = 0; i < rx_ring->count; i++) {
                buffer_info = &rx_ring->buffer_info[i];
-               if (buffer_info->skb) {
-                       pci_unmap_single(pdev,
-                                        buffer_info->dma,
-                                        buffer_info->length,
-                                        PCI_DMA_FROMDEVICE);
+               if (buffer_info->dma &&
+                   adapter->clean_rx == e1000_clean_rx_irq) {
+                       pci_unmap_single(pdev, buffer_info->dma,
+                                        buffer_info->length,
+                                        PCI_DMA_FROMDEVICE);
+               } else if (buffer_info->dma &&
+                          adapter->clean_rx == e1000_clean_jumbo_rx_irq) {
+                       pci_unmap_page(pdev, buffer_info->dma,
+                                      buffer_info->length,
+                                      PCI_DMA_FROMDEVICE);
+               }
 
+               buffer_info->dma = 0;
+               if (buffer_info->page) {
+                       put_page(buffer_info->page);
+                       buffer_info->page = NULL;
+               }
+               if (buffer_info->skb) {
                        dev_kfree_skb(buffer_info->skb);
                        buffer_info->skb = NULL;
                }
        }
 
+       /* there also may be some cached data from a chained receive */
+       if (rx_ring->rx_skb_top) {
+               dev_kfree_skb(rx_ring->rx_skb_top);
+               rx_ring->rx_skb_top = NULL;
+       }
+
        size = sizeof(struct e1000_buffer) * rx_ring->count;
        memset(rx_ring->buffer_info, 0, size);
 
        /* Zero out the descriptor ring */
-
        memset(rx_ring->desc, 0, rx_ring->size);
 
        rx_ring->next_to_clean = 0;
@@ -2327,7 +2364,8 @@ static void e1000_set_rx_mode(struct net_device *netdev)
 {
        struct e1000_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
-       struct dev_addr_list *uc_ptr;
+       struct netdev_hw_addr *ha;
+       bool use_uc = false;
        struct dev_addr_list *mc_ptr;
        u32 rctl;
        u32 hash_value;
@@ -2366,12 +2404,11 @@ static void e1000_set_rx_mode(struct net_device *netdev)
                        rctl |= E1000_RCTL_VFE;
        }
 
-       uc_ptr = NULL;
-       if (netdev->uc_count > rar_entries - 1) {
+       if (netdev->uc.count > rar_entries - 1) {
                rctl |= E1000_RCTL_UPE;
        } else if (!(netdev->flags & IFF_PROMISC)) {
                rctl &= ~E1000_RCTL_UPE;
-               uc_ptr = netdev->uc_list;
+               use_uc = true;
        }
 
        ew32(RCTL, rctl);
@@ -2389,13 +2426,20 @@ static void e1000_set_rx_mode(struct net_device *netdev)
         * if there are not 14 addresses, go ahead and clear the filters
         * -- with 82571 controllers only 0-13 entries are filled here
         */
+       i = 1;
+       if (use_uc)
+               list_for_each_entry(ha, &netdev->uc.list, list) {
+                       if (i == rar_entries)
+                               break;
+                       e1000_rar_set(hw, ha->addr, i++);
+               }
+
+       WARN_ON(i == rar_entries);
+
        mc_ptr = netdev->mc_list;
 
-       for (i = 1; i < rar_entries; i++) {
-               if (uc_ptr) {
-                       e1000_rar_set(hw, uc_ptr->da_addr, i);
-                       uc_ptr = uc_ptr->next;
-               } else if (mc_ptr) {
+       for (; i < rar_entries; i++) {
+               if (mc_ptr) {
                        e1000_rar_set(hw, mc_ptr->da_addr, i);
                        mc_ptr = mc_ptr->next;
                } else {
@@ -2405,7 +2449,6 @@ static void e1000_set_rx_mode(struct net_device *netdev)
                        E1000_WRITE_FLUSH();
                }
        }
-       WARN_ON(uc_ptr != NULL);
 
        /* load any remaining addresses into the hash table */
 
@@ -2590,7 +2633,6 @@ static void e1000_watchdog(unsigned long data)
                        ew32(TCTL, tctl);
 
                        netif_carrier_on(netdev);
-                       netif_wake_queue(netdev);
                        mod_timer(&adapter->phy_info_timer, round_jiffies(jiffies + 2 * HZ));
                        adapter->smartspeed = 0;
                } else {
@@ -2607,7 +2649,6 @@ static void e1000_watchdog(unsigned long data)
                        printk(KERN_INFO "e1000: %s NIC Link is Down\n",
                               netdev->name);
                        netif_carrier_off(netdev);
-                       netif_stop_queue(netdev);
                        mod_timer(&adapter->phy_info_timer, round_jiffies(jiffies + 2 * HZ));
 
                        /* 80003ES2LAN workaround--
@@ -2645,6 +2686,8 @@ static void e1000_watchdog(unsigned long data)
                         * (Do the reset outside of interrupt context). */
                        adapter->tx_timeout_count++;
                        schedule_work(&adapter->reset_task);
+                       /* return immediately since reset is imminent */
+                       return;
                }
        }
 
@@ -2989,7 +3032,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
                        size -= 4;
 
                buffer_info->length = size;
-               buffer_info->dma = map[0] + offset;
+               buffer_info->dma = skb_shinfo(skb)->dma_head + offset;
                buffer_info->time_stamp = jiffies;
                buffer_info->next_to_watch = i;
 
@@ -3030,7 +3073,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
                                size -= 4;
 
                        buffer_info->length = size;
-                       buffer_info->dma = map[f + 1] + offset;
+                       buffer_info->dma = map[f] + offset;
                        buffer_info->time_stamp = jiffies;
                        buffer_info->next_to_watch = i;
 
@@ -3362,7 +3405,6 @@ static int e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 
        if (count) {
                e1000_tx_queue(adapter, tx_ring, tx_flags, count);
-               netdev->trans_start = jiffies;
                /* Make sure there is space in the ring for the next send. */
                e1000_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 2);
 
@@ -3438,7 +3480,7 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu)
        switch (hw->mac_type) {
        case e1000_undefined ... e1000_82542_rev2_1:
        case e1000_ich8lan:
-               if (max_frame > MAXIMUM_ETHERNET_FRAME_SIZE) {
+               if (max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN)) {
                        DPRINTK(PROBE, ERR, "Jumbo Frames not supported.\n");
                        return -EINVAL;
                }
@@ -3451,7 +3493,7 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu)
                                  &eeprom_data);
                if ((hw->device_id != E1000_DEV_ID_82573L) ||
                    (eeprom_data & EEPROM_WORD1A_ASPM_MASK)) {
-                       if (max_frame > MAXIMUM_ETHERNET_FRAME_SIZE) {
+                       if (max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN)) {
                                DPRINTK(PROBE, ERR,
                                        "Jumbo Frames not supported.\n");
                                return -EINVAL;
@@ -3477,8 +3519,10 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu)
 
        /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
         * means we reserve 2 more, this pushes us to allocate from the next
-        * larger slab size
-        * i.e. RXBUFFER_2048 --> size-4096 slab */
+        * larger slab size.
+        * i.e. RXBUFFER_2048 --> size-4096 slab
+        *  however with the new *_jumbo_rx* routines, jumbo receives will use
+        *  fragmented skbs */
 
        if (max_frame <= E1000_RXBUFFER_256)
                adapter->rx_buffer_len = E1000_RXBUFFER_256;
@@ -3488,16 +3532,16 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu)
                adapter->rx_buffer_len = E1000_RXBUFFER_1024;
        else if (max_frame <= E1000_RXBUFFER_2048)
                adapter->rx_buffer_len = E1000_RXBUFFER_2048;
-       else if (max_frame <= E1000_RXBUFFER_4096)
-               adapter->rx_buffer_len = E1000_RXBUFFER_4096;
-       else if (max_frame <= E1000_RXBUFFER_8192)
-               adapter->rx_buffer_len = E1000_RXBUFFER_8192;
-       else if (max_frame <= E1000_RXBUFFER_16384)
+       else
+#if (PAGE_SIZE >= E1000_RXBUFFER_16384)
                adapter->rx_buffer_len = E1000_RXBUFFER_16384;
+#elif (PAGE_SIZE >= E1000_RXBUFFER_4096)
+               adapter->rx_buffer_len = PAGE_SIZE;
+#endif
 
        /* adjust allocation if LPE protects us, and we aren't using SBP */
        if (!hw->tbi_compatibility_on &&
-           ((max_frame == MAXIMUM_ETHERNET_FRAME_SIZE) ||
+           ((max_frame == (ETH_FRAME_LEN + ETH_FCS_LEN)) ||
             (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE)))
                adapter->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
 
@@ -3738,7 +3782,7 @@ static irqreturn_t e1000_intr(int irq, void *data)
        struct e1000_hw *hw = &adapter->hw;
        u32 rctl, icr = er32(ICR);
 
-       if (unlikely((!icr) || test_bit(__E1000_RESETTING, &adapter->flags)))
+       if (unlikely((!icr) || test_bit(__E1000_DOWN, &adapter->flags)))
                return IRQ_NONE;  /* Not our interrupt */
 
        /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
@@ -3975,9 +4019,227 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err,
 }
 
 /**
+ * e1000_consume_page - helper function
+ **/
+static void e1000_consume_page(struct e1000_buffer *bi, struct sk_buff *skb,
+                               u16 length)
+{
+       bi->page = NULL;
+       skb->len += length;
+       skb->data_len += length;
+       skb->truesize += length;
+}
+
+/**
+ * e1000_receive_skb - helper function to handle rx indications
+ * @adapter: board private structure
+ * @status: descriptor status field as written by hardware
+ * @vlan: descriptor vlan field as written by hardware (no le/be conversion)
+ * @skb: pointer to sk_buff to be indicated to stack
+ */
+static void e1000_receive_skb(struct e1000_adapter *adapter, u8 status,
+                             __le16 vlan, struct sk_buff *skb)
+{
+       if (unlikely(adapter->vlgrp && (status & E1000_RXD_STAT_VP))) {
+               vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
+                                        le16_to_cpu(vlan) &
+                                        E1000_RXD_SPC_VLAN_MASK);
+       } else {
+               netif_receive_skb(skb);
+       }
+}
+
+/**
+ * e1000_clean_jumbo_rx_irq - Send received data up the network stack; legacy
+ * @adapter: board private structure
+ * @rx_ring: ring to clean
+ * @work_done: amount of napi work completed this call
+ * @work_to_do: max amount of work allowed for this call to do
+ *
+ * the return value indicates whether actual cleaning was done, there
+ * is no guarantee that everything was cleaned
+ */
+static bool e1000_clean_jumbo_rx_irq(struct e1000_adapter *adapter,
+                                    struct e1000_rx_ring *rx_ring,
+                                    int *work_done, int work_to_do)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       struct net_device *netdev = adapter->netdev;
+       struct pci_dev *pdev = adapter->pdev;
+       struct e1000_rx_desc *rx_desc, *next_rxd;
+       struct e1000_buffer *buffer_info, *next_buffer;
+       unsigned long irq_flags;
+       u32 length;
+       unsigned int i;
+       int cleaned_count = 0;
+       bool cleaned = false;
+       unsigned int total_rx_bytes=0, total_rx_packets=0;
+
+       i = rx_ring->next_to_clean;
+       rx_desc = E1000_RX_DESC(*rx_ring, i);
+       buffer_info = &rx_ring->buffer_info[i];
+
+       while (rx_desc->status & E1000_RXD_STAT_DD) {
+               struct sk_buff *skb;
+               u8 status;
+
+               if (*work_done >= work_to_do)
+                       break;
+               (*work_done)++;
+
+               status = rx_desc->status;
+               skb = buffer_info->skb;
+               buffer_info->skb = NULL;
+
+               if (++i == rx_ring->count) i = 0;
+               next_rxd = E1000_RX_DESC(*rx_ring, i);
+               prefetch(next_rxd);
+
+               next_buffer = &rx_ring->buffer_info[i];
+
+               cleaned = true;
+               cleaned_count++;
+               pci_unmap_page(pdev, buffer_info->dma, buffer_info->length,
+                              PCI_DMA_FROMDEVICE);
+               buffer_info->dma = 0;
+
+               length = le16_to_cpu(rx_desc->length);
+
+               /* errors is only valid for DD + EOP descriptors */
+               if (unlikely((status & E1000_RXD_STAT_EOP) &&
+                   (rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK))) {
+                       u8 last_byte = *(skb->data + length - 1);
+                       if (TBI_ACCEPT(hw, status, rx_desc->errors, length,
+                                      last_byte)) {
+                               spin_lock_irqsave(&adapter->stats_lock,
+                                                 irq_flags);
+                               e1000_tbi_adjust_stats(hw, &adapter->stats,
+                                                      length, skb->data);
+                               spin_unlock_irqrestore(&adapter->stats_lock,
+                                                      irq_flags);
+                               length--;
+                       } else {
+                               /* recycle both page and skb */
+                               buffer_info->skb = skb;
+                               /* an error means any chain goes out the window
+                                * too */
+                               if (rx_ring->rx_skb_top)
+                                       dev_kfree_skb(rx_ring->rx_skb_top);
+                               rx_ring->rx_skb_top = NULL;
+                               goto next_desc;
+                       }
+               }
+
+#define rxtop rx_ring->rx_skb_top
+               if (!(status & E1000_RXD_STAT_EOP)) {
+                       /* this descriptor is only the beginning (or middle) */
+                       if (!rxtop) {
+                               /* this is the beginning of a chain */
+                               rxtop = skb;
+                               skb_fill_page_desc(rxtop, 0, buffer_info->page,
+                                                  0, length);
+                       } else {
+                               /* this is the middle of a chain */
+                               skb_fill_page_desc(rxtop,
+                                   skb_shinfo(rxtop)->nr_frags,
+                                   buffer_info->page, 0, length);
+                               /* re-use the skb, only consumed the page */
+                               buffer_info->skb = skb;
+                       }
+                       e1000_consume_page(buffer_info, rxtop, length);
+                       goto next_desc;
+               } else {
+                       if (rxtop) {
+                               /* end of the chain */
+                               skb_fill_page_desc(rxtop,
+                                   skb_shinfo(rxtop)->nr_frags,
+                                   buffer_info->page, 0, length);
+                               /* re-use the current skb, we only consumed the
+                                * page */
+                               buffer_info->skb = skb;
+                               skb = rxtop;
+                               rxtop = NULL;
+                               e1000_consume_page(buffer_info, skb, length);
+                       } else {
+                               /* no chain, got EOP, this buf is the packet
+                                * copybreak to save the put_page/alloc_page */
+                               if (length <= copybreak &&
+                                   skb_tailroom(skb) >= length) {
+                                       u8 *vaddr;
+                                       vaddr = kmap_atomic(buffer_info->page,
+                                                           KM_SKB_DATA_SOFTIRQ);
+                                       memcpy(skb_tail_pointer(skb), vaddr, length);
+                                       kunmap_atomic(vaddr,
+                                                     KM_SKB_DATA_SOFTIRQ);
+                                       /* re-use the page, so don't erase
+                                        * buffer_info->page */
+                                       skb_put(skb, length);
+                               } else {
+                                       skb_fill_page_desc(skb, 0,
+                                                          buffer_info->page, 0,
+                                                          length);
+                                       e1000_consume_page(buffer_info, skb,
+                                                          length);
+                               }
+                       }
+               }
+
+               /* Receive Checksum Offload XXX recompute due to CRC strip? */
+               e1000_rx_checksum(adapter,
+                                 (u32)(status) |
+                                 ((u32)(rx_desc->errors) << 24),
+                                 le16_to_cpu(rx_desc->csum), skb);
+
+               pskb_trim(skb, skb->len - 4);
+
+               /* probably a little skewed due to removing CRC */
+               total_rx_bytes += skb->len;
+               total_rx_packets++;
+
+               /* eth type trans needs skb->data to point to something */
+               if (!pskb_may_pull(skb, ETH_HLEN)) {
+                       DPRINTK(DRV, ERR, "pskb_may_pull failed.\n");
+                       dev_kfree_skb(skb);
+                       goto next_desc;
+               }
+
+               skb->protocol = eth_type_trans(skb, netdev);
+
+               e1000_receive_skb(adapter, status, rx_desc->special, skb);
+
+next_desc:
+               rx_desc->status = 0;
+
+               /* return some buffers to hardware, one at a time is too slow */
+               if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) {
+                       adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
+                       cleaned_count = 0;
+               }
+
+               /* use prefetched values */
+               rx_desc = next_rxd;
+               buffer_info = next_buffer;
+       }
+       rx_ring->next_to_clean = i;
+
+       cleaned_count = E1000_DESC_UNUSED(rx_ring);
+       if (cleaned_count)
+               adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
+
+       adapter->total_rx_packets += total_rx_packets;
+       adapter->total_rx_bytes += total_rx_bytes;
+       adapter->net_stats.rx_bytes += total_rx_bytes;
+       adapter->net_stats.rx_packets += total_rx_packets;
+       return cleaned;
+}
+
+/**
  * e1000_clean_rx_irq - Send received data up the network stack; legacy
  * @adapter: board private structure
- **/
+ * @rx_ring: ring to clean
+ * @work_done: amount of napi work completed this call
+ * @work_to_do: max amount of work allowed for this call to do
+ */
 static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
                               struct e1000_rx_ring *rx_ring,
                               int *work_done, int work_to_do)
@@ -3989,7 +4251,6 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
        struct e1000_buffer *buffer_info, *next_buffer;
        unsigned long flags;
        u32 length;
-       u8 last_byte;
        unsigned int i;
        int cleaned_count = 0;
        bool cleaned = false;
@@ -4021,14 +4282,14 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
 
                cleaned = true;
                cleaned_count++;
-               pci_unmap_single(pdev,
-                                buffer_info->dma,
-                                buffer_info->length,
+               pci_unmap_single(pdev, buffer_info->dma, buffer_info->length,
                                 PCI_DMA_FROMDEVICE);
+               buffer_info->dma = 0;
 
                length = le16_to_cpu(rx_desc->length);
-
-               if (unlikely(!(status & E1000_RXD_STAT_EOP))) {
+               /* !EOP means multiple descriptors were used to store a single
+                * packet, also make sure the frame isn't just CRC only */
+               if (unlikely(!(status & E1000_RXD_STAT_EOP) || (length <= 4))) {
                        /* All receives must fit into a single buffer */
                        E1000_DBG("%s: Receive packet consumed multiple"
                                  " buffers\n", netdev->name);
@@ -4038,7 +4299,7 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
                }
 
                if (unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) {
-                       last_byte = *(skb->data + length - 1);
+                       u8 last_byte = *(skb->data + length - 1);
                        if (TBI_ACCEPT(hw, status, rx_desc->errors, length,
                                       last_byte)) {
                                spin_lock_irqsave(&adapter->stats_lock, flags);
@@ -4093,13 +4354,7 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
 
                skb->protocol = eth_type_trans(skb, netdev);
 
-               if (unlikely(adapter->vlgrp &&
-                           (status & E1000_RXD_STAT_VP))) {
-                       vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
-                                                le16_to_cpu(rx_desc->special));
-               } else {
-                       netif_receive_skb(skb);
-               }
+               e1000_receive_skb(adapter, status, rx_desc->special, skb);
 
 next_desc:
                rx_desc->status = 0;
@@ -4128,6 +4383,114 @@ next_desc:
 }
 
 /**
+ * e1000_alloc_jumbo_rx_buffers - Replace used jumbo receive buffers
+ * @adapter: address of board private structure
+ * @rx_ring: pointer to receive ring structure
+ * @cleaned_count: number of buffers to allocate this pass
+ **/
+
+static void
+e1000_alloc_jumbo_rx_buffers(struct e1000_adapter *adapter,
+                             struct e1000_rx_ring *rx_ring, int cleaned_count)
+{
+       struct net_device *netdev = adapter->netdev;
+       struct pci_dev *pdev = adapter->pdev;
+       struct e1000_rx_desc *rx_desc;
+       struct e1000_buffer *buffer_info;
+       struct sk_buff *skb;
+       unsigned int i;
+       unsigned int bufsz = 256 -
+                            16 /*for skb_reserve */ -
+                            NET_IP_ALIGN;
+
+       i = rx_ring->next_to_use;
+       buffer_info = &rx_ring->buffer_info[i];
+
+       while (cleaned_count--) {
+               skb = buffer_info->skb;
+               if (skb) {
+                       skb_trim(skb, 0);
+                       goto check_page;
+               }
+
+               skb = netdev_alloc_skb(netdev, bufsz);
+               if (unlikely(!skb)) {
+                       /* Better luck next round */
+                       adapter->alloc_rx_buff_failed++;
+                       break;
+               }
+
+               /* Fix for errata 23, can't cross 64kB boundary */
+               if (!e1000_check_64k_bound(adapter, skb->data, bufsz)) {
+                       struct sk_buff *oldskb = skb;
+                       DPRINTK(PROBE, ERR, "skb align check failed: %u bytes "
+                                            "at %p\n", bufsz, skb->data);
+                       /* Try again, without freeing the previous */
+                       skb = netdev_alloc_skb(netdev, bufsz);
+                       /* Failed allocation, critical failure */
+                       if (!skb) {
+                               dev_kfree_skb(oldskb);
+                               adapter->alloc_rx_buff_failed++;
+                               break;
+                       }
+
+                       if (!e1000_check_64k_bound(adapter, skb->data, bufsz)) {
+                               /* give up */
+                               dev_kfree_skb(skb);
+                               dev_kfree_skb(oldskb);
+                               break; /* while (cleaned_count--) */
+                       }
+
+                       /* Use new allocation */
+                       dev_kfree_skb(oldskb);
+               }
+               /* Make buffer alignment 2 beyond a 16 byte boundary
+                * this will result in a 16 byte aligned IP header after
+                * the 14 byte MAC header is removed
+                */
+               skb_reserve(skb, NET_IP_ALIGN);
+
+               buffer_info->skb = skb;
+               buffer_info->length = adapter->rx_buffer_len;
+check_page:
+               /* allocate a new page if necessary */
+               if (!buffer_info->page) {
+                       buffer_info->page = alloc_page(GFP_ATOMIC);
+                       if (unlikely(!buffer_info->page)) {
+                               adapter->alloc_rx_buff_failed++;
+                               break;
+                       }
+               }
+
+               if (!buffer_info->dma)
+                       buffer_info->dma = pci_map_page(pdev,
+                                                       buffer_info->page, 0,
+                                                       buffer_info->length,
+                                                       PCI_DMA_FROMDEVICE);
+
+               rx_desc = E1000_RX_DESC(*rx_ring, i);
+               rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
+
+               if (unlikely(++i == rx_ring->count))
+                       i = 0;
+               buffer_info = &rx_ring->buffer_info[i];
+       }
+
+       if (likely(rx_ring->next_to_use != i)) {
+               rx_ring->next_to_use = i;
+               if (unlikely(i-- == 0))
+                       i = (rx_ring->count - 1);
+
+               /* Force memory writes to complete before letting h/w
+                * know there are new descriptors to fetch.  (Only
+                * applicable for weak-ordered memory model archs,
+                * such as IA-64). */
+               wmb();
+               writel(i, adapter->hw.hw_addr + rx_ring->rdt);
+       }
+}
+
+/**
  * e1000_alloc_rx_buffers - Replace used receive buffers; legacy & extended
  * @adapter: address of board private structure
  **/
@@ -4172,6 +4535,7 @@ static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
                        /* Failed allocation, critical failure */
                        if (!skb) {
                                dev_kfree_skb(oldskb);
+                               adapter->alloc_rx_buff_failed++;
                                break;
                        }
 
@@ -4179,6 +4543,7 @@ static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
                                /* give up */
                                dev_kfree_skb(skb);
                                dev_kfree_skb(oldskb);
+                               adapter->alloc_rx_buff_failed++;
                                break; /* while !buffer_info->skb */
                        }
 
@@ -4196,9 +4561,14 @@ static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
 map_skb:
                buffer_info->dma = pci_map_single(pdev,
                                                  skb->data,
-                                                 adapter->rx_buffer_len,
+                                                 buffer_info->length,
                                                  PCI_DMA_FROMDEVICE);
 
+               /*
+                * XXX if it was allocated cleanly it will never map to a
+                * boundary crossing
+                */
+
                /* Fix for errata 23, can't cross 64kB boundary */
                if (!e1000_check_64k_bound(adapter,
                                        (void *)(unsigned long)buffer_info->dma,
@@ -4213,7 +4583,9 @@ map_skb:
                        pci_unmap_single(pdev, buffer_info->dma,
                                         adapter->rx_buffer_len,
                                         PCI_DMA_FROMDEVICE);
+                       buffer_info->dma = 0;
 
+                       adapter->alloc_rx_buff_failed++;
                        break; /* while !buffer_info->skb */
                }
                rx_desc = E1000_RX_DESC(*rx_ring, i);
@@ -4808,6 +5180,9 @@ static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev,
 
        netif_device_detach(netdev);
 
+       if (state == pci_channel_io_perm_failure)
+               return PCI_ERS_RESULT_DISCONNECT;
+
        if (netif_running(netdev))
                e1000_down(adapter);
        pci_disable_device(pdev);