cnic: Convert cnic_local_flags to atomic ops.
[safe/jmp/linux-2.6] / drivers / net / forcedeth.c
index 037d870..268ea4d 100644 (file)
@@ -3,8 +3,7 @@
  *
  * Note: This driver is a cleanroom reimplementation based on reverse
  *      engineered documentation written by Carl-Daniel Hailfinger
- *      and Andrew de Quincey. It's neither supported nor endorsed
- *      by NVIDIA Corp. Use at your own risk.
+ *      and Andrew de Quincey.
  *
  * NVIDIA, nForce and other NVIDIA marks are trademarks or registered
  * trademarks of NVIDIA Corporation in the United States and other
@@ -14,7 +13,7 @@
  * Copyright (C) 2004 Andrew de Quincey (wol support)
  * Copyright (C) 2004 Carl-Daniel Hailfinger (invalid MAC handling, insane
  *             IRQ rate fixes, bigendian fixes, cleanups, verification)
- * Copyright (c) 2004 NVIDIA Corporation
+ * Copyright (c) 2004,2005,2006,2007,2008,2009 NVIDIA Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  *
- * Changelog:
- *     0.01: 05 Oct 2003: First release that compiles without warnings.
- *     0.02: 05 Oct 2003: Fix bug for nv_drain_tx: do not try to free NULL skbs.
- *                        Check all PCI BARs for the register window.
- *                        udelay added to mii_rw.
- *     0.03: 06 Oct 2003: Initialize dev->irq.
- *     0.04: 07 Oct 2003: Initialize np->lock, reduce handled irqs, add printks.
- *     0.05: 09 Oct 2003: printk removed again, irq status print tx_timeout.
- *     0.06: 10 Oct 2003: MAC Address read updated, pff flag generation updated,
- *                        irq mask updated
- *     0.07: 14 Oct 2003: Further irq mask updates.
- *     0.08: 20 Oct 2003: rx_desc.Length initialization added, nv_alloc_rx refill
- *                        added into irq handler, NULL check for drain_ring.
- *     0.09: 20 Oct 2003: Basic link speed irq implementation. Only handle the
- *                        requested interrupt sources.
- *     0.10: 20 Oct 2003: First cleanup for release.
- *     0.11: 21 Oct 2003: hexdump for tx added, rx buffer sizes increased.
- *                        MAC Address init fix, set_multicast cleanup.
- *     0.12: 23 Oct 2003: Cleanups for release.
- *     0.13: 25 Oct 2003: Limit for concurrent tx packets increased to 10.
- *                        Set link speed correctly. start rx before starting
- *                        tx (nv_start_rx sets the link speed).
- *     0.14: 25 Oct 2003: Nic dependant irq mask.
- *     0.15: 08 Nov 2003: fix smp deadlock with set_multicast_list during
- *                        open.
- *     0.16: 15 Nov 2003: include file cleanup for ppc64, rx buffer size
- *                        increased to 1628 bytes.
- *     0.17: 16 Nov 2003: undo rx buffer size increase. Substract 1 from
- *                        the tx length.
- *     0.18: 17 Nov 2003: fix oops due to late initialization of dev_stats
- *     0.19: 29 Nov 2003: Handle RxNoBuf, detect & handle invalid mac
- *                        addresses, really stop rx if already running
- *                        in nv_start_rx, clean up a bit.
- *     0.20: 07 Dec 2003: alloc fixes
- *     0.21: 12 Jan 2004: additional alloc fix, nic polling fix.
- *     0.22: 19 Jan 2004: reprogram timer to a sane rate, avoid lockup
- *                        on close.
- *     0.23: 26 Jan 2004: various small cleanups
- *     0.24: 27 Feb 2004: make driver even less anonymous in backtraces
- *     0.25: 09 Mar 2004: wol support
- *     0.26: 03 Jun 2004: netdriver specific annotation, sparse-related fixes
- *     0.27: 19 Jun 2004: Gigabit support, new descriptor rings,
- *                        added CK804/MCP04 device IDs, code fixes
- *                        for registers, link status and other minor fixes.
- *     0.28: 21 Jun 2004: Big cleanup, making driver mostly endian safe
- *     0.29: 31 Aug 2004: Add backup timer for link change notification.
- *     0.30: 25 Sep 2004: rx checksum support for nf 250 Gb. Add rx reset
- *                        into nv_close, otherwise reenabling for wol can
- *                        cause DMA to kfree'd memory.
- *     0.31: 14 Nov 2004: ethtool support for getting/setting link
- *                        capabilities.
- *     0.32: 16 Apr 2005: RX_ERROR4 handling added.
- *     0.33: 16 May 2005: Support for MCP51 added.
- *     0.34: 18 Jun 2005: Add DEV_NEED_LINKTIMER to all nForce nics.
- *     0.35: 26 Jun 2005: Support for MCP55 added.
- *     0.36: 28 Jun 2005: Add jumbo frame support.
- *     0.37: 10 Jul 2005: Additional ethtool support, cleanup of pci id list
- *     0.38: 16 Jul 2005: tx irq rewrite: Use global flags instead of
- *                        per-packet flags.
- *     0.39: 18 Jul 2005: Add 64bit descriptor support.
- *     0.40: 19 Jul 2005: Add support for mac address change.
- *     0.41: 30 Jul 2005: Write back original MAC in nv_close instead
- *                        of nv_remove
- *     0.42: 06 Aug 2005: Fix lack of link speed initialization
- *                        in the second (and later) nv_open call
- *     0.43: 10 Aug 2005: Add support for tx checksum.
- *     0.44: 20 Aug 2005: Add support for scatter gather and segmentation.
- *     0.45: 18 Sep 2005: Remove nv_stop/start_rx from every link check
- *     0.46: 20 Oct 2005: Add irq optimization modes.
- *     0.47: 26 Oct 2005: Add phyaddr 0 in phy scan.
- *     0.48: 24 Dec 2005: Disable TSO, bugfix for pci_map_single
- *     0.49: 10 Dec 2005: Fix tso for large buffers.
- *     0.50: 20 Jan 2006: Add 8021pq tagging support.
- *     0.51: 20 Jan 2006: Add 64bit consistent memory allocation for rings.
- *     0.52: 20 Jan 2006: Add MSI/MSIX support.
- *     0.53: 19 Mar 2006: Fix init from low power mode and add hw reset.
- *     0.54: 21 Mar 2006: Fix spin locks for multi irqs and cleanup.
- *     0.55: 22 Mar 2006: Add flow control (pause frame).
- *     0.56: 22 Mar 2006: Additional ethtool config and moduleparam support.
- *
  * Known bugs:
  * We suspect that on some hardware no TX done interrupts are generated.
  * This means recovery from netif_stop_queue only happens if the hw timer
  * DEV_NEED_TIMERIRQ will not harm you on sane hardware, only generating a few
  * superfluous timer interrupts from the nic.
  */
-#define FORCEDETH_VERSION              "0.56"
+#define FORCEDETH_VERSION              "0.64"
 #define DRV_NAME                       "forcedeth"
 
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/delay.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/ethtool.h>
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/if_vlan.h>
 #include <linux/dma-mapping.h>
+#include <linux/slab.h>
 
 #include <asm/irq.h>
 #include <asm/io.h>
 #define dprintk(x...)          do { } while (0)
 #endif
 
+#define TX_WORK_PER_LOOP  64
+#define RX_WORK_PER_LOOP  64
 
 /*
  * Hardware access:
  */
 
-#define DEV_NEED_TIMERIRQ      0x0001  /* set the timer irq flag in the irq mask */
-#define DEV_NEED_LINKTIMER     0x0002  /* poll link settings. Relies on the timer irq */
-#define DEV_HAS_LARGEDESC      0x0004  /* device supports jumbo frames and needs packet format 2 */
-#define DEV_HAS_HIGH_DMA        0x0008  /* device supports 64bit dma */
-#define DEV_HAS_CHECKSUM        0x0010  /* device supports tx and rx checksum offloads */
-#define DEV_HAS_VLAN            0x0020  /* device supports vlan tagging and striping */
-#define DEV_HAS_MSI             0x0040  /* device supports MSI */
-#define DEV_HAS_MSI_X           0x0080  /* device supports MSI-X */
-#define DEV_HAS_POWER_CNTRL     0x0100  /* device supports power savings */
-#define DEV_HAS_PAUSEFRAME_TX   0x0200  /* device supports tx pause frames */
-#define DEV_HAS_STATISTICS      0x0400  /* device supports hw statistics */
-#define DEV_HAS_TEST_EXTENDED   0x0800  /* device supports extended diagnostic test */
+#define DEV_NEED_TIMERIRQ          0x0000001  /* set the timer irq flag in the irq mask */
+#define DEV_NEED_LINKTIMER         0x0000002  /* poll link settings. Relies on the timer irq */
+#define DEV_HAS_LARGEDESC          0x0000004  /* device supports jumbo frames and needs packet format 2 */
+#define DEV_HAS_HIGH_DMA           0x0000008  /* device supports 64bit dma */
+#define DEV_HAS_CHECKSUM           0x0000010  /* device supports tx and rx checksum offloads */
+#define DEV_HAS_VLAN               0x0000020  /* device supports vlan tagging and striping */
+#define DEV_HAS_MSI                0x0000040  /* device supports MSI */
+#define DEV_HAS_MSI_X              0x0000080  /* device supports MSI-X */
+#define DEV_HAS_POWER_CNTRL        0x0000100  /* device supports power savings */
+#define DEV_HAS_STATISTICS_V1      0x0000200  /* device supports hw statistics version 1 */
+#define DEV_HAS_STATISTICS_V2      0x0000600  /* device supports hw statistics version 2 */
+#define DEV_HAS_STATISTICS_V3      0x0000e00  /* device supports hw statistics version 3 */
+#define DEV_HAS_TEST_EXTENDED      0x0001000  /* device supports extended diagnostic test */
+#define DEV_HAS_MGMT_UNIT          0x0002000  /* device supports management unit */
+#define DEV_HAS_CORRECT_MACADDR    0x0004000  /* device supports correct mac address order */
+#define DEV_HAS_COLLISION_FIX      0x0008000  /* device supports tx collision fix */
+#define DEV_HAS_PAUSEFRAME_TX_V1   0x0010000  /* device supports tx pause frames version 1 */
+#define DEV_HAS_PAUSEFRAME_TX_V2   0x0020000  /* device supports tx pause frames version 2 */
+#define DEV_HAS_PAUSEFRAME_TX_V3   0x0040000  /* device supports tx pause frames version 3 */
+#define DEV_NEED_TX_LIMIT          0x0080000  /* device needs to limit tx */
+#define DEV_NEED_TX_LIMIT2         0x0180000  /* device needs to limit tx, expect for some revs */
+#define DEV_HAS_GEAR_MODE          0x0200000  /* device supports gear mode */
+#define DEV_NEED_PHY_INIT_FIX      0x0400000  /* device needs specific phy workaround */
+#define DEV_NEED_LOW_POWER_FIX     0x0800000  /* device needs special power up workaround */
+#define DEV_NEED_MSI_FIX           0x1000000  /* device needs msi workaround */
 
 enum {
        NvRegIrqStatus = 0x000,
 #define NVREG_IRQSTAT_MIIEVENT 0x040
-#define NVREG_IRQSTAT_MASK             0x1ff
+#define NVREG_IRQSTAT_MASK             0x83ff
        NvRegIrqMask = 0x004,
 #define NVREG_IRQ_RX_ERROR             0x0001
 #define NVREG_IRQ_RX                   0x0002
@@ -183,15 +119,12 @@ enum {
 #define NVREG_IRQ_LINK                 0x0040
 #define NVREG_IRQ_RX_FORCED            0x0080
 #define NVREG_IRQ_TX_FORCED            0x0100
+#define NVREG_IRQ_RECOVER_ERROR                0x8200
 #define NVREG_IRQMASK_THROUGHPUT       0x00df
-#define NVREG_IRQMASK_CPU              0x0040
+#define NVREG_IRQMASK_CPU              0x0060
 #define NVREG_IRQ_TX_ALL               (NVREG_IRQ_TX_ERR|NVREG_IRQ_TX_OK|NVREG_IRQ_TX_FORCED)
 #define NVREG_IRQ_RX_ALL               (NVREG_IRQ_RX_ERROR|NVREG_IRQ_RX|NVREG_IRQ_RX_NOBUF|NVREG_IRQ_RX_FORCED)
-#define NVREG_IRQ_OTHER                        (NVREG_IRQ_TIMER|NVREG_IRQ_LINK)
-
-#define NVREG_IRQ_UNKNOWN      (~(NVREG_IRQ_RX_ERROR|NVREG_IRQ_RX|NVREG_IRQ_RX_NOBUF|NVREG_IRQ_TX_ERR| \
-                                       NVREG_IRQ_TX_OK|NVREG_IRQ_TIMER|NVREG_IRQ_LINK|NVREG_IRQ_RX_FORCED| \
-                                       NVREG_IRQ_TX_FORCED))
+#define NVREG_IRQ_OTHER                        (NVREG_IRQ_TIMER|NVREG_IRQ_LINK|NVREG_IRQ_RECOVER_ERROR)
 
        NvRegUnknownSetupReg6 = 0x008,
 #define NVREG_UNKSETUP6_VAL            3
@@ -201,7 +134,7 @@ enum {
  * NVREG_POLL_DEFAULT=97 would result in an interval length of 1 ms
  */
        NvRegPollingInterval = 0x00c,
-#define NVREG_POLL_DEFAULT_THROUGHPUT  970
+#define NVREG_POLL_DEFAULT_THROUGHPUT  65535 /* backup tx cleanup if loop max reached */
 #define NVREG_POLL_DEFAULT_CPU 13
        NvRegMSIMap0 = 0x020,
        NvRegMSIMap1 = 0x024,
@@ -212,10 +145,23 @@ enum {
 #define NVREG_MISC1_HD         0x02
 #define NVREG_MISC1_FORCE      0x3b0f3c
 
-       NvRegMacReset = 0x3c,
+       NvRegMacReset = 0x34,
 #define NVREG_MAC_RESET_ASSERT 0x0F3
        NvRegTransmitterControl = 0x084,
 #define NVREG_XMITCTL_START    0x01
+#define NVREG_XMITCTL_MGMT_ST  0x40000000
+#define NVREG_XMITCTL_SYNC_MASK                0x000f0000
+#define NVREG_XMITCTL_SYNC_NOT_READY   0x0
+#define NVREG_XMITCTL_SYNC_PHY_INIT    0x00040000
+#define NVREG_XMITCTL_MGMT_SEMA_MASK   0x00000f00
+#define NVREG_XMITCTL_MGMT_SEMA_FREE   0x0
+#define NVREG_XMITCTL_HOST_SEMA_MASK   0x0000f000
+#define NVREG_XMITCTL_HOST_SEMA_ACQ    0x0000f000
+#define NVREG_XMITCTL_HOST_LOADED      0x00004000
+#define NVREG_XMITCTL_TX_PATH_EN       0x01000000
+#define NVREG_XMITCTL_DATA_START       0x00100000
+#define NVREG_XMITCTL_DATA_READY       0x00010000
+#define NVREG_XMITCTL_DATA_ERROR       0x00020000
        NvRegTransmitterStatus = 0x088,
 #define NVREG_XMITSTAT_BUSY    0x01
 
@@ -231,36 +177,52 @@ enum {
 #define NVREG_OFFLOAD_NORMAL   RX_NIC_BUFSIZE
        NvRegReceiverControl = 0x094,
 #define NVREG_RCVCTL_START     0x01
+#define NVREG_RCVCTL_RX_PATH_EN        0x01000000
        NvRegReceiverStatus = 0x98,
 #define NVREG_RCVSTAT_BUSY     0x01
 
-       NvRegRandomSeed = 0x9c,
-#define NVREG_RNDSEED_MASK     0x00ff
-#define NVREG_RNDSEED_FORCE    0x7f00
-#define NVREG_RNDSEED_FORCE2   0x2d00
-#define NVREG_RNDSEED_FORCE3   0x7400
-
-       NvRegUnknownSetupReg1 = 0xA0,
-#define NVREG_UNKSETUP1_VAL    0x16070f
-       NvRegUnknownSetupReg2 = 0xA4,
-#define NVREG_UNKSETUP2_VAL    0x16
+       NvRegSlotTime = 0x9c,
+#define NVREG_SLOTTIME_LEGBF_ENABLED   0x80000000
+#define NVREG_SLOTTIME_10_100_FULL     0x00007f00
+#define NVREG_SLOTTIME_1000_FULL       0x0003ff00
+#define NVREG_SLOTTIME_HALF            0x0000ff00
+#define NVREG_SLOTTIME_DEFAULT         0x00007f00
+#define NVREG_SLOTTIME_MASK            0x000000ff
+
+       NvRegTxDeferral = 0xA0,
+#define NVREG_TX_DEFERRAL_DEFAULT              0x15050f
+#define NVREG_TX_DEFERRAL_RGMII_10_100         0x16070f
+#define NVREG_TX_DEFERRAL_RGMII_1000           0x14050f
+#define NVREG_TX_DEFERRAL_RGMII_STRETCH_10     0x16190f
+#define NVREG_TX_DEFERRAL_RGMII_STRETCH_100    0x16300f
+#define NVREG_TX_DEFERRAL_MII_STRETCH          0x152000
+       NvRegRxDeferral = 0xA4,
+#define NVREG_RX_DEFERRAL_DEFAULT      0x16
        NvRegMacAddrA = 0xA8,
        NvRegMacAddrB = 0xAC,
        NvRegMulticastAddrA = 0xB0,
 #define NVREG_MCASTADDRA_FORCE 0x01
        NvRegMulticastAddrB = 0xB4,
        NvRegMulticastMaskA = 0xB8,
+#define NVREG_MCASTMASKA_NONE          0xffffffff
        NvRegMulticastMaskB = 0xBC,
+#define NVREG_MCASTMASKB_NONE          0xffff
 
        NvRegPhyInterface = 0xC0,
 #define PHY_RGMII              0x10000000
+       NvRegBackOffControl = 0xC4,
+#define NVREG_BKOFFCTRL_DEFAULT                        0x70000000
+#define NVREG_BKOFFCTRL_SEED_MASK              0x000003ff
+#define NVREG_BKOFFCTRL_SELECT                 24
+#define NVREG_BKOFFCTRL_GEAR                   12
 
        NvRegTxRingPhysAddr = 0x100,
        NvRegRxRingPhysAddr = 0x104,
        NvRegRingSizes = 0x108,
 #define NVREG_RINGSZ_TXSHIFT 0
 #define NVREG_RINGSZ_RXSHIFT 16
-       NvRegUnknownTransmitterReg = 0x10c,
+       NvRegTransmitPoll = 0x10c,
+#define NVREG_TRANSMITPOLL_MAC_ADDR_REV        0x00008000
        NvRegLinkSpeed = 0x110,
 #define NVREG_LINKSPEED_FORCE 0x10000
 #define NVREG_LINKSPEED_10     1000
@@ -269,8 +231,10 @@ enum {
 #define NVREG_LINKSPEED_MASK   (0xFFF)
        NvRegUnknownSetupReg5 = 0x130,
 #define NVREG_UNKSETUP5_BIT31  (1<<31)
-       NvRegUnknownSetupReg3 = 0x13c,
-#define NVREG_UNKSETUP3_VAL1   0x200010
+       NvRegTxWatermark = 0x13c,
+#define NVREG_TX_WM_DESC1_DEFAULT      0x0200010
+#define NVREG_TX_WM_DESC2_3_DEFAULT    0x1e08000
+#define NVREG_TX_WM_DESC2_3_1000       0xfe08000
        NvRegTxRxControl = 0x144,
 #define NVREG_TXRXCTL_KICK     0x0001
 #define NVREG_TXRXCTL_BIT1     0x0002
@@ -279,22 +243,26 @@ enum {
 #define NVREG_TXRXCTL_RESET    0x0010
 #define NVREG_TXRXCTL_RXCHECK  0x0400
 #define NVREG_TXRXCTL_DESC_1   0
-#define NVREG_TXRXCTL_DESC_2   0x02100
-#define NVREG_TXRXCTL_DESC_3   0x02200
+#define NVREG_TXRXCTL_DESC_2   0x002100
+#define NVREG_TXRXCTL_DESC_3   0xc02200
 #define NVREG_TXRXCTL_VLANSTRIP 0x00040
 #define NVREG_TXRXCTL_VLANINS  0x00080
        NvRegTxRingPhysAddrHigh = 0x148,
        NvRegRxRingPhysAddrHigh = 0x14C,
        NvRegTxPauseFrame = 0x170,
-#define NVREG_TX_PAUSEFRAME_DISABLE    0x1ff0080
-#define NVREG_TX_PAUSEFRAME_ENABLE     0x0c00030
+#define NVREG_TX_PAUSEFRAME_DISABLE    0x0fff0080
+#define NVREG_TX_PAUSEFRAME_ENABLE_V1  0x01800010
+#define NVREG_TX_PAUSEFRAME_ENABLE_V2  0x056003f0
+#define NVREG_TX_PAUSEFRAME_ENABLE_V3  0x09f00880
+       NvRegTxPauseFrameLimit = 0x174,
+#define NVREG_TX_PAUSEFRAMELIMIT_ENABLE        0x00010000
        NvRegMIIStatus = 0x180,
 #define NVREG_MIISTAT_ERROR            0x0001
 #define NVREG_MIISTAT_LINKCHANGE       0x0008
-#define NVREG_MIISTAT_MASK             0x000f
-#define NVREG_MIISTAT_MASK2            0x000f
-       NvRegUnknownSetupReg4 = 0x184,
-#define NVREG_UNKSETUP4_VAL    8
+#define NVREG_MIISTAT_MASK_RW          0x0007
+#define NVREG_MIISTAT_MASK_ALL         0x000f
+       NvRegMIIMask = 0x184,
+#define NVREG_MII_LINKCHANGE           0x0008
 
        NvRegAdapterControl = 0x188,
 #define NVREG_ADAPTCTL_START   0x02
@@ -310,6 +278,9 @@ enum {
 #define NVREG_MIICTL_WRITE     0x00400
 #define NVREG_MIICTL_ADDRSHIFT 5
        NvRegMIIData = 0x194,
+       NvRegTxUnicast = 0x1a0,
+       NvRegTxMulticast = 0x1a4,
+       NvRegTxBroadcast = 0x1a8,
        NvRegWakeUpFlags = 0x200,
 #define NVREG_WAKEUPFLAGS_VAL          0x7770
 #define NVREG_WAKEUPFLAGS_BUSYSHIFT    24
@@ -323,8 +294,10 @@ enum {
 #define NVREG_WAKEUPFLAGS_ACCEPT_LINKCHANGE    0x04
 #define NVREG_WAKEUPFLAGS_ENABLE       0x1111
 
-       NvRegPatternCRC = 0x204,
-       NvRegPatternMask = 0x208,
+       NvRegMgmtUnitGetVersion = 0x204,
+#define NVREG_MGMTUNITGETVERSION       0x01
+       NvRegMgmtUnitVersion = 0x208,
+#define NVREG_MGMTUNITVERSION          0x08
        NvRegPowerCap = 0x268,
 #define NVREG_POWERCAP_D3SUPP  (1<<30)
 #define NVREG_POWERCAP_D2SUPP  (1<<26)
@@ -337,6 +310,8 @@ enum {
 #define NVREG_POWERSTATE_D1            0x0001
 #define NVREG_POWERSTATE_D2            0x0002
 #define NVREG_POWERSTATE_D3            0x0003
+       NvRegMgmtUnitControl = 0x278,
+#define NVREG_MGMTUNITCONTROL_INUSE    0x20000
        NvRegTxCnt = 0x280,
        NvRegTxZeroReXmt = 0x284,
        NvRegTxOneReXmt = 0x288,
@@ -371,27 +346,29 @@ enum {
        NvRegMSIXIrqStatus = 0x3f0,
 
        NvRegPowerState2 = 0x600,
-#define NVREG_POWERSTATE2_POWERUP_MASK         0x0F11
+#define NVREG_POWERSTATE2_POWERUP_MASK         0x0F15
 #define NVREG_POWERSTATE2_POWERUP_REV_A3       0x0001
+#define NVREG_POWERSTATE2_PHY_RESET            0x0004
+#define NVREG_POWERSTATE2_GATE_CLOCKS          0x0F00
 };
 
 /* Big endian: should work, but is untested */
 struct ring_desc {
-       u32 PacketBuffer;
-       u32 FlagLen;
+       __le32 buf;
+       __le32 flaglen;
 };
 
 struct ring_desc_ex {
-       u32 PacketBufferHigh;
-       u32 PacketBufferLow;
-       u32 TxVlan;
-       u32 FlagLen;
+       __le32 bufhigh;
+       __le32 buflow;
+       __le32 txvlan;
+       __le32 flaglen;
 };
 
-typedef union _ring_type {
+union ring_type {
        struct ring_desc* orig;
        struct ring_desc_ex* ex;
-} ring_type;
+};
 
 #define FLAG_MASK_V1 0xffff0000
 #define FLAG_MASK_V2 0xffffc000
@@ -400,6 +377,7 @@ typedef union _ring_type {
 
 #define NV_TX_LASTPACKET       (1<<16)
 #define NV_TX_RETRYERROR       (1<<19)
+#define NV_TX_RETRYCOUNT_MASK  (0xF<<20)
 #define NV_TX_FORCED_INTERRUPT (1<<24)
 #define NV_TX_DEFERRED         (1<<26)
 #define NV_TX_CARRIERLOST      (1<<27)
@@ -410,6 +388,7 @@ typedef union _ring_type {
 
 #define NV_TX2_LASTPACKET      (1<<29)
 #define NV_TX2_RETRYERROR      (1<<18)
+#define NV_TX2_RETRYCOUNT_MASK (0xF<<19)
 #define NV_TX2_FORCED_INTERRUPT        (1<<30)
 #define NV_TX2_DEFERRED                (1<<25)
 #define NV_TX2_CARRIERLOST     (1<<26)
@@ -439,11 +418,12 @@ typedef union _ring_type {
 #define NV_RX_FRAMINGERR       (1<<29)
 #define NV_RX_ERROR            (1<<30)
 #define NV_RX_AVAIL            (1<<31)
+#define NV_RX_ERROR_MASK       (NV_RX_ERROR1|NV_RX_ERROR2|NV_RX_ERROR3|NV_RX_ERROR4|NV_RX_CRCERR|NV_RX_OVERFLOW|NV_RX_FRAMINGERR)
 
 #define NV_RX2_CHECKSUMMASK    (0x1C000000)
-#define NV_RX2_CHECKSUMOK1     (0x10000000)
-#define NV_RX2_CHECKSUMOK2     (0x14000000)
-#define NV_RX2_CHECKSUMOK3     (0x18000000)
+#define NV_RX2_CHECKSUM_IP     (0x10000000)
+#define NV_RX2_CHECKSUM_IP_TCP (0x14000000)
+#define NV_RX2_CHECKSUM_IP_UDP (0x18000000)
 #define NV_RX2_DESCRIPTORVALID (1<<29)
 #define NV_RX2_SUBSTRACT1      (1<<25)
 #define NV_RX2_ERROR1          (1<<18)
@@ -456,13 +436,16 @@ typedef union _ring_type {
 /* error and avail are the same for both */
 #define NV_RX2_ERROR           (1<<30)
 #define NV_RX2_AVAIL           (1<<31)
+#define NV_RX2_ERROR_MASK      (NV_RX2_ERROR1|NV_RX2_ERROR2|NV_RX2_ERROR3|NV_RX2_ERROR4|NV_RX2_CRCERR|NV_RX2_OVERFLOW|NV_RX2_FRAMINGERR)
 
 #define NV_RX3_VLAN_TAG_PRESENT (1<<16)
 #define NV_RX3_VLAN_TAG_MASK   (0x0000FFFF)
 
 /* Miscelaneous hardware related defines: */
 #define NV_PCI_REGSZ_VER1              0x270
-#define NV_PCI_REGSZ_VER2              0x604
+#define NV_PCI_REGSZ_VER2              0x2d4
+#define NV_PCI_REGSZ_VER3              0x604
+#define NV_PCI_REGSZ_MAX               0x604
 
 /* various timeout delays: all in usec */
 #define NV_TXRX_RESET_DELAY    4
@@ -487,18 +470,12 @@ typedef union _ring_type {
 /* General driver defaults */
 #define NV_WATCHDOG_TIMEO      (5*HZ)
 
-#define RX_RING_DEFAULT                128
+#define RX_RING_DEFAULT                512
 #define TX_RING_DEFAULT                256
 #define RX_RING_MIN            128
 #define TX_RING_MIN            64
 #define RING_MAX_DESC_VER_1    1024
 #define RING_MAX_DESC_VER_2_3  16384
-/*
- * Difference between the get and put pointers for the tx ring.
- * This is used to throttle the amount of data outstanding in the
- * tx ring.
- */
-#define TX_LIMIT_DIFFERENCE    1
 
 /* rx/tx mac addr + type + vlan + align + slack*/
 #define NV_RX_HEADERS          (64)
@@ -526,18 +503,65 @@ typedef union _ring_type {
 #define DESC_VER_3     3
 
 /* PHY defines */
-#define PHY_OUI_MARVELL        0x5043
-#define PHY_OUI_CICADA 0x03f1
+#define PHY_OUI_MARVELL                0x5043
+#define PHY_OUI_CICADA         0x03f1
+#define PHY_OUI_VITESSE                0x01c1
+#define PHY_OUI_REALTEK                0x0732
+#define PHY_OUI_REALTEK2       0x0020
 #define PHYID1_OUI_MASK        0x03ff
 #define PHYID1_OUI_SHFT        6
 #define PHYID2_OUI_MASK        0xfc00
 #define PHYID2_OUI_SHFT        10
-#define PHY_INIT1      0x0f000
-#define PHY_INIT2      0x0e00
-#define PHY_INIT3      0x01000
-#define PHY_INIT4      0x0200
-#define PHY_INIT5      0x0004
-#define PHY_INIT6      0x02000
+#define PHYID2_MODEL_MASK              0x03f0
+#define PHY_MODEL_REALTEK_8211         0x0110
+#define PHY_REV_MASK                   0x0001
+#define PHY_REV_REALTEK_8211B          0x0000
+#define PHY_REV_REALTEK_8211C          0x0001
+#define PHY_MODEL_REALTEK_8201         0x0200
+#define PHY_MODEL_MARVELL_E3016                0x0220
+#define PHY_MARVELL_E3016_INITMASK     0x0300
+#define PHY_CICADA_INIT1       0x0f000
+#define PHY_CICADA_INIT2       0x0e00
+#define PHY_CICADA_INIT3       0x01000
+#define PHY_CICADA_INIT4       0x0200
+#define PHY_CICADA_INIT5       0x0004
+#define PHY_CICADA_INIT6       0x02000
+#define PHY_VITESSE_INIT_REG1  0x1f
+#define PHY_VITESSE_INIT_REG2  0x10
+#define PHY_VITESSE_INIT_REG3  0x11
+#define PHY_VITESSE_INIT_REG4  0x12
+#define PHY_VITESSE_INIT_MSK1  0xc
+#define PHY_VITESSE_INIT_MSK2  0x0180
+#define PHY_VITESSE_INIT1      0x52b5
+#define PHY_VITESSE_INIT2      0xaf8a
+#define PHY_VITESSE_INIT3      0x8
+#define PHY_VITESSE_INIT4      0x8f8a
+#define PHY_VITESSE_INIT5      0xaf86
+#define PHY_VITESSE_INIT6      0x8f86
+#define PHY_VITESSE_INIT7      0xaf82
+#define PHY_VITESSE_INIT8      0x0100
+#define PHY_VITESSE_INIT9      0x8f82
+#define PHY_VITESSE_INIT10     0x0
+#define PHY_REALTEK_INIT_REG1  0x1f
+#define PHY_REALTEK_INIT_REG2  0x19
+#define PHY_REALTEK_INIT_REG3  0x13
+#define PHY_REALTEK_INIT_REG4  0x14
+#define PHY_REALTEK_INIT_REG5  0x18
+#define PHY_REALTEK_INIT_REG6  0x11
+#define PHY_REALTEK_INIT_REG7  0x01
+#define PHY_REALTEK_INIT1      0x0000
+#define PHY_REALTEK_INIT2      0x8e00
+#define PHY_REALTEK_INIT3      0x0001
+#define PHY_REALTEK_INIT4      0xad17
+#define PHY_REALTEK_INIT5      0xfb54
+#define PHY_REALTEK_INIT6      0xf5c7
+#define PHY_REALTEK_INIT7      0x1000
+#define PHY_REALTEK_INIT8      0x0003
+#define PHY_REALTEK_INIT9      0x0008
+#define PHY_REALTEK_INIT10     0x0005
+#define PHY_REALTEK_INIT11     0x0200
+#define PHY_REALTEK_INIT_MSK1  0x0003
+
 #define PHY_GIGABIT    0x0100
 
 #define PHY_TIMEOUT    0x1
@@ -568,6 +592,17 @@ typedef union _ring_type {
 #define NV_MSI_X_VECTOR_TX    0x1
 #define NV_MSI_X_VECTOR_OTHER 0x2
 
+#define NV_MSI_PRIV_OFFSET 0x68
+#define NV_MSI_PRIV_VALUE  0xffffffff
+
+#define NV_RESTART_TX         0x1
+#define NV_RESTART_RX         0x2
+
+#define NV_TX_LIMIT_COUNT     16
+
+#define NV_DYNAMIC_THRESHOLD        4
+#define NV_DYNAMIC_MAX_QUIET_COUNT  2048
+
 /* statistics */
 struct nv_ethtool_str {
        char name[ETH_GSTRING_LEN];
@@ -583,9 +618,6 @@ static const struct nv_ethtool_str nv_estats_str[] = {
        { "tx_carrier_errors" },
        { "tx_excess_deferral" },
        { "tx_retry_error" },
-       { "tx_deferral" },
-       { "tx_packets" },
-       { "tx_pause" },
        { "rx_frame_error" },
        { "rx_extra_byte" },
        { "rx_late_collision" },
@@ -598,11 +630,22 @@ static const struct nv_ethtool_str nv_estats_str[] = {
        { "rx_unicast" },
        { "rx_multicast" },
        { "rx_broadcast" },
+       { "rx_packets" },
+       { "rx_errors_total" },
+       { "tx_errors_total" },
+
+       /* version 2 stats */
+       { "tx_deferral" },
+       { "tx_packets" },
        { "rx_bytes" },
+       { "tx_pause" },
        { "rx_pause" },
        { "rx_drop_frame" },
-       { "rx_packets" },
-       { "rx_errors_total" }
+
+       /* version 3 stats */
+       { "tx_unicast" },
+       { "tx_multicast" },
+       { "tx_broadcast" }
 };
 
 struct nv_ethtool_stats {
@@ -615,9 +658,6 @@ struct nv_ethtool_stats {
        u64 tx_carrier_errors;
        u64 tx_excess_deferral;
        u64 tx_retry_error;
-       u64 tx_deferral;
-       u64 tx_packets;
-       u64 tx_pause;
        u64 rx_frame_error;
        u64 rx_extra_byte;
        u64 rx_late_collision;
@@ -630,13 +670,28 @@ struct nv_ethtool_stats {
        u64 rx_unicast;
        u64 rx_multicast;
        u64 rx_broadcast;
+       u64 rx_packets;
+       u64 rx_errors_total;
+       u64 tx_errors_total;
+
+       /* version 2 stats */
+       u64 tx_deferral;
+       u64 tx_packets;
        u64 rx_bytes;
+       u64 tx_pause;
        u64 rx_pause;
        u64 rx_drop_frame;
-       u64 rx_packets;
-       u64 rx_errors_total;
+
+       /* version 3 stats */
+       u64 tx_unicast;
+       u64 tx_multicast;
+       u64 tx_broadcast;
 };
 
+#define NV_DEV_STATISTICS_V3_COUNT (sizeof(struct nv_ethtool_stats)/sizeof(u64))
+#define NV_DEV_STATISTICS_V2_COUNT (NV_DEV_STATISTICS_V3_COUNT - 3)
+#define NV_DEV_STATISTICS_V1_COUNT (NV_DEV_STATISTICS_V2_COUNT - 6)
+
 /* diagnostics */
 #define NV_TEST_COUNT_BASE 3
 #define NV_TEST_COUNT_EXTENDED 4
@@ -649,8 +704,8 @@ static const struct nv_ethtool_str nv_etests_str[] = {
 };
 
 struct register_test {
-       u32 reg;
-       u32 mask;
+       __u32 reg;
+       __u32 mask;
 };
 
 static const struct register_test nv_registers_test[] = {
@@ -658,19 +713,28 @@ static const struct register_test nv_registers_test[] = {
        { NvRegMisc1, 0x03c },
        { NvRegOffloadConfig, 0x03ff },
        { NvRegMulticastAddrA, 0xffffffff },
-       { NvRegUnknownSetupReg3, 0x0ff },
+       { NvRegTxWatermark, 0x0ff },
        { NvRegWakeUpFlags, 0x07777 },
        { 0,0 }
 };
 
+struct nv_skb_map {
+       struct sk_buff *skb;
+       dma_addr_t dma;
+       unsigned int dma_len:31;
+       unsigned int dma_single:1;
+       struct ring_desc_ex *first_tx_desc;
+       struct nv_skb_map *next_tx_ctx;
+};
+
 /*
  * SMP locking:
- * All hardware access under dev->priv->lock, except the performance
+ * All hardware access under netdev_priv(dev)->lock, except the performance
  * critical parts:
  * - rx is (pseudo-) lockless: it relies on the single-threading provided
  *     by the arch code for interrupts.
  * - tx setup is lockless: it relies on netif_tx_lock. Actual submission
- *     needs dev->priv->lock :-(
+ *     needs netdev_priv(dev)->lock :-(
  * - set_multicast_list: preparation lockless, relies on netif_tx_lock.
  */
 
@@ -678,9 +742,11 @@ static const struct register_test nv_registers_test[] = {
 struct fe_priv {
        spinlock_t lock;
 
+       struct net_device *dev;
+       struct napi_struct napi;
+
        /* General data:
         * Locking: spin_lock(&np->lock); */
-       struct net_device_stats stats;
        struct nv_ethtool_stats estats;
        int in_shutdown;
        u32 linkspeed;
@@ -690,29 +756,41 @@ struct fe_priv {
        int phyaddr;
        int wolenabled;
        unsigned int phy_oui;
+       unsigned int phy_model;
+       unsigned int phy_rev;
        u16 gigabit;
        int intr_test;
+       int recover_error;
+       int quiet_count;
 
        /* General data: RO fields */
        dma_addr_t ring_addr;
        struct pci_dev *pci_dev;
        u32 orig_mac[2];
+       u32 events;
        u32 irqmask;
        u32 desc_ver;
        u32 txrxctl_bits;
        u32 vlanctl_bits;
        u32 driver_data;
+       u32 device_id;
        u32 register_size;
+       int rx_csum;
+       u32 mac_in_use;
+       int mgmt_version;
+       int mgmt_sema;
 
        void __iomem *base;
 
        /* rx specific fields.
         * Locking: Within irq hander or disable_irq+spin_lock(&np->lock);
         */
-       ring_type rx_ring;
-       unsigned int cur_rx, refill_rx;
-       struct sk_buff **rx_skbuff;
-       dma_addr_t *rx_dma;
+       union ring_type get_rx, put_rx, first_rx, last_rx;
+       struct nv_skb_map *get_rx_ctx, *put_rx_ctx;
+       struct nv_skb_map *first_rx_ctx, *last_rx_ctx;
+       struct nv_skb_map *rx_skb;
+
+       union ring_type rx_ring;
        unsigned int rx_buf_sz;
        unsigned int pkt_limit;
        struct timer_list oom_kick;
@@ -729,15 +807,19 @@ struct fe_priv {
        /*
         * tx specific fields.
         */
-       ring_type tx_ring;
-       unsigned int next_tx, nic_tx;
-       struct sk_buff **tx_skbuff;
-       dma_addr_t *tx_dma;
-       unsigned int *tx_dma_len;
+       union ring_type get_tx, put_tx, first_tx, last_tx;
+       struct nv_skb_map *get_tx_ctx, *put_tx_ctx;
+       struct nv_skb_map *first_tx_ctx, *last_tx_ctx;
+       struct nv_skb_map *tx_skb;
+
+       union ring_type tx_ring;
        u32 tx_flags;
        int tx_ring_size;
-       int tx_limit_start;
-       int tx_limit_stop;
+       int tx_limit;
+       u32 tx_pkts_in_progress;
+       struct nv_skb_map *tx_change_owner;
+       struct nv_skb_map *tx_end_flip;
+       int tx_stop;
 
        /* vlan fields */
        struct vlan_group *vlangrp;
@@ -748,13 +830,21 @@ struct fe_priv {
 
        /* flow control */
        u32 pause_flags;
+
+       /* power saved state */
+       u32 saved_config_space[NV_PCI_REGSZ_MAX/4];
+
+       /* for different msi-x irq type */
+       char name_rx[IFNAMSIZ + 3];       /* -rx    */
+       char name_tx[IFNAMSIZ + 3];       /* -tx    */
+       char name_other[IFNAMSIZ + 6];    /* -other */
 };
 
 /*
  * Maximum number of loops until we assume that a bit in the irq mask
  * is stuck. Overridable with module param.
  */
-static int max_interrupt_work = 5;
+static int max_interrupt_work = 4;
 
 /*
  * Optimization can be either throuput mode or cpu mode
@@ -764,9 +854,10 @@ static int max_interrupt_work = 5;
  */
 enum {
        NV_OPTIMIZATION_MODE_THROUGHPUT,
-       NV_OPTIMIZATION_MODE_CPU
+       NV_OPTIMIZATION_MODE_CPU,
+       NV_OPTIMIZATION_MODE_DYNAMIC
 };
-static int optimization_mode = NV_OPTIMIZATION_MODE_THROUGHPUT;
+static int optimization_mode = NV_OPTIMIZATION_MODE_DYNAMIC;
 
 /*
  * Poll interval for timer irq
@@ -804,6 +895,22 @@ enum {
 };
 static int dma_64bit = NV_DMA_64BIT_ENABLED;
 
+/*
+ * Crossover Detection
+ * Realtek 8201 phy + some OEM boards do not work properly.
+ */
+enum {
+       NV_CROSSOVER_DETECTION_DISABLED,
+       NV_CROSSOVER_DETECTION_ENABLED
+};
+static int phy_cross = NV_CROSSOVER_DETECTION_DISABLED;
+
+/*
+ * Power down phy when interface is down (persists through reboot;
+ * older Linux and other OSes may not power it up again)
+ */
+static int phy_power_down = 0;
+
 static inline struct fe_priv *get_nvpriv(struct net_device *dev)
 {
        return netdev_priv(dev);
@@ -822,13 +929,20 @@ static inline void pci_push(u8 __iomem *base)
 
 static inline u32 nv_descr_getlength(struct ring_desc *prd, u32 v)
 {
-       return le32_to_cpu(prd->FlagLen)
+       return le32_to_cpu(prd->flaglen)
                & ((v == DESC_VER_1) ? LEN_MASK_V1 : LEN_MASK_V2);
 }
 
 static inline u32 nv_descr_getlength_ex(struct ring_desc_ex *prd, u32 v)
 {
-       return le32_to_cpu(prd->FlagLen) & LEN_MASK_V2;
+       return le32_to_cpu(prd->flaglen) & LEN_MASK_V2;
+}
+
+static bool nv_optimized(struct fe_priv *np)
+{
+       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+               return false;
+       return true;
 }
 
 static int reg_delay(struct net_device *dev, int offset, u32 mask, u32 target,
@@ -842,7 +956,7 @@ static int reg_delay(struct net_device *dev, int offset, u32 mask, u32 target,
                delaymax -= delay;
                if (delaymax < 0) {
                        if (msg)
-                               printk(msg);
+                               printk("%s", msg);
                        return 1;
                }
        } while ((readl(base + offset) & mask) != target);
@@ -852,26 +966,36 @@ static int reg_delay(struct net_device *dev, int offset, u32 mask, u32 target,
 #define NV_SETUP_RX_RING 0x01
 #define NV_SETUP_TX_RING 0x02
 
+static inline u32 dma_low(dma_addr_t addr)
+{
+       return addr;
+}
+
+static inline u32 dma_high(dma_addr_t addr)
+{
+       return addr>>31>>1;     /* 0 if 32bit, shift down by 32 if 64bit */
+}
+
 static void setup_hw_rings(struct net_device *dev, int rxtx_flags)
 {
        struct fe_priv *np = get_nvpriv(dev);
        u8 __iomem *base = get_hwbase(dev);
 
-       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
+       if (!nv_optimized(np)) {
                if (rxtx_flags & NV_SETUP_RX_RING) {
-                       writel((u32) cpu_to_le64(np->ring_addr), base + NvRegRxRingPhysAddr);
+                       writel(dma_low(np->ring_addr), base + NvRegRxRingPhysAddr);
                }
                if (rxtx_flags & NV_SETUP_TX_RING) {
-                       writel((u32) cpu_to_le64(np->ring_addr + np->rx_ring_size*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr);
+                       writel(dma_low(np->ring_addr + np->rx_ring_size*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr);
                }
        } else {
                if (rxtx_flags & NV_SETUP_RX_RING) {
-                       writel((u32) cpu_to_le64(np->ring_addr), base + NvRegRxRingPhysAddr);
-                       writel((u32) (cpu_to_le64(np->ring_addr) >> 32), base + NvRegRxRingPhysAddrHigh);
+                       writel(dma_low(np->ring_addr), base + NvRegRxRingPhysAddr);
+                       writel(dma_high(np->ring_addr), base + NvRegRxRingPhysAddrHigh);
                }
                if (rxtx_flags & NV_SETUP_TX_RING) {
-                       writel((u32) cpu_to_le64(np->ring_addr + np->rx_ring_size*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddr);
-                       writel((u32) (cpu_to_le64(np->ring_addr + np->rx_ring_size*sizeof(struct ring_desc_ex)) >> 32), base + NvRegTxRingPhysAddrHigh);
+                       writel(dma_low(np->ring_addr + np->rx_ring_size*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddr);
+                       writel(dma_high(np->ring_addr + np->rx_ring_size*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddrHigh);
                }
        }
 }
@@ -880,8 +1004,8 @@ static void free_rings(struct net_device *dev)
 {
        struct fe_priv *np = get_nvpriv(dev);
 
-       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
-               if(np->rx_ring.orig)
+       if (!nv_optimized(np)) {
+               if (np->rx_ring.orig)
                        pci_free_consistent(np->pci_dev, sizeof(struct ring_desc) * (np->rx_ring_size + np->tx_ring_size),
                                            np->rx_ring.orig, np->ring_addr);
        } else {
@@ -889,16 +1013,10 @@ static void free_rings(struct net_device *dev)
                        pci_free_consistent(np->pci_dev, sizeof(struct ring_desc_ex) * (np->rx_ring_size + np->tx_ring_size),
                                            np->rx_ring.ex, np->ring_addr);
        }
-       if (np->rx_skbuff)
-               kfree(np->rx_skbuff);
-       if (np->rx_dma)
-               kfree(np->rx_dma);
-       if (np->tx_skbuff)
-               kfree(np->tx_skbuff);
-       if (np->tx_dma)
-               kfree(np->tx_dma);
-       if (np->tx_dma_len)
-               kfree(np->tx_dma_len);
+       if (np->rx_skb)
+               kfree(np->rx_skb);
+       if (np->tx_skb)
+               kfree(np->tx_skb);
 }
 
 static int using_multi_irqs(struct net_device *dev)
@@ -913,6 +1031,23 @@ static int using_multi_irqs(struct net_device *dev)
                return 1;
 }
 
+static void nv_txrx_gate(struct net_device *dev, bool gate)
+{
+       struct fe_priv *np = get_nvpriv(dev);
+       u8 __iomem *base = get_hwbase(dev);
+       u32 powerstate;
+
+       if (!np->mac_in_use &&
+           (np->driver_data & DEV_HAS_POWER_CNTRL)) {
+               powerstate = readl(base + NvRegPowerState2);
+               if (gate)
+                       powerstate |= NVREG_POWERSTATE2_GATE_CLOCKS;
+               else
+                       powerstate &= ~NVREG_POWERSTATE2_GATE_CLOCKS;
+               writel(powerstate, base + NvRegPowerState2);
+       }
+}
+
 static void nv_enable_irq(struct net_device *dev)
 {
        struct fe_priv *np = get_nvpriv(dev);
@@ -921,7 +1056,7 @@ static void nv_enable_irq(struct net_device *dev)
                if (np->msi_flags & NV_MSI_X_ENABLED)
                        enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
                else
-                       enable_irq(dev->irq);
+                       enable_irq(np->pci_dev->irq);
        } else {
                enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
                enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
@@ -937,7 +1072,7 @@ static void nv_disable_irq(struct net_device *dev)
                if (np->msi_flags & NV_MSI_X_ENABLED)
                        disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
                else
-                       disable_irq(dev->irq);
+                       disable_irq(np->pci_dev->irq);
        } else {
                disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
                disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
@@ -967,6 +1102,20 @@ static void nv_disable_hw_interrupts(struct net_device *dev, u32 mask)
        }
 }
 
+static void nv_napi_enable(struct net_device *dev)
+{
+       struct fe_priv *np = get_nvpriv(dev);
+
+       napi_enable(&np->napi);
+}
+
+static void nv_napi_disable(struct net_device *dev)
+{
+       struct fe_priv *np = get_nvpriv(dev);
+
+       napi_disable(&np->napi);
+}
+
 #define MII_READ       (-1)
 /* mii_rw: read/write a register on the PHY.
  *
@@ -978,7 +1127,7 @@ static int mii_rw(struct net_device *dev, int addr, int miireg, int value)
        u32 reg;
        int retval;
 
-       writel(NVREG_MIISTAT_MASK, base + NvRegMIIStatus);
+       writel(NVREG_MIISTAT_MASK_RW, base + NvRegMIIStatus);
 
        reg = readl(base + NvRegMIIControl);
        if (reg & NVREG_MIICTL_INUSE) {
@@ -1016,14 +1165,13 @@ static int mii_rw(struct net_device *dev, int addr, int miireg, int value)
        return retval;
 }
 
-static int phy_reset(struct net_device *dev)
+static int phy_reset(struct net_device *dev, u32 bmcr_setup)
 {
        struct fe_priv *np = netdev_priv(dev);
        u32 miicontrol;
        unsigned int tries = 0;
 
-       miicontrol = mii_rw(dev, np->phyaddr, MII_BMCR, MII_READ);
-       miicontrol |= BMCR_RESET;
+       miicontrol = BMCR_RESET | bmcr_setup;
        if (mii_rw(dev, np->phyaddr, MII_BMCR, miicontrol)) {
                return -1;
        }
@@ -1048,6 +1196,95 @@ static int phy_init(struct net_device *dev)
        u8 __iomem *base = get_hwbase(dev);
        u32 phyinterface, phy_reserved, mii_status, mii_control, mii_control_1000,reg;
 
+       /* phy errata for E3016 phy */
+       if (np->phy_model == PHY_MODEL_MARVELL_E3016) {
+               reg = mii_rw(dev, np->phyaddr, MII_NCONFIG, MII_READ);
+               reg &= ~PHY_MARVELL_E3016_INITMASK;
+               if (mii_rw(dev, np->phyaddr, MII_NCONFIG, reg)) {
+                       printk(KERN_INFO "%s: phy write to errata reg failed.\n", pci_name(np->pci_dev));
+                       return PHY_ERROR;
+               }
+       }
+       if (np->phy_oui == PHY_OUI_REALTEK) {
+               if (np->phy_model == PHY_MODEL_REALTEK_8211 &&
+                   np->phy_rev == PHY_REV_REALTEK_8211B) {
+                       if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG1, PHY_REALTEK_INIT1)) {
+                               printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                               return PHY_ERROR;
+                       }
+                       if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG2, PHY_REALTEK_INIT2)) {
+                               printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                               return PHY_ERROR;
+                       }
+                       if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG1, PHY_REALTEK_INIT3)) {
+                               printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                               return PHY_ERROR;
+                       }
+                       if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG3, PHY_REALTEK_INIT4)) {
+                               printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                               return PHY_ERROR;
+                       }
+                       if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG4, PHY_REALTEK_INIT5)) {
+                               printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                               return PHY_ERROR;
+                       }
+                       if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG5, PHY_REALTEK_INIT6)) {
+                               printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                               return PHY_ERROR;
+                       }
+                       if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG1, PHY_REALTEK_INIT1)) {
+                               printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                               return PHY_ERROR;
+                       }
+               }
+               if (np->phy_model == PHY_MODEL_REALTEK_8211 &&
+                   np->phy_rev == PHY_REV_REALTEK_8211C) {
+                       u32 powerstate = readl(base + NvRegPowerState2);
+
+                       /* need to perform hw phy reset */
+                       powerstate |= NVREG_POWERSTATE2_PHY_RESET;
+                       writel(powerstate, base + NvRegPowerState2);
+                       msleep(25);
+
+                       powerstate &= ~NVREG_POWERSTATE2_PHY_RESET;
+                       writel(powerstate, base + NvRegPowerState2);
+                       msleep(25);
+
+                       reg = mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG6, MII_READ);
+                       reg |= PHY_REALTEK_INIT9;
+                       if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG6, reg)) {
+                               printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                               return PHY_ERROR;
+                       }
+                       if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG1, PHY_REALTEK_INIT10)) {
+                               printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                               return PHY_ERROR;
+                       }
+                       reg = mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG7, MII_READ);
+                       if (!(reg & PHY_REALTEK_INIT11)) {
+                               reg |= PHY_REALTEK_INIT11;
+                               if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG7, reg)) {
+                                       printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                                       return PHY_ERROR;
+                               }
+                       }
+                       if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG1, PHY_REALTEK_INIT1)) {
+                               printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                               return PHY_ERROR;
+                       }
+               }
+               if (np->phy_model == PHY_MODEL_REALTEK_8201) {
+                       if (np->driver_data & DEV_NEED_PHY_INIT_FIX) {
+                               phy_reserved = mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG6, MII_READ);
+                               phy_reserved |= PHY_REALTEK_INIT7;
+                               if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG6, phy_reserved)) {
+                                       printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                                       return PHY_ERROR;
+                               }
+                       }
+               }
+       }
+
        /* set advertise register */
        reg = mii_rw(dev, np->phyaddr, MII_ADVERTISE, MII_READ);
        reg |= (ADVERTISE_10HALF|ADVERTISE_10FULL|ADVERTISE_100HALF|ADVERTISE_100FULL|ADVERTISE_PAUSE_ASYM|ADVERTISE_PAUSE_CAP);
@@ -1078,23 +1315,39 @@ static int phy_init(struct net_device *dev)
        else
                np->gigabit = 0;
 
-       /* reset the phy */
-       if (phy_reset(dev)) {
-               printk(KERN_INFO "%s: phy reset failed\n", pci_name(np->pci_dev));
-               return PHY_ERROR;
+       mii_control = mii_rw(dev, np->phyaddr, MII_BMCR, MII_READ);
+       mii_control |= BMCR_ANENABLE;
+
+       if (np->phy_oui == PHY_OUI_REALTEK &&
+           np->phy_model == PHY_MODEL_REALTEK_8211 &&
+           np->phy_rev == PHY_REV_REALTEK_8211C) {
+               /* start autoneg since we already performed hw reset above */
+               mii_control |= BMCR_ANRESTART;
+               if (mii_rw(dev, np->phyaddr, MII_BMCR, mii_control)) {
+                       printk(KERN_INFO "%s: phy init failed\n", pci_name(np->pci_dev));
+                       return PHY_ERROR;
+               }
+       } else {
+               /* reset the phy
+                * (certain phys need bmcr to be setup with reset)
+                */
+               if (phy_reset(dev, mii_control)) {
+                       printk(KERN_INFO "%s: phy reset failed\n", pci_name(np->pci_dev));
+                       return PHY_ERROR;
+               }
        }
 
        /* phy vendor specific configuration */
        if ((np->phy_oui == PHY_OUI_CICADA) && (phyinterface & PHY_RGMII) ) {
                phy_reserved = mii_rw(dev, np->phyaddr, MII_RESV1, MII_READ);
-               phy_reserved &= ~(PHY_INIT1 | PHY_INIT2);
-               phy_reserved |= (PHY_INIT3 | PHY_INIT4);
+               phy_reserved &= ~(PHY_CICADA_INIT1 | PHY_CICADA_INIT2);
+               phy_reserved |= (PHY_CICADA_INIT3 | PHY_CICADA_INIT4);
                if (mii_rw(dev, np->phyaddr, MII_RESV1, phy_reserved)) {
                        printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
                        return PHY_ERROR;
                }
                phy_reserved = mii_rw(dev, np->phyaddr, MII_NCONFIG, MII_READ);
-               phy_reserved |= PHY_INIT5;
+               phy_reserved |= PHY_CICADA_INIT5;
                if (mii_rw(dev, np->phyaddr, MII_NCONFIG, phy_reserved)) {
                        printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
                        return PHY_ERROR;
@@ -1102,18 +1355,153 @@ static int phy_init(struct net_device *dev)
        }
        if (np->phy_oui == PHY_OUI_CICADA) {
                phy_reserved = mii_rw(dev, np->phyaddr, MII_SREVISION, MII_READ);
-               phy_reserved |= PHY_INIT6;
+               phy_reserved |= PHY_CICADA_INIT6;
                if (mii_rw(dev, np->phyaddr, MII_SREVISION, phy_reserved)) {
                        printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
                        return PHY_ERROR;
                }
        }
+       if (np->phy_oui == PHY_OUI_VITESSE) {
+               if (mii_rw(dev, np->phyaddr, PHY_VITESSE_INIT_REG1, PHY_VITESSE_INIT1)) {
+                       printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                       return PHY_ERROR;
+               }
+               if (mii_rw(dev, np->phyaddr, PHY_VITESSE_INIT_REG2, PHY_VITESSE_INIT2)) {
+                       printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                       return PHY_ERROR;
+               }
+               phy_reserved = mii_rw(dev, np->phyaddr, PHY_VITESSE_INIT_REG4, MII_READ);
+               if (mii_rw(dev, np->phyaddr, PHY_VITESSE_INIT_REG4, phy_reserved)) {
+                       printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                       return PHY_ERROR;
+               }
+               phy_reserved = mii_rw(dev, np->phyaddr, PHY_VITESSE_INIT_REG3, MII_READ);
+               phy_reserved &= ~PHY_VITESSE_INIT_MSK1;
+               phy_reserved |= PHY_VITESSE_INIT3;
+               if (mii_rw(dev, np->phyaddr, PHY_VITESSE_INIT_REG3, phy_reserved)) {
+                       printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                       return PHY_ERROR;
+               }
+               if (mii_rw(dev, np->phyaddr, PHY_VITESSE_INIT_REG2, PHY_VITESSE_INIT4)) {
+                       printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                       return PHY_ERROR;
+               }
+               if (mii_rw(dev, np->phyaddr, PHY_VITESSE_INIT_REG2, PHY_VITESSE_INIT5)) {
+                       printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                       return PHY_ERROR;
+               }
+               phy_reserved = mii_rw(dev, np->phyaddr, PHY_VITESSE_INIT_REG4, MII_READ);
+               phy_reserved &= ~PHY_VITESSE_INIT_MSK1;
+               phy_reserved |= PHY_VITESSE_INIT3;
+               if (mii_rw(dev, np->phyaddr, PHY_VITESSE_INIT_REG4, phy_reserved)) {
+                       printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                       return PHY_ERROR;
+               }
+               phy_reserved = mii_rw(dev, np->phyaddr, PHY_VITESSE_INIT_REG3, MII_READ);
+               if (mii_rw(dev, np->phyaddr, PHY_VITESSE_INIT_REG3, phy_reserved)) {
+                       printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                       return PHY_ERROR;
+               }
+               if (mii_rw(dev, np->phyaddr, PHY_VITESSE_INIT_REG2, PHY_VITESSE_INIT6)) {
+                       printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                       return PHY_ERROR;
+               }
+               if (mii_rw(dev, np->phyaddr, PHY_VITESSE_INIT_REG2, PHY_VITESSE_INIT7)) {
+                       printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                       return PHY_ERROR;
+               }
+               phy_reserved = mii_rw(dev, np->phyaddr, PHY_VITESSE_INIT_REG4, MII_READ);
+               if (mii_rw(dev, np->phyaddr, PHY_VITESSE_INIT_REG4, phy_reserved)) {
+                       printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                       return PHY_ERROR;
+               }
+               phy_reserved = mii_rw(dev, np->phyaddr, PHY_VITESSE_INIT_REG3, MII_READ);
+               phy_reserved &= ~PHY_VITESSE_INIT_MSK2;
+               phy_reserved |= PHY_VITESSE_INIT8;
+               if (mii_rw(dev, np->phyaddr, PHY_VITESSE_INIT_REG3, phy_reserved)) {
+                       printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                       return PHY_ERROR;
+               }
+               if (mii_rw(dev, np->phyaddr, PHY_VITESSE_INIT_REG2, PHY_VITESSE_INIT9)) {
+                       printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                       return PHY_ERROR;
+               }
+               if (mii_rw(dev, np->phyaddr, PHY_VITESSE_INIT_REG1, PHY_VITESSE_INIT10)) {
+                       printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                       return PHY_ERROR;
+               }
+       }
+       if (np->phy_oui == PHY_OUI_REALTEK) {
+               if (np->phy_model == PHY_MODEL_REALTEK_8211 &&
+                   np->phy_rev == PHY_REV_REALTEK_8211B) {
+                       /* reset could have cleared these out, set them back */
+                       if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG1, PHY_REALTEK_INIT1)) {
+                               printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                               return PHY_ERROR;
+                       }
+                       if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG2, PHY_REALTEK_INIT2)) {
+                               printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                               return PHY_ERROR;
+                       }
+                       if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG1, PHY_REALTEK_INIT3)) {
+                               printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                               return PHY_ERROR;
+                       }
+                       if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG3, PHY_REALTEK_INIT4)) {
+                               printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                               return PHY_ERROR;
+                       }
+                       if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG4, PHY_REALTEK_INIT5)) {
+                               printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                               return PHY_ERROR;
+                       }
+                       if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG5, PHY_REALTEK_INIT6)) {
+                               printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                               return PHY_ERROR;
+                       }
+                       if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG1, PHY_REALTEK_INIT1)) {
+                               printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                               return PHY_ERROR;
+                       }
+               }
+               if (np->phy_model == PHY_MODEL_REALTEK_8201) {
+                       if (np->driver_data & DEV_NEED_PHY_INIT_FIX) {
+                               phy_reserved = mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG6, MII_READ);
+                               phy_reserved |= PHY_REALTEK_INIT7;
+                               if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG6, phy_reserved)) {
+                                       printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                                       return PHY_ERROR;
+                               }
+                       }
+                       if (phy_cross == NV_CROSSOVER_DETECTION_DISABLED) {
+                               if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG1, PHY_REALTEK_INIT3)) {
+                                       printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                                       return PHY_ERROR;
+                               }
+                               phy_reserved = mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG2, MII_READ);
+                               phy_reserved &= ~PHY_REALTEK_INIT_MSK1;
+                               phy_reserved |= PHY_REALTEK_INIT3;
+                               if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG2, phy_reserved)) {
+                                       printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                                       return PHY_ERROR;
+                               }
+                               if (mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG1, PHY_REALTEK_INIT1)) {
+                                       printk(KERN_INFO "%s: phy init failed.\n", pci_name(np->pci_dev));
+                                       return PHY_ERROR;
+                               }
+                       }
+               }
+       }
+
        /* some phys clear out pause advertisment on reset, set it back */
        mii_rw(dev, np->phyaddr, MII_ADVERTISE, reg);
 
-       /* restart auto negotiation */
+       /* restart auto negotiation, power down phy */
        mii_control = mii_rw(dev, np->phyaddr, MII_BMCR, MII_READ);
        mii_control |= (BMCR_ANRESTART | BMCR_ANENABLE);
+       if (phy_power_down) {
+               mii_control |= BMCR_PDOWN;
+       }
        if (mii_rw(dev, np->phyaddr, MII_BMCR, mii_control)) {
                return PHY_ERROR;
        }
@@ -1125,16 +1513,21 @@ static void nv_start_rx(struct net_device *dev)
 {
        struct fe_priv *np = netdev_priv(dev);
        u8 __iomem *base = get_hwbase(dev);
+       u32 rx_ctrl = readl(base + NvRegReceiverControl);
 
        dprintk(KERN_DEBUG "%s: nv_start_rx\n", dev->name);
        /* Already running? Stop it. */
-       if (readl(base + NvRegReceiverControl) & NVREG_RCVCTL_START) {
-               writel(0, base + NvRegReceiverControl);
+       if ((readl(base + NvRegReceiverControl) & NVREG_RCVCTL_START) && !np->mac_in_use) {
+               rx_ctrl &= ~NVREG_RCVCTL_START;
+               writel(rx_ctrl, base + NvRegReceiverControl);
                pci_push(base);
        }
        writel(np->linkspeed, base + NvRegLinkSpeed);
        pci_push(base);
-       writel(NVREG_RCVCTL_START, base + NvRegReceiverControl);
+        rx_ctrl |= NVREG_RCVCTL_START;
+        if (np->mac_in_use)
+               rx_ctrl &= ~NVREG_RCVCTL_RX_PATH_EN;
+       writel(rx_ctrl, base + NvRegReceiverControl);
        dprintk(KERN_DEBUG "%s: nv_start_rx to duplex %d, speed 0x%08x.\n",
                                dev->name, np->duplex, np->linkspeed);
        pci_push(base);
@@ -1142,39 +1535,71 @@ static void nv_start_rx(struct net_device *dev)
 
 static void nv_stop_rx(struct net_device *dev)
 {
+       struct fe_priv *np = netdev_priv(dev);
        u8 __iomem *base = get_hwbase(dev);
+       u32 rx_ctrl = readl(base + NvRegReceiverControl);
 
        dprintk(KERN_DEBUG "%s: nv_stop_rx\n", dev->name);
-       writel(0, base + NvRegReceiverControl);
+       if (!np->mac_in_use)
+               rx_ctrl &= ~NVREG_RCVCTL_START;
+       else
+               rx_ctrl |= NVREG_RCVCTL_RX_PATH_EN;
+       writel(rx_ctrl, base + NvRegReceiverControl);
        reg_delay(dev, NvRegReceiverStatus, NVREG_RCVSTAT_BUSY, 0,
                        NV_RXSTOP_DELAY1, NV_RXSTOP_DELAY1MAX,
                        KERN_INFO "nv_stop_rx: ReceiverStatus remained busy");
 
        udelay(NV_RXSTOP_DELAY2);
-       writel(0, base + NvRegLinkSpeed);
+       if (!np->mac_in_use)
+               writel(0, base + NvRegLinkSpeed);
 }
 
 static void nv_start_tx(struct net_device *dev)
 {
+       struct fe_priv *np = netdev_priv(dev);
        u8 __iomem *base = get_hwbase(dev);
+       u32 tx_ctrl = readl(base + NvRegTransmitterControl);
 
        dprintk(KERN_DEBUG "%s: nv_start_tx\n", dev->name);
-       writel(NVREG_XMITCTL_START, base + NvRegTransmitterControl);
+       tx_ctrl |= NVREG_XMITCTL_START;
+       if (np->mac_in_use)
+               tx_ctrl &= ~NVREG_XMITCTL_TX_PATH_EN;
+       writel(tx_ctrl, base + NvRegTransmitterControl);
        pci_push(base);
 }
 
 static void nv_stop_tx(struct net_device *dev)
 {
+       struct fe_priv *np = netdev_priv(dev);
        u8 __iomem *base = get_hwbase(dev);
+       u32 tx_ctrl = readl(base + NvRegTransmitterControl);
 
        dprintk(KERN_DEBUG "%s: nv_stop_tx\n", dev->name);
-       writel(0, base + NvRegTransmitterControl);
+       if (!np->mac_in_use)
+               tx_ctrl &= ~NVREG_XMITCTL_START;
+       else
+               tx_ctrl |= NVREG_XMITCTL_TX_PATH_EN;
+       writel(tx_ctrl, base + NvRegTransmitterControl);
        reg_delay(dev, NvRegTransmitterStatus, NVREG_XMITSTAT_BUSY, 0,
                        NV_TXSTOP_DELAY1, NV_TXSTOP_DELAY1MAX,
                        KERN_INFO "nv_stop_tx: TransmitterStatus remained busy");
 
        udelay(NV_TXSTOP_DELAY2);
-       writel(0, base + NvRegUnknownTransmitterReg);
+       if (!np->mac_in_use)
+               writel(readl(base + NvRegTransmitPoll) & NVREG_TRANSMITPOLL_MAC_ADDR_REV,
+                      base + NvRegTransmitPoll);
+}
+
+static void nv_start_rxtx(struct net_device *dev)
+{
+       nv_start_rx(dev);
+       nv_start_tx(dev);
+}
+
+static void nv_stop_rxtx(struct net_device *dev)
+{
+       nv_stop_rx(dev);
+       nv_stop_tx(dev);
 }
 
 static void nv_txrx_reset(struct net_device *dev)
@@ -1194,20 +1619,95 @@ static void nv_mac_reset(struct net_device *dev)
 {
        struct fe_priv *np = netdev_priv(dev);
        u8 __iomem *base = get_hwbase(dev);
+       u32 temp1, temp2, temp3;
 
        dprintk(KERN_DEBUG "%s: nv_mac_reset\n", dev->name);
+
        writel(NVREG_TXRXCTL_BIT2 | NVREG_TXRXCTL_RESET | np->txrxctl_bits, base + NvRegTxRxControl);
        pci_push(base);
+
+       /* save registers since they will be cleared on reset */
+       temp1 = readl(base + NvRegMacAddrA);
+       temp2 = readl(base + NvRegMacAddrB);
+       temp3 = readl(base + NvRegTransmitPoll);
+
        writel(NVREG_MAC_RESET_ASSERT, base + NvRegMacReset);
        pci_push(base);
        udelay(NV_MAC_RESET_DELAY);
        writel(0, base + NvRegMacReset);
        pci_push(base);
        udelay(NV_MAC_RESET_DELAY);
+
+       /* restore saved registers */
+       writel(temp1, base + NvRegMacAddrA);
+       writel(temp2, base + NvRegMacAddrB);
+       writel(temp3, base + NvRegTransmitPoll);
+
        writel(NVREG_TXRXCTL_BIT2 | np->txrxctl_bits, base + NvRegTxRxControl);
        pci_push(base);
 }
 
+static void nv_get_hw_stats(struct net_device *dev)
+{
+       struct fe_priv *np = netdev_priv(dev);
+       u8 __iomem *base = get_hwbase(dev);
+
+       np->estats.tx_bytes += readl(base + NvRegTxCnt);
+       np->estats.tx_zero_rexmt += readl(base + NvRegTxZeroReXmt);
+       np->estats.tx_one_rexmt += readl(base + NvRegTxOneReXmt);
+       np->estats.tx_many_rexmt += readl(base + NvRegTxManyReXmt);
+       np->estats.tx_late_collision += readl(base + NvRegTxLateCol);
+       np->estats.tx_fifo_errors += readl(base + NvRegTxUnderflow);
+       np->estats.tx_carrier_errors += readl(base + NvRegTxLossCarrier);
+       np->estats.tx_excess_deferral += readl(base + NvRegTxExcessDef);
+       np->estats.tx_retry_error += readl(base + NvRegTxRetryErr);
+       np->estats.rx_frame_error += readl(base + NvRegRxFrameErr);
+       np->estats.rx_extra_byte += readl(base + NvRegRxExtraByte);
+       np->estats.rx_late_collision += readl(base + NvRegRxLateCol);
+       np->estats.rx_runt += readl(base + NvRegRxRunt);
+       np->estats.rx_frame_too_long += readl(base + NvRegRxFrameTooLong);
+       np->estats.rx_over_errors += readl(base + NvRegRxOverflow);
+       np->estats.rx_crc_errors += readl(base + NvRegRxFCSErr);
+       np->estats.rx_frame_align_error += readl(base + NvRegRxFrameAlignErr);
+       np->estats.rx_length_error += readl(base + NvRegRxLenErr);
+       np->estats.rx_unicast += readl(base + NvRegRxUnicast);
+       np->estats.rx_multicast += readl(base + NvRegRxMulticast);
+       np->estats.rx_broadcast += readl(base + NvRegRxBroadcast);
+       np->estats.rx_packets =
+               np->estats.rx_unicast +
+               np->estats.rx_multicast +
+               np->estats.rx_broadcast;
+       np->estats.rx_errors_total =
+               np->estats.rx_crc_errors +
+               np->estats.rx_over_errors +
+               np->estats.rx_frame_error +
+               (np->estats.rx_frame_align_error - np->estats.rx_extra_byte) +
+               np->estats.rx_late_collision +
+               np->estats.rx_runt +
+               np->estats.rx_frame_too_long;
+       np->estats.tx_errors_total =
+               np->estats.tx_late_collision +
+               np->estats.tx_fifo_errors +
+               np->estats.tx_carrier_errors +
+               np->estats.tx_excess_deferral +
+               np->estats.tx_retry_error;
+
+       if (np->driver_data & DEV_HAS_STATISTICS_V2) {
+               np->estats.tx_deferral += readl(base + NvRegTxDef);
+               np->estats.tx_packets += readl(base + NvRegTxFrame);
+               np->estats.rx_bytes += readl(base + NvRegRxCnt);
+               np->estats.tx_pause += readl(base + NvRegTxPause);
+               np->estats.rx_pause += readl(base + NvRegRxPause);
+               np->estats.rx_drop_frame += readl(base + NvRegRxDropFrame);
+       }
+
+       if (np->driver_data & DEV_HAS_STATISTICS_V3) {
+               np->estats.tx_unicast += readl(base + NvRegTxUnicast);
+               np->estats.tx_multicast += readl(base + NvRegTxMulticast);
+               np->estats.tx_broadcast += readl(base + NvRegTxBroadcast);
+       }
+}
+
 /*
  * nv_get_stats: dev->get_stats function
  * Get latest stats value from the nic.
@@ -1218,11 +1718,21 @@ static struct net_device_stats *nv_get_stats(struct net_device *dev)
 {
        struct fe_priv *np = netdev_priv(dev);
 
-       /* It seems that the nic always generates interrupts and doesn't
-        * accumulate errors internally. Thus the current values in np->stats
-        * are already up to date.
-        */
-       return &np->stats;
+       /* If the nic supports hw counters then retrieve latest values */
+       if (np->driver_data & (DEV_HAS_STATISTICS_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_STATISTICS_V3)) {
+               nv_get_hw_stats(dev);
+
+               /* copy to net_device stats */
+               dev->stats.tx_bytes = np->estats.tx_bytes;
+               dev->stats.tx_fifo_errors = np->estats.tx_fifo_errors;
+               dev->stats.tx_carrier_errors = np->estats.tx_carrier_errors;
+               dev->stats.rx_crc_errors = np->estats.rx_crc_errors;
+               dev->stats.rx_over_errors = np->estats.rx_over_errors;
+               dev->stats.rx_errors = np->estats.rx_errors_total;
+               dev->stats.tx_errors = np->estats.tx_errors_total;
+       }
+
+       return &dev->stats;
 }
 
 /*
@@ -1233,73 +1743,76 @@ static struct net_device_stats *nv_get_stats(struct net_device *dev)
 static int nv_alloc_rx(struct net_device *dev)
 {
        struct fe_priv *np = netdev_priv(dev);
-       unsigned int refill_rx = np->refill_rx;
-       int nr;
-
-       while (np->cur_rx != refill_rx) {
-               struct sk_buff *skb;
-
-               nr = refill_rx % np->rx_ring_size;
-               if (np->rx_skbuff[nr] == NULL) {
-
-                       skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD);
-                       if (!skb)
-                               break;
-
-                       skb->dev = dev;
-                       np->rx_skbuff[nr] = skb;
-               } else {
-                       skb = np->rx_skbuff[nr];
-               }
-               np->rx_dma[nr] = pci_map_single(np->pci_dev, skb->data,
-                                       skb->end-skb->data, PCI_DMA_FROMDEVICE);
-               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
-                       np->rx_ring.orig[nr].PacketBuffer = cpu_to_le32(np->rx_dma[nr]);
+       struct ring_desc* less_rx;
+
+       less_rx = np->get_rx.orig;
+       if (less_rx-- == np->first_rx.orig)
+               less_rx = np->last_rx.orig;
+
+       while (np->put_rx.orig != less_rx) {
+               struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD);
+               if (skb) {
+                       np->put_rx_ctx->skb = skb;
+                       np->put_rx_ctx->dma = pci_map_single(np->pci_dev,
+                                                            skb->data,
+                                                            skb_tailroom(skb),
+                                                            PCI_DMA_FROMDEVICE);
+                       np->put_rx_ctx->dma_len = skb_tailroom(skb);
+                       np->put_rx.orig->buf = cpu_to_le32(np->put_rx_ctx->dma);
                        wmb();
-                       np->rx_ring.orig[nr].FlagLen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL);
+                       np->put_rx.orig->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL);
+                       if (unlikely(np->put_rx.orig++ == np->last_rx.orig))
+                               np->put_rx.orig = np->first_rx.orig;
+                       if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx))
+                               np->put_rx_ctx = np->first_rx_ctx;
                } else {
-                       np->rx_ring.ex[nr].PacketBufferHigh = cpu_to_le64(np->rx_dma[nr]) >> 32;
-                       np->rx_ring.ex[nr].PacketBufferLow = cpu_to_le64(np->rx_dma[nr]) & 0x0FFFFFFFF;
-                       wmb();
-                       np->rx_ring.ex[nr].FlagLen = cpu_to_le32(np->rx_buf_sz | NV_RX2_AVAIL);
+                       return 1;
                }
-               dprintk(KERN_DEBUG "%s: nv_alloc_rx: Packet %d marked as Available\n",
-                                       dev->name, refill_rx);
-               refill_rx++;
        }
-       np->refill_rx = refill_rx;
-       if (np->cur_rx - refill_rx == np->rx_ring_size)
-               return 1;
        return 0;
 }
 
-static void nv_do_rx_refill(unsigned long data)
+static int nv_alloc_rx_optimized(struct net_device *dev)
 {
-       struct net_device *dev = (struct net_device *) data;
        struct fe_priv *np = netdev_priv(dev);
-
-       if (!using_multi_irqs(dev)) {
-               if (np->msi_flags & NV_MSI_X_ENABLED)
-                       disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
-               else
-                       disable_irq(dev->irq);
-       } else {
-               disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
-       }
-       if (nv_alloc_rx(dev)) {
-               spin_lock_irq(&np->lock);
-               if (!np->in_shutdown)
-                       mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
-               spin_unlock_irq(&np->lock);
-       }
-       if (!using_multi_irqs(dev)) {
-               if (np->msi_flags & NV_MSI_X_ENABLED)
-                       enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
-               else
-                       enable_irq(dev->irq);
-       } else {
-               enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
+       struct ring_desc_ex* less_rx;
+
+       less_rx = np->get_rx.ex;
+       if (less_rx-- == np->first_rx.ex)
+               less_rx = np->last_rx.ex;
+
+       while (np->put_rx.ex != less_rx) {
+               struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD);
+               if (skb) {
+                       np->put_rx_ctx->skb = skb;
+                       np->put_rx_ctx->dma = pci_map_single(np->pci_dev,
+                                                            skb->data,
+                                                            skb_tailroom(skb),
+                                                            PCI_DMA_FROMDEVICE);
+                       np->put_rx_ctx->dma_len = skb_tailroom(skb);
+                       np->put_rx.ex->bufhigh = cpu_to_le32(dma_high(np->put_rx_ctx->dma));
+                       np->put_rx.ex->buflow = cpu_to_le32(dma_low(np->put_rx_ctx->dma));
+                       wmb();
+                       np->put_rx.ex->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX2_AVAIL);
+                       if (unlikely(np->put_rx.ex++ == np->last_rx.ex))
+                               np->put_rx.ex = np->first_rx.ex;
+                       if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx))
+                               np->put_rx_ctx = np->first_rx_ctx;
+               } else {
+                       return 1;
+               }
        }
+       return 0;
+}
+
+/* If rx bufs are exhausted called after 50ms to attempt to refresh */
+static void nv_do_rx_refill(unsigned long data)
+{
+       struct net_device *dev = (struct net_device *) data;
+       struct fe_priv *np = netdev_priv(dev);
+
+       /* Just reschedule NAPI rx processing */
+       napi_schedule(&np->napi);
 }
 
 static void nv_init_rx(struct net_device *dev)
@@ -1307,13 +1820,28 @@ static void nv_init_rx(struct net_device *dev)
        struct fe_priv *np = netdev_priv(dev);
        int i;
 
-       np->cur_rx = np->rx_ring_size;
-       np->refill_rx = 0;
-       for (i = 0; i < np->rx_ring_size; i++)
-               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
-                       np->rx_ring.orig[i].FlagLen = 0;
-               else
-                       np->rx_ring.ex[i].FlagLen = 0;
+       np->get_rx = np->put_rx = np->first_rx = np->rx_ring;
+
+       if (!nv_optimized(np))
+               np->last_rx.orig = &np->rx_ring.orig[np->rx_ring_size-1];
+       else
+               np->last_rx.ex = &np->rx_ring.ex[np->rx_ring_size-1];
+       np->get_rx_ctx = np->put_rx_ctx = np->first_rx_ctx = np->rx_skb;
+       np->last_rx_ctx = &np->rx_skb[np->rx_ring_size-1];
+
+       for (i = 0; i < np->rx_ring_size; i++) {
+               if (!nv_optimized(np)) {
+                       np->rx_ring.orig[i].flaglen = 0;
+                       np->rx_ring.orig[i].buf = 0;
+               } else {
+                       np->rx_ring.ex[i].flaglen = 0;
+                       np->rx_ring.ex[i].txvlan = 0;
+                       np->rx_ring.ex[i].bufhigh = 0;
+                       np->rx_ring.ex[i].buflow = 0;
+               }
+               np->rx_skb[i].skb = NULL;
+               np->rx_skb[i].dma = 0;
+       }
 }
 
 static void nv_init_tx(struct net_device *dev)
@@ -1321,45 +1849,75 @@ static void nv_init_tx(struct net_device *dev)
        struct fe_priv *np = netdev_priv(dev);
        int i;
 
-       np->next_tx = np->nic_tx = 0;
+       np->get_tx = np->put_tx = np->first_tx = np->tx_ring;
+
+       if (!nv_optimized(np))
+               np->last_tx.orig = &np->tx_ring.orig[np->tx_ring_size-1];
+       else
+               np->last_tx.ex = &np->tx_ring.ex[np->tx_ring_size-1];
+       np->get_tx_ctx = np->put_tx_ctx = np->first_tx_ctx = np->tx_skb;
+       np->last_tx_ctx = &np->tx_skb[np->tx_ring_size-1];
+       np->tx_pkts_in_progress = 0;
+       np->tx_change_owner = NULL;
+       np->tx_end_flip = NULL;
+       np->tx_stop = 0;
+
        for (i = 0; i < np->tx_ring_size; i++) {
-               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
-                       np->tx_ring.orig[i].FlagLen = 0;
-               else
-                       np->tx_ring.ex[i].FlagLen = 0;
-               np->tx_skbuff[i] = NULL;
-               np->tx_dma[i] = 0;
+               if (!nv_optimized(np)) {
+                       np->tx_ring.orig[i].flaglen = 0;
+                       np->tx_ring.orig[i].buf = 0;
+               } else {
+                       np->tx_ring.ex[i].flaglen = 0;
+                       np->tx_ring.ex[i].txvlan = 0;
+                       np->tx_ring.ex[i].bufhigh = 0;
+                       np->tx_ring.ex[i].buflow = 0;
+               }
+               np->tx_skb[i].skb = NULL;
+               np->tx_skb[i].dma = 0;
+               np->tx_skb[i].dma_len = 0;
+               np->tx_skb[i].dma_single = 0;
+               np->tx_skb[i].first_tx_desc = NULL;
+               np->tx_skb[i].next_tx_ctx = NULL;
        }
 }
 
 static int nv_init_ring(struct net_device *dev)
 {
+       struct fe_priv *np = netdev_priv(dev);
+
        nv_init_tx(dev);
        nv_init_rx(dev);
-       return nv_alloc_rx(dev);
+
+       if (!nv_optimized(np))
+               return nv_alloc_rx(dev);
+       else
+               return nv_alloc_rx_optimized(dev);
 }
 
-static int nv_release_txskb(struct net_device *dev, unsigned int skbnr)
+static void nv_unmap_txskb(struct fe_priv *np, struct nv_skb_map *tx_skb)
 {
-       struct fe_priv *np = netdev_priv(dev);
-
-       dprintk(KERN_INFO "%s: nv_release_txskb for skbnr %d\n",
-               dev->name, skbnr);
-
-       if (np->tx_dma[skbnr]) {
-               pci_unmap_page(np->pci_dev, np->tx_dma[skbnr],
-                              np->tx_dma_len[skbnr],
-                              PCI_DMA_TODEVICE);
-               np->tx_dma[skbnr] = 0;
+       if (tx_skb->dma) {
+               if (tx_skb->dma_single)
+                       pci_unmap_single(np->pci_dev, tx_skb->dma,
+                                        tx_skb->dma_len,
+                                        PCI_DMA_TODEVICE);
+               else
+                       pci_unmap_page(np->pci_dev, tx_skb->dma,
+                                      tx_skb->dma_len,
+                                      PCI_DMA_TODEVICE);
+               tx_skb->dma = 0;
        }
+}
 
-       if (np->tx_skbuff[skbnr]) {
-               dev_kfree_skb_any(np->tx_skbuff[skbnr]);
-               np->tx_skbuff[skbnr] = NULL;
+static int nv_release_txskb(struct fe_priv *np, struct nv_skb_map *tx_skb)
+{
+       nv_unmap_txskb(np, tx_skb);
+       if (tx_skb->skb) {
+               dev_kfree_skb_any(tx_skb->skb);
+               tx_skb->skb = NULL;
                return 1;
-       } else {
-               return 0;
        }
+       return 0;
 }
 
 static void nv_drain_tx(struct net_device *dev)
@@ -1368,59 +1926,196 @@ static void nv_drain_tx(struct net_device *dev)
        unsigned int i;
 
        for (i = 0; i < np->tx_ring_size; i++) {
-               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
-                       np->tx_ring.orig[i].FlagLen = 0;
-               else
-                       np->tx_ring.ex[i].FlagLen = 0;
-               if (nv_release_txskb(dev, i))
-                       np->stats.tx_dropped++;
+               if (!nv_optimized(np)) {
+                       np->tx_ring.orig[i].flaglen = 0;
+                       np->tx_ring.orig[i].buf = 0;
+               } else {
+                       np->tx_ring.ex[i].flaglen = 0;
+                       np->tx_ring.ex[i].txvlan = 0;
+                       np->tx_ring.ex[i].bufhigh = 0;
+                       np->tx_ring.ex[i].buflow = 0;
+               }
+               if (nv_release_txskb(np, &np->tx_skb[i]))
+                       dev->stats.tx_dropped++;
+               np->tx_skb[i].dma = 0;
+               np->tx_skb[i].dma_len = 0;
+               np->tx_skb[i].dma_single = 0;
+               np->tx_skb[i].first_tx_desc = NULL;
+               np->tx_skb[i].next_tx_ctx = NULL;
        }
+       np->tx_pkts_in_progress = 0;
+       np->tx_change_owner = NULL;
+       np->tx_end_flip = NULL;
 }
 
 static void nv_drain_rx(struct net_device *dev)
 {
        struct fe_priv *np = netdev_priv(dev);
        int i;
+
        for (i = 0; i < np->rx_ring_size; i++) {
-               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
-                       np->rx_ring.orig[i].FlagLen = 0;
-               else
-                       np->rx_ring.ex[i].FlagLen = 0;
+               if (!nv_optimized(np)) {
+                       np->rx_ring.orig[i].flaglen = 0;
+                       np->rx_ring.orig[i].buf = 0;
+               } else {
+                       np->rx_ring.ex[i].flaglen = 0;
+                       np->rx_ring.ex[i].txvlan = 0;
+                       np->rx_ring.ex[i].bufhigh = 0;
+                       np->rx_ring.ex[i].buflow = 0;
+               }
                wmb();
-               if (np->rx_skbuff[i]) {
-                       pci_unmap_single(np->pci_dev, np->rx_dma[i],
-                                               np->rx_skbuff[i]->end-np->rx_skbuff[i]->data,
-                                               PCI_DMA_FROMDEVICE);
-                       dev_kfree_skb(np->rx_skbuff[i]);
-                       np->rx_skbuff[i] = NULL;
+               if (np->rx_skb[i].skb) {
+                       pci_unmap_single(np->pci_dev, np->rx_skb[i].dma,
+                                        (skb_end_pointer(np->rx_skb[i].skb) -
+                                         np->rx_skb[i].skb->data),
+                                        PCI_DMA_FROMDEVICE);
+                       dev_kfree_skb(np->rx_skb[i].skb);
+                       np->rx_skb[i].skb = NULL;
                }
        }
 }
 
-static void drain_ring(struct net_device *dev)
+static void nv_drain_rxtx(struct net_device *dev)
 {
        nv_drain_tx(dev);
        nv_drain_rx(dev);
 }
 
+static inline u32 nv_get_empty_tx_slots(struct fe_priv *np)
+{
+       return (u32)(np->tx_ring_size - ((np->tx_ring_size + (np->put_tx_ctx - np->get_tx_ctx)) % np->tx_ring_size));
+}
+
+static void nv_legacybackoff_reseed(struct net_device *dev)
+{
+       u8 __iomem *base = get_hwbase(dev);
+       u32 reg;
+       u32 low;
+       int tx_status = 0;
+
+       reg = readl(base + NvRegSlotTime) & ~NVREG_SLOTTIME_MASK;
+       get_random_bytes(&low, sizeof(low));
+       reg |= low & NVREG_SLOTTIME_MASK;
+
+       /* Need to stop tx before change takes effect.
+        * Caller has already gained np->lock.
+        */
+       tx_status = readl(base + NvRegTransmitterControl) & NVREG_XMITCTL_START;
+       if (tx_status)
+               nv_stop_tx(dev);
+       nv_stop_rx(dev);
+       writel(reg, base + NvRegSlotTime);
+       if (tx_status)
+               nv_start_tx(dev);
+       nv_start_rx(dev);
+}
+
+/* Gear Backoff Seeds */
+#define BACKOFF_SEEDSET_ROWS   8
+#define BACKOFF_SEEDSET_LFSRS  15
+
+/* Known Good seed sets */
+static const u32 main_seedset[BACKOFF_SEEDSET_ROWS][BACKOFF_SEEDSET_LFSRS] = {
+    {145, 155, 165, 175, 185, 196, 235, 245, 255, 265, 275, 285, 660, 690, 874},
+    {245, 255, 265, 575, 385, 298, 335, 345, 355, 366, 375, 385, 761, 790, 974},
+    {145, 155, 165, 175, 185, 196, 235, 245, 255, 265, 275, 285, 660, 690, 874},
+    {245, 255, 265, 575, 385, 298, 335, 345, 355, 366, 375, 386, 761, 790, 974},
+    {266, 265, 276, 585, 397, 208, 345, 355, 365, 376, 385, 396, 771, 700, 984},
+    {266, 265, 276, 586, 397, 208, 346, 355, 365, 376, 285, 396, 771, 700, 984},
+    {366, 365, 376, 686, 497, 308, 447, 455, 466, 476, 485, 496, 871, 800,  84},
+    {466, 465, 476, 786, 597, 408, 547, 555, 566, 576, 585, 597, 971, 900, 184}};
+
+static const u32 gear_seedset[BACKOFF_SEEDSET_ROWS][BACKOFF_SEEDSET_LFSRS] = {
+    {251, 262, 273, 324, 319, 508, 375, 364, 341, 371, 398, 193, 375,  30, 295},
+    {351, 375, 373, 469, 551, 639, 477, 464, 441, 472, 498, 293, 476, 130, 395},
+    {351, 375, 373, 469, 551, 639, 477, 464, 441, 472, 498, 293, 476, 130, 397},
+    {251, 262, 273, 324, 319, 508, 375, 364, 341, 371, 398, 193, 375,  30, 295},
+    {251, 262, 273, 324, 319, 508, 375, 364, 341, 371, 398, 193, 375,  30, 295},
+    {351, 375, 373, 469, 551, 639, 477, 464, 441, 472, 498, 293, 476, 130, 395},
+    {351, 375, 373, 469, 551, 639, 477, 464, 441, 472, 498, 293, 476, 130, 395},
+    {351, 375, 373, 469, 551, 639, 477, 464, 441, 472, 498, 293, 476, 130, 395}};
+
+static void nv_gear_backoff_reseed(struct net_device *dev)
+{
+       u8 __iomem *base = get_hwbase(dev);
+       u32 miniseed1, miniseed2, miniseed2_reversed, miniseed3, miniseed3_reversed;
+       u32 temp, seedset, combinedSeed;
+       int i;
+
+       /* Setup seed for free running LFSR */
+       /* We are going to read the time stamp counter 3 times
+          and swizzle bits around to increase randomness */
+       get_random_bytes(&miniseed1, sizeof(miniseed1));
+       miniseed1 &= 0x0fff;
+       if (miniseed1 == 0)
+               miniseed1 = 0xabc;
+
+       get_random_bytes(&miniseed2, sizeof(miniseed2));
+       miniseed2 &= 0x0fff;
+       if (miniseed2 == 0)
+               miniseed2 = 0xabc;
+       miniseed2_reversed =
+               ((miniseed2 & 0xF00) >> 8) |
+                (miniseed2 & 0x0F0) |
+                ((miniseed2 & 0x00F) << 8);
+
+       get_random_bytes(&miniseed3, sizeof(miniseed3));
+       miniseed3 &= 0x0fff;
+       if (miniseed3 == 0)
+               miniseed3 = 0xabc;
+       miniseed3_reversed =
+               ((miniseed3 & 0xF00) >> 8) |
+                (miniseed3 & 0x0F0) |
+                ((miniseed3 & 0x00F) << 8);
+
+       combinedSeed = ((miniseed1 ^ miniseed2_reversed) << 12) |
+                      (miniseed2 ^ miniseed3_reversed);
+
+       /* Seeds can not be zero */
+       if ((combinedSeed & NVREG_BKOFFCTRL_SEED_MASK) == 0)
+               combinedSeed |= 0x08;
+       if ((combinedSeed & (NVREG_BKOFFCTRL_SEED_MASK << NVREG_BKOFFCTRL_GEAR)) == 0)
+               combinedSeed |= 0x8000;
+
+       /* No need to disable tx here */
+       temp = NVREG_BKOFFCTRL_DEFAULT | (0 << NVREG_BKOFFCTRL_SELECT);
+       temp |= combinedSeed & NVREG_BKOFFCTRL_SEED_MASK;
+       temp |= combinedSeed >> NVREG_BKOFFCTRL_GEAR;
+       writel(temp,base + NvRegBackOffControl);
+
+       /* Setup seeds for all gear LFSRs. */
+       get_random_bytes(&seedset, sizeof(seedset));
+       seedset = seedset % BACKOFF_SEEDSET_ROWS;
+       for (i = 1; i <= BACKOFF_SEEDSET_LFSRS; i++)
+       {
+               temp = NVREG_BKOFFCTRL_DEFAULT | (i << NVREG_BKOFFCTRL_SELECT);
+               temp |= main_seedset[seedset][i-1] & 0x3ff;
+               temp |= ((gear_seedset[seedset][i-1] & 0x3ff) << NVREG_BKOFFCTRL_GEAR);
+               writel(temp, base + NvRegBackOffControl);
+       }
+}
+
 /*
  * nv_start_xmit: dev->hard_start_xmit function
  * Called with netif_tx_lock held.
  */
-static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct fe_priv *np = netdev_priv(dev);
        u32 tx_flags = 0;
        u32 tx_flags_extra = (np->desc_ver == DESC_VER_1 ? NV_TX_LASTPACKET : NV_TX2_LASTPACKET);
        unsigned int fragments = skb_shinfo(skb)->nr_frags;
-       unsigned int nr = (np->next_tx - 1) % np->tx_ring_size;
-       unsigned int start_nr = np->next_tx % np->tx_ring_size;
        unsigned int i;
        u32 offset = 0;
        u32 bcnt;
-       u32 size = skb->len-skb->data_len;
+       u32 size = skb_headlen(skb);
        u32 entries = (size >> NV_TX2_TSO_MAX_SHIFT) + ((size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0);
-       u32 tx_flags_vlan = 0;
+       u32 empty_slots;
+       struct ring_desc* put_tx;
+       struct ring_desc* start_tx;
+       struct ring_desc* prev_tx;
+       struct nv_skb_map* prev_tx_ctx;
+       unsigned long flags;
 
        /* add fragments to entries count */
        for (i = 0; i < fragments; i++) {
@@ -1428,35 +2123,38 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
                           ((skb_shinfo(skb)->frags[i].size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0);
        }
 
-       spin_lock_irq(&np->lock);
-
-       if ((np->next_tx - np->nic_tx + entries - 1) > np->tx_limit_stop) {
-               spin_unlock_irq(&np->lock);
+       spin_lock_irqsave(&np->lock, flags);
+       empty_slots = nv_get_empty_tx_slots(np);
+       if (unlikely(empty_slots <= entries)) {
                netif_stop_queue(dev);
+               np->tx_stop = 1;
+               spin_unlock_irqrestore(&np->lock, flags);
                return NETDEV_TX_BUSY;
        }
+       spin_unlock_irqrestore(&np->lock, flags);
+
+       start_tx = put_tx = np->put_tx.orig;
 
        /* setup the header buffer */
        do {
+               prev_tx = put_tx;
+               prev_tx_ctx = np->put_tx_ctx;
                bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
-               nr = (nr + 1) % np->tx_ring_size;
-
-               np->tx_dma[nr] = pci_map_single(np->pci_dev, skb->data + offset, bcnt,
+               np->put_tx_ctx->dma = pci_map_single(np->pci_dev, skb->data + offset, bcnt,
                                                PCI_DMA_TODEVICE);
-               np->tx_dma_len[nr] = bcnt;
+               np->put_tx_ctx->dma_len = bcnt;
+               np->put_tx_ctx->dma_single = 1;
+               put_tx->buf = cpu_to_le32(np->put_tx_ctx->dma);
+               put_tx->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
 
-               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
-                       np->tx_ring.orig[nr].PacketBuffer = cpu_to_le32(np->tx_dma[nr]);
-                       np->tx_ring.orig[nr].FlagLen = cpu_to_le32((bcnt-1) | tx_flags);
-               } else {
-                       np->tx_ring.ex[nr].PacketBufferHigh = cpu_to_le64(np->tx_dma[nr]) >> 32;
-                       np->tx_ring.ex[nr].PacketBufferLow = cpu_to_le64(np->tx_dma[nr]) & 0x0FFFFFFFF;
-                       np->tx_ring.ex[nr].FlagLen = cpu_to_le32((bcnt-1) | tx_flags);
-               }
                tx_flags = np->tx_flags;
                offset += bcnt;
                size -= bcnt;
-       } while(size);
+               if (unlikely(put_tx++ == np->last_tx.orig))
+                       put_tx = np->first_tx.orig;
+               if (unlikely(np->put_tx_ctx++ == np->last_tx_ctx))
+                       np->put_tx_ctx = np->first_tx_ctx;
+       } while (size);
 
        /* setup the fragments */
        for (i = 0; i < fragments; i++) {
@@ -1465,57 +2163,201 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
                offset = 0;
 
                do {
+                       prev_tx = put_tx;
+                       prev_tx_ctx = np->put_tx_ctx;
                        bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
-                       nr = (nr + 1) % np->tx_ring_size;
-
-                       np->tx_dma[nr] = pci_map_page(np->pci_dev, frag->page, frag->page_offset+offset, bcnt,
-                                                     PCI_DMA_TODEVICE);
-                       np->tx_dma_len[nr] = bcnt;
+                       np->put_tx_ctx->dma = pci_map_page(np->pci_dev, frag->page, frag->page_offset+offset, bcnt,
+                                                          PCI_DMA_TODEVICE);
+                       np->put_tx_ctx->dma_len = bcnt;
+                       np->put_tx_ctx->dma_single = 0;
+                       put_tx->buf = cpu_to_le32(np->put_tx_ctx->dma);
+                       put_tx->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
 
-                       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
-                               np->tx_ring.orig[nr].PacketBuffer = cpu_to_le32(np->tx_dma[nr]);
-                               np->tx_ring.orig[nr].FlagLen = cpu_to_le32((bcnt-1) | tx_flags);
-                       } else {
-                               np->tx_ring.ex[nr].PacketBufferHigh = cpu_to_le64(np->tx_dma[nr]) >> 32;
-                               np->tx_ring.ex[nr].PacketBufferLow = cpu_to_le64(np->tx_dma[nr]) & 0x0FFFFFFFF;
-                               np->tx_ring.ex[nr].FlagLen = cpu_to_le32((bcnt-1) | tx_flags);
-                       }
                        offset += bcnt;
                        size -= bcnt;
+                       if (unlikely(put_tx++ == np->last_tx.orig))
+                               put_tx = np->first_tx.orig;
+                       if (unlikely(np->put_tx_ctx++ == np->last_tx_ctx))
+                               np->put_tx_ctx = np->first_tx_ctx;
                } while (size);
        }
 
        /* set last fragment flag  */
-       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
-               np->tx_ring.orig[nr].FlagLen |= cpu_to_le32(tx_flags_extra);
-       } else {
-               np->tx_ring.ex[nr].FlagLen |= cpu_to_le32(tx_flags_extra);
+       prev_tx->flaglen |= cpu_to_le32(tx_flags_extra);
+
+       /* save skb in this slot's context area */
+       prev_tx_ctx->skb = skb;
+
+       if (skb_is_gso(skb))
+               tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT);
+       else
+               tx_flags_extra = skb->ip_summed == CHECKSUM_PARTIAL ?
+                        NV_TX2_CHECKSUM_L3 | NV_TX2_CHECKSUM_L4 : 0;
+
+       spin_lock_irqsave(&np->lock, flags);
+
+       /* set tx flags */
+       start_tx->flaglen |= cpu_to_le32(tx_flags | tx_flags_extra);
+       np->put_tx.orig = put_tx;
+
+       spin_unlock_irqrestore(&np->lock, flags);
+
+       dprintk(KERN_DEBUG "%s: nv_start_xmit: entries %d queued for transmission. tx_flags_extra: %x\n",
+               dev->name, entries, tx_flags_extra);
+       {
+               int j;
+               for (j=0; j<64; j++) {
+                       if ((j%16) == 0)
+                               dprintk("\n%03x:", j);
+                       dprintk(" %02x", ((unsigned char*)skb->data)[j]);
+               }
+               dprintk("\n");
+       }
+
+       writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
+       return NETDEV_TX_OK;
+}
+
+static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
+                                          struct net_device *dev)
+{
+       struct fe_priv *np = netdev_priv(dev);
+       u32 tx_flags = 0;
+       u32 tx_flags_extra;
+       unsigned int fragments = skb_shinfo(skb)->nr_frags;
+       unsigned int i;
+       u32 offset = 0;
+       u32 bcnt;
+       u32 size = skb_headlen(skb);
+       u32 entries = (size >> NV_TX2_TSO_MAX_SHIFT) + ((size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0);
+       u32 empty_slots;
+       struct ring_desc_ex* put_tx;
+       struct ring_desc_ex* start_tx;
+       struct ring_desc_ex* prev_tx;
+       struct nv_skb_map* prev_tx_ctx;
+       struct nv_skb_map* start_tx_ctx;
+       unsigned long flags;
+
+       /* add fragments to entries count */
+       for (i = 0; i < fragments; i++) {
+               entries += (skb_shinfo(skb)->frags[i].size >> NV_TX2_TSO_MAX_SHIFT) +
+                          ((skb_shinfo(skb)->frags[i].size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0);
+       }
+
+       spin_lock_irqsave(&np->lock, flags);
+       empty_slots = nv_get_empty_tx_slots(np);
+       if (unlikely(empty_slots <= entries)) {
+               netif_stop_queue(dev);
+               np->tx_stop = 1;
+               spin_unlock_irqrestore(&np->lock, flags);
+               return NETDEV_TX_BUSY;
        }
+       spin_unlock_irqrestore(&np->lock, flags);
+
+       start_tx = put_tx = np->put_tx.ex;
+       start_tx_ctx = np->put_tx_ctx;
+
+       /* setup the header buffer */
+       do {
+               prev_tx = put_tx;
+               prev_tx_ctx = np->put_tx_ctx;
+               bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
+               np->put_tx_ctx->dma = pci_map_single(np->pci_dev, skb->data + offset, bcnt,
+                                               PCI_DMA_TODEVICE);
+               np->put_tx_ctx->dma_len = bcnt;
+               np->put_tx_ctx->dma_single = 1;
+               put_tx->bufhigh = cpu_to_le32(dma_high(np->put_tx_ctx->dma));
+               put_tx->buflow = cpu_to_le32(dma_low(np->put_tx_ctx->dma));
+               put_tx->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
 
-       np->tx_skbuff[nr] = skb;
+               tx_flags = NV_TX2_VALID;
+               offset += bcnt;
+               size -= bcnt;
+               if (unlikely(put_tx++ == np->last_tx.ex))
+                       put_tx = np->first_tx.ex;
+               if (unlikely(np->put_tx_ctx++ == np->last_tx_ctx))
+                       np->put_tx_ctx = np->first_tx_ctx;
+       } while (size);
 
-#ifdef NETIF_F_TSO
-       if (skb_shinfo(skb)->gso_size)
+       /* setup the fragments */
+       for (i = 0; i < fragments; i++) {
+               skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+               u32 size = frag->size;
+               offset = 0;
+
+               do {
+                       prev_tx = put_tx;
+                       prev_tx_ctx = np->put_tx_ctx;
+                       bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
+                       np->put_tx_ctx->dma = pci_map_page(np->pci_dev, frag->page, frag->page_offset+offset, bcnt,
+                                                          PCI_DMA_TODEVICE);
+                       np->put_tx_ctx->dma_len = bcnt;
+                       np->put_tx_ctx->dma_single = 0;
+                       put_tx->bufhigh = cpu_to_le32(dma_high(np->put_tx_ctx->dma));
+                       put_tx->buflow = cpu_to_le32(dma_low(np->put_tx_ctx->dma));
+                       put_tx->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
+
+                       offset += bcnt;
+                       size -= bcnt;
+                       if (unlikely(put_tx++ == np->last_tx.ex))
+                               put_tx = np->first_tx.ex;
+                       if (unlikely(np->put_tx_ctx++ == np->last_tx_ctx))
+                               np->put_tx_ctx = np->first_tx_ctx;
+               } while (size);
+       }
+
+       /* set last fragment flag  */
+       prev_tx->flaglen |= cpu_to_le32(NV_TX2_LASTPACKET);
+
+       /* save skb in this slot's context area */
+       prev_tx_ctx->skb = skb;
+
+       if (skb_is_gso(skb))
                tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT);
        else
-#endif
-       tx_flags_extra = (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0);
+               tx_flags_extra = skb->ip_summed == CHECKSUM_PARTIAL ?
+                        NV_TX2_CHECKSUM_L3 | NV_TX2_CHECKSUM_L4 : 0;
 
        /* vlan tag */
-       if (np->vlangrp && vlan_tx_tag_present(skb)) {
-               tx_flags_vlan = NV_TX3_VLAN_TAG_PRESENT | vlan_tx_tag_get(skb);
+       if (likely(!np->vlangrp)) {
+               start_tx->txvlan = 0;
+       } else {
+               if (vlan_tx_tag_present(skb))
+                       start_tx->txvlan = cpu_to_le32(NV_TX3_VLAN_TAG_PRESENT | vlan_tx_tag_get(skb));
+               else
+                       start_tx->txvlan = 0;
        }
 
-       /* set tx flags */
-       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
-               np->tx_ring.orig[start_nr].FlagLen |= cpu_to_le32(tx_flags | tx_flags_extra);
-       } else {
-               np->tx_ring.ex[start_nr].TxVlan = cpu_to_le32(tx_flags_vlan);
-               np->tx_ring.ex[start_nr].FlagLen |= cpu_to_le32(tx_flags | tx_flags_extra);
+       spin_lock_irqsave(&np->lock, flags);
+
+       if (np->tx_limit) {
+               /* Limit the number of outstanding tx. Setup all fragments, but
+                * do not set the VALID bit on the first descriptor. Save a pointer
+                * to that descriptor and also for next skb_map element.
+                */
+
+               if (np->tx_pkts_in_progress == NV_TX_LIMIT_COUNT) {
+                       if (!np->tx_change_owner)
+                               np->tx_change_owner = start_tx_ctx;
+
+                       /* remove VALID bit */
+                       tx_flags &= ~NV_TX2_VALID;
+                       start_tx_ctx->first_tx_desc = start_tx;
+                       start_tx_ctx->next_tx_ctx = np->put_tx_ctx;
+                       np->tx_end_flip = np->put_tx_ctx;
+               } else {
+                       np->tx_pkts_in_progress++;
+               }
        }
 
-       dprintk(KERN_DEBUG "%s: nv_start_xmit: packet %d (entries %d) queued for transmission. tx_flags_extra: %x\n",
-               dev->name, np->next_tx, entries, tx_flags_extra);
+       /* set tx flags */
+       start_tx->flaglen |= cpu_to_le32(tx_flags | tx_flags_extra);
+       np->put_tx.ex = put_tx;
+
+       spin_unlock_irqrestore(&np->lock, flags);
+
+       dprintk(KERN_DEBUG "%s: nv_start_xmit_optimized: entries %d queued for transmission. tx_flags_extra: %x\n",
+               dev->name, entries, tx_flags_extra);
        {
                int j;
                for (j=0; j<64; j++) {
@@ -1526,75 +2368,144 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
                dprintk("\n");
        }
 
-       np->next_tx += entries;
-
-       dev->trans_start = jiffies;
-       spin_unlock_irq(&np->lock);
        writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
-       pci_push(get_hwbase(dev));
        return NETDEV_TX_OK;
 }
 
+static inline void nv_tx_flip_ownership(struct net_device *dev)
+{
+       struct fe_priv *np = netdev_priv(dev);
+
+       np->tx_pkts_in_progress--;
+       if (np->tx_change_owner) {
+               np->tx_change_owner->first_tx_desc->flaglen |=
+                       cpu_to_le32(NV_TX2_VALID);
+               np->tx_pkts_in_progress++;
+
+               np->tx_change_owner = np->tx_change_owner->next_tx_ctx;
+               if (np->tx_change_owner == np->tx_end_flip)
+                       np->tx_change_owner = NULL;
+
+               writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
+       }
+}
+
 /*
  * nv_tx_done: check for completed packets, release the skbs.
  *
  * Caller must own np->lock.
  */
-static void nv_tx_done(struct net_device *dev)
+static int nv_tx_done(struct net_device *dev, int limit)
 {
        struct fe_priv *np = netdev_priv(dev);
-       u32 Flags;
-       unsigned int i;
-       struct sk_buff *skb;
+       u32 flags;
+       int tx_work = 0;
+       struct ring_desc* orig_get_tx = np->get_tx.orig;
 
-       while (np->nic_tx != np->next_tx) {
-               i = np->nic_tx % np->tx_ring_size;
+       while ((np->get_tx.orig != np->put_tx.orig) &&
+              !((flags = le32_to_cpu(np->get_tx.orig->flaglen)) & NV_TX_VALID) &&
+              (tx_work < limit)) {
 
-               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
-                       Flags = le32_to_cpu(np->tx_ring.orig[i].FlagLen);
-               else
-                       Flags = le32_to_cpu(np->tx_ring.ex[i].FlagLen);
+               dprintk(KERN_DEBUG "%s: nv_tx_done: flags 0x%x.\n",
+                                       dev->name, flags);
+
+               nv_unmap_txskb(np, np->get_tx_ctx);
 
-               dprintk(KERN_DEBUG "%s: nv_tx_done: looking at packet %d, Flags 0x%x.\n",
-                                       dev->name, np->nic_tx, Flags);
-               if (Flags & NV_TX_VALID)
-                       break;
                if (np->desc_ver == DESC_VER_1) {
-                       if (Flags & NV_TX_LASTPACKET) {
-                               skb = np->tx_skbuff[i];
-                               if (Flags & (NV_TX_RETRYERROR|NV_TX_CARRIERLOST|NV_TX_LATECOLLISION|
-                                            NV_TX_UNDERFLOW|NV_TX_ERROR)) {
-                                       if (Flags & NV_TX_UNDERFLOW)
-                                               np->stats.tx_fifo_errors++;
-                                       if (Flags & NV_TX_CARRIERLOST)
-                                               np->stats.tx_carrier_errors++;
-                                       np->stats.tx_errors++;
+                       if (flags & NV_TX_LASTPACKET) {
+                               if (flags & NV_TX_ERROR) {
+                                       if (flags & NV_TX_UNDERFLOW)
+                                               dev->stats.tx_fifo_errors++;
+                                       if (flags & NV_TX_CARRIERLOST)
+                                               dev->stats.tx_carrier_errors++;
+                                       if ((flags & NV_TX_RETRYERROR) && !(flags & NV_TX_RETRYCOUNT_MASK))
+                                               nv_legacybackoff_reseed(dev);
+                                       dev->stats.tx_errors++;
                                } else {
-                                       np->stats.tx_packets++;
-                                       np->stats.tx_bytes += skb->len;
+                                       dev->stats.tx_packets++;
+                                       dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
                                }
+                               dev_kfree_skb_any(np->get_tx_ctx->skb);
+                               np->get_tx_ctx->skb = NULL;
+                               tx_work++;
                        }
                } else {
-                       if (Flags & NV_TX2_LASTPACKET) {
-                               skb = np->tx_skbuff[i];
-                               if (Flags & (NV_TX2_RETRYERROR|NV_TX2_CARRIERLOST|NV_TX2_LATECOLLISION|
-                                            NV_TX2_UNDERFLOW|NV_TX2_ERROR)) {
-                                       if (Flags & NV_TX2_UNDERFLOW)
-                                               np->stats.tx_fifo_errors++;
-                                       if (Flags & NV_TX2_CARRIERLOST)
-                                               np->stats.tx_carrier_errors++;
-                                       np->stats.tx_errors++;
+                       if (flags & NV_TX2_LASTPACKET) {
+                               if (flags & NV_TX2_ERROR) {
+                                       if (flags & NV_TX2_UNDERFLOW)
+                                               dev->stats.tx_fifo_errors++;
+                                       if (flags & NV_TX2_CARRIERLOST)
+                                               dev->stats.tx_carrier_errors++;
+                                       if ((flags & NV_TX2_RETRYERROR) && !(flags & NV_TX2_RETRYCOUNT_MASK))
+                                               nv_legacybackoff_reseed(dev);
+                                       dev->stats.tx_errors++;
                                } else {
-                                       np->stats.tx_packets++;
-                                       np->stats.tx_bytes += skb->len;
+                                       dev->stats.tx_packets++;
+                                       dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
+                               }
+                               dev_kfree_skb_any(np->get_tx_ctx->skb);
+                               np->get_tx_ctx->skb = NULL;
+                               tx_work++;
+                       }
+               }
+               if (unlikely(np->get_tx.orig++ == np->last_tx.orig))
+                       np->get_tx.orig = np->first_tx.orig;
+               if (unlikely(np->get_tx_ctx++ == np->last_tx_ctx))
+                       np->get_tx_ctx = np->first_tx_ctx;
+       }
+       if (unlikely((np->tx_stop == 1) && (np->get_tx.orig != orig_get_tx))) {
+               np->tx_stop = 0;
+               netif_wake_queue(dev);
+       }
+       return tx_work;
+}
+
+static int nv_tx_done_optimized(struct net_device *dev, int limit)
+{
+       struct fe_priv *np = netdev_priv(dev);
+       u32 flags;
+       int tx_work = 0;
+       struct ring_desc_ex* orig_get_tx = np->get_tx.ex;
+
+       while ((np->get_tx.ex != np->put_tx.ex) &&
+              !((flags = le32_to_cpu(np->get_tx.ex->flaglen)) & NV_TX_VALID) &&
+              (tx_work < limit)) {
+
+               dprintk(KERN_DEBUG "%s: nv_tx_done_optimized: flags 0x%x.\n",
+                                       dev->name, flags);
+
+               nv_unmap_txskb(np, np->get_tx_ctx);
+
+               if (flags & NV_TX2_LASTPACKET) {
+                       if (!(flags & NV_TX2_ERROR))
+                               dev->stats.tx_packets++;
+                       else {
+                               if ((flags & NV_TX2_RETRYERROR) && !(flags & NV_TX2_RETRYCOUNT_MASK)) {
+                                       if (np->driver_data & DEV_HAS_GEAR_MODE)
+                                               nv_gear_backoff_reseed(dev);
+                                       else
+                                               nv_legacybackoff_reseed(dev);
                                }
                        }
+
+                       dev_kfree_skb_any(np->get_tx_ctx->skb);
+                       np->get_tx_ctx->skb = NULL;
+                       tx_work++;
+
+                       if (np->tx_limit) {
+                               nv_tx_flip_ownership(dev);
+                       }
                }
-               nv_release_txskb(dev, i);
-               np->nic_tx++;
+               if (unlikely(np->get_tx.ex++ == np->last_tx.ex))
+                       np->get_tx.ex = np->first_tx.ex;
+               if (unlikely(np->get_tx_ctx++ == np->last_tx_ctx))
+                       np->get_tx_ctx = np->first_tx_ctx;
        }
-       if (np->next_tx - np->nic_tx < np->tx_limit_start)
+       if (unlikely((np->tx_stop == 1) && (np->get_tx.ex != orig_get_tx))) {
+               np->tx_stop = 0;
                netif_wake_queue(dev);
+       }
+       return tx_work;
 }
 
 /*
@@ -1606,6 +2517,8 @@ static void nv_tx_timeout(struct net_device *dev)
        struct fe_priv *np = netdev_priv(dev);
        u8 __iomem *base = get_hwbase(dev);
        u32 status;
+       union ring_type put_tx;
+       int saved_tx_limit;
 
        if (np->msi_flags & NV_MSI_X_ENABLED)
                status = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQSTAT_MASK;
@@ -1617,9 +2530,8 @@ static void nv_tx_timeout(struct net_device *dev)
        {
                int i;
 
-               printk(KERN_INFO "%s: Ring at %lx: next %d nic %d\n",
-                               dev->name, (unsigned long)np->ring_addr,
-                               np->next_tx, np->nic_tx);
+               printk(KERN_INFO "%s: Ring at %lx\n",
+                      dev->name, (unsigned long)np->ring_addr);
                printk(KERN_INFO "%s: Dumping tx registers\n", dev->name);
                for (i=0;i<=np->register_size;i+= 32) {
                        printk(KERN_INFO "%3x: %08x %08x %08x %08x %08x %08x %08x %08x\n",
@@ -1631,32 +2543,32 @@ static void nv_tx_timeout(struct net_device *dev)
                }
                printk(KERN_INFO "%s: Dumping tx ring\n", dev->name);
                for (i=0;i<np->tx_ring_size;i+= 4) {
-                       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
+                       if (!nv_optimized(np)) {
                                printk(KERN_INFO "%03x: %08x %08x // %08x %08x // %08x %08x // %08x %08x\n",
                                       i,
-                                      le32_to_cpu(np->tx_ring.orig[i].PacketBuffer),
-                                      le32_to_cpu(np->tx_ring.orig[i].FlagLen),
-                                      le32_to_cpu(np->tx_ring.orig[i+1].PacketBuffer),
-                                      le32_to_cpu(np->tx_ring.orig[i+1].FlagLen),
-                                      le32_to_cpu(np->tx_ring.orig[i+2].PacketBuffer),
-                                      le32_to_cpu(np->tx_ring.orig[i+2].FlagLen),
-                                      le32_to_cpu(np->tx_ring.orig[i+3].PacketBuffer),
-                                      le32_to_cpu(np->tx_ring.orig[i+3].FlagLen));
+                                      le32_to_cpu(np->tx_ring.orig[i].buf),
+                                      le32_to_cpu(np->tx_ring.orig[i].flaglen),
+                                      le32_to_cpu(np->tx_ring.orig[i+1].buf),
+                                      le32_to_cpu(np->tx_ring.orig[i+1].flaglen),
+                                      le32_to_cpu(np->tx_ring.orig[i+2].buf),
+                                      le32_to_cpu(np->tx_ring.orig[i+2].flaglen),
+                                      le32_to_cpu(np->tx_ring.orig[i+3].buf),
+                                      le32_to_cpu(np->tx_ring.orig[i+3].flaglen));
                        } else {
                                printk(KERN_INFO "%03x: %08x %08x %08x // %08x %08x %08x // %08x %08x %08x // %08x %08x %08x\n",
                                       i,
-                                      le32_to_cpu(np->tx_ring.ex[i].PacketBufferHigh),
-                                      le32_to_cpu(np->tx_ring.ex[i].PacketBufferLow),
-                                      le32_to_cpu(np->tx_ring.ex[i].FlagLen),
-                                      le32_to_cpu(np->tx_ring.ex[i+1].PacketBufferHigh),
-                                      le32_to_cpu(np->tx_ring.ex[i+1].PacketBufferLow),
-                                      le32_to_cpu(np->tx_ring.ex[i+1].FlagLen),
-                                      le32_to_cpu(np->tx_ring.ex[i+2].PacketBufferHigh),
-                                      le32_to_cpu(np->tx_ring.ex[i+2].PacketBufferLow),
-                                      le32_to_cpu(np->tx_ring.ex[i+2].FlagLen),
-                                      le32_to_cpu(np->tx_ring.ex[i+3].PacketBufferHigh),
-                                      le32_to_cpu(np->tx_ring.ex[i+3].PacketBufferLow),
-                                      le32_to_cpu(np->tx_ring.ex[i+3].FlagLen));
+                                      le32_to_cpu(np->tx_ring.ex[i].bufhigh),
+                                      le32_to_cpu(np->tx_ring.ex[i].buflow),
+                                      le32_to_cpu(np->tx_ring.ex[i].flaglen),
+                                      le32_to_cpu(np->tx_ring.ex[i+1].bufhigh),
+                                      le32_to_cpu(np->tx_ring.ex[i+1].buflow),
+                                      le32_to_cpu(np->tx_ring.ex[i+1].flaglen),
+                                      le32_to_cpu(np->tx_ring.ex[i+2].bufhigh),
+                                      le32_to_cpu(np->tx_ring.ex[i+2].buflow),
+                                      le32_to_cpu(np->tx_ring.ex[i+2].flaglen),
+                                      le32_to_cpu(np->tx_ring.ex[i+3].bufhigh),
+                                      le32_to_cpu(np->tx_ring.ex[i+3].buflow),
+                                      le32_to_cpu(np->tx_ring.ex[i+3].flaglen));
                        }
                }
        }
@@ -1666,20 +2578,32 @@ static void nv_tx_timeout(struct net_device *dev)
        /* 1) stop tx engine */
        nv_stop_tx(dev);
 
-       /* 2) check that the packets were not sent already: */
-       nv_tx_done(dev);
+       /* 2) complete any outstanding tx and do not give HW any limited tx pkts */
+       saved_tx_limit = np->tx_limit;
+       np->tx_limit = 0; /* prevent giving HW any limited pkts */
+       np->tx_stop = 0;  /* prevent waking tx queue */
+       if (!nv_optimized(np))
+               nv_tx_done(dev, np->tx_ring_size);
+       else
+               nv_tx_done_optimized(dev, np->tx_ring_size);
 
-       /* 3) if there are dead entries: clear everything */
-       if (np->next_tx != np->nic_tx) {
-               printk(KERN_DEBUG "%s: tx_timeout: dead entries!\n", dev->name);
-               nv_drain_tx(dev);
-               np->next_tx = np->nic_tx = 0;
-               setup_hw_rings(dev, NV_SETUP_TX_RING);
-               netif_wake_queue(dev);
-       }
+       /* save current HW postion */
+       if (np->tx_change_owner)
+               put_tx.ex = np->tx_change_owner->first_tx_desc;
+       else
+               put_tx = np->put_tx;
 
-       /* 4) restart tx engine */
+       /* 3) clear all tx state */
+       nv_drain_tx(dev);
+       nv_init_tx(dev);
+
+       /* 4) restore state to current HW position */
+       np->get_tx = np->put_tx = put_tx;
+       np->tx_limit = saved_tx_limit;
+
+       /* 5) restart tx engine */
        nv_start_tx(dev);
+       netif_wake_queue(dev);
        spin_unlock_irq(&np->lock);
 }
 
@@ -1693,7 +2617,7 @@ static int nv_getlen(struct net_device *dev, void *packet, int datalen)
        int protolen;   /* length as stored in the proto field */
 
        /* 1) calculate len according to header */
-       if ( ((struct vlan_ethhdr *)packet)->h_vlan_proto == __constant_htons(ETH_P_8021Q)) {
+       if ( ((struct vlan_ethhdr *)packet)->h_vlan_proto == htons(ETH_P_8021Q)) {
                protolen = ntohs( ((struct vlan_ethhdr *)packet)->h_vlan_encapsulated_proto );
                hdrlen = VLAN_HLEN;
        } else {
@@ -1736,157 +2660,235 @@ static int nv_getlen(struct net_device *dev, void *packet, int datalen)
        }
 }
 
-static void nv_rx_process(struct net_device *dev)
+static int nv_rx_process(struct net_device *dev, int limit)
 {
        struct fe_priv *np = netdev_priv(dev);
-       u32 Flags;
-       u32 vlanflags = 0;
-
-       for (;;) {
-               struct sk_buff *skb;
-               int len;
-               int i;
-               if (np->cur_rx - np->refill_rx >= np->rx_ring_size)
-                       break;  /* we scanned the whole ring - do not continue */
-
-               i = np->cur_rx % np->rx_ring_size;
-               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
-                       Flags = le32_to_cpu(np->rx_ring.orig[i].FlagLen);
-                       len = nv_descr_getlength(&np->rx_ring.orig[i], np->desc_ver);
-               } else {
-                       Flags = le32_to_cpu(np->rx_ring.ex[i].FlagLen);
-                       len = nv_descr_getlength_ex(&np->rx_ring.ex[i], np->desc_ver);
-                       vlanflags = le32_to_cpu(np->rx_ring.ex[i].PacketBufferLow);
-               }
+       u32 flags;
+       int rx_work = 0;
+       struct sk_buff *skb;
+       int len;
 
-               dprintk(KERN_DEBUG "%s: nv_rx_process: looking at packet %d, Flags 0x%x.\n",
-                                       dev->name, np->cur_rx, Flags);
+       while((np->get_rx.orig != np->put_rx.orig) &&
+             !((flags = le32_to_cpu(np->get_rx.orig->flaglen)) & NV_RX_AVAIL) &&
+               (rx_work < limit)) {
 
-               if (Flags & NV_RX_AVAIL)
-                       break;  /* still owned by hardware, */
+               dprintk(KERN_DEBUG "%s: nv_rx_process: flags 0x%x.\n",
+                                       dev->name, flags);
 
                /*
                 * the packet is for us - immediately tear down the pci mapping.
                 * TODO: check if a prefetch of the first cacheline improves
                 * the performance.
                 */
-               pci_unmap_single(np->pci_dev, np->rx_dma[i],
-                               np->rx_skbuff[i]->end-np->rx_skbuff[i]->data,
+               pci_unmap_single(np->pci_dev, np->get_rx_ctx->dma,
+                               np->get_rx_ctx->dma_len,
                                PCI_DMA_FROMDEVICE);
+               skb = np->get_rx_ctx->skb;
+               np->get_rx_ctx->skb = NULL;
 
                {
                        int j;
-                       dprintk(KERN_DEBUG "Dumping packet (flags 0x%x).",Flags);
+                       dprintk(KERN_DEBUG "Dumping packet (flags 0x%x).",flags);
                        for (j=0; j<64; j++) {
                                if ((j%16) == 0)
                                        dprintk("\n%03x:", j);
-                               dprintk(" %02x", ((unsigned char*)np->rx_skbuff[i]->data)[j]);
+                               dprintk(" %02x", ((unsigned char*)skb->data)[j]);
                        }
                        dprintk("\n");
                }
                /* look at what we actually got: */
                if (np->desc_ver == DESC_VER_1) {
-                       if (!(Flags & NV_RX_DESCRIPTORVALID))
-                               goto next_pkt;
-
-                       if (Flags & NV_RX_ERROR) {
-                               if (Flags & NV_RX_MISSEDFRAME) {
-                                       np->stats.rx_missed_errors++;
-                                       np->stats.rx_errors++;
-                                       goto next_pkt;
-                               }
-                               if (Flags & (NV_RX_ERROR1|NV_RX_ERROR2|NV_RX_ERROR3)) {
-                                       np->stats.rx_errors++;
-                                       goto next_pkt;
-                               }
-                               if (Flags & NV_RX_CRCERR) {
-                                       np->stats.rx_crc_errors++;
-                                       np->stats.rx_errors++;
-                                       goto next_pkt;
-                               }
-                               if (Flags & NV_RX_OVERFLOW) {
-                                       np->stats.rx_over_errors++;
-                                       np->stats.rx_errors++;
-                                       goto next_pkt;
-                               }
-                               if (Flags & NV_RX_ERROR4) {
-                                       len = nv_getlen(dev, np->rx_skbuff[i]->data, len);
-                                       if (len < 0) {
-                                               np->stats.rx_errors++;
-                                               goto next_pkt;
+                       if (likely(flags & NV_RX_DESCRIPTORVALID)) {
+                               len = flags & LEN_MASK_V1;
+                               if (unlikely(flags & NV_RX_ERROR)) {
+                                       if ((flags & NV_RX_ERROR_MASK) == NV_RX_ERROR4) {
+                                               len = nv_getlen(dev, skb->data, len);
+                                               if (len < 0) {
+                                                       dev->stats.rx_errors++;
+                                                       dev_kfree_skb(skb);
+                                                       goto next_pkt;
+                                               }
                                        }
-                               }
-                               /* framing errors are soft errors. */
-                               if (Flags & NV_RX_FRAMINGERR) {
-                                       if (Flags & NV_RX_SUBSTRACT1) {
-                                               len--;
+                                       /* framing errors are soft errors */
+                                       else if ((flags & NV_RX_ERROR_MASK) == NV_RX_FRAMINGERR) {
+                                               if (flags & NV_RX_SUBSTRACT1) {
+                                                       len--;
+                                               }
+                                       }
+                                       /* the rest are hard errors */
+                                       else {
+                                               if (flags & NV_RX_MISSEDFRAME)
+                                                       dev->stats.rx_missed_errors++;
+                                               if (flags & NV_RX_CRCERR)
+                                                       dev->stats.rx_crc_errors++;
+                                               if (flags & NV_RX_OVERFLOW)
+                                                       dev->stats.rx_over_errors++;
+                                               dev->stats.rx_errors++;
+                                               dev_kfree_skb(skb);
+                                               goto next_pkt;
                                        }
                                }
+                       } else {
+                               dev_kfree_skb(skb);
+                               goto next_pkt;
                        }
                } else {
-                       if (!(Flags & NV_RX2_DESCRIPTORVALID))
+                       if (likely(flags & NV_RX2_DESCRIPTORVALID)) {
+                               len = flags & LEN_MASK_V2;
+                               if (unlikely(flags & NV_RX2_ERROR)) {
+                                       if ((flags & NV_RX2_ERROR_MASK) == NV_RX2_ERROR4) {
+                                               len = nv_getlen(dev, skb->data, len);
+                                               if (len < 0) {
+                                                       dev->stats.rx_errors++;
+                                                       dev_kfree_skb(skb);
+                                                       goto next_pkt;
+                                               }
+                                       }
+                                       /* framing errors are soft errors */
+                                       else if ((flags & NV_RX2_ERROR_MASK) == NV_RX2_FRAMINGERR) {
+                                               if (flags & NV_RX2_SUBSTRACT1) {
+                                                       len--;
+                                               }
+                                       }
+                                       /* the rest are hard errors */
+                                       else {
+                                               if (flags & NV_RX2_CRCERR)
+                                                       dev->stats.rx_crc_errors++;
+                                               if (flags & NV_RX2_OVERFLOW)
+                                                       dev->stats.rx_over_errors++;
+                                               dev->stats.rx_errors++;
+                                               dev_kfree_skb(skb);
+                                               goto next_pkt;
+                                       }
+                               }
+                               if (((flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUM_IP_TCP) || /*ip and tcp */
+                                   ((flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUM_IP_UDP))   /*ip and udp */
+                                       skb->ip_summed = CHECKSUM_UNNECESSARY;
+                       } else {
+                               dev_kfree_skb(skb);
                                goto next_pkt;
+                       }
+               }
+               /* got a valid packet - forward it to the network core */
+               skb_put(skb, len);
+               skb->protocol = eth_type_trans(skb, dev);
+               dprintk(KERN_DEBUG "%s: nv_rx_process: %d bytes, proto %d accepted.\n",
+                                       dev->name, len, skb->protocol);
+               napi_gro_receive(&np->napi, skb);
+               dev->stats.rx_packets++;
+               dev->stats.rx_bytes += len;
+next_pkt:
+               if (unlikely(np->get_rx.orig++ == np->last_rx.orig))
+                       np->get_rx.orig = np->first_rx.orig;
+               if (unlikely(np->get_rx_ctx++ == np->last_rx_ctx))
+                       np->get_rx_ctx = np->first_rx_ctx;
 
-                       if (Flags & NV_RX2_ERROR) {
-                               if (Flags & (NV_RX2_ERROR1|NV_RX2_ERROR2|NV_RX2_ERROR3)) {
-                                       np->stats.rx_errors++;
-                                       goto next_pkt;
-                               }
-                               if (Flags & NV_RX2_CRCERR) {
-                                       np->stats.rx_crc_errors++;
-                                       np->stats.rx_errors++;
-                                       goto next_pkt;
-                               }
-                               if (Flags & NV_RX2_OVERFLOW) {
-                                       np->stats.rx_over_errors++;
-                                       np->stats.rx_errors++;
-                                       goto next_pkt;
-                               }
-                               if (Flags & NV_RX2_ERROR4) {
-                                       len = nv_getlen(dev, np->rx_skbuff[i]->data, len);
+               rx_work++;
+       }
+
+       return rx_work;
+}
+
+static int nv_rx_process_optimized(struct net_device *dev, int limit)
+{
+       struct fe_priv *np = netdev_priv(dev);
+       u32 flags;
+       u32 vlanflags = 0;
+       int rx_work = 0;
+       struct sk_buff *skb;
+       int len;
+
+       while((np->get_rx.ex != np->put_rx.ex) &&
+             !((flags = le32_to_cpu(np->get_rx.ex->flaglen)) & NV_RX2_AVAIL) &&
+             (rx_work < limit)) {
+
+               dprintk(KERN_DEBUG "%s: nv_rx_process_optimized: flags 0x%x.\n",
+                                       dev->name, flags);
+
+               /*
+                * the packet is for us - immediately tear down the pci mapping.
+                * TODO: check if a prefetch of the first cacheline improves
+                * the performance.
+                */
+               pci_unmap_single(np->pci_dev, np->get_rx_ctx->dma,
+                               np->get_rx_ctx->dma_len,
+                               PCI_DMA_FROMDEVICE);
+               skb = np->get_rx_ctx->skb;
+               np->get_rx_ctx->skb = NULL;
+
+               {
+                       int j;
+                       dprintk(KERN_DEBUG "Dumping packet (flags 0x%x).",flags);
+                       for (j=0; j<64; j++) {
+                               if ((j%16) == 0)
+                                       dprintk("\n%03x:", j);
+                               dprintk(" %02x", ((unsigned char*)skb->data)[j]);
+                       }
+                       dprintk("\n");
+               }
+               /* look at what we actually got: */
+               if (likely(flags & NV_RX2_DESCRIPTORVALID)) {
+                       len = flags & LEN_MASK_V2;
+                       if (unlikely(flags & NV_RX2_ERROR)) {
+                               if ((flags & NV_RX2_ERROR_MASK) == NV_RX2_ERROR4) {
+                                       len = nv_getlen(dev, skb->data, len);
                                        if (len < 0) {
-                                               np->stats.rx_errors++;
+                                               dev_kfree_skb(skb);
                                                goto next_pkt;
                                        }
                                }
                                /* framing errors are soft errors */
-                               if (Flags & NV_RX2_FRAMINGERR) {
-                                       if (Flags & NV_RX2_SUBSTRACT1) {
+                               else if ((flags & NV_RX2_ERROR_MASK) == NV_RX2_FRAMINGERR) {
+                                       if (flags & NV_RX2_SUBSTRACT1) {
                                                len--;
                                        }
                                }
+                               /* the rest are hard errors */
+                               else {
+                                       dev_kfree_skb(skb);
+                                       goto next_pkt;
+                               }
                        }
-                       if (np->txrxctl_bits & NVREG_TXRXCTL_RXCHECK) {
-                               Flags &= NV_RX2_CHECKSUMMASK;
-                               if (Flags == NV_RX2_CHECKSUMOK1 ||
-                                   Flags == NV_RX2_CHECKSUMOK2 ||
-                                   Flags == NV_RX2_CHECKSUMOK3) {
-                                       dprintk(KERN_DEBUG "%s: hw checksum hit!.\n", dev->name);
-                                       np->rx_skbuff[i]->ip_summed = CHECKSUM_UNNECESSARY;
+
+                       if (((flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUM_IP_TCP) || /*ip and tcp */
+                           ((flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUM_IP_UDP))   /*ip and udp */
+                               skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+                       /* got a valid packet - forward it to the network core */
+                       skb_put(skb, len);
+                       skb->protocol = eth_type_trans(skb, dev);
+                       prefetch(skb->data);
+
+                       dprintk(KERN_DEBUG "%s: nv_rx_process_optimized: %d bytes, proto %d accepted.\n",
+                               dev->name, len, skb->protocol);
+
+                       if (likely(!np->vlangrp)) {
+                               napi_gro_receive(&np->napi, skb);
+                       } else {
+                               vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
+                               if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
+                                       vlan_gro_receive(&np->napi, np->vlangrp,
+                                                        vlanflags & NV_RX3_VLAN_TAG_MASK, skb);
                                } else {
-                                       dprintk(KERN_DEBUG "%s: hwchecksum miss!.\n", dev->name);
+                                       napi_gro_receive(&np->napi, skb);
                                }
                        }
-               }
-               /* got a valid packet - forward it to the network core */
-               skb = np->rx_skbuff[i];
-               np->rx_skbuff[i] = NULL;
 
-               skb_put(skb, len);
-               skb->protocol = eth_type_trans(skb, dev);
-               dprintk(KERN_DEBUG "%s: nv_rx_process: packet %d with %d bytes, proto %d accepted.\n",
-                                       dev->name, np->cur_rx, len, skb->protocol);
-               if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT)) {
-                       vlan_hwaccel_rx(skb, np->vlangrp, vlanflags & NV_RX3_VLAN_TAG_MASK);
+                       dev->stats.rx_packets++;
+                       dev->stats.rx_bytes += len;
                } else {
-                       netif_rx(skb);
+                       dev_kfree_skb(skb);
                }
-               dev->last_rx = jiffies;
-               np->stats.rx_packets++;
-               np->stats.rx_bytes += len;
 next_pkt:
-               np->cur_rx++;
+               if (unlikely(np->get_rx.ex++ == np->last_rx.ex))
+                       np->get_rx.ex = np->first_rx.ex;
+               if (unlikely(np->get_rx_ctx++ == np->last_rx_ctx))
+                       np->get_rx_ctx = np->first_rx_ctx;
+
+               rx_work++;
        }
+
+       return rx_work;
 }
 
 static void set_bufsize(struct net_device *dev)
@@ -1930,15 +2932,15 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu)
                 * Changing the MTU is a rare event, it shouldn't matter.
                 */
                nv_disable_irq(dev);
+               nv_napi_disable(dev);
                netif_tx_lock_bh(dev);
+               netif_addr_lock(dev);
                spin_lock(&np->lock);
                /* stop engines */
-               nv_stop_rx(dev);
-               nv_stop_tx(dev);
+               nv_stop_rxtx(dev);
                nv_txrx_reset(dev);
                /* drain rx queue */
-               nv_drain_rx(dev);
-               nv_drain_tx(dev);
+               nv_drain_rxtx(dev);
                /* reinit driver view of the rx queue */
                set_bufsize(dev);
                if (nv_init_ring(dev)) {
@@ -1955,10 +2957,11 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu)
                pci_push(base);
 
                /* restart rx engine */
-               nv_start_rx(dev);
-               nv_start_tx(dev);
+               nv_start_rxtx(dev);
                spin_unlock(&np->lock);
+               netif_addr_unlock(dev);
                netif_tx_unlock_bh(dev);
+               nv_napi_enable(dev);
                nv_enable_irq(dev);
        }
        return 0;
@@ -1986,7 +2989,7 @@ static int nv_set_mac_address(struct net_device *dev, void *addr)
        struct fe_priv *np = netdev_priv(dev);
        struct sockaddr *macaddr = (struct sockaddr*)addr;
 
-       if(!is_valid_ether_addr(macaddr->sa_data))
+       if (!is_valid_ether_addr(macaddr->sa_data))
                return -EADDRNOTAVAIL;
 
        /* synchronized against open : rtnl_lock() held by caller */
@@ -1994,6 +2997,7 @@ static int nv_set_mac_address(struct net_device *dev, void *addr)
 
        if (netif_running(dev)) {
                netif_tx_lock_bh(dev);
+               netif_addr_lock(dev);
                spin_lock_irq(&np->lock);
 
                /* stop rx engine */
@@ -2005,6 +3009,7 @@ static int nv_set_mac_address(struct net_device *dev, void *addr)
                /* restart rx engine */
                nv_start_rx(dev);
                spin_unlock_irq(&np->lock);
+               netif_addr_unlock(dev);
                netif_tx_unlock_bh(dev);
        } else {
                nv_copy_mac_to_hw(dev);
@@ -2028,12 +3033,11 @@ static void nv_set_multicast(struct net_device *dev)
        memset(mask, 0, sizeof(mask));
 
        if (dev->flags & IFF_PROMISC) {
-               printk(KERN_NOTICE "%s: Promiscuous mode enabled.\n", dev->name);
                pff |= NVREG_PFF_PROMISC;
        } else {
                pff |= NVREG_PFF_MYADDR;
 
-               if (dev->flags & IFF_ALLMULTI || dev->mc_list) {
+               if (dev->flags & IFF_ALLMULTI || !netdev_mc_empty(dev)) {
                        u32 alwaysOff[2];
                        u32 alwaysOn[2];
 
@@ -2041,24 +3045,27 @@ static void nv_set_multicast(struct net_device *dev)
                        if (dev->flags & IFF_ALLMULTI) {
                                alwaysOn[0] = alwaysOn[1] = alwaysOff[0] = alwaysOff[1] = 0;
                        } else {
-                               struct dev_mc_list *walk;
+                               struct netdev_hw_addr *ha;
 
-                               walk = dev->mc_list;
-                               while (walk != NULL) {
+                               netdev_for_each_mc_addr(ha, dev) {
+                                       unsigned char *addr = ha->addr;
                                        u32 a, b;
-                                       a = le32_to_cpu(*(u32 *) walk->dmi_addr);
-                                       b = le16_to_cpu(*(u16 *) (&walk->dmi_addr[4]));
+
+                                       a = le32_to_cpu(*(__le32 *) addr);
+                                       b = le16_to_cpu(*(__le16 *) (&addr[4]));
                                        alwaysOn[0] &= a;
                                        alwaysOff[0] &= ~a;
                                        alwaysOn[1] &= b;
                                        alwaysOff[1] &= ~b;
-                                       walk = walk->next;
                                }
                        }
                        addr[0] = alwaysOn[0];
                        addr[1] = alwaysOn[1];
                        mask[0] = alwaysOn[0] | alwaysOff[0];
                        mask[1] = alwaysOn[1] | alwaysOff[1];
+               } else {
+                       mask[0] = NVREG_MCASTMASKA_NONE;
+                       mask[1] = NVREG_MCASTMASKB_NONE;
                }
        }
        addr[0] |= NVREG_MCASTADDRA_FORCE;
@@ -2095,7 +3102,15 @@ static void nv_update_pause(struct net_device *dev, u32 pause_flags)
        if (np->pause_flags & NV_PAUSEFRAME_TX_CAPABLE) {
                u32 regmisc = readl(base + NvRegMisc1) & ~NVREG_MISC1_PAUSE_TX;
                if (pause_flags & NV_PAUSEFRAME_TX_ENABLE) {
-                       writel(NVREG_TX_PAUSEFRAME_ENABLE,  base + NvRegTxPauseFrame);
+                       u32 pause_enable = NVREG_TX_PAUSEFRAME_ENABLE_V1;
+                       if (np->driver_data & DEV_HAS_PAUSEFRAME_TX_V2)
+                               pause_enable = NVREG_TX_PAUSEFRAME_ENABLE_V2;
+                       if (np->driver_data & DEV_HAS_PAUSEFRAME_TX_V3) {
+                               pause_enable = NVREG_TX_PAUSEFRAME_ENABLE_V3;
+                               /* limit the number of tx pause frames to a default of 8 */
+                               writel(readl(base + NvRegTxPauseFrameLimit)|NVREG_TX_PAUSEFRAMELIMIT_ENABLE, base + NvRegTxPauseFrameLimit);
+                       }
+                       writel(pause_enable,  base + NvRegTxPauseFrame);
                        writel(regmisc|NVREG_MISC1_PAUSE_TX, base + NvRegMisc1);
                        np->pause_flags |= NV_PAUSEFRAME_TX_ENABLE;
                } else {
@@ -2127,7 +3142,9 @@ static int nv_update_linkspeed(struct net_device *dev)
        int newdup = np->duplex;
        int mii_status;
        int retval = 0;
-       u32 control_1000, status_1000, phyreg, pause_flags;
+       u32 control_1000, status_1000, phyreg, pause_flags, txreg;
+       u32 txrxFlags = 0;
+       u32 phy_exp;
 
        /* BMSR_LSTATUS is latched, read it twice:
         * we want the current value.
@@ -2223,16 +3240,25 @@ set_speed:
        np->duplex = newdup;
        np->linkspeed = newls;
 
+       /* The transmitter and receiver must be restarted for safe update */
+       if (readl(base + NvRegTransmitterControl) & NVREG_XMITCTL_START) {
+               txrxFlags |= NV_RESTART_TX;
+               nv_stop_tx(dev);
+       }
+       if (readl(base + NvRegReceiverControl) & NVREG_RCVCTL_START) {
+               txrxFlags |= NV_RESTART_RX;
+               nv_stop_rx(dev);
+       }
+
        if (np->gigabit == PHY_GIGABIT) {
-               phyreg = readl(base + NvRegRandomSeed);
+               phyreg = readl(base + NvRegSlotTime);
                phyreg &= ~(0x3FF00);
-               if ((np->linkspeed & 0xFFF) == NVREG_LINKSPEED_10)
-                       phyreg |= NVREG_RNDSEED_FORCE3;
-               else if ((np->linkspeed & 0xFFF) == NVREG_LINKSPEED_100)
-                       phyreg |= NVREG_RNDSEED_FORCE2;
+               if (((np->linkspeed & 0xFFF) == NVREG_LINKSPEED_10) ||
+                   ((np->linkspeed & 0xFFF) == NVREG_LINKSPEED_100))
+                       phyreg |= NVREG_SLOTTIME_10_100_FULL;
                else if ((np->linkspeed & 0xFFF) == NVREG_LINKSPEED_1000)
-                       phyreg |= NVREG_RNDSEED_FORCE;
-               writel(phyreg, base + NvRegRandomSeed);
+                       phyreg |= NVREG_SLOTTIME_1000_FULL;
+               writel(phyreg, base + NvRegSlotTime);
        }
 
        phyreg = readl(base + NvRegPhyInterface);
@@ -2245,6 +3271,38 @@ set_speed:
                phyreg |= PHY_1000;
        writel(phyreg, base + NvRegPhyInterface);
 
+       phy_exp = mii_rw(dev, np->phyaddr, MII_EXPANSION, MII_READ) & EXPANSION_NWAY; /* autoneg capable */
+       if (phyreg & PHY_RGMII) {
+               if ((np->linkspeed & NVREG_LINKSPEED_MASK) == NVREG_LINKSPEED_1000) {
+                       txreg = NVREG_TX_DEFERRAL_RGMII_1000;
+               } else {
+                       if (!phy_exp && !np->duplex && (np->driver_data & DEV_HAS_COLLISION_FIX)) {
+                               if ((np->linkspeed & NVREG_LINKSPEED_MASK) == NVREG_LINKSPEED_10)
+                                       txreg = NVREG_TX_DEFERRAL_RGMII_STRETCH_10;
+                               else
+                                       txreg = NVREG_TX_DEFERRAL_RGMII_STRETCH_100;
+                       } else {
+                               txreg = NVREG_TX_DEFERRAL_RGMII_10_100;
+                       }
+               }
+       } else {
+               if (!phy_exp && !np->duplex && (np->driver_data & DEV_HAS_COLLISION_FIX))
+                       txreg = NVREG_TX_DEFERRAL_MII_STRETCH;
+               else
+                       txreg = NVREG_TX_DEFERRAL_DEFAULT;
+       }
+       writel(txreg, base + NvRegTxDeferral);
+
+       if (np->desc_ver == DESC_VER_1) {
+               txreg = NVREG_TX_WM_DESC1_DEFAULT;
+       } else {
+               if ((np->linkspeed & NVREG_LINKSPEED_MASK) == NVREG_LINKSPEED_1000)
+                       txreg = NVREG_TX_WM_DESC2_3_1000;
+               else
+                       txreg = NVREG_TX_WM_DESC2_3_DEFAULT;
+       }
+       writel(txreg, base + NvRegTxWatermark);
+
        writel(NVREG_MISC1_FORCE | ( np->duplex ? 0 : NVREG_MISC1_HD),
                base + NvRegMisc1);
        pci_push(base);
@@ -2259,20 +3317,20 @@ set_speed:
                        lpa_pause = lpa & (LPA_PAUSE_CAP| LPA_PAUSE_ASYM);
 
                        switch (adv_pause) {
-                       case (ADVERTISE_PAUSE_CAP):
+                       case ADVERTISE_PAUSE_CAP:
                                if (lpa_pause & LPA_PAUSE_CAP) {
                                        pause_flags |= NV_PAUSEFRAME_RX_ENABLE;
                                        if (np->pause_flags & NV_PAUSEFRAME_TX_REQ)
                                                pause_flags |= NV_PAUSEFRAME_TX_ENABLE;
                                }
                                break;
-                       case (ADVERTISE_PAUSE_ASYM):
+                       case ADVERTISE_PAUSE_ASYM:
                                if (lpa_pause == (LPA_PAUSE_CAP| LPA_PAUSE_ASYM))
                                {
                                        pause_flags |= NV_PAUSEFRAME_TX_ENABLE;
                                }
                                break;
-                       case (ADVERTISE_PAUSE_CAP| ADVERTISE_PAUSE_ASYM):
+                       case ADVERTISE_PAUSE_CAP| ADVERTISE_PAUSE_ASYM:
                                if (lpa_pause & LPA_PAUSE_CAP)
                                {
                                        pause_flags |=  NV_PAUSEFRAME_RX_ENABLE;
@@ -2291,6 +3349,11 @@ set_speed:
        }
        nv_update_pause(dev, pause_flags);
 
+       if (txrxFlags & NV_RESTART_TX)
+               nv_start_tx(dev);
+       if (txrxFlags & NV_RESTART_RX)
+               nv_start_rx(dev);
+
        return retval;
 }
 
@@ -2300,137 +3363,171 @@ static void nv_linkchange(struct net_device *dev)
                if (!netif_carrier_ok(dev)) {
                        netif_carrier_on(dev);
                        printk(KERN_INFO "%s: link up.\n", dev->name);
+                       nv_txrx_gate(dev, false);
                        nv_start_rx(dev);
                }
        } else {
                if (netif_carrier_ok(dev)) {
                        netif_carrier_off(dev);
                        printk(KERN_INFO "%s: link down.\n", dev->name);
+                       nv_txrx_gate(dev, true);
                        nv_stop_rx(dev);
                }
        }
 }
 
-static void nv_link_irq(struct net_device *dev)
+static void nv_link_irq(struct net_device *dev)
+{
+       u8 __iomem *base = get_hwbase(dev);
+       u32 miistat;
+
+       miistat = readl(base + NvRegMIIStatus);
+       writel(NVREG_MIISTAT_LINKCHANGE, base + NvRegMIIStatus);
+       dprintk(KERN_INFO "%s: link change irq, status 0x%x.\n", dev->name, miistat);
+
+       if (miistat & (NVREG_MIISTAT_LINKCHANGE))
+               nv_linkchange(dev);
+       dprintk(KERN_DEBUG "%s: link change notification done.\n", dev->name);
+}
+
+static void nv_msi_workaround(struct fe_priv *np)
+{
+
+       /* Need to toggle the msi irq mask within the ethernet device,
+        * otherwise, future interrupts will not be detected.
+        */
+       if (np->msi_flags & NV_MSI_ENABLED) {
+               u8 __iomem *base = np->base;
+
+               writel(0, base + NvRegMSIIrqMask);
+               writel(NVREG_MSI_VECTOR_0_ENABLED, base + NvRegMSIIrqMask);
+       }
+}
+
+static inline int nv_change_interrupt_mode(struct net_device *dev, int total_work)
 {
-       u8 __iomem *base = get_hwbase(dev);
-       u32 miistat;
-
-       miistat = readl(base + NvRegMIIStatus);
-       writel(NVREG_MIISTAT_MASK, base + NvRegMIIStatus);
-       dprintk(KERN_INFO "%s: link change irq, status 0x%x.\n", dev->name, miistat);
+       struct fe_priv *np = netdev_priv(dev);
 
-       if (miistat & (NVREG_MIISTAT_LINKCHANGE))
-               nv_linkchange(dev);
-       dprintk(KERN_DEBUG "%s: link change notification done.\n", dev->name);
+       if (optimization_mode == NV_OPTIMIZATION_MODE_DYNAMIC) {
+               if (total_work > NV_DYNAMIC_THRESHOLD) {
+                       /* transition to poll based interrupts */
+                       np->quiet_count = 0;
+                       if (np->irqmask != NVREG_IRQMASK_CPU) {
+                               np->irqmask = NVREG_IRQMASK_CPU;
+                               return 1;
+                       }
+               } else {
+                       if (np->quiet_count < NV_DYNAMIC_MAX_QUIET_COUNT) {
+                               np->quiet_count++;
+                       } else {
+                               /* reached a period of low activity, switch
+                                  to per tx/rx packet interrupts */
+                               if (np->irqmask != NVREG_IRQMASK_THROUGHPUT) {
+                                       np->irqmask = NVREG_IRQMASK_THROUGHPUT;
+                                       return 1;
+                               }
+                       }
+               }
+       }
+       return 0;
 }
 
-static irqreturn_t nv_nic_irq(int foo, void *data, struct pt_regs *regs)
+static irqreturn_t nv_nic_irq(int foo, void *data)
 {
        struct net_device *dev = (struct net_device *) data;
        struct fe_priv *np = netdev_priv(dev);
        u8 __iomem *base = get_hwbase(dev);
-       u32 events;
-       int i;
 
        dprintk(KERN_DEBUG "%s: nv_nic_irq\n", dev->name);
 
-       for (i=0; ; i++) {
-               if (!(np->msi_flags & NV_MSI_X_ENABLED)) {
-                       events = readl(base + NvRegIrqStatus) & NVREG_IRQSTAT_MASK;
-                       writel(NVREG_IRQSTAT_MASK, base + NvRegIrqStatus);
-               } else {
-                       events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQSTAT_MASK;
-                       writel(NVREG_IRQSTAT_MASK, base + NvRegMSIXIrqStatus);
-               }
-               pci_push(base);
-               dprintk(KERN_DEBUG "%s: irq: %08x\n", dev->name, events);
-               if (!(events & np->irqmask))
-                       break;
+       if (!(np->msi_flags & NV_MSI_X_ENABLED)) {
+               np->events = readl(base + NvRegIrqStatus);
+               writel(np->events, base + NvRegIrqStatus);
+       } else {
+               np->events = readl(base + NvRegMSIXIrqStatus);
+               writel(np->events, base + NvRegMSIXIrqStatus);
+       }
+       dprintk(KERN_DEBUG "%s: irq: %08x\n", dev->name, np->events);
+       if (!(np->events & np->irqmask))
+               return IRQ_NONE;
 
-               spin_lock(&np->lock);
-               nv_tx_done(dev);
-               spin_unlock(&np->lock);
+       nv_msi_workaround(np);
 
-               nv_rx_process(dev);
-               if (nv_alloc_rx(dev)) {
-                       spin_lock(&np->lock);
-                       if (!np->in_shutdown)
-                               mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
-                       spin_unlock(&np->lock);
-               }
+       if (napi_schedule_prep(&np->napi)) {
+               /*
+                * Disable further irq's (msix not enabled with napi)
+                */
+               writel(0, base + NvRegIrqMask);
+               __napi_schedule(&np->napi);
+       }
 
-               if (events & NVREG_IRQ_LINK) {
-                       spin_lock(&np->lock);
-                       nv_link_irq(dev);
-                       spin_unlock(&np->lock);
-               }
-               if (np->need_linktimer && time_after(jiffies, np->link_timeout)) {
-                       spin_lock(&np->lock);
-                       nv_linkchange(dev);
-                       spin_unlock(&np->lock);
-                       np->link_timeout = jiffies + LINK_TIMEOUT;
-               }
-               if (events & (NVREG_IRQ_TX_ERR)) {
-                       dprintk(KERN_DEBUG "%s: received irq with events 0x%x. Probably TX fail.\n",
-                                               dev->name, events);
-               }
-               if (events & (NVREG_IRQ_UNKNOWN)) {
-                       printk(KERN_DEBUG "%s: received irq with unknown events 0x%x. Please report\n",
-                                               dev->name, events);
-               }
-               if (i > max_interrupt_work) {
-                       spin_lock(&np->lock);
-                       /* disable interrupts on the nic */
-                       if (!(np->msi_flags & NV_MSI_X_ENABLED))
-                               writel(0, base + NvRegIrqMask);
-                       else
-                               writel(np->irqmask, base + NvRegIrqMask);
-                       pci_push(base);
+       dprintk(KERN_DEBUG "%s: nv_nic_irq completed\n", dev->name);
 
-                       if (!np->in_shutdown) {
-                               np->nic_poll_irq = np->irqmask;
-                               mod_timer(&np->nic_poll, jiffies + POLL_WAIT);
-                       }
-                       printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq.\n", dev->name, i);
-                       spin_unlock(&np->lock);
-                       break;
-               }
+       return IRQ_HANDLED;
+}
+
+/**
+ * All _optimized functions are used to help increase performance
+ * (reduce CPU and increase throughput). They use descripter version 3,
+ * compiler directives, and reduce memory accesses.
+ */
+static irqreturn_t nv_nic_irq_optimized(int foo, void *data)
+{
+       struct net_device *dev = (struct net_device *) data;
+       struct fe_priv *np = netdev_priv(dev);
+       u8 __iomem *base = get_hwbase(dev);
 
+       dprintk(KERN_DEBUG "%s: nv_nic_irq_optimized\n", dev->name);
+
+       if (!(np->msi_flags & NV_MSI_X_ENABLED)) {
+               np->events = readl(base + NvRegIrqStatus);
+               writel(np->events, base + NvRegIrqStatus);
+       } else {
+               np->events = readl(base + NvRegMSIXIrqStatus);
+               writel(np->events, base + NvRegMSIXIrqStatus);
        }
-       dprintk(KERN_DEBUG "%s: nv_nic_irq completed\n", dev->name);
+       dprintk(KERN_DEBUG "%s: irq: %08x\n", dev->name, np->events);
+       if (!(np->events & np->irqmask))
+               return IRQ_NONE;
 
-       return IRQ_RETVAL(i);
+       nv_msi_workaround(np);
+
+       if (napi_schedule_prep(&np->napi)) {
+               /*
+                * Disable further irq's (msix not enabled with napi)
+                */
+               writel(0, base + NvRegIrqMask);
+               __napi_schedule(&np->napi);
+       }
+       dprintk(KERN_DEBUG "%s: nv_nic_irq_optimized completed\n", dev->name);
+
+       return IRQ_HANDLED;
 }
 
-static irqreturn_t nv_nic_irq_tx(int foo, void *data, struct pt_regs *regs)
+static irqreturn_t nv_nic_irq_tx(int foo, void *data)
 {
        struct net_device *dev = (struct net_device *) data;
        struct fe_priv *np = netdev_priv(dev);
        u8 __iomem *base = get_hwbase(dev);
        u32 events;
        int i;
+       unsigned long flags;
 
        dprintk(KERN_DEBUG "%s: nv_nic_irq_tx\n", dev->name);
 
        for (i=0; ; i++) {
                events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQ_TX_ALL;
                writel(NVREG_IRQ_TX_ALL, base + NvRegMSIXIrqStatus);
-               pci_push(base);
                dprintk(KERN_DEBUG "%s: tx irq: %08x\n", dev->name, events);
                if (!(events & np->irqmask))
                        break;
 
-               spin_lock_irq(&np->lock);
-               nv_tx_done(dev);
-               spin_unlock_irq(&np->lock);
+               spin_lock_irqsave(&np->lock, flags);
+               nv_tx_done_optimized(dev, TX_WORK_PER_LOOP);
+               spin_unlock_irqrestore(&np->lock, flags);
 
-               if (events & (NVREG_IRQ_TX_ERR)) {
-                       dprintk(KERN_DEBUG "%s: received irq with events 0x%x. Probably TX fail.\n",
-                                               dev->name, events);
-               }
-               if (i > max_interrupt_work) {
-                       spin_lock_irq(&np->lock);
+               if (unlikely(i > max_interrupt_work)) {
+                       spin_lock_irqsave(&np->lock, flags);
                        /* disable interrupts on the nic */
                        writel(NVREG_IRQ_TX_ALL, base + NvRegIrqMask);
                        pci_push(base);
@@ -2439,8 +3536,8 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data, struct pt_regs *regs)
                                np->nic_poll_irq |= NVREG_IRQ_TX_ALL;
                                mod_timer(&np->nic_poll, jiffies + POLL_WAIT);
                        }
+                       spin_unlock_irqrestore(&np->lock, flags);
                        printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_tx.\n", dev->name, i);
-                       spin_unlock_irq(&np->lock);
                        break;
                }
 
@@ -2450,34 +3547,106 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data, struct pt_regs *regs)
        return IRQ_RETVAL(i);
 }
 
-static irqreturn_t nv_nic_irq_rx(int foo, void *data, struct pt_regs *regs)
+static int nv_napi_poll(struct napi_struct *napi, int budget)
+{
+       struct fe_priv *np = container_of(napi, struct fe_priv, napi);
+       struct net_device *dev = np->dev;
+       u8 __iomem *base = get_hwbase(dev);
+       unsigned long flags;
+       int retcode;
+       int rx_count, tx_work=0, rx_work=0;
+
+       do {
+               if (!nv_optimized(np)) {
+                       spin_lock_irqsave(&np->lock, flags);
+                       tx_work += nv_tx_done(dev, np->tx_ring_size);
+                       spin_unlock_irqrestore(&np->lock, flags);
+
+                       rx_count = nv_rx_process(dev, budget - rx_work);
+                       retcode = nv_alloc_rx(dev);
+               } else {
+                       spin_lock_irqsave(&np->lock, flags);
+                       tx_work += nv_tx_done_optimized(dev, np->tx_ring_size);
+                       spin_unlock_irqrestore(&np->lock, flags);
+
+                       rx_count = nv_rx_process_optimized(dev,
+                           budget - rx_work);
+                       retcode = nv_alloc_rx_optimized(dev);
+               }
+       } while (retcode == 0 &&
+                rx_count > 0 && (rx_work += rx_count) < budget);
+
+       if (retcode) {
+               spin_lock_irqsave(&np->lock, flags);
+               if (!np->in_shutdown)
+                       mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
+               spin_unlock_irqrestore(&np->lock, flags);
+       }
+
+       nv_change_interrupt_mode(dev, tx_work + rx_work);
+
+       if (unlikely(np->events & NVREG_IRQ_LINK)) {
+               spin_lock_irqsave(&np->lock, flags);
+               nv_link_irq(dev);
+               spin_unlock_irqrestore(&np->lock, flags);
+       }
+       if (unlikely(np->need_linktimer && time_after(jiffies, np->link_timeout))) {
+               spin_lock_irqsave(&np->lock, flags);
+               nv_linkchange(dev);
+               spin_unlock_irqrestore(&np->lock, flags);
+               np->link_timeout = jiffies + LINK_TIMEOUT;
+       }
+       if (unlikely(np->events & NVREG_IRQ_RECOVER_ERROR)) {
+               spin_lock_irqsave(&np->lock, flags);
+               if (!np->in_shutdown) {
+                       np->nic_poll_irq = np->irqmask;
+                       np->recover_error = 1;
+                       mod_timer(&np->nic_poll, jiffies + POLL_WAIT);
+               }
+               spin_unlock_irqrestore(&np->lock, flags);
+               napi_complete(napi);
+               return rx_work;
+       }
+
+       if (rx_work < budget) {
+               /* re-enable interrupts
+                  (msix not enabled in napi) */
+               napi_complete(napi);
+
+               writel(np->irqmask, base + NvRegIrqMask);
+       }
+       return rx_work;
+}
+
+static irqreturn_t nv_nic_irq_rx(int foo, void *data)
 {
        struct net_device *dev = (struct net_device *) data;
        struct fe_priv *np = netdev_priv(dev);
        u8 __iomem *base = get_hwbase(dev);
        u32 events;
        int i;
+       unsigned long flags;
 
        dprintk(KERN_DEBUG "%s: nv_nic_irq_rx\n", dev->name);
 
        for (i=0; ; i++) {
                events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQ_RX_ALL;
                writel(NVREG_IRQ_RX_ALL, base + NvRegMSIXIrqStatus);
-               pci_push(base);
                dprintk(KERN_DEBUG "%s: rx irq: %08x\n", dev->name, events);
                if (!(events & np->irqmask))
                        break;
 
-               nv_rx_process(dev);
-               if (nv_alloc_rx(dev)) {
-                       spin_lock_irq(&np->lock);
-                       if (!np->in_shutdown)
-                               mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
-                       spin_unlock_irq(&np->lock);
+               if (nv_rx_process_optimized(dev, RX_WORK_PER_LOOP)) {
+                       if (unlikely(nv_alloc_rx_optimized(dev))) {
+                               spin_lock_irqsave(&np->lock, flags);
+                               if (!np->in_shutdown)
+                                       mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
+                               spin_unlock_irqrestore(&np->lock, flags);
+                       }
                }
 
-               if (i > max_interrupt_work) {
-                       spin_lock_irq(&np->lock);
+               if (unlikely(i > max_interrupt_work)) {
+                       spin_lock_irqsave(&np->lock, flags);
                        /* disable interrupts on the nic */
                        writel(NVREG_IRQ_RX_ALL, base + NvRegIrqMask);
                        pci_push(base);
@@ -2486,51 +3655,51 @@ static irqreturn_t nv_nic_irq_rx(int foo, void *data, struct pt_regs *regs)
                                np->nic_poll_irq |= NVREG_IRQ_RX_ALL;
                                mod_timer(&np->nic_poll, jiffies + POLL_WAIT);
                        }
+                       spin_unlock_irqrestore(&np->lock, flags);
                        printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_rx.\n", dev->name, i);
-                       spin_unlock_irq(&np->lock);
                        break;
                }
-
        }
        dprintk(KERN_DEBUG "%s: nv_nic_irq_rx completed\n", dev->name);
 
        return IRQ_RETVAL(i);
 }
 
-static irqreturn_t nv_nic_irq_other(int foo, void *data, struct pt_regs *regs)
+static irqreturn_t nv_nic_irq_other(int foo, void *data)
 {
        struct net_device *dev = (struct net_device *) data;
        struct fe_priv *np = netdev_priv(dev);
        u8 __iomem *base = get_hwbase(dev);
        u32 events;
        int i;
+       unsigned long flags;
 
        dprintk(KERN_DEBUG "%s: nv_nic_irq_other\n", dev->name);
 
        for (i=0; ; i++) {
                events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQ_OTHER;
                writel(NVREG_IRQ_OTHER, base + NvRegMSIXIrqStatus);
-               pci_push(base);
                dprintk(KERN_DEBUG "%s: irq: %08x\n", dev->name, events);
                if (!(events & np->irqmask))
                        break;
 
+               /* check tx in case we reached max loop limit in tx isr */
+               spin_lock_irqsave(&np->lock, flags);
+               nv_tx_done_optimized(dev, TX_WORK_PER_LOOP);
+               spin_unlock_irqrestore(&np->lock, flags);
+
                if (events & NVREG_IRQ_LINK) {
-                       spin_lock_irq(&np->lock);
+                       spin_lock_irqsave(&np->lock, flags);
                        nv_link_irq(dev);
-                       spin_unlock_irq(&np->lock);
+                       spin_unlock_irqrestore(&np->lock, flags);
                }
                if (np->need_linktimer && time_after(jiffies, np->link_timeout)) {
-                       spin_lock_irq(&np->lock);
+                       spin_lock_irqsave(&np->lock, flags);
                        nv_linkchange(dev);
-                       spin_unlock_irq(&np->lock);
+                       spin_unlock_irqrestore(&np->lock, flags);
                        np->link_timeout = jiffies + LINK_TIMEOUT;
                }
-               if (events & (NVREG_IRQ_UNKNOWN)) {
-                       printk(KERN_DEBUG "%s: received irq with unknown events 0x%x. Please report\n",
-                                               dev->name, events);
-               }
-               if (i > max_interrupt_work) {
+               if (events & NVREG_IRQ_RECOVER_ERROR) {
                        spin_lock_irq(&np->lock);
                        /* disable interrupts on the nic */
                        writel(NVREG_IRQ_OTHER, base + NvRegIrqMask);
@@ -2538,12 +3707,26 @@ static irqreturn_t nv_nic_irq_other(int foo, void *data, struct pt_regs *regs)
 
                        if (!np->in_shutdown) {
                                np->nic_poll_irq |= NVREG_IRQ_OTHER;
+                               np->recover_error = 1;
                                mod_timer(&np->nic_poll, jiffies + POLL_WAIT);
                        }
-                       printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_other.\n", dev->name, i);
                        spin_unlock_irq(&np->lock);
                        break;
                }
+               if (unlikely(i > max_interrupt_work)) {
+                       spin_lock_irqsave(&np->lock, flags);
+                       /* disable interrupts on the nic */
+                       writel(NVREG_IRQ_OTHER, base + NvRegIrqMask);
+                       pci_push(base);
+
+                       if (!np->in_shutdown) {
+                               np->nic_poll_irq |= NVREG_IRQ_OTHER;
+                               mod_timer(&np->nic_poll, jiffies + POLL_WAIT);
+                       }
+                       spin_unlock_irqrestore(&np->lock, flags);
+                       printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_other.\n", dev->name, i);
+                       break;
+               }
 
        }
        dprintk(KERN_DEBUG "%s: nv_nic_irq_other completed\n", dev->name);
@@ -2551,7 +3734,7 @@ static irqreturn_t nv_nic_irq_other(int foo, void *data, struct pt_regs *regs)
        return IRQ_RETVAL(i);
 }
 
-static irqreturn_t nv_nic_irq_test(int foo, void *data, struct pt_regs *regs)
+static irqreturn_t nv_nic_irq_test(int foo, void *data)
 {
        struct net_device *dev = (struct net_device *) data;
        struct fe_priv *np = netdev_priv(dev);
@@ -2572,6 +3755,8 @@ static irqreturn_t nv_nic_irq_test(int foo, void *data, struct pt_regs *regs)
        if (!(events & NVREG_IRQ_TIMER))
                return IRQ_RETVAL(0);
 
+       nv_msi_workaround(np);
+
        spin_lock(&np->lock);
        np->intr_test = 1;
        spin_unlock(&np->lock);
@@ -2613,6 +3798,16 @@ static int nv_request_irq(struct net_device *dev, int intr_test)
        u8 __iomem *base = get_hwbase(dev);
        int ret = 1;
        int i;
+       irqreturn_t (*handler)(int foo, void *data);
+
+       if (intr_test) {
+               handler = nv_nic_irq_test;
+       } else {
+               if (nv_optimized(np))
+                       handler = nv_nic_irq_optimized;
+               else
+                       handler = nv_nic_irq;
+       }
 
        if (np->msi_flags & NV_MSI_X_CAPABLE) {
                for (i = 0; i < (np->msi_flags & NV_MSI_X_VECTORS_MASK); i++) {
@@ -2622,21 +3817,27 @@ static int nv_request_irq(struct net_device *dev, int intr_test)
                        np->msi_flags |= NV_MSI_X_ENABLED;
                        if (optimization_mode == NV_OPTIMIZATION_MODE_THROUGHPUT && !intr_test) {
                                /* Request irq for rx handling */
-                               if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector, &nv_nic_irq_rx, IRQF_SHARED, dev->name, dev) != 0) {
+                               sprintf(np->name_rx, "%s-rx", dev->name);
+                               if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector,
+                                               nv_nic_irq_rx, IRQF_SHARED, np->name_rx, dev) != 0) {
                                        printk(KERN_INFO "forcedeth: request_irq failed for rx %d\n", ret);
                                        pci_disable_msix(np->pci_dev);
                                        np->msi_flags &= ~NV_MSI_X_ENABLED;
                                        goto out_err;
                                }
                                /* Request irq for tx handling */
-                               if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector, &nv_nic_irq_tx, IRQF_SHARED, dev->name, dev) != 0) {
+                               sprintf(np->name_tx, "%s-tx", dev->name);
+                               if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector,
+                                               nv_nic_irq_tx, IRQF_SHARED, np->name_tx, dev) != 0) {
                                        printk(KERN_INFO "forcedeth: request_irq failed for tx %d\n", ret);
                                        pci_disable_msix(np->pci_dev);
                                        np->msi_flags &= ~NV_MSI_X_ENABLED;
                                        goto out_free_rx;
                                }
                                /* Request irq for link and timer handling */
-                               if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector, &nv_nic_irq_other, IRQF_SHARED, dev->name, dev) != 0) {
+                               sprintf(np->name_other, "%s-other", dev->name);
+                               if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector,
+                                               nv_nic_irq_other, IRQF_SHARED, np->name_other, dev) != 0) {
                                        printk(KERN_INFO "forcedeth: request_irq failed for link %d\n", ret);
                                        pci_disable_msix(np->pci_dev);
                                        np->msi_flags &= ~NV_MSI_X_ENABLED;
@@ -2650,10 +3851,7 @@ static int nv_request_irq(struct net_device *dev, int intr_test)
                                set_msix_vector_map(dev, NV_MSI_X_VECTOR_OTHER, NVREG_IRQ_OTHER);
                        } else {
                                /* Request irq for all interrupts */
-                               if ((!intr_test &&
-                                    request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector, &nv_nic_irq, IRQF_SHARED, dev->name, dev) != 0) ||
-                                   (intr_test &&
-                                    request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector, &nv_nic_irq_test, IRQF_SHARED, dev->name, dev) != 0)) {
+                               if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector, handler, IRQF_SHARED, dev->name, dev) != 0) {
                                        printk(KERN_INFO "forcedeth: request_irq failed %d\n", ret);
                                        pci_disable_msix(np->pci_dev);
                                        np->msi_flags &= ~NV_MSI_X_ENABLED;
@@ -2669,11 +3867,12 @@ static int nv_request_irq(struct net_device *dev, int intr_test)
        if (ret != 0 && np->msi_flags & NV_MSI_CAPABLE) {
                if ((ret = pci_enable_msi(np->pci_dev)) == 0) {
                        np->msi_flags |= NV_MSI_ENABLED;
-                       if ((!intr_test && request_irq(np->pci_dev->irq, &nv_nic_irq, IRQF_SHARED, dev->name, dev) != 0) ||
-                           (intr_test && request_irq(np->pci_dev->irq, &nv_nic_irq_test, IRQF_SHARED, dev->name, dev) != 0)) {
+                       dev->irq = np->pci_dev->irq;
+                       if (request_irq(np->pci_dev->irq, handler, IRQF_SHARED, dev->name, dev) != 0) {
                                printk(KERN_INFO "forcedeth: request_irq failed %d\n", ret);
                                pci_disable_msi(np->pci_dev);
                                np->msi_flags &= ~NV_MSI_ENABLED;
+                               dev->irq = np->pci_dev->irq;
                                goto out_err;
                        }
 
@@ -2685,8 +3884,7 @@ static int nv_request_irq(struct net_device *dev, int intr_test)
                }
        }
        if (ret != 0) {
-               if ((!intr_test && request_irq(np->pci_dev->irq, &nv_nic_irq, IRQF_SHARED, dev->name, dev) != 0) ||
-                   (intr_test && request_irq(np->pci_dev->irq, &nv_nic_irq_test, IRQF_SHARED, dev->name, dev) != 0))
+               if (request_irq(np->pci_dev->irq, handler, IRQF_SHARED, dev->name, dev) != 0)
                        goto out_err;
 
        }
@@ -2737,7 +3935,7 @@ static void nv_do_nic_poll(unsigned long data)
                if (np->msi_flags & NV_MSI_X_ENABLED)
                        disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
                else
-                       disable_irq_lockdep(dev->irq);
+                       disable_irq_lockdep(np->pci_dev->irq);
                mask = np->irqmask;
        } else {
                if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) {
@@ -2753,33 +3951,81 @@ static void nv_do_nic_poll(unsigned long data)
                        mask |= NVREG_IRQ_OTHER;
                }
        }
-       np->nic_poll_irq = 0;
+       /* disable_irq() contains synchronize_irq, thus no irq handler can run now */
+
+       if (np->recover_error) {
+               np->recover_error = 0;
+               printk(KERN_INFO "%s: MAC in recoverable error state\n", dev->name);
+               if (netif_running(dev)) {
+                       netif_tx_lock_bh(dev);
+                       netif_addr_lock(dev);
+                       spin_lock(&np->lock);
+                       /* stop engines */
+                       nv_stop_rxtx(dev);
+                       if (np->driver_data & DEV_HAS_POWER_CNTRL)
+                               nv_mac_reset(dev);
+                       nv_txrx_reset(dev);
+                       /* drain rx queue */
+                       nv_drain_rxtx(dev);
+                       /* reinit driver view of the rx queue */
+                       set_bufsize(dev);
+                       if (nv_init_ring(dev)) {
+                               if (!np->in_shutdown)
+                                       mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
+                       }
+                       /* reinit nic view of the rx queue */
+                       writel(np->rx_buf_sz, base + NvRegOffloadConfig);
+                       setup_hw_rings(dev, NV_SETUP_RX_RING | NV_SETUP_TX_RING);
+                       writel( ((np->rx_ring_size-1) << NVREG_RINGSZ_RXSHIFT) + ((np->tx_ring_size-1) << NVREG_RINGSZ_TXSHIFT),
+                               base + NvRegRingSizes);
+                       pci_push(base);
+                       writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
+                       pci_push(base);
+                       /* clear interrupts */
+                       if (!(np->msi_flags & NV_MSI_X_ENABLED))
+                               writel(NVREG_IRQSTAT_MASK, base + NvRegIrqStatus);
+                       else
+                               writel(NVREG_IRQSTAT_MASK, base + NvRegMSIXIrqStatus);
 
-       /* FIXME: Do we need synchronize_irq(dev->irq) here? */
+                       /* restart rx engine */
+                       nv_start_rxtx(dev);
+                       spin_unlock(&np->lock);
+                       netif_addr_unlock(dev);
+                       netif_tx_unlock_bh(dev);
+               }
+       }
 
        writel(mask, base + NvRegIrqMask);
        pci_push(base);
 
        if (!using_multi_irqs(dev)) {
-               nv_nic_irq(0, dev, NULL);
+               np->nic_poll_irq = 0;
+               if (nv_optimized(np))
+                       nv_nic_irq_optimized(0, dev);
+               else
+                       nv_nic_irq(0, dev);
                if (np->msi_flags & NV_MSI_X_ENABLED)
                        enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
                else
-                       enable_irq_lockdep(dev->irq);
+                       enable_irq_lockdep(np->pci_dev->irq);
        } else {
                if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) {
-                       nv_nic_irq_rx(0, dev, NULL);
+                       np->nic_poll_irq &= ~NVREG_IRQ_RX_ALL;
+                       nv_nic_irq_rx(0, dev);
                        enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
                }
                if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) {
-                       nv_nic_irq_tx(0, dev, NULL);
+                       np->nic_poll_irq &= ~NVREG_IRQ_TX_ALL;
+                       nv_nic_irq_tx(0, dev);
                        enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
                }
                if (np->nic_poll_irq & NVREG_IRQ_OTHER) {
-                       nv_nic_irq_other(0, dev, NULL);
+                       np->nic_poll_irq &= ~NVREG_IRQ_OTHER;
+                       nv_nic_irq_other(0, dev);
                        enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector);
                }
        }
+
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -2793,56 +4039,18 @@ static void nv_do_stats_poll(unsigned long data)
 {
        struct net_device *dev = (struct net_device *) data;
        struct fe_priv *np = netdev_priv(dev);
-       u8 __iomem *base = get_hwbase(dev);
 
-       np->estats.tx_bytes += readl(base + NvRegTxCnt);
-       np->estats.tx_zero_rexmt += readl(base + NvRegTxZeroReXmt);
-       np->estats.tx_one_rexmt += readl(base + NvRegTxOneReXmt);
-       np->estats.tx_many_rexmt += readl(base + NvRegTxManyReXmt);
-       np->estats.tx_late_collision += readl(base + NvRegTxLateCol);
-       np->estats.tx_fifo_errors += readl(base + NvRegTxUnderflow);
-       np->estats.tx_carrier_errors += readl(base + NvRegTxLossCarrier);
-       np->estats.tx_excess_deferral += readl(base + NvRegTxExcessDef);
-       np->estats.tx_retry_error += readl(base + NvRegTxRetryErr);
-       np->estats.tx_deferral += readl(base + NvRegTxDef);
-       np->estats.tx_packets += readl(base + NvRegTxFrame);
-       np->estats.tx_pause += readl(base + NvRegTxPause);
-       np->estats.rx_frame_error += readl(base + NvRegRxFrameErr);
-       np->estats.rx_extra_byte += readl(base + NvRegRxExtraByte);
-       np->estats.rx_late_collision += readl(base + NvRegRxLateCol);
-       np->estats.rx_runt += readl(base + NvRegRxRunt);
-       np->estats.rx_frame_too_long += readl(base + NvRegRxFrameTooLong);
-       np->estats.rx_over_errors += readl(base + NvRegRxOverflow);
-       np->estats.rx_crc_errors += readl(base + NvRegRxFCSErr);
-       np->estats.rx_frame_align_error += readl(base + NvRegRxFrameAlignErr);
-       np->estats.rx_length_error += readl(base + NvRegRxLenErr);
-       np->estats.rx_unicast += readl(base + NvRegRxUnicast);
-       np->estats.rx_multicast += readl(base + NvRegRxMulticast);
-       np->estats.rx_broadcast += readl(base + NvRegRxBroadcast);
-       np->estats.rx_bytes += readl(base + NvRegRxCnt);
-       np->estats.rx_pause += readl(base + NvRegRxPause);
-       np->estats.rx_drop_frame += readl(base + NvRegRxDropFrame);
-       np->estats.rx_packets =
-               np->estats.rx_unicast +
-               np->estats.rx_multicast +
-               np->estats.rx_broadcast;
-       np->estats.rx_errors_total =
-               np->estats.rx_crc_errors +
-               np->estats.rx_over_errors +
-               np->estats.rx_frame_error +
-               (np->estats.rx_frame_align_error - np->estats.rx_extra_byte) +
-               np->estats.rx_late_collision +
-               np->estats.rx_runt +
-               np->estats.rx_frame_too_long;
+       nv_get_hw_stats(dev);
 
        if (!np->in_shutdown)
-               mod_timer(&np->stats_poll, jiffies + STATS_INTERVAL);
+               mod_timer(&np->stats_poll,
+                       round_jiffies(jiffies + STATS_INTERVAL));
 }
 
 static void nv_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 {
        struct fe_priv *np = netdev_priv(dev);
-       strcpy(info->driver, "forcedeth");
+       strcpy(info->driver, DRV_NAME);
        strcpy(info->version, FORCEDETH_VERSION);
        strcpy(info->bus_info, pci_name(np->pci_dev));
 }
@@ -2990,13 +4198,25 @@ static int nv_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 
        netif_carrier_off(dev);
        if (netif_running(dev)) {
+               unsigned long flags;
+
                nv_disable_irq(dev);
                netif_tx_lock_bh(dev);
-               spin_lock(&np->lock);
+               netif_addr_lock(dev);
+               /* with plain spinlock lockdep complains */
+               spin_lock_irqsave(&np->lock, flags);
                /* stop engines */
-               nv_stop_rx(dev);
-               nv_stop_tx(dev);
-               spin_unlock(&np->lock);
+               /* FIXME:
+                * this can take some time, and interrupts are disabled
+                * due to spin_lock_irqsave, but let's hope no daemon
+                * is going to change the settings very often...
+                * Worst case:
+                * NV_RXSTOP_DELAY1MAX + NV_TXSTOP_DELAY1MAX
+                * + some minor delays, which is up to a second approximately
+                */
+               nv_stop_rxtx(dev);
+               spin_unlock_irqrestore(&np->lock, flags);
+               netif_addr_unlock(dev);
                netif_tx_unlock_bh(dev);
        }
 
@@ -3033,9 +4253,18 @@ static int nv_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
                if (netif_running(dev))
                        printk(KERN_INFO "%s: link down.\n", dev->name);
                bmcr = mii_rw(dev, np->phyaddr, MII_BMCR, MII_READ);
-               bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART);
-               mii_rw(dev, np->phyaddr, MII_BMCR, bmcr);
-
+               if (np->phy_model == PHY_MODEL_MARVELL_E3016) {
+                       bmcr |= BMCR_ANENABLE;
+                       /* reset the phy in order for settings to stick,
+                        * and cause autoneg to start */
+                       if (phy_reset(dev, bmcr)) {
+                               printk(KERN_INFO "%s: phy reset failed\n", dev->name);
+                               return -EINVAL;
+                       }
+               } else {
+                       bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART);
+                       mii_rw(dev, np->phyaddr, MII_BMCR, bmcr);
+               }
        } else {
                int adv, bmcr;
 
@@ -3075,23 +4304,24 @@ static int nv_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
                        bmcr |= BMCR_FULLDPLX;
                if (np->fixed_mode & (ADVERTISE_100HALF|ADVERTISE_100FULL))
                        bmcr |= BMCR_SPEED100;
-               mii_rw(dev, np->phyaddr, MII_BMCR, bmcr);
                if (np->phy_oui == PHY_OUI_MARVELL) {
-                       /* reset the phy */
-                       if (phy_reset(dev)) {
+                       /* reset the phy in order for forced mode settings to stick */
+                       if (phy_reset(dev, bmcr)) {
                                printk(KERN_INFO "%s: phy reset failed\n", dev->name);
                                return -EINVAL;
                        }
-               } else if (netif_running(dev)) {
-                       /* Wait a bit and then reconfigure the nic. */
-                       udelay(10);
-                       nv_linkchange(dev);
+               } else {
+                       mii_rw(dev, np->phyaddr, MII_BMCR, bmcr);
+                       if (netif_running(dev)) {
+                               /* Wait a bit and then reconfigure the nic. */
+                               udelay(10);
+                               nv_linkchange(dev);
+                       }
                }
        }
 
        if (netif_running(dev)) {
-               nv_start_rx(dev);
-               nv_start_tx(dev);
+               nv_start_rxtx(dev);
                nv_enable_irq(dev);
        }
 
@@ -3132,22 +4362,31 @@ static int nv_nway_reset(struct net_device *dev)
                if (netif_running(dev)) {
                        nv_disable_irq(dev);
                        netif_tx_lock_bh(dev);
+                       netif_addr_lock(dev);
                        spin_lock(&np->lock);
                        /* stop engines */
-                       nv_stop_rx(dev);
-                       nv_stop_tx(dev);
+                       nv_stop_rxtx(dev);
                        spin_unlock(&np->lock);
+                       netif_addr_unlock(dev);
                        netif_tx_unlock_bh(dev);
                        printk(KERN_INFO "%s: link down.\n", dev->name);
                }
 
                bmcr = mii_rw(dev, np->phyaddr, MII_BMCR, MII_READ);
-               bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART);
-               mii_rw(dev, np->phyaddr, MII_BMCR, bmcr);
+               if (np->phy_model == PHY_MODEL_MARVELL_E3016) {
+                       bmcr |= BMCR_ANENABLE;
+                       /* reset the phy in order for settings to stick*/
+                       if (phy_reset(dev, bmcr)) {
+                               printk(KERN_INFO "%s: phy reset failed\n", dev->name);
+                               return -EINVAL;
+                       }
+               } else {
+                       bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART);
+                       mii_rw(dev, np->phyaddr, MII_BMCR, bmcr);
+               }
 
                if (netif_running(dev)) {
-                       nv_start_rx(dev);
-                       nv_start_tx(dev);
+                       nv_start_rxtx(dev);
                        nv_enable_irq(dev);
                }
                ret = 0;
@@ -3187,7 +4426,7 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
 {
        struct fe_priv *np = netdev_priv(dev);
        u8 __iomem *base = get_hwbase(dev);
-       u8 *rxtx_ring, *rx_skbuff, *tx_skbuff, *rx_dma, *tx_dma, *tx_dma_len;
+       u8 *rxtx_ring, *rx_skbuff, *tx_skbuff;
        dma_addr_t ring_addr;
 
        if (ring->rx_pending < RX_RING_MIN ||
@@ -3204,7 +4443,7 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
        }
 
        /* allocate new rings */
-       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
+       if (!nv_optimized(np)) {
                rxtx_ring = pci_alloc_consistent(np->pci_dev,
                                            sizeof(struct ring_desc) * (ring->rx_pending + ring->tx_pending),
                                            &ring_addr);
@@ -3213,15 +4452,12 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
                                            sizeof(struct ring_desc_ex) * (ring->rx_pending + ring->tx_pending),
                                            &ring_addr);
        }
-       rx_skbuff = kmalloc(sizeof(struct sk_buff*) * ring->rx_pending, GFP_KERNEL);
-       rx_dma = kmalloc(sizeof(dma_addr_t) * ring->rx_pending, GFP_KERNEL);
-       tx_skbuff = kmalloc(sizeof(struct sk_buff*) * ring->tx_pending, GFP_KERNEL);
-       tx_dma = kmalloc(sizeof(dma_addr_t) * ring->tx_pending, GFP_KERNEL);
-       tx_dma_len = kmalloc(sizeof(unsigned int) * ring->tx_pending, GFP_KERNEL);
-       if (!rxtx_ring || !rx_skbuff || !rx_dma || !tx_skbuff || !tx_dma || !tx_dma_len) {
+       rx_skbuff = kmalloc(sizeof(struct nv_skb_map) * ring->rx_pending, GFP_KERNEL);
+       tx_skbuff = kmalloc(sizeof(struct nv_skb_map) * ring->tx_pending, GFP_KERNEL);
+       if (!rxtx_ring || !rx_skbuff || !tx_skbuff) {
                /* fall back to old rings */
-               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
-                       if(rxtx_ring)
+               if (!nv_optimized(np)) {
+                       if (rxtx_ring)
                                pci_free_consistent(np->pci_dev, sizeof(struct ring_desc) * (ring->rx_pending + ring->tx_pending),
                                                    rxtx_ring, ring_addr);
                } else {
@@ -3231,28 +4467,22 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
                }
                if (rx_skbuff)
                        kfree(rx_skbuff);
-               if (rx_dma)
-                       kfree(rx_dma);
                if (tx_skbuff)
                        kfree(tx_skbuff);
-               if (tx_dma)
-                       kfree(tx_dma);
-               if (tx_dma_len)
-                       kfree(tx_dma_len);
                goto exit;
        }
 
        if (netif_running(dev)) {
                nv_disable_irq(dev);
+               nv_napi_disable(dev);
                netif_tx_lock_bh(dev);
+               netif_addr_lock(dev);
                spin_lock(&np->lock);
                /* stop engines */
-               nv_stop_rx(dev);
-               nv_stop_tx(dev);
+               nv_stop_rxtx(dev);
                nv_txrx_reset(dev);
                /* drain queues */
-               nv_drain_rx(dev);
-               nv_drain_tx(dev);
+               nv_drain_rxtx(dev);
                /* delete queues */
                free_rings(dev);
        }
@@ -3260,27 +4490,20 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
        /* set new values */
        np->rx_ring_size = ring->rx_pending;
        np->tx_ring_size = ring->tx_pending;
-       np->tx_limit_stop = ring->tx_pending - TX_LIMIT_DIFFERENCE;
-       np->tx_limit_start = ring->tx_pending - TX_LIMIT_DIFFERENCE - 1;
-       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
+
+       if (!nv_optimized(np)) {
                np->rx_ring.orig = (struct ring_desc*)rxtx_ring;
                np->tx_ring.orig = &np->rx_ring.orig[np->rx_ring_size];
        } else {
                np->rx_ring.ex = (struct ring_desc_ex*)rxtx_ring;
                np->tx_ring.ex = &np->rx_ring.ex[np->rx_ring_size];
        }
-       np->rx_skbuff = (struct sk_buff**)rx_skbuff;
-       np->rx_dma = (dma_addr_t*)rx_dma;
-       np->tx_skbuff = (struct sk_buff**)tx_skbuff;
-       np->tx_dma = (dma_addr_t*)tx_dma;
-       np->tx_dma_len = (unsigned int*)tx_dma_len;
+       np->rx_skb = (struct nv_skb_map*)rx_skbuff;
+       np->tx_skb = (struct nv_skb_map*)tx_skbuff;
        np->ring_addr = ring_addr;
 
-       memset(np->rx_skbuff, 0, sizeof(struct sk_buff*) * np->rx_ring_size);
-       memset(np->rx_dma, 0, sizeof(dma_addr_t) * np->rx_ring_size);
-       memset(np->tx_skbuff, 0, sizeof(struct sk_buff*) * np->tx_ring_size);
-       memset(np->tx_dma, 0, sizeof(dma_addr_t) * np->tx_ring_size);
-       memset(np->tx_dma_len, 0, sizeof(unsigned int) * np->tx_ring_size);
+       memset(np->rx_skb, 0, sizeof(struct nv_skb_map) * np->rx_ring_size);
+       memset(np->tx_skb, 0, sizeof(struct nv_skb_map) * np->tx_ring_size);
 
        if (netif_running(dev)) {
                /* reinit driver view of the queues */
@@ -3300,10 +4523,11 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
                pci_push(base);
 
                /* restart engines */
-               nv_start_rx(dev);
-               nv_start_tx(dev);
+               nv_start_rxtx(dev);
                spin_unlock(&np->lock);
+               netif_addr_unlock(dev);
                netif_tx_unlock_bh(dev);
+               nv_napi_enable(dev);
                nv_enable_irq(dev);
        }
        return 0;
@@ -3340,11 +4564,12 @@ static int nv_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam*
        if (netif_running(dev)) {
                nv_disable_irq(dev);
                netif_tx_lock_bh(dev);
+               netif_addr_lock(dev);
                spin_lock(&np->lock);
                /* stop engines */
-               nv_stop_rx(dev);
-               nv_stop_tx(dev);
+               nv_stop_rxtx(dev);
                spin_unlock(&np->lock);
+               netif_addr_unlock(dev);
                netif_tx_unlock_bh(dev);
        }
 
@@ -3384,8 +4609,7 @@ static int nv_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam*
        }
 
        if (netif_running(dev)) {
-               nv_start_rx(dev);
-               nv_start_tx(dev);
+               nv_start_rxtx(dev);
                nv_enable_irq(dev);
        }
        return 0;
@@ -3394,7 +4618,7 @@ static int nv_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam*
 static u32 nv_get_rx_csum(struct net_device *dev)
 {
        struct fe_priv *np = netdev_priv(dev);
-       return (np->txrxctl_bits & NVREG_TXRXCTL_RXCHECK) != 0;
+       return (np->rx_csum) != 0;
 }
 
 static int nv_set_rx_csum(struct net_device *dev, u32 data)
@@ -3404,22 +4628,15 @@ static int nv_set_rx_csum(struct net_device *dev, u32 data)
        int retcode = 0;
 
        if (np->driver_data & DEV_HAS_CHECKSUM) {
-
-               if (((np->txrxctl_bits & NVREG_TXRXCTL_RXCHECK) && data) ||
-                   (!(np->txrxctl_bits & NVREG_TXRXCTL_RXCHECK) && !data)) {
-                       /* already set or unset */
-                       return 0;
-               }
-
                if (data) {
+                       np->rx_csum = 1;
                        np->txrxctl_bits |= NVREG_TXRXCTL_RXCHECK;
-               } else if (!(np->vlanctl_bits & NVREG_VLANCONTROL_ENABLE)) {
-                       np->txrxctl_bits &= ~NVREG_TXRXCTL_RXCHECK;
                } else {
-                       printk(KERN_INFO "Can not disable rx checksum if vlan is enabled\n");
-                       return -EINVAL;
+                       np->rx_csum = 0;
+                       /* vlan is dependent on rx checksum offload */
+                       if (!(np->vlanctl_bits & NVREG_VLANCONTROL_ENABLE))
+                               np->txrxctl_bits &= ~NVREG_TXRXCTL_RXCHECK;
                }
-
                if (netif_running(dev)) {
                        spin_lock_irq(&np->lock);
                        writel(np->txrxctl_bits, base + NvRegTxRxControl);
@@ -3437,7 +4654,7 @@ static int nv_set_tx_csum(struct net_device *dev, u32 data)
        struct fe_priv *np = netdev_priv(dev);
 
        if (np->driver_data & DEV_HAS_CHECKSUM)
-               return ethtool_op_set_tx_hw_csum(dev, data);
+               return ethtool_op_set_tx_csum(dev, data);
        else
                return -EOPNOTSUPP;
 }
@@ -3452,14 +4669,28 @@ static int nv_set_sg(struct net_device *dev, u32 data)
                return -EOPNOTSUPP;
 }
 
-static int nv_get_stats_count(struct net_device *dev)
+static int nv_get_sset_count(struct net_device *dev, int sset)
 {
        struct fe_priv *np = netdev_priv(dev);
 
-       if (np->driver_data & DEV_HAS_STATISTICS)
-               return (sizeof(struct nv_ethtool_stats)/sizeof(u64));
-       else
-               return 0;
+       switch (sset) {
+       case ETH_SS_TEST:
+               if (np->driver_data & DEV_HAS_TEST_EXTENDED)
+                       return NV_TEST_COUNT_EXTENDED;
+               else
+                       return NV_TEST_COUNT_BASE;
+       case ETH_SS_STATS:
+               if (np->driver_data & DEV_HAS_STATISTICS_V3)
+                       return NV_DEV_STATISTICS_V3_COUNT;
+               else if (np->driver_data & DEV_HAS_STATISTICS_V2)
+                       return NV_DEV_STATISTICS_V2_COUNT;
+               else if (np->driver_data & DEV_HAS_STATISTICS_V1)
+                       return NV_DEV_STATISTICS_V1_COUNT;
+               else
+                       return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
 }
 
 static void nv_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *estats, u64 *buffer)
@@ -3469,17 +4700,7 @@ static void nv_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *e
        /* update stats */
        nv_do_stats_poll((unsigned long)dev);
 
-       memcpy(buffer, &np->estats, nv_get_stats_count(dev)*sizeof(u64));
-}
-
-static int nv_self_test_count(struct net_device *dev)
-{
-       struct fe_priv *np = netdev_priv(dev);
-
-       if (np->driver_data & DEV_HAS_TEST_EXTENDED)
-               return NV_TEST_COUNT_EXTENDED;
-       else
-               return NV_TEST_COUNT_BASE;
+       memcpy(buffer, &np->estats, nv_get_sset_count(dev, ETH_SS_STATS)*sizeof(u64));
 }
 
 static int nv_link_test(struct net_device *dev)
@@ -3595,7 +4816,7 @@ static int nv_loopback_test(struct net_device *dev)
        struct sk_buff *tx_skb, *rx_skb;
        dma_addr_t test_dma_addr;
        u32 tx_flags_extra = (np->desc_ver == DESC_VER_1 ? NV_TX_LASTPACKET : NV_TX2_LASTPACKET);
-       u32 Flags;
+       u32 flags;
        int len, i, pkt_len;
        u8 *pkt_data;
        u32 filter_flags = 0;
@@ -3626,25 +4847,31 @@ static int nv_loopback_test(struct net_device *dev)
        pci_push(base);
 
        /* restart rx engine */
-       nv_start_rx(dev);
-       nv_start_tx(dev);
+       nv_start_rxtx(dev);
 
        /* setup packet for tx */
        pkt_len = ETH_DATA_LEN;
        tx_skb = dev_alloc_skb(pkt_len);
+       if (!tx_skb) {
+               printk(KERN_ERR "dev_alloc_skb() failed during loopback test"
+                        " of %s\n", dev->name);
+               ret = 0;
+               goto out;
+       }
+       test_dma_addr = pci_map_single(np->pci_dev, tx_skb->data,
+                                      skb_tailroom(tx_skb),
+                                      PCI_DMA_FROMDEVICE);
        pkt_data = skb_put(tx_skb, pkt_len);
        for (i = 0; i < pkt_len; i++)
                pkt_data[i] = (u8)(i & 0xff);
-       test_dma_addr = pci_map_single(np->pci_dev, tx_skb->data,
-                                      tx_skb->end-tx_skb->data, PCI_DMA_FROMDEVICE);
 
-       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
-               np->tx_ring.orig[0].PacketBuffer = cpu_to_le32(test_dma_addr);
-               np->tx_ring.orig[0].FlagLen = cpu_to_le32((pkt_len-1) | np->tx_flags | tx_flags_extra);
+       if (!nv_optimized(np)) {
+               np->tx_ring.orig[0].buf = cpu_to_le32(test_dma_addr);
+               np->tx_ring.orig[0].flaglen = cpu_to_le32((pkt_len-1) | np->tx_flags | tx_flags_extra);
        } else {
-               np->tx_ring.ex[0].PacketBufferHigh = cpu_to_le64(test_dma_addr) >> 32;
-               np->tx_ring.ex[0].PacketBufferLow = cpu_to_le64(test_dma_addr) & 0x0FFFFFFFF;
-               np->tx_ring.ex[0].FlagLen = cpu_to_le32((pkt_len-1) | np->tx_flags | tx_flags_extra);
+               np->tx_ring.ex[0].bufhigh = cpu_to_le32(dma_high(test_dma_addr));
+               np->tx_ring.ex[0].buflow = cpu_to_le32(dma_low(test_dma_addr));
+               np->tx_ring.ex[0].flaglen = cpu_to_le32((pkt_len-1) | np->tx_flags | tx_flags_extra);
        }
        writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
        pci_push(get_hwbase(dev));
@@ -3652,22 +4879,22 @@ static int nv_loopback_test(struct net_device *dev)
        msleep(500);
 
        /* check for rx of the packet */
-       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
-               Flags = le32_to_cpu(np->rx_ring.orig[0].FlagLen);
+       if (!nv_optimized(np)) {
+               flags = le32_to_cpu(np->rx_ring.orig[0].flaglen);
                len = nv_descr_getlength(&np->rx_ring.orig[0], np->desc_ver);
 
        } else {
-               Flags = le32_to_cpu(np->rx_ring.ex[0].FlagLen);
+               flags = le32_to_cpu(np->rx_ring.ex[0].flaglen);
                len = nv_descr_getlength_ex(&np->rx_ring.ex[0], np->desc_ver);
        }
 
-       if (Flags & NV_RX_AVAIL) {
+       if (flags & NV_RX_AVAIL) {
                ret = 0;
        } else if (np->desc_ver == DESC_VER_1) {
-               if (Flags & NV_RX_ERROR)
+               if (flags & NV_RX_ERROR)
                        ret = 0;
        } else {
-               if (Flags & NV_RX2_ERROR) {
+               if (flags & NV_RX2_ERROR) {
                        ret = 0;
                }
        }
@@ -3678,7 +4905,7 @@ static int nv_loopback_test(struct net_device *dev)
                        dprintk(KERN_DEBUG "%s: loopback len mismatch %d vs %d\n",
                                dev->name, len, pkt_len);
                } else {
-                       rx_skb = np->rx_skbuff[0];
+                       rx_skb = np->rx_skb[0].skb;
                        for (i = 0; i < pkt_len; i++) {
                                if (rx_skb->data[i] != (u8)(i & 0xff)) {
                                        ret = 0;
@@ -3692,18 +4919,16 @@ static int nv_loopback_test(struct net_device *dev)
                dprintk(KERN_DEBUG "%s: loopback - did not receive test packet\n", dev->name);
        }
 
-       pci_unmap_page(np->pci_dev, test_dma_addr,
-                      tx_skb->end-tx_skb->data,
+       pci_unmap_single(np->pci_dev, test_dma_addr,
+                      (skb_end_pointer(tx_skb) - tx_skb->data),
                       PCI_DMA_TODEVICE);
        dev_kfree_skb_any(tx_skb);
-
+ out:
        /* stop engines */
-       nv_stop_rx(dev);
-       nv_stop_tx(dev);
+       nv_stop_rxtx(dev);
        nv_txrx_reset(dev);
        /* drain rx queue */
-       nv_drain_rx(dev);
-       nv_drain_tx(dev);
+       nv_drain_rxtx(dev);
 
        if (netif_running(dev)) {
                writel(misc1_flags, base + NvRegMisc1);
@@ -3719,7 +4944,7 @@ static void nv_self_test(struct net_device *dev, struct ethtool_test *test, u64
        struct fe_priv *np = netdev_priv(dev);
        u8 __iomem *base = get_hwbase(dev);
        int result;
-       memset(buffer, 0, nv_self_test_count(dev)*sizeof(u64));
+       memset(buffer, 0, nv_get_sset_count(dev, ETH_SS_TEST)*sizeof(u64));
 
        if (!nv_link_test(dev)) {
                test->flags |= ETH_TEST_FL_FAILED;
@@ -3729,7 +4954,9 @@ static void nv_self_test(struct net_device *dev, struct ethtool_test *test, u64
        if (test->flags & ETH_TEST_FL_OFFLINE) {
                if (netif_running(dev)) {
                        netif_stop_queue(dev);
+                       nv_napi_disable(dev);
                        netif_tx_lock_bh(dev);
+                       netif_addr_lock(dev);
                        spin_lock_irq(&np->lock);
                        nv_disable_hw_interrupts(dev, np->irqmask);
                        if (!(np->msi_flags & NV_MSI_X_ENABLED)) {
@@ -3738,13 +4965,12 @@ static void nv_self_test(struct net_device *dev, struct ethtool_test *test, u64
                                writel(NVREG_IRQSTAT_MASK, base + NvRegMSIXIrqStatus);
                        }
                        /* stop engines */
-                       nv_stop_rx(dev);
-                       nv_stop_tx(dev);
+                       nv_stop_rxtx(dev);
                        nv_txrx_reset(dev);
                        /* drain rx queue */
-                       nv_drain_rx(dev);
-                       nv_drain_tx(dev);
+                       nv_drain_rxtx(dev);
                        spin_unlock_irq(&np->lock);
+                       netif_addr_unlock(dev);
                        netif_tx_unlock_bh(dev);
                }
 
@@ -3784,9 +5010,9 @@ static void nv_self_test(struct net_device *dev, struct ethtool_test *test, u64
                        writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
                        pci_push(base);
                        /* restart rx engine */
-                       nv_start_rx(dev);
-                       nv_start_tx(dev);
+                       nv_start_rxtx(dev);
                        netif_start_queue(dev);
+                       nv_napi_enable(dev);
                        nv_enable_hw_interrupts(dev, np->irqmask);
                }
        }
@@ -3796,15 +5022,15 @@ static void nv_get_strings(struct net_device *dev, u32 stringset, u8 *buffer)
 {
        switch (stringset) {
        case ETH_SS_STATS:
-               memcpy(buffer, &nv_estats_str, nv_get_stats_count(dev)*sizeof(struct nv_ethtool_str));
+               memcpy(buffer, &nv_estats_str, nv_get_sset_count(dev, ETH_SS_STATS)*sizeof(struct nv_ethtool_str));
                break;
        case ETH_SS_TEST:
-               memcpy(buffer, &nv_etests_str, nv_self_test_count(dev)*sizeof(struct nv_ethtool_str));
+               memcpy(buffer, &nv_etests_str, nv_get_sset_count(dev, ETH_SS_TEST)*sizeof(struct nv_ethtool_str));
                break;
        }
 }
 
-static struct ethtool_ops ops = {
+static const struct ethtool_ops ops = {
        .get_drvinfo = nv_get_drvinfo,
        .get_link = ethtool_op_get_link,
        .get_wol = nv_get_wol,
@@ -3814,8 +5040,6 @@ static struct ethtool_ops ops = {
        .get_regs_len = nv_get_regs_len,
        .get_regs = nv_get_regs,
        .nway_reset = nv_nway_reset,
-       .get_perm_addr = ethtool_op_get_perm_addr,
-       .get_tso = ethtool_op_get_tso,
        .set_tso = nv_set_tso,
        .get_ringparam = nv_get_ringparam,
        .set_ringparam = nv_set_ringparam,
@@ -3823,14 +5047,11 @@ static struct ethtool_ops ops = {
        .set_pauseparam = nv_set_pauseparam,
        .get_rx_csum = nv_get_rx_csum,
        .set_rx_csum = nv_set_rx_csum,
-       .get_tx_csum = ethtool_op_get_tx_csum,
        .set_tx_csum = nv_set_tx_csum,
-       .get_sg = ethtool_op_get_sg,
        .set_sg = nv_set_sg,
        .get_strings = nv_get_strings,
-       .get_stats_count = nv_get_stats_count,
        .get_ethtool_stats = nv_get_ethtool_stats,
-       .self_test_count = nv_self_test_count,
+       .get_sset_count = nv_get_sset_count,
        .self_test = nv_self_test,
 };
 
@@ -3855,12 +5076,89 @@ static void nv_vlan_rx_register(struct net_device *dev, struct vlan_group *grp)
        writel(np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
 
        spin_unlock_irq(&np->lock);
-};
+}
 
-static void nv_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+/* The mgmt unit and driver use a semaphore to access the phy during init */
+static int nv_mgmt_acquire_sema(struct net_device *dev)
 {
-       /* nothing to do */
-};
+       struct fe_priv *np = netdev_priv(dev);
+       u8 __iomem *base = get_hwbase(dev);
+       int i;
+       u32 tx_ctrl, mgmt_sema;
+
+       for (i = 0; i < 10; i++) {
+               mgmt_sema = readl(base + NvRegTransmitterControl) & NVREG_XMITCTL_MGMT_SEMA_MASK;
+               if (mgmt_sema == NVREG_XMITCTL_MGMT_SEMA_FREE)
+                       break;
+               msleep(500);
+       }
+
+       if (mgmt_sema != NVREG_XMITCTL_MGMT_SEMA_FREE)
+               return 0;
+
+       for (i = 0; i < 2; i++) {
+               tx_ctrl = readl(base + NvRegTransmitterControl);
+               tx_ctrl |= NVREG_XMITCTL_HOST_SEMA_ACQ;
+               writel(tx_ctrl, base + NvRegTransmitterControl);
+
+               /* verify that semaphore was acquired */
+               tx_ctrl = readl(base + NvRegTransmitterControl);
+               if (((tx_ctrl & NVREG_XMITCTL_HOST_SEMA_MASK) == NVREG_XMITCTL_HOST_SEMA_ACQ) &&
+                   ((tx_ctrl & NVREG_XMITCTL_MGMT_SEMA_MASK) == NVREG_XMITCTL_MGMT_SEMA_FREE)) {
+                       np->mgmt_sema = 1;
+                       return 1;
+               }
+               else
+                       udelay(50);
+       }
+
+       return 0;
+}
+
+static void nv_mgmt_release_sema(struct net_device *dev)
+{
+       struct fe_priv *np = netdev_priv(dev);
+       u8 __iomem *base = get_hwbase(dev);
+       u32 tx_ctrl;
+
+       if (np->driver_data & DEV_HAS_MGMT_UNIT) {
+               if (np->mgmt_sema) {
+                       tx_ctrl = readl(base + NvRegTransmitterControl);
+                       tx_ctrl &= ~NVREG_XMITCTL_HOST_SEMA_ACQ;
+                       writel(tx_ctrl, base + NvRegTransmitterControl);
+               }
+       }
+}
+
+
+static int nv_mgmt_get_version(struct net_device *dev)
+{
+       struct fe_priv *np = netdev_priv(dev);
+       u8 __iomem *base = get_hwbase(dev);
+       u32 data_ready = readl(base + NvRegTransmitterControl);
+       u32 data_ready2 = 0;
+       unsigned long start;
+       int ready = 0;
+
+       writel(NVREG_MGMTUNITGETVERSION, base + NvRegMgmtUnitGetVersion);
+       writel(data_ready ^ NVREG_XMITCTL_DATA_START, base + NvRegTransmitterControl);
+       start = jiffies;
+       while (time_before(jiffies, start + 5*HZ)) {
+               data_ready2 = readl(base + NvRegTransmitterControl);
+               if ((data_ready & NVREG_XMITCTL_DATA_READY) != (data_ready2 & NVREG_XMITCTL_DATA_READY)) {
+                       ready = 1;
+                       break;
+               }
+               schedule_timeout_uninterruptible(1);
+       }
+
+       if (!ready || (data_ready2 & NVREG_XMITCTL_DATA_ERROR))
+               return 0;
+
+       np->mgmt_version = readl(base + NvRegMgmtUnitVersion) & NVREG_MGMTUNITVERSION;
+
+       return 1;
+}
 
 static int nv_open(struct net_device *dev)
 {
@@ -3868,17 +5166,22 @@ static int nv_open(struct net_device *dev)
        u8 __iomem *base = get_hwbase(dev);
        int ret = 1;
        int oom, i;
+       u32 low;
 
        dprintk(KERN_DEBUG "nv_open: begin\n");
 
-       /* 1) erase previous misconfiguration */
+       /* power up phy */
+       mii_rw(dev, np->phyaddr, MII_BMCR,
+              mii_rw(dev, np->phyaddr, MII_BMCR, MII_READ) & ~BMCR_PDOWN);
+
+       nv_txrx_gate(dev, false);
+       /* erase previous misconfiguration */
        if (np->driver_data & DEV_HAS_POWER_CNTRL)
                nv_mac_reset(dev);
-       /* 4.1-1: stop adapter: ignored, 4.3 seems to be overkill */
        writel(NVREG_MCASTADDRA_FORCE, base + NvRegMulticastAddrA);
        writel(0, base + NvRegMulticastAddrB);
-       writel(0, base + NvRegMulticastMaskA);
-       writel(0, base + NvRegMulticastMaskB);
+       writel(NVREG_MCASTMASKA_NONE, base + NvRegMulticastMaskA);
+       writel(NVREG_MCASTMASKB_NONE, base + NvRegMulticastMaskB);
        writel(0, base + NvRegPacketFilterFlags);
 
        writel(0, base + NvRegTransmitterControl);
@@ -3889,28 +5192,27 @@ static int nv_open(struct net_device *dev)
        if (np->pause_flags & NV_PAUSEFRAME_TX_CAPABLE)
                writel(NVREG_TX_PAUSEFRAME_DISABLE,  base + NvRegTxPauseFrame);
 
-       /* 2) initialize descriptor rings */
+       /* initialize descriptor rings */
        set_bufsize(dev);
        oom = nv_init_ring(dev);
 
        writel(0, base + NvRegLinkSpeed);
-       writel(0, base + NvRegUnknownTransmitterReg);
+       writel(readl(base + NvRegTransmitPoll) & NVREG_TRANSMITPOLL_MAC_ADDR_REV, base + NvRegTransmitPoll);
        nv_txrx_reset(dev);
        writel(0, base + NvRegUnknownSetupReg6);
 
        np->in_shutdown = 0;
 
-       /* 3) set mac address */
-       nv_copy_mac_to_hw(dev);
-
-       /* 4) give hw rings */
+       /* give hw rings */
        setup_hw_rings(dev, NV_SETUP_RX_RING | NV_SETUP_TX_RING);
        writel( ((np->rx_ring_size-1) << NVREG_RINGSZ_RXSHIFT) + ((np->tx_ring_size-1) << NVREG_RINGSZ_TXSHIFT),
                base + NvRegRingSizes);
 
-       /* 5) continue setup */
        writel(np->linkspeed, base + NvRegLinkSpeed);
-       writel(NVREG_UNKSETUP3_VAL1, base + NvRegUnknownSetupReg3);
+       if (np->desc_ver == DESC_VER_1)
+               writel(NVREG_TX_WM_DESC1_DEFAULT, base + NvRegTxWatermark);
+       else
+               writel(NVREG_TX_WM_DESC2_3_DEFAULT, base + NvRegTxWatermark);
        writel(np->txrxctl_bits, base + NvRegTxRxControl);
        writel(np->vlanctl_bits, base + NvRegVlanControl);
        pci_push(base);
@@ -3919,21 +5221,32 @@ static int nv_open(struct net_device *dev)
                        NV_SETUP5_DELAY, NV_SETUP5_DELAYMAX,
                        KERN_INFO "open: SetupReg5, Bit 31 remained off\n");
 
-       writel(0, base + NvRegUnknownSetupReg4);
+       writel(0, base + NvRegMIIMask);
        writel(NVREG_IRQSTAT_MASK, base + NvRegIrqStatus);
-       writel(NVREG_MIISTAT_MASK2, base + NvRegMIIStatus);
+       writel(NVREG_MIISTAT_MASK_ALL, base + NvRegMIIStatus);
 
-       /* 6) continue setup */
        writel(NVREG_MISC1_FORCE | NVREG_MISC1_HD, base + NvRegMisc1);
        writel(readl(base + NvRegTransmitterStatus), base + NvRegTransmitterStatus);
        writel(NVREG_PFF_ALWAYS, base + NvRegPacketFilterFlags);
        writel(np->rx_buf_sz, base + NvRegOffloadConfig);
 
        writel(readl(base + NvRegReceiverStatus), base + NvRegReceiverStatus);
-       get_random_bytes(&i, sizeof(i));
-       writel(NVREG_RNDSEED_FORCE | (i&NVREG_RNDSEED_MASK), base + NvRegRandomSeed);
-       writel(NVREG_UNKSETUP1_VAL, base + NvRegUnknownSetupReg1);
-       writel(NVREG_UNKSETUP2_VAL, base + NvRegUnknownSetupReg2);
+
+       get_random_bytes(&low, sizeof(low));
+       low &= NVREG_SLOTTIME_MASK;
+       if (np->desc_ver == DESC_VER_1) {
+               writel(low|NVREG_SLOTTIME_DEFAULT, base + NvRegSlotTime);
+       } else {
+               if (!(np->driver_data & DEV_HAS_GEAR_MODE)) {
+                       /* setup legacy backoff */
+                       writel(NVREG_SLOTTIME_LEGBF_ENABLED|NVREG_SLOTTIME_10_100_FULL|low, base + NvRegSlotTime);
+               } else {
+                       writel(NVREG_SLOTTIME_10_100_FULL, base + NvRegSlotTime);
+                       nv_gear_backoff_reseed(dev);
+               }
+       }
+       writel(NVREG_TX_DEFERRAL_DEFAULT, base + NvRegTxDeferral);
+       writel(NVREG_RX_DEFERRAL_DEFAULT, base + NvRegRxDeferral);
        if (poll_interval == -1) {
                if (optimization_mode == NV_OPTIMIZATION_MODE_THROUGHPUT)
                        writel(NVREG_POLL_DEFAULT_THROUGHPUT, base + NvRegPollingInterval);
@@ -3946,7 +5259,7 @@ static int nv_open(struct net_device *dev)
        writel((np->phyaddr << NVREG_ADAPTCTL_PHYSHIFT)|NVREG_ADAPTCTL_PHYVALID|NVREG_ADAPTCTL_RUNNING,
                        base + NvRegAdapterControl);
        writel(NVREG_MIISPEED_BIT8|NVREG_MIIDELAY, base + NvRegMIISpeed);
-       writel(NVREG_UNKSETUP4_VAL, base + NvRegUnknownSetupReg4);
+       writel(NVREG_MII_LINKCHANGE, base + NvRegMIIMask);
        if (np->wolenabled)
                writel(NVREG_WAKEUPFLAGS_ENABLE , base + NvRegWakeUpFlags);
 
@@ -3960,7 +5273,7 @@ static int nv_open(struct net_device *dev)
 
        nv_disable_hw_interrupts(dev, np->irqmask);
        pci_push(base);
-       writel(NVREG_MIISTAT_MASK2, base + NvRegMIIStatus);
+       writel(NVREG_MIISTAT_MASK_ALL, base + NvRegMIIStatus);
        writel(NVREG_IRQSTAT_MASK, base + NvRegIrqStatus);
        pci_push(base);
 
@@ -3974,8 +5287,8 @@ static int nv_open(struct net_device *dev)
        spin_lock_irq(&np->lock);
        writel(NVREG_MCASTADDRA_FORCE, base + NvRegMulticastAddrA);
        writel(0, base + NvRegMulticastAddrB);
-       writel(0, base + NvRegMulticastMaskA);
-       writel(0, base + NvRegMulticastMaskB);
+       writel(NVREG_MCASTMASKA_NONE, base + NvRegMulticastMaskA);
+       writel(NVREG_MCASTMASKB_NONE, base + NvRegMulticastMaskB);
        writel(NVREG_PFF_ALWAYS|NVREG_PFF_MYADDR, base + NvRegPacketFilterFlags);
        /* One manual link speed update: Interrupts are enabled, future link
         * speed changes cause interrupts and are handled by nv_link_irq().
@@ -3983,34 +5296,36 @@ static int nv_open(struct net_device *dev)
        {
                u32 miistat;
                miistat = readl(base + NvRegMIIStatus);
-               writel(NVREG_MIISTAT_MASK, base + NvRegMIIStatus);
+               writel(NVREG_MIISTAT_MASK_ALL, base + NvRegMIIStatus);
                dprintk(KERN_INFO "startup: got 0x%08x.\n", miistat);
        }
        /* set linkspeed to invalid value, thus force nv_update_linkspeed
         * to init hw */
        np->linkspeed = 0;
        ret = nv_update_linkspeed(dev);
-       nv_start_rx(dev);
-       nv_start_tx(dev);
+       nv_start_rxtx(dev);
        netif_start_queue(dev);
+       nv_napi_enable(dev);
+
        if (ret) {
                netif_carrier_on(dev);
        } else {
-               printk("%s: no link during initialization.\n", dev->name);
+               printk(KERN_INFO "%s: no link during initialization.\n", dev->name);
                netif_carrier_off(dev);
        }
        if (oom)
                mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
 
        /* start statistics timer */
-       if (np->driver_data & DEV_HAS_STATISTICS)
-               mod_timer(&np->stats_poll, jiffies + STATS_INTERVAL);
+       if (np->driver_data & (DEV_HAS_STATISTICS_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_STATISTICS_V3))
+               mod_timer(&np->stats_poll,
+                       round_jiffies(jiffies + STATS_INTERVAL));
 
        spin_unlock_irq(&np->lock);
 
        return 0;
 out_drain:
-       drain_ring(dev);
+       nv_drain_rxtx(dev);
        return ret;
 }
 
@@ -4022,7 +5337,8 @@ static int nv_close(struct net_device *dev)
        spin_lock_irq(&np->lock);
        np->in_shutdown = 1;
        spin_unlock_irq(&np->lock);
-       synchronize_irq(dev->irq);
+       nv_napi_disable(dev);
+       synchronize_irq(np->pci_dev->irq);
 
        del_timer_sync(&np->oom_kick);
        del_timer_sync(&np->nic_poll);
@@ -4030,8 +5346,7 @@ static int nv_close(struct net_device *dev)
 
        netif_stop_queue(dev);
        spin_lock_irq(&np->lock);
-       nv_stop_tx(dev);
-       nv_stop_rx(dev);
+       nv_stop_rxtx(dev);
        nv_txrx_reset(dev);
 
        /* disable interrupts on the nic or we will lock up */
@@ -4044,22 +5359,56 @@ static int nv_close(struct net_device *dev)
 
        nv_free_irq(dev);
 
-       drain_ring(dev);
+       nv_drain_rxtx(dev);
 
-       if (np->wolenabled)
+       if (np->wolenabled || !phy_power_down) {
+               nv_txrx_gate(dev, false);
+               writel(NVREG_PFF_ALWAYS|NVREG_PFF_MYADDR, base + NvRegPacketFilterFlags);
                nv_start_rx(dev);
-
-       /* special op: write back the misordered MAC address - otherwise
-        * the next nv_probe would see a wrong address.
-        */
-       writel(np->orig_mac[0], base + NvRegMacAddrA);
-       writel(np->orig_mac[1], base + NvRegMacAddrB);
+       } else {
+               /* power down phy */
+               mii_rw(dev, np->phyaddr, MII_BMCR,
+                      mii_rw(dev, np->phyaddr, MII_BMCR, MII_READ)|BMCR_PDOWN);
+               nv_txrx_gate(dev, true);
+       }
 
        /* FIXME: power down nic */
 
        return 0;
 }
 
+static const struct net_device_ops nv_netdev_ops = {
+       .ndo_open               = nv_open,
+       .ndo_stop               = nv_close,
+       .ndo_get_stats          = nv_get_stats,
+       .ndo_start_xmit         = nv_start_xmit,
+       .ndo_tx_timeout         = nv_tx_timeout,
+       .ndo_change_mtu         = nv_change_mtu,
+       .ndo_validate_addr      = eth_validate_addr,
+       .ndo_set_mac_address    = nv_set_mac_address,
+       .ndo_set_multicast_list = nv_set_multicast,
+       .ndo_vlan_rx_register   = nv_vlan_rx_register,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+       .ndo_poll_controller    = nv_poll_controller,
+#endif
+};
+
+static const struct net_device_ops nv_netdev_ops_optimized = {
+       .ndo_open               = nv_open,
+       .ndo_stop               = nv_close,
+       .ndo_get_stats          = nv_get_stats,
+       .ndo_start_xmit         = nv_start_xmit_optimized,
+       .ndo_tx_timeout         = nv_tx_timeout,
+       .ndo_change_mtu         = nv_change_mtu,
+       .ndo_validate_addr      = eth_validate_addr,
+       .ndo_set_mac_address    = nv_set_mac_address,
+       .ndo_set_multicast_list = nv_set_multicast,
+       .ndo_vlan_rx_register   = nv_vlan_rx_register,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+       .ndo_poll_controller    = nv_poll_controller,
+#endif
+};
+
 static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
 {
        struct net_device *dev;
@@ -4067,7 +5416,14 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
        unsigned long addr;
        u8 __iomem *base;
        int err, i;
-       u32 powerstate;
+       u32 powerstate, txreg;
+       u32 phystate_orig = 0, phystate;
+       int phyinitialized = 0;
+       static int printed_version;
+
+       if (!printed_version++)
+               printk(KERN_INFO "%s: Reverse Engineered nForce ethernet"
+                      " driver. Version %s.\n", DRV_NAME, FORCEDETH_VERSION);
 
        dev = alloc_etherdev(sizeof(struct fe_priv));
        err = -ENOMEM;
@@ -4075,9 +5431,9 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
                goto out;
 
        np = netdev_priv(dev);
+       np->dev = dev;
        np->pci_dev = pci_dev;
        spin_lock_init(&np->lock);
-       SET_MODULE_OWNER(dev);
        SET_NETDEV_DEV(dev, &pci_dev->dev);
 
        init_timer(&np->oom_kick);
@@ -4091,11 +5447,8 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
        np->stats_poll.function = &nv_do_stats_poll;    /* timer handler */
 
        err = pci_enable_device(pci_dev);
-       if (err) {
-               printk(KERN_INFO "forcedeth: pci_enable_dev failed (%d) for device %s\n",
-                               err, pci_name(pci_dev));
+       if (err)
                goto out_free;
-       }
 
        pci_set_master(pci_dev);
 
@@ -4103,7 +5456,9 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
        if (err < 0)
                goto out_disable;
 
-       if (id->driver_data & (DEV_HAS_VLAN|DEV_HAS_MSI_X|DEV_HAS_POWER_CNTRL|DEV_HAS_STATISTICS))
+       if (id->driver_data & (DEV_HAS_VLAN|DEV_HAS_MSI_X|DEV_HAS_POWER_CNTRL|DEV_HAS_STATISTICS_V2|DEV_HAS_STATISTICS_V3))
+               np->register_size = NV_PCI_REGSZ_VER3;
+       else if (id->driver_data & DEV_HAS_STATISTICS_V1)
                np->register_size = NV_PCI_REGSZ_VER2;
        else
                np->register_size = NV_PCI_REGSZ_VER1;
@@ -4122,13 +5477,15 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
                }
        }
        if (i == DEVICE_COUNT_RESOURCE) {
-               printk(KERN_INFO "forcedeth: Couldn't find register window for device %s.\n",
-                                       pci_name(pci_dev));
+               dev_printk(KERN_INFO, &pci_dev->dev,
+                          "Couldn't find register window\n");
                goto out_relreg;
        }
 
        /* copy of driver data */
        np->driver_data = id->driver_data;
+       /* copy of device id */
+       np->device_id = id->device;
 
        /* handle different descriptor versions */
        if (id->driver_data & DEV_HAS_HIGH_DMA) {
@@ -4136,16 +5493,14 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
                np->desc_ver = DESC_VER_3;
                np->txrxctl_bits = NVREG_TXRXCTL_DESC_3;
                if (dma_64bit) {
-                       if (pci_set_dma_mask(pci_dev, DMA_39BIT_MASK)) {
-                               printk(KERN_INFO "forcedeth: 64-bit DMA failed, using 32-bit addressing for device %s.\n",
-                                      pci_name(pci_dev));
-                       } else {
+                       if (pci_set_dma_mask(pci_dev, DMA_BIT_MASK(39)))
+                               dev_printk(KERN_INFO, &pci_dev->dev,
+                                       "64-bit DMA failed, using 32-bit addressing\n");
+                       else
                                dev->features |= NETIF_F_HIGHDMA;
-                               printk(KERN_INFO "forcedeth: using HIGHDMA\n");
-                       }
-                       if (pci_set_consistent_dma_mask(pci_dev, DMA_39BIT_MASK)) {
-                               printk(KERN_INFO "forcedeth: 64-bit DMA (consistent) failed, using 32-bit ring buffers for device %s.\n",
-                                      pci_name(pci_dev));
+                       if (pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(39))) {
+                               dev_printk(KERN_INFO, &pci_dev->dev,
+                                       "64-bit DMA (consistent) failed, using 32-bit ring buffers\n");
                        }
                }
        } else if (id->driver_data & DEV_HAS_LARGEDESC) {
@@ -4163,31 +5518,23 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
                np->pkt_limit = NV_PKTLIMIT_2;
 
        if (id->driver_data & DEV_HAS_CHECKSUM) {
+               np->rx_csum = 1;
                np->txrxctl_bits |= NVREG_TXRXCTL_RXCHECK;
-               dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
-#ifdef NETIF_F_TSO
+               dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
                dev->features |= NETIF_F_TSO;
-#endif
-       }
+               dev->features |= NETIF_F_GRO;
+       }
 
        np->vlanctl_bits = 0;
        if (id->driver_data & DEV_HAS_VLAN) {
                np->vlanctl_bits = NVREG_VLANCONTROL_ENABLE;
                dev->features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX;
-               dev->vlan_rx_register = nv_vlan_rx_register;
-               dev->vlan_rx_kill_vid = nv_vlan_rx_kill_vid;
-       }
-
-       np->msi_flags = 0;
-       if ((id->driver_data & DEV_HAS_MSI) && msi) {
-               np->msi_flags |= NV_MSI_CAPABLE;
-       }
-       if ((id->driver_data & DEV_HAS_MSI_X) && msix) {
-               np->msi_flags |= NV_MSI_X_CAPABLE;
        }
 
        np->pause_flags = NV_PAUSEFRAME_RX_CAPABLE | NV_PAUSEFRAME_RX_REQ | NV_PAUSEFRAME_AUTONEG;
-       if (id->driver_data & DEV_HAS_PAUSEFRAME_TX) {
+       if ((id->driver_data & DEV_HAS_PAUSEFRAME_TX_V1) ||
+           (id->driver_data & DEV_HAS_PAUSEFRAME_TX_V2) ||
+           (id->driver_data & DEV_HAS_PAUSEFRAME_TX_V3)) {
                np->pause_flags |= NV_PAUSEFRAME_TX_CAPABLE | NV_PAUSEFRAME_TX_REQ;
        }
 
@@ -4202,10 +5549,8 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 
        np->rx_ring_size = RX_RING_DEFAULT;
        np->tx_ring_size = TX_RING_DEFAULT;
-       np->tx_limit_stop = np->tx_ring_size - TX_LIMIT_DIFFERENCE;
-       np->tx_limit_start = np->tx_ring_size - TX_LIMIT_DIFFERENCE - 1;
 
-       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
+       if (!nv_optimized(np)) {
                np->rx_ring.orig = pci_alloc_consistent(pci_dev,
                                        sizeof(struct ring_desc) * (np->rx_ring_size + np->tx_ring_size),
                                        &np->ring_addr);
@@ -4220,31 +5565,18 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
                        goto out_unmap;
                np->tx_ring.ex = &np->rx_ring.ex[np->rx_ring_size];
        }
-       np->rx_skbuff = kmalloc(sizeof(struct sk_buff*) * np->rx_ring_size, GFP_KERNEL);
-       np->rx_dma = kmalloc(sizeof(dma_addr_t) * np->rx_ring_size, GFP_KERNEL);
-       np->tx_skbuff = kmalloc(sizeof(struct sk_buff*) * np->tx_ring_size, GFP_KERNEL);
-       np->tx_dma = kmalloc(sizeof(dma_addr_t) * np->tx_ring_size, GFP_KERNEL);
-       np->tx_dma_len = kmalloc(sizeof(unsigned int) * np->tx_ring_size, GFP_KERNEL);
-       if (!np->rx_skbuff || !np->rx_dma || !np->tx_skbuff || !np->tx_dma || !np->tx_dma_len)
+       np->rx_skb = kcalloc(np->rx_ring_size, sizeof(struct nv_skb_map), GFP_KERNEL);
+       np->tx_skb = kcalloc(np->tx_ring_size, sizeof(struct nv_skb_map), GFP_KERNEL);
+       if (!np->rx_skb || !np->tx_skb)
                goto out_freering;
-       memset(np->rx_skbuff, 0, sizeof(struct sk_buff*) * np->rx_ring_size);
-       memset(np->rx_dma, 0, sizeof(dma_addr_t) * np->rx_ring_size);
-       memset(np->tx_skbuff, 0, sizeof(struct sk_buff*) * np->tx_ring_size);
-       memset(np->tx_dma, 0, sizeof(dma_addr_t) * np->tx_ring_size);
-       memset(np->tx_dma_len, 0, sizeof(unsigned int) * np->tx_ring_size);
-
-       dev->open = nv_open;
-       dev->stop = nv_close;
-       dev->hard_start_xmit = nv_start_xmit;
-       dev->get_stats = nv_get_stats;
-       dev->change_mtu = nv_change_mtu;
-       dev->set_mac_address = nv_set_mac_address;
-       dev->set_multicast_list = nv_set_multicast;
-#ifdef CONFIG_NET_POLL_CONTROLLER
-       dev->poll_controller = nv_poll_controller;
-#endif
+
+       if (!nv_optimized(np))
+               dev->netdev_ops = &nv_netdev_ops;
+       else
+               dev->netdev_ops = &nv_netdev_ops_optimized;
+
+       netif_napi_add(dev, &np->napi, nv_napi_poll, RX_WORK_PER_LOOP);
        SET_ETHTOOL_OPS(dev, &ops);
-       dev->tx_timeout = nv_tx_timeout;
        dev->watchdog_timeo = NV_WATCHDOG_TIMEO;
 
        pci_set_drvdata(pci_dev, dev);
@@ -4254,12 +5586,43 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
        np->orig_mac[0] = readl(base + NvRegMacAddrA);
        np->orig_mac[1] = readl(base + NvRegMacAddrB);
 
-       dev->dev_addr[0] = (np->orig_mac[1] >>  8) & 0xff;
-       dev->dev_addr[1] = (np->orig_mac[1] >>  0) & 0xff;
-       dev->dev_addr[2] = (np->orig_mac[0] >> 24) & 0xff;
-       dev->dev_addr[3] = (np->orig_mac[0] >> 16) & 0xff;
-       dev->dev_addr[4] = (np->orig_mac[0] >>  8) & 0xff;
-       dev->dev_addr[5] = (np->orig_mac[0] >>  0) & 0xff;
+       /* check the workaround bit for correct mac address order */
+       txreg = readl(base + NvRegTransmitPoll);
+       if (id->driver_data & DEV_HAS_CORRECT_MACADDR) {
+               /* mac address is already in correct order */
+               dev->dev_addr[0] = (np->orig_mac[0] >>  0) & 0xff;
+               dev->dev_addr[1] = (np->orig_mac[0] >>  8) & 0xff;
+               dev->dev_addr[2] = (np->orig_mac[0] >> 16) & 0xff;
+               dev->dev_addr[3] = (np->orig_mac[0] >> 24) & 0xff;
+               dev->dev_addr[4] = (np->orig_mac[1] >>  0) & 0xff;
+               dev->dev_addr[5] = (np->orig_mac[1] >>  8) & 0xff;
+       } else if (txreg & NVREG_TRANSMITPOLL_MAC_ADDR_REV) {
+               /* mac address is already in correct order */
+               dev->dev_addr[0] = (np->orig_mac[0] >>  0) & 0xff;
+               dev->dev_addr[1] = (np->orig_mac[0] >>  8) & 0xff;
+               dev->dev_addr[2] = (np->orig_mac[0] >> 16) & 0xff;
+               dev->dev_addr[3] = (np->orig_mac[0] >> 24) & 0xff;
+               dev->dev_addr[4] = (np->orig_mac[1] >>  0) & 0xff;
+               dev->dev_addr[5] = (np->orig_mac[1] >>  8) & 0xff;
+               /*
+                * Set orig mac address back to the reversed version.
+                * This flag will be cleared during low power transition.
+                * Therefore, we should always put back the reversed address.
+                */
+               np->orig_mac[0] = (dev->dev_addr[5] << 0) + (dev->dev_addr[4] << 8) +
+                       (dev->dev_addr[3] << 16) + (dev->dev_addr[2] << 24);
+               np->orig_mac[1] = (dev->dev_addr[1] << 0) + (dev->dev_addr[0] << 8);
+       } else {
+               /* need to reverse mac address to correct order */
+               dev->dev_addr[0] = (np->orig_mac[1] >>  8) & 0xff;
+               dev->dev_addr[1] = (np->orig_mac[1] >>  0) & 0xff;
+               dev->dev_addr[2] = (np->orig_mac[0] >> 24) & 0xff;
+               dev->dev_addr[3] = (np->orig_mac[0] >> 16) & 0xff;
+               dev->dev_addr[4] = (np->orig_mac[0] >>  8) & 0xff;
+               dev->dev_addr[5] = (np->orig_mac[0] >>  0) & 0xff;
+               writel(txreg|NVREG_TRANSMITPOLL_MAC_ADDR_REV, base + NvRegTransmitPoll);
+               printk(KERN_DEBUG "nv_probe: set workaround bit for reversed mac addr\n");
+       }
        memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
 
        if (!is_valid_ether_addr(dev->perm_addr)) {
@@ -4267,35 +5630,36 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
                 * Bad mac address. At least one bios sets the mac address
                 * to 01:23:45:67:89:ab
                 */
-               printk(KERN_ERR "%s: Invalid Mac address detected: %02x:%02x:%02x:%02x:%02x:%02x\n",
-                       pci_name(pci_dev),
-                       dev->dev_addr[0], dev->dev_addr[1], dev->dev_addr[2],
-                       dev->dev_addr[3], dev->dev_addr[4], dev->dev_addr[5]);
-               printk(KERN_ERR "Please complain to your hardware vendor. Switching to a random MAC.\n");
-               dev->dev_addr[0] = 0x00;
-               dev->dev_addr[1] = 0x00;
-               dev->dev_addr[2] = 0x6c;
-               get_random_bytes(&dev->dev_addr[3], 3);
+               dev_printk(KERN_ERR, &pci_dev->dev,
+                       "Invalid Mac address detected: %pM\n",
+                       dev->dev_addr);
+               dev_printk(KERN_ERR, &pci_dev->dev,
+                       "Please complain to your hardware vendor. Switching to a random MAC.\n");
+               random_ether_addr(dev->dev_addr);
        }
 
-       dprintk(KERN_DEBUG "%s: MAC Address %02x:%02x:%02x:%02x:%02x:%02x\n", pci_name(pci_dev),
-                       dev->dev_addr[0], dev->dev_addr[1], dev->dev_addr[2],
-                       dev->dev_addr[3], dev->dev_addr[4], dev->dev_addr[5]);
+       dprintk(KERN_DEBUG "%s: MAC Address %pM\n",
+               pci_name(pci_dev), dev->dev_addr);
+
+       /* set mac address */
+       nv_copy_mac_to_hw(dev);
+
+       /* Workaround current PCI init glitch:  wakeup bits aren't
+        * being set from PCI PM capability.
+        */
+       device_init_wakeup(&pci_dev->dev, 1);
 
        /* disable WOL */
        writel(0, base + NvRegWakeUpFlags);
        np->wolenabled = 0;
 
        if (id->driver_data & DEV_HAS_POWER_CNTRL) {
-               u8 revision_id;
-               pci_read_config_byte(pci_dev, PCI_REVISION_ID, &revision_id);
 
                /* take phy and nic out of low power mode */
                powerstate = readl(base + NvRegPowerState2);
                powerstate &= ~NVREG_POWERSTATE2_POWERUP_MASK;
-               if ((id->device == PCI_DEVICE_ID_NVIDIA_NVENET_12 ||
-                    id->device == PCI_DEVICE_ID_NVIDIA_NVENET_13) &&
-                   revision_id >= 0xA3)
+               if ((id->driver_data & DEV_NEED_LOW_POWER_FIX) &&
+                   pci_dev->revision >= 0xA3)
                        powerstate |= NVREG_POWERSTATE2_POWERUP_REV_A3;
                writel(powerstate, base + NvRegPowerState2);
        }
@@ -4305,14 +5669,35 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
        } else {
                np->tx_flags = NV_TX2_VALID;
        }
-       if (optimization_mode == NV_OPTIMIZATION_MODE_THROUGHPUT) {
-               np->irqmask = NVREG_IRQMASK_THROUGHPUT;
-               if (np->msi_flags & NV_MSI_X_CAPABLE) /* set number of vectors */
-                       np->msi_flags |= 0x0003;
-       } else {
+
+       np->msi_flags = 0;
+       if ((id->driver_data & DEV_HAS_MSI) && msi) {
+               np->msi_flags |= NV_MSI_CAPABLE;
+       }
+       if ((id->driver_data & DEV_HAS_MSI_X) && msix) {
+               /* msix has had reported issues when modifying irqmask
+                  as in the case of napi, therefore, disable for now
+               */
+#if 0
+               np->msi_flags |= NV_MSI_X_CAPABLE;
+#endif
+       }
+
+       if (optimization_mode == NV_OPTIMIZATION_MODE_CPU) {
                np->irqmask = NVREG_IRQMASK_CPU;
                if (np->msi_flags & NV_MSI_X_CAPABLE) /* set number of vectors */
                        np->msi_flags |= 0x0001;
+       } else if (optimization_mode == NV_OPTIMIZATION_MODE_DYNAMIC &&
+                  !(id->driver_data & DEV_NEED_TIMERIRQ)) {
+               /* start off in throughput mode */
+               np->irqmask = NVREG_IRQMASK_THROUGHPUT;
+               /* remove support for msix mode */
+               np->msi_flags &= ~NV_MSI_X_CAPABLE;
+       } else {
+               optimization_mode = NV_OPTIMIZATION_MODE_THROUGHPUT;
+               np->irqmask = NVREG_IRQMASK_THROUGHPUT;
+               if (np->msi_flags & NV_MSI_X_CAPABLE) /* set number of vectors */
+                       np->msi_flags |= 0x0003;
        }
 
        if (id->driver_data & DEV_NEED_TIMERIRQ)
@@ -4326,6 +5711,50 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
                np->need_linktimer = 0;
        }
 
+       /* Limit the number of tx's outstanding for hw bug */
+       if (id->driver_data & DEV_NEED_TX_LIMIT) {
+               np->tx_limit = 1;
+               if (((id->driver_data & DEV_NEED_TX_LIMIT2) == DEV_NEED_TX_LIMIT2) &&
+                   pci_dev->revision >= 0xA2)
+                       np->tx_limit = 0;
+       }
+
+       /* clear phy state and temporarily halt phy interrupts */
+       writel(0, base + NvRegMIIMask);
+       phystate = readl(base + NvRegAdapterControl);
+       if (phystate & NVREG_ADAPTCTL_RUNNING) {
+               phystate_orig = 1;
+               phystate &= ~NVREG_ADAPTCTL_RUNNING;
+               writel(phystate, base + NvRegAdapterControl);
+       }
+       writel(NVREG_MIISTAT_MASK_ALL, base + NvRegMIIStatus);
+
+       if (id->driver_data & DEV_HAS_MGMT_UNIT) {
+               /* management unit running on the mac? */
+               if ((readl(base + NvRegTransmitterControl) & NVREG_XMITCTL_MGMT_ST) &&
+                   (readl(base + NvRegTransmitterControl) & NVREG_XMITCTL_SYNC_PHY_INIT) &&
+                   nv_mgmt_acquire_sema(dev) &&
+                   nv_mgmt_get_version(dev)) {
+                       np->mac_in_use = 1;
+                       if (np->mgmt_version > 0) {
+                               np->mac_in_use = readl(base + NvRegMgmtUnitControl) & NVREG_MGMTUNITCONTROL_INUSE;
+                       }
+                       dprintk(KERN_INFO "%s: mgmt unit is running. mac in use %x.\n",
+                               pci_name(pci_dev), np->mac_in_use);
+                       /* management unit setup the phy already? */
+                       if (np->mac_in_use &&
+                           ((readl(base + NvRegTransmitterControl) & NVREG_XMITCTL_SYNC_MASK) ==
+                            NVREG_XMITCTL_SYNC_PHY_INIT)) {
+                               /* phy is inited by mgmt unit */
+                               phyinitialized = 1;
+                               dprintk(KERN_INFO "%s: Phy already initialized by mgmt unit.\n",
+                                       pci_name(pci_dev));
+                       } else {
+                               /* we need to init the phy */
+                       }
+               }
+       }
+
        /* find a suitable phy */
        for (i = 1; i <= 32; i++) {
                int id1, id2;
@@ -4342,22 +5771,39 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
                if (id2 < 0 || id2 == 0xffff)
                        continue;
 
+               np->phy_model = id2 & PHYID2_MODEL_MASK;
                id1 = (id1 & PHYID1_OUI_MASK) << PHYID1_OUI_SHFT;
                id2 = (id2 & PHYID2_OUI_MASK) >> PHYID2_OUI_SHFT;
                dprintk(KERN_DEBUG "%s: open: Found PHY %04x:%04x at address %d.\n",
                        pci_name(pci_dev), id1, id2, phyaddr);
                np->phyaddr = phyaddr;
                np->phy_oui = id1 | id2;
+
+               /* Realtek hardcoded phy id1 to all zero's on certain phys */
+               if (np->phy_oui == PHY_OUI_REALTEK2)
+                       np->phy_oui = PHY_OUI_REALTEK;
+               /* Setup phy revision for Realtek */
+               if (np->phy_oui == PHY_OUI_REALTEK && np->phy_model == PHY_MODEL_REALTEK_8211)
+                       np->phy_rev = mii_rw(dev, phyaddr, MII_RESV1, MII_READ) & PHY_REV_MASK;
+
                break;
        }
        if (i == 33) {
-               printk(KERN_INFO "%s: open: Could not find a valid PHY.\n",
-                      pci_name(pci_dev));
+               dev_printk(KERN_INFO, &pci_dev->dev,
+                       "open: Could not find a valid PHY.\n");
                goto out_error;
        }
 
-       /* reset it */
-       phy_init(dev);
+       if (!phyinitialized) {
+               /* reset it */
+               phy_init(dev);
+       } else {
+               /* see if it is a gigabit phy */
+               u32 mii_status = mii_rw(dev, np->phyaddr, MII_BMSR, MII_READ);
+               if (mii_status & PHY_GIGABIT) {
+                       np->gigabit = PHY_GIGABIT;
+               }
+       }
 
        /* set default link speed settings */
        np->linkspeed = NVREG_LINKSPEED_FORCE|NVREG_LINKSPEED_10;
@@ -4366,16 +5812,43 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 
        err = register_netdev(dev);
        if (err) {
-               printk(KERN_INFO "forcedeth: unable to register netdev: %d\n", err);
+               dev_printk(KERN_INFO, &pci_dev->dev,
+                          "unable to register netdev: %d\n", err);
                goto out_error;
        }
-       printk(KERN_INFO "%s: forcedeth.c: subsystem: %05x:%04x bound to %s\n",
-                       dev->name, pci_dev->subsystem_vendor, pci_dev->subsystem_device,
-                       pci_name(pci_dev));
+
+       dev_printk(KERN_INFO, &pci_dev->dev, "ifname %s, PHY OUI 0x%x @ %d, "
+                  "addr %2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n",
+                  dev->name,
+                  np->phy_oui,
+                  np->phyaddr,
+                  dev->dev_addr[0],
+                  dev->dev_addr[1],
+                  dev->dev_addr[2],
+                  dev->dev_addr[3],
+                  dev->dev_addr[4],
+                  dev->dev_addr[5]);
+
+       dev_printk(KERN_INFO, &pci_dev->dev, "%s%s%s%s%s%s%s%s%s%sdesc-v%u\n",
+                  dev->features & NETIF_F_HIGHDMA ? "highdma " : "",
+                  dev->features & (NETIF_F_IP_CSUM | NETIF_F_SG) ?
+                       "csum " : "",
+                  dev->features & (NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX) ?
+                       "vlan " : "",
+                  id->driver_data & DEV_HAS_POWER_CNTRL ? "pwrctl " : "",
+                  id->driver_data & DEV_HAS_MGMT_UNIT ? "mgmt " : "",
+                  id->driver_data & DEV_NEED_TIMERIRQ ? "timirq " : "",
+                  np->gigabit == PHY_GIGABIT ? "gbit " : "",
+                  np->need_linktimer ? "lnktim " : "",
+                  np->msi_flags & NV_MSI_CAPABLE ? "msi " : "",
+                  np->msi_flags & NV_MSI_X_CAPABLE ? "msi-x " : "",
+                  np->desc_ver);
 
        return 0;
 
 out_error:
+       if (phystate_orig)
+               writel(phystate|NVREG_ADAPTCTL_RUNNING, base + NvRegAdapterControl);
        pci_set_drvdata(pci_dev, NULL);
 out_freering:
        free_rings(dev);
@@ -4391,12 +5864,56 @@ out:
        return err;
 }
 
+static void nv_restore_phy(struct net_device *dev)
+{
+       struct fe_priv *np = netdev_priv(dev);
+       u16 phy_reserved, mii_control;
+
+       if (np->phy_oui == PHY_OUI_REALTEK &&
+           np->phy_model == PHY_MODEL_REALTEK_8201 &&
+           phy_cross == NV_CROSSOVER_DETECTION_DISABLED) {
+               mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG1, PHY_REALTEK_INIT3);
+               phy_reserved = mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG2, MII_READ);
+               phy_reserved &= ~PHY_REALTEK_INIT_MSK1;
+               phy_reserved |= PHY_REALTEK_INIT8;
+               mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG2, phy_reserved);
+               mii_rw(dev, np->phyaddr, PHY_REALTEK_INIT_REG1, PHY_REALTEK_INIT1);
+
+               /* restart auto negotiation */
+               mii_control = mii_rw(dev, np->phyaddr, MII_BMCR, MII_READ);
+               mii_control |= (BMCR_ANRESTART | BMCR_ANENABLE);
+               mii_rw(dev, np->phyaddr, MII_BMCR, mii_control);
+       }
+}
+
+static void nv_restore_mac_addr(struct pci_dev *pci_dev)
+{
+       struct net_device *dev = pci_get_drvdata(pci_dev);
+       struct fe_priv *np = netdev_priv(dev);
+       u8 __iomem *base = get_hwbase(dev);
+
+       /* special op: write back the misordered MAC address - otherwise
+        * the next nv_probe would see a wrong address.
+        */
+       writel(np->orig_mac[0], base + NvRegMacAddrA);
+       writel(np->orig_mac[1], base + NvRegMacAddrB);
+       writel(readl(base + NvRegTransmitPoll) & ~NVREG_TRANSMITPOLL_MAC_ADDR_REV,
+              base + NvRegTransmitPoll);
+}
+
 static void __devexit nv_remove(struct pci_dev *pci_dev)
 {
        struct net_device *dev = pci_get_drvdata(pci_dev);
 
        unregister_netdev(dev);
 
+       nv_restore_mac_addr(pci_dev);
+
+       /* restore any phy related changes */
+       nv_restore_phy(dev);
+
+       nv_mgmt_release_sema(dev);
+
        /* free all structures */
        free_rings(dev);
        iounmap(get_hwbase(dev));
@@ -4406,114 +5923,272 @@ static void __devexit nv_remove(struct pci_dev *pci_dev)
        pci_set_drvdata(pci_dev, NULL);
 }
 
-static struct pci_device_id pci_tbl[] = {
+#ifdef CONFIG_PM
+static int nv_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+       struct net_device *dev = pci_get_drvdata(pdev);
+       struct fe_priv *np = netdev_priv(dev);
+       u8 __iomem *base = get_hwbase(dev);
+       int i;
+
+       if (netif_running(dev)) {
+               // Gross.
+               nv_close(dev);
+       }
+       netif_device_detach(dev);
+
+       /* save non-pci configuration space */
+       for (i = 0;i <= np->register_size/sizeof(u32); i++)
+               np->saved_config_space[i] = readl(base + i*sizeof(u32));
+
+       pci_save_state(pdev);
+       pci_enable_wake(pdev, pci_choose_state(pdev, state), np->wolenabled);
+       pci_disable_device(pdev);
+       pci_set_power_state(pdev, pci_choose_state(pdev, state));
+       return 0;
+}
+
+static int nv_resume(struct pci_dev *pdev)
+{
+       struct net_device *dev = pci_get_drvdata(pdev);
+       struct fe_priv *np = netdev_priv(dev);
+       u8 __iomem *base = get_hwbase(dev);
+       int i, rc = 0;
+
+       pci_set_power_state(pdev, PCI_D0);
+       pci_restore_state(pdev);
+       /* ack any pending wake events, disable PME */
+       pci_enable_wake(pdev, PCI_D0, 0);
+
+       /* restore non-pci configuration space */
+       for (i = 0;i <= np->register_size/sizeof(u32); i++)
+               writel(np->saved_config_space[i], base+i*sizeof(u32));
+
+       if (np->driver_data & DEV_NEED_MSI_FIX)
+               pci_write_config_dword(pdev, NV_MSI_PRIV_OFFSET, NV_MSI_PRIV_VALUE);
+
+       /* restore phy state, including autoneg */
+       phy_init(dev);
+
+       netif_device_attach(dev);
+       if (netif_running(dev)) {
+               rc = nv_open(dev);
+               nv_set_multicast(dev);
+       }
+       return rc;
+}
+
+static void nv_shutdown(struct pci_dev *pdev)
+{
+       struct net_device *dev = pci_get_drvdata(pdev);
+       struct fe_priv *np = netdev_priv(dev);
+
+       if (netif_running(dev))
+               nv_close(dev);
+
+       /*
+        * Restore the MAC so a kernel started by kexec won't get confused.
+        * If we really go for poweroff, we must not restore the MAC,
+        * otherwise the MAC for WOL will be reversed at least on some boards.
+        */
+       if (system_state != SYSTEM_POWER_OFF) {
+               nv_restore_mac_addr(pdev);
+       }
+
+       pci_disable_device(pdev);
+       /*
+        * Apparently it is not possible to reinitialise from D3 hot,
+        * only put the device into D3 if we really go for poweroff.
+        */
+       if (system_state == SYSTEM_POWER_OFF) {
+               if (pci_enable_wake(pdev, PCI_D3cold, np->wolenabled))
+                       pci_enable_wake(pdev, PCI_D3hot, np->wolenabled);
+               pci_set_power_state(pdev, PCI_D3hot);
+       }
+}
+#else
+#define nv_suspend NULL
+#define nv_shutdown NULL
+#define nv_resume NULL
+#endif /* CONFIG_PM */
+
+static DEFINE_PCI_DEVICE_TABLE(pci_tbl) = {
        {       /* nForce Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_1),
+               PCI_DEVICE(0x10DE, 0x01C3),
                .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
        },
        {       /* nForce2 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_2),
+               PCI_DEVICE(0x10DE, 0x0066),
                .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
        },
        {       /* nForce3 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_3),
+               PCI_DEVICE(0x10DE, 0x00D6),
                .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
        },
        {       /* nForce3 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_4),
+               PCI_DEVICE(0x10DE, 0x0086),
                .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM,
        },
        {       /* nForce3 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_5),
+               PCI_DEVICE(0x10DE, 0x008C),
                .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM,
        },
        {       /* nForce3 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_6),
+               PCI_DEVICE(0x10DE, 0x00E6),
                .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM,
        },
        {       /* nForce3 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_7),
+               PCI_DEVICE(0x10DE, 0x00DF),
                .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM,
        },
        {       /* CK804 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_8),
-               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA,
+               PCI_DEVICE(0x10DE, 0x0056),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_STATISTICS_V1|DEV_NEED_TX_LIMIT,
        },
        {       /* CK804 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_9),
-               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA,
+               PCI_DEVICE(0x10DE, 0x0057),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_STATISTICS_V1|DEV_NEED_TX_LIMIT,
        },
        {       /* MCP04 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_10),
-               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA,
+               PCI_DEVICE(0x10DE, 0x0037),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_STATISTICS_V1|DEV_NEED_TX_LIMIT,
        },
        {       /* MCP04 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_11),
-               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA,
+               PCI_DEVICE(0x10DE, 0x0038),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_STATISTICS_V1|DEV_NEED_TX_LIMIT,
        },
        {       /* MCP51 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_12),
-               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL,
+               PCI_DEVICE(0x10DE, 0x0268),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_STATISTICS_V1|DEV_NEED_LOW_POWER_FIX,
        },
        {       /* MCP51 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_13),
-               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL,
+               PCI_DEVICE(0x10DE, 0x0269),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_STATISTICS_V1|DEV_NEED_LOW_POWER_FIX,
        },
        {       /* MCP55 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_14),
-               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_VLAN|DEV_HAS_MSI|DEV_HAS_MSI_X|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS|DEV_HAS_TEST_EXTENDED,
+               PCI_DEVICE(0x10DE, 0x0372),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_VLAN|DEV_HAS_MSI|DEV_HAS_MSI_X|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_NEED_TX_LIMIT|DEV_NEED_MSI_FIX,
        },
        {       /* MCP55 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_15),
-               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_VLAN|DEV_HAS_MSI|DEV_HAS_MSI_X|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS|DEV_HAS_TEST_EXTENDED,
+               PCI_DEVICE(0x10DE, 0x0373),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_VLAN|DEV_HAS_MSI|DEV_HAS_MSI_X|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_NEED_TX_LIMIT|DEV_NEED_MSI_FIX,
        },
        {       /* MCP61 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_16),
-               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS|DEV_HAS_TEST_EXTENDED,
+               PCI_DEVICE(0x10DE, 0x03E5),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR|DEV_NEED_MSI_FIX,
        },
        {       /* MCP61 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_17),
-               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS|DEV_HAS_TEST_EXTENDED,
+               PCI_DEVICE(0x10DE, 0x03E6),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR|DEV_NEED_MSI_FIX,
        },
        {       /* MCP61 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_18),
-               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS|DEV_HAS_TEST_EXTENDED,
+               PCI_DEVICE(0x10DE, 0x03EE),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR|DEV_NEED_MSI_FIX,
        },
        {       /* MCP61 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_19),
-               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS|DEV_HAS_TEST_EXTENDED,
+               PCI_DEVICE(0x10DE, 0x03EF),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR|DEV_NEED_MSI_FIX,
        },
        {       /* MCP65 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_20),
-               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS|DEV_HAS_TEST_EXTENDED,
+               PCI_DEVICE(0x10DE, 0x0450),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR|DEV_NEED_TX_LIMIT|DEV_HAS_GEAR_MODE|DEV_NEED_MSI_FIX,
        },
        {       /* MCP65 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_21),
-               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS|DEV_HAS_TEST_EXTENDED,
+               PCI_DEVICE(0x10DE, 0x0451),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR|DEV_NEED_TX_LIMIT|DEV_HAS_GEAR_MODE|DEV_NEED_MSI_FIX,
        },
        {       /* MCP65 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_22),
-               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS|DEV_HAS_TEST_EXTENDED,
+               PCI_DEVICE(0x10DE, 0x0452),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR|DEV_NEED_TX_LIMIT|DEV_HAS_GEAR_MODE|DEV_NEED_MSI_FIX,
        },
        {       /* MCP65 Ethernet Controller */
-               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_23),
-               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS|DEV_HAS_TEST_EXTENDED,
+               PCI_DEVICE(0x10DE, 0x0453),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR|DEV_NEED_TX_LIMIT|DEV_HAS_GEAR_MODE|DEV_NEED_MSI_FIX,
+       },
+       {       /* MCP67 Ethernet Controller */
+               PCI_DEVICE(0x10DE, 0x054C),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR|DEV_HAS_GEAR_MODE|DEV_NEED_MSI_FIX,
+       },
+       {       /* MCP67 Ethernet Controller */
+               PCI_DEVICE(0x10DE, 0x054D),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR|DEV_HAS_GEAR_MODE|DEV_NEED_MSI_FIX,
+       },
+       {       /* MCP67 Ethernet Controller */
+               PCI_DEVICE(0x10DE, 0x054E),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR|DEV_HAS_GEAR_MODE|DEV_NEED_MSI_FIX,
+       },
+       {       /* MCP67 Ethernet Controller */
+               PCI_DEVICE(0x10DE, 0x054F),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR|DEV_HAS_GEAR_MODE|DEV_NEED_MSI_FIX,
+       },
+       {       /* MCP73 Ethernet Controller */
+               PCI_DEVICE(0x10DE, 0x07DC),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR|DEV_HAS_COLLISION_FIX|DEV_HAS_GEAR_MODE|DEV_NEED_MSI_FIX,
+       },
+       {       /* MCP73 Ethernet Controller */
+               PCI_DEVICE(0x10DE, 0x07DD),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR|DEV_HAS_COLLISION_FIX|DEV_HAS_GEAR_MODE|DEV_NEED_MSI_FIX,
+       },
+       {       /* MCP73 Ethernet Controller */
+               PCI_DEVICE(0x10DE, 0x07DE),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR|DEV_HAS_COLLISION_FIX|DEV_HAS_GEAR_MODE|DEV_NEED_MSI_FIX,
+       },
+       {       /* MCP73 Ethernet Controller */
+               PCI_DEVICE(0x10DE, 0x07DF),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR|DEV_HAS_COLLISION_FIX|DEV_HAS_GEAR_MODE|DEV_NEED_MSI_FIX,
+       },
+       {       /* MCP77 Ethernet Controller */
+               PCI_DEVICE(0x10DE, 0x0760),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX_V2|DEV_HAS_STATISTICS_V3|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR|DEV_HAS_COLLISION_FIX|DEV_NEED_TX_LIMIT2|DEV_HAS_GEAR_MODE|DEV_NEED_PHY_INIT_FIX|DEV_NEED_MSI_FIX,
+       },
+       {       /* MCP77 Ethernet Controller */
+               PCI_DEVICE(0x10DE, 0x0761),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX_V2|DEV_HAS_STATISTICS_V3|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR|DEV_HAS_COLLISION_FIX|DEV_NEED_TX_LIMIT2|DEV_HAS_GEAR_MODE|DEV_NEED_PHY_INIT_FIX|DEV_NEED_MSI_FIX,
+       },
+       {       /* MCP77 Ethernet Controller */
+               PCI_DEVICE(0x10DE, 0x0762),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX_V2|DEV_HAS_STATISTICS_V3|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR|DEV_HAS_COLLISION_FIX|DEV_NEED_TX_LIMIT2|DEV_HAS_GEAR_MODE|DEV_NEED_PHY_INIT_FIX|DEV_NEED_MSI_FIX,
+       },
+       {       /* MCP77 Ethernet Controller */
+               PCI_DEVICE(0x10DE, 0x0763),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX_V2|DEV_HAS_STATISTICS_V3|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR|DEV_HAS_COLLISION_FIX|DEV_NEED_TX_LIMIT2|DEV_HAS_GEAR_MODE|DEV_NEED_PHY_INIT_FIX|DEV_NEED_MSI_FIX,
+       },
+       {       /* MCP79 Ethernet Controller */
+               PCI_DEVICE(0x10DE, 0x0AB0),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX_V3|DEV_HAS_STATISTICS_V3|DEV_HAS_TEST_EXTENDED|DEV_HAS_CORRECT_MACADDR|DEV_HAS_COLLISION_FIX|DEV_NEED_TX_LIMIT2|DEV_HAS_GEAR_MODE|DEV_NEED_PHY_INIT_FIX|DEV_NEED_MSI_FIX,
+       },
+       {       /* MCP79 Ethernet Controller */
+               PCI_DEVICE(0x10DE, 0x0AB1),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX_V3|DEV_HAS_STATISTICS_V3|DEV_HAS_TEST_EXTENDED|DEV_HAS_CORRECT_MACADDR|DEV_HAS_COLLISION_FIX|DEV_NEED_TX_LIMIT2|DEV_HAS_GEAR_MODE|DEV_NEED_PHY_INIT_FIX|DEV_NEED_MSI_FIX,
+       },
+       {       /* MCP79 Ethernet Controller */
+               PCI_DEVICE(0x10DE, 0x0AB2),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX_V3|DEV_HAS_STATISTICS_V3|DEV_HAS_TEST_EXTENDED|DEV_HAS_CORRECT_MACADDR|DEV_HAS_COLLISION_FIX|DEV_NEED_TX_LIMIT2|DEV_HAS_GEAR_MODE|DEV_NEED_PHY_INIT_FIX|DEV_NEED_MSI_FIX,
+       },
+       {       /* MCP79 Ethernet Controller */
+               PCI_DEVICE(0x10DE, 0x0AB3),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX_V3|DEV_HAS_STATISTICS_V3|DEV_HAS_TEST_EXTENDED|DEV_HAS_CORRECT_MACADDR|DEV_HAS_COLLISION_FIX|DEV_NEED_TX_LIMIT2|DEV_HAS_GEAR_MODE|DEV_NEED_PHY_INIT_FIX|DEV_NEED_MSI_FIX,
+       },
+       {       /* MCP89 Ethernet Controller */
+               PCI_DEVICE(0x10DE, 0x0D7D),
+               .driver_data = DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX_V3|DEV_HAS_STATISTICS_V3|DEV_HAS_TEST_EXTENDED|DEV_HAS_CORRECT_MACADDR|DEV_HAS_COLLISION_FIX|DEV_HAS_GEAR_MODE|DEV_NEED_PHY_INIT_FIX,
        },
        {0,},
 };
 
 static struct pci_driver driver = {
-       .name = "forcedeth",
-       .id_table = pci_tbl,
-       .probe = nv_probe,
-       .remove = __devexit_p(nv_remove),
+       .name           = DRV_NAME,
+       .id_table       = pci_tbl,
+       .probe          = nv_probe,
+       .remove         = __devexit_p(nv_remove),
+       .suspend        = nv_suspend,
+       .resume         = nv_resume,
+       .shutdown       = nv_shutdown,
 };
 
-
 static int __init init_nic(void)
 {
-       printk(KERN_INFO "forcedeth.c: Reverse Engineered nForce ethernet driver. Version %s.\n", FORCEDETH_VERSION);
-       return pci_module_init(&driver);
+       return pci_register_driver(&driver);
 }
 
 static void __exit exit_nic(void)
@@ -4524,7 +6199,7 @@ static void __exit exit_nic(void)
 module_param(max_interrupt_work, int, 0);
 MODULE_PARM_DESC(max_interrupt_work, "forcedeth maximum events handled per interrupt");
 module_param(optimization_mode, int, 0);
-MODULE_PARM_DESC(optimization_mode, "In throughput mode (0), every tx & rx packet will generate an interrupt. In CPU mode (1), interrupts are controlled by a timer.");
+MODULE_PARM_DESC(optimization_mode, "In throughput mode (0), every tx & rx packet will generate an interrupt. In CPU mode (1), interrupts are controlled by a timer. In dynamic mode (2), the mode toggles between throughput and CPU mode based on network load.");
 module_param(poll_interval, int, 0);
 MODULE_PARM_DESC(poll_interval, "Interval determines how frequent timer interrupt is generated by [(time_in_micro_secs * 100) / (2^10)]. Min is 0 and Max is 65535.");
 module_param(msi, int, 0);
@@ -4533,6 +6208,10 @@ module_param(msix, int, 0);
 MODULE_PARM_DESC(msix, "MSIX interrupts are enabled by setting to 1 and disabled by setting to 0.");
 module_param(dma_64bit, int, 0);
 MODULE_PARM_DESC(dma_64bit, "High DMA is enabled by setting to 1 and disabled by setting to 0.");
+module_param(phy_cross, int, 0);
+MODULE_PARM_DESC(phy_cross, "Phy crossover detection for Realtek 8201 phy is enabled by setting to 1 and disabled by setting to 0.");
+module_param(phy_power_down, int, 0);
+MODULE_PARM_DESC(phy_power_down, "Power down phy and disable link when interface is down (1), or leave phy powered up (0).");
 
 MODULE_AUTHOR("Manfred Spraul <manfred@colorfullife.com>");
 MODULE_DESCRIPTION("Reverse Engineered nForce ethernet driver");