virtio: finer-grained features for virtio_net
[safe/jmp/linux-2.6] / drivers / net / s2io.c
index e2c206c..523478e 100644 (file)
@@ -50,6 +50,8 @@
  *                 Possible values '1' for enable , '0' for disable.
  *                 Default is '2' - which means disable in promisc mode
  *                 and enable in non-promiscuous mode.
+ * multiq: This parameter used to enable/disable MULTIQUEUE support.
+ *      Possible values '1' for enable and '0' for disable. Default is '0'
  ************************************************************************/
 
 #include <linux/module.h>
@@ -84,7 +86,7 @@
 #include "s2io.h"
 #include "s2io-regs.h"
 
-#define DRV_VERSION "2.0.26.15-1"
+#define DRV_VERSION "2.0.26.23"
 
 /* S2io Driver name & version. */
 static char s2io_driver_name[] = "Neterion";
@@ -115,20 +117,6 @@ static inline int RXD_IS_UP2DT(struct RxD_t *rxdp)
 
 #define LINK_IS_UP(val64) (!(val64 & (ADAPTER_STATUS_RMAC_REMOTE_FAULT | \
                                      ADAPTER_STATUS_RMAC_LOCAL_FAULT)))
-#define TASKLET_IN_USE test_and_set_bit(0, (&sp->tasklet_status))
-#define PANIC  1
-#define LOW    2
-static inline int rx_buffer_level(struct s2io_nic * sp, int rxb_size, int ring)
-{
-       struct mac_info *mac_control;
-
-       mac_control = &sp->mac_control;
-       if (rxb_size <= rxd_count[sp->rxd_mode])
-               return PANIC;
-       else if ((mac_control->rings[ring].pkt_cnt - rxb_size) > 16)
-               return  LOW;
-       return 0;
-}
 
 static inline int is_s2io_card_up(const struct s2io_nic * sp)
 {
@@ -386,6 +374,26 @@ static void s2io_vlan_rx_register(struct net_device *dev,
 /* A flag indicating whether 'RX_PA_CFG_STRIP_VLAN_TAG' bit is set or not */
 static int vlan_strip_flag;
 
+/* Unregister the vlan */
+static void s2io_vlan_rx_kill_vid(struct net_device *dev, unsigned long vid)
+{
+       int i;
+       struct s2io_nic *nic = dev->priv;
+       unsigned long flags[MAX_TX_FIFOS];
+       struct mac_info *mac_control = &nic->mac_control;
+       struct config_param *config = &nic->config;
+
+       for (i = 0; i < config->tx_fifo_num; i++)
+               spin_lock_irqsave(&mac_control->fifos[i].tx_lock, flags[i]);
+
+       if (nic->vlgrp)
+               vlan_group_set_device(nic->vlgrp, vid, NULL);
+
+       for (i = config->tx_fifo_num - 1; i >= 0; i--)
+               spin_unlock_irqrestore(&mac_control->fifos[i].tx_lock,
+                       flags[i]);
+}
+
 /*
  * Constants to be programmed into the Xena's registers, to configure
  * the XAUI.
@@ -456,10 +464,9 @@ MODULE_VERSION(DRV_VERSION);
 
 
 /* Module Loadable parameters. */
-S2IO_PARM_INT(tx_fifo_num, 1);
+S2IO_PARM_INT(tx_fifo_num, FIFO_DEFAULT_NUM);
 S2IO_PARM_INT(rx_ring_num, 1);
-
-
+S2IO_PARM_INT(multiq, 0);
 S2IO_PARM_INT(rx_ring_mode, 1);
 S2IO_PARM_INT(use_continuous_tx_intrs, 1);
 S2IO_PARM_INT(rmac_pause_time, 0x100);
@@ -469,6 +476,8 @@ S2IO_PARM_INT(shared_splits, 0);
 S2IO_PARM_INT(tmac_util_period, 5);
 S2IO_PARM_INT(rmac_util_period, 5);
 S2IO_PARM_INT(l3l4hdr_size, 128);
+/* 0 is no steering, 1 is Priority steering, 2 is Default steering */
+S2IO_PARM_INT(tx_steering_type, TX_DEFAULT_STEERING);
 /* Frequency of Rx desc syncs expressed as power of 2 */
 S2IO_PARM_INT(rxsync_frequency, 3);
 /* Interrupt type. Values can be 0(INTA), 2(MSI_X) */
@@ -533,6 +542,101 @@ static struct pci_driver s2io_driver = {
 /* A simplifier macro used both by init and free shared_mem Fns(). */
 #define TXD_MEM_PAGE_CNT(len, per_each) ((len+per_each - 1) / per_each)
 
+/* netqueue manipulation helper functions */
+static inline void s2io_stop_all_tx_queue(struct s2io_nic *sp)
+{
+       int i;
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+       if (sp->config.multiq) {
+               for (i = 0; i < sp->config.tx_fifo_num; i++)
+                       netif_stop_subqueue(sp->dev, i);
+       } else
+#endif
+       {
+               for (i = 0; i < sp->config.tx_fifo_num; i++)
+                       sp->mac_control.fifos[i].queue_state = FIFO_QUEUE_STOP;
+               netif_stop_queue(sp->dev);
+       }
+}
+
+static inline void s2io_stop_tx_queue(struct s2io_nic *sp, int fifo_no)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+       if (sp->config.multiq)
+               netif_stop_subqueue(sp->dev, fifo_no);
+       else
+#endif
+       {
+               sp->mac_control.fifos[fifo_no].queue_state =
+                       FIFO_QUEUE_STOP;
+               netif_stop_queue(sp->dev);
+       }
+}
+
+static inline void s2io_start_all_tx_queue(struct s2io_nic *sp)
+{
+       int i;
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+       if (sp->config.multiq) {
+               for (i = 0; i < sp->config.tx_fifo_num; i++)
+                       netif_start_subqueue(sp->dev, i);
+       } else
+#endif
+       {
+               for (i = 0; i < sp->config.tx_fifo_num; i++)
+                       sp->mac_control.fifos[i].queue_state = FIFO_QUEUE_START;
+               netif_start_queue(sp->dev);
+       }
+}
+
+static inline void s2io_start_tx_queue(struct s2io_nic *sp, int fifo_no)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+       if (sp->config.multiq)
+               netif_start_subqueue(sp->dev, fifo_no);
+       else
+#endif
+       {
+               sp->mac_control.fifos[fifo_no].queue_state =
+                       FIFO_QUEUE_START;
+               netif_start_queue(sp->dev);
+       }
+}
+
+static inline void s2io_wake_all_tx_queue(struct s2io_nic *sp)
+{
+       int i;
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+       if (sp->config.multiq) {
+               for (i = 0; i < sp->config.tx_fifo_num; i++)
+                       netif_wake_subqueue(sp->dev, i);
+       } else
+#endif
+       {
+               for (i = 0; i < sp->config.tx_fifo_num; i++)
+                       sp->mac_control.fifos[i].queue_state = FIFO_QUEUE_START;
+               netif_wake_queue(sp->dev);
+       }
+}
+
+static inline void s2io_wake_tx_queue(
+       struct fifo_info *fifo, int cnt, u8 multiq)
+{
+
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+       if (multiq) {
+               if (cnt && __netif_subqueue_stopped(fifo->dev, fifo->fifo_no))
+                       netif_wake_subqueue(fifo->dev, fifo->fifo_no);
+       } else
+#endif
+       if (cnt && (fifo->queue_state == FIFO_QUEUE_STOP)) {
+               if (netif_queue_stopped(fifo->dev)) {
+                       fifo->queue_state = FIFO_QUEUE_START;
+                       netif_wake_queue(fifo->dev);
+               }
+       }
+}
+
 /**
  * init_shared_mem - Allocation and Initialization of Memory
  * @nic: Device private variable.
@@ -614,6 +718,7 @@ static int init_shared_mem(struct s2io_nic *nic)
                mac_control->fifos[i].fifo_no = i;
                mac_control->fifos[i].nic = nic;
                mac_control->fifos[i].max_txds = MAX_SKB_FRAGS + 2;
+               mac_control->fifos[i].dev = dev;
 
                for (j = 0; j < page_num; j++) {
                        int k = 0;
@@ -704,6 +809,7 @@ static int init_shared_mem(struct s2io_nic *nic)
                    config->rx_cfg[i].num_rxd - 1;
                mac_control->rings[i].nic = nic;
                mac_control->rings[i].ring_no = i;
+               mac_control->rings[i].lro = lro_enable;
 
                blk_cnt = config->rx_cfg[i].num_rxd /
                                (rxd_count[nic->rxd_mode] + 1);
@@ -1079,8 +1185,67 @@ static int s2io_print_pci_mode(struct s2io_nic *nic)
 }
 
 /**
+ *  init_tti - Initialization transmit traffic interrupt scheme
+ *  @nic: device private variable
+ *  @link: link status (UP/DOWN) used to enable/disable continuous
+ *  transmit interrupts
+ *  Description: The function configures transmit traffic interrupts
+ *  Return Value:  SUCCESS on success and
+ *  '-1' on failure
+ */
+
+static int init_tti(struct s2io_nic *nic, int link)
+{
+       struct XENA_dev_config __iomem *bar0 = nic->bar0;
+       register u64 val64 = 0;
+       int i;
+       struct config_param *config;
+
+       config = &nic->config;
+
+       for (i = 0; i < config->tx_fifo_num; i++) {
+               /*
+                * TTI Initialization. Default Tx timer gets us about
+                * 250 interrupts per sec. Continuous interrupts are enabled
+                * by default.
+                */
+               if (nic->device_type == XFRAME_II_DEVICE) {
+                       int count = (nic->config.bus_speed * 125)/2;
+                       val64 = TTI_DATA1_MEM_TX_TIMER_VAL(count);
+               } else
+                       val64 = TTI_DATA1_MEM_TX_TIMER_VAL(0x2078);
+
+               val64 |= TTI_DATA1_MEM_TX_URNG_A(0xA) |
+                               TTI_DATA1_MEM_TX_URNG_B(0x10) |
+                               TTI_DATA1_MEM_TX_URNG_C(0x30) |
+                               TTI_DATA1_MEM_TX_TIMER_AC_EN;
+
+               if (use_continuous_tx_intrs && (link == LINK_UP))
+                       val64 |= TTI_DATA1_MEM_TX_TIMER_CI_EN;
+               writeq(val64, &bar0->tti_data1_mem);
+
+               val64 = TTI_DATA2_MEM_TX_UFC_A(0x10) |
+                               TTI_DATA2_MEM_TX_UFC_B(0x20) |
+                               TTI_DATA2_MEM_TX_UFC_C(0x40) |
+                               TTI_DATA2_MEM_TX_UFC_D(0x80);
+
+               writeq(val64, &bar0->tti_data2_mem);
+
+               val64 = TTI_CMD_MEM_WE | TTI_CMD_MEM_STROBE_NEW_CMD |
+                               TTI_CMD_MEM_OFFSET(i);
+               writeq(val64, &bar0->tti_command_mem);
+
+               if (wait_for_cmd_complete(&bar0->tti_command_mem,
+                       TTI_CMD_MEM_STROBE_NEW_CMD, S2IO_BIT_RESET) != SUCCESS)
+                       return FAILURE;
+       }
+
+       return SUCCESS;
+}
+
+/**
  *  init_nic - Initialization of hardware
- *  @nic: device peivate variable
+ *  @nic: device private variable
  *  Description: The function sequentially configures every block
  *  of the H/W from their reset values.
  *  Return Value:  SUCCESS on success and
@@ -1185,9 +1350,9 @@ static int init_nic(struct s2io_nic *nic)
 
        for (i = 0, j = 0; i < config->tx_fifo_num; i++) {
                val64 |=
-                   vBIT(config->tx_cfg[i].fifo_len - 1, ((i * 32) + 19),
+                   vBIT(config->tx_cfg[i].fifo_len - 1, ((j * 32) + 19),
                         13) | vBIT(config->tx_cfg[i].fifo_priority,
-                                   ((i * 32) + 5), 3);
+                                   ((j * 32) + 5), 3);
 
                if (i == (config->tx_fifo_num - 1)) {
                        if (i % 2 == 0)
@@ -1198,17 +1363,25 @@ static int init_nic(struct s2io_nic *nic)
                case 1:
                        writeq(val64, &bar0->tx_fifo_partition_0);
                        val64 = 0;
+                       j = 0;
                        break;
                case 3:
                        writeq(val64, &bar0->tx_fifo_partition_1);
                        val64 = 0;
+                       j = 0;
                        break;
                case 5:
                        writeq(val64, &bar0->tx_fifo_partition_2);
                        val64 = 0;
+                       j = 0;
                        break;
                case 7:
                        writeq(val64, &bar0->tx_fifo_partition_3);
+                       val64 = 0;
+                       j = 0;
+                       break;
+               default:
+                       j++;
                        break;
                }
        }
@@ -1294,11 +1467,11 @@ static int init_nic(struct s2io_nic *nic)
 
        /*
         * Filling Tx round robin registers
-        * as per the number of FIFOs
+        * as per the number of FIFOs for equal scheduling priority
         */
        switch (config->tx_fifo_num) {
        case 1:
-               val64 = 0x0000000000000000ULL;
+               val64 = 0x0;
                writeq(val64, &bar0->tx_w_round_robin_0);
                writeq(val64, &bar0->tx_w_round_robin_1);
                writeq(val64, &bar0->tx_w_round_robin_2);
@@ -1306,87 +1479,78 @@ static int init_nic(struct s2io_nic *nic)
                writeq(val64, &bar0->tx_w_round_robin_4);
                break;
        case 2:
-               val64 = 0x0000010000010000ULL;
+               val64 = 0x0001000100010001ULL;
                writeq(val64, &bar0->tx_w_round_robin_0);
-               val64 = 0x0100000100000100ULL;
                writeq(val64, &bar0->tx_w_round_robin_1);
-               val64 = 0x0001000001000001ULL;
                writeq(val64, &bar0->tx_w_round_robin_2);
-               val64 = 0x0000010000010000ULL;
                writeq(val64, &bar0->tx_w_round_robin_3);
-               val64 = 0x0100000000000000ULL;
+               val64 = 0x0001000100000000ULL;
                writeq(val64, &bar0->tx_w_round_robin_4);
                break;
        case 3:
-               val64 = 0x0001000102000001ULL;
+               val64 = 0x0001020001020001ULL;
                writeq(val64, &bar0->tx_w_round_robin_0);
-               val64 = 0x0001020000010001ULL;
+               val64 = 0x0200010200010200ULL;
                writeq(val64, &bar0->tx_w_round_robin_1);
-               val64 = 0x0200000100010200ULL;
+               val64 = 0x0102000102000102ULL;
                writeq(val64, &bar0->tx_w_round_robin_2);
-               val64 = 0x0001000102000001ULL;
+               val64 = 0x0001020001020001ULL;
                writeq(val64, &bar0->tx_w_round_robin_3);
-               val64 = 0x0001020000000000ULL;
+               val64 = 0x0200010200000000ULL;
                writeq(val64, &bar0->tx_w_round_robin_4);
                break;
        case 4:
-               val64 = 0x0001020300010200ULL;
+               val64 = 0x0001020300010203ULL;
                writeq(val64, &bar0->tx_w_round_robin_0);
-               val64 = 0x0100000102030001ULL;
                writeq(val64, &bar0->tx_w_round_robin_1);
-               val64 = 0x0200010000010203ULL;
                writeq(val64, &bar0->tx_w_round_robin_2);
-               val64 = 0x0001020001000001ULL;
                writeq(val64, &bar0->tx_w_round_robin_3);
-               val64 = 0x0203000100000000ULL;
+               val64 = 0x0001020300000000ULL;
                writeq(val64, &bar0->tx_w_round_robin_4);
                break;
        case 5:
-               val64 = 0x0001000203000102ULL;
+               val64 = 0x0001020304000102ULL;
                writeq(val64, &bar0->tx_w_round_robin_0);
-               val64 = 0x0001020001030004ULL;
+               val64 = 0x0304000102030400ULL;
                writeq(val64, &bar0->tx_w_round_robin_1);
-               val64 = 0x0001000203000102ULL;
+               val64 = 0x0102030400010203ULL;
                writeq(val64, &bar0->tx_w_round_robin_2);
-               val64 = 0x0001020001030004ULL;
+               val64 = 0x0400010203040001ULL;
                writeq(val64, &bar0->tx_w_round_robin_3);
-               val64 = 0x0001000000000000ULL;
+               val64 = 0x0203040000000000ULL;
                writeq(val64, &bar0->tx_w_round_robin_4);
                break;
        case 6:
-               val64 = 0x0001020304000102ULL;
+               val64 = 0x0001020304050001ULL;
                writeq(val64, &bar0->tx_w_round_robin_0);
-               val64 = 0x0304050001020001ULL;
+               val64 = 0x0203040500010203ULL;
                writeq(val64, &bar0->tx_w_round_robin_1);
-               val64 = 0x0203000100000102ULL;
+               val64 = 0x0405000102030405ULL;
                writeq(val64, &bar0->tx_w_round_robin_2);
-               val64 = 0x0304000102030405ULL;
+               val64 = 0x0001020304050001ULL;
                writeq(val64, &bar0->tx_w_round_robin_3);
-               val64 = 0x0001000200000000ULL;
+               val64 = 0x0203040500000000ULL;
                writeq(val64, &bar0->tx_w_round_robin_4);
                break;
        case 7:
-               val64 = 0x0001020001020300ULL;
+               val64 = 0x0001020304050600ULL;
                writeq(val64, &bar0->tx_w_round_robin_0);
-               val64 = 0x0102030400010203ULL;
+               val64 = 0x0102030405060001ULL;
                writeq(val64, &bar0->tx_w_round_robin_1);
-               val64 = 0x0405060001020001ULL;
+               val64 = 0x0203040506000102ULL;
                writeq(val64, &bar0->tx_w_round_robin_2);
-               val64 = 0x0304050000010200ULL;
+               val64 = 0x0304050600010203ULL;
                writeq(val64, &bar0->tx_w_round_robin_3);
-               val64 = 0x0102030000000000ULL;
+               val64 = 0x0405060000000000ULL;
                writeq(val64, &bar0->tx_w_round_robin_4);
                break;
        case 8:
-               val64 = 0x0001020300040105ULL;
+               val64 = 0x0001020304050607ULL;
                writeq(val64, &bar0->tx_w_round_robin_0);
-               val64 = 0x0200030106000204ULL;
                writeq(val64, &bar0->tx_w_round_robin_1);
-               val64 = 0x0103000502010007ULL;
                writeq(val64, &bar0->tx_w_round_robin_2);
-               val64 = 0x0304010002060500ULL;
                writeq(val64, &bar0->tx_w_round_robin_3);
-               val64 = 0x0103020400000000ULL;
+               val64 = 0x0001020300000000ULL;
                writeq(val64, &bar0->tx_w_round_robin_4);
                break;
        }
@@ -1397,113 +1561,112 @@ static int init_nic(struct s2io_nic *nic)
        writeq(val64, &bar0->tx_fifo_partition_0);
 
        /* Filling the Rx round robin registers as per the
-        * number of Rings and steering based on QoS.
-         */
+        * number of Rings and steering based on QoS with
+        * equal priority.
+        */
        switch (config->rx_ring_num) {
        case 1:
+               val64 = 0x0;
+               writeq(val64, &bar0->rx_w_round_robin_0);
+               writeq(val64, &bar0->rx_w_round_robin_1);
+               writeq(val64, &bar0->rx_w_round_robin_2);
+               writeq(val64, &bar0->rx_w_round_robin_3);
+               writeq(val64, &bar0->rx_w_round_robin_4);
+
                val64 = 0x8080808080808080ULL;
                writeq(val64, &bar0->rts_qos_steering);
                break;
        case 2:
-               val64 = 0x0000010000010000ULL;
+               val64 = 0x0001000100010001ULL;
                writeq(val64, &bar0->rx_w_round_robin_0);
-               val64 = 0x0100000100000100ULL;
                writeq(val64, &bar0->rx_w_round_robin_1);
-               val64 = 0x0001000001000001ULL;
                writeq(val64, &bar0->rx_w_round_robin_2);
-               val64 = 0x0000010000010000ULL;
                writeq(val64, &bar0->rx_w_round_robin_3);
-               val64 = 0x0100000000000000ULL;
+               val64 = 0x0001000100000000ULL;
                writeq(val64, &bar0->rx_w_round_robin_4);
 
                val64 = 0x8080808040404040ULL;
                writeq(val64, &bar0->rts_qos_steering);
                break;
        case 3:
-               val64 = 0x0001000102000001ULL;
+               val64 = 0x0001020001020001ULL;
                writeq(val64, &bar0->rx_w_round_robin_0);
-               val64 = 0x0001020000010001ULL;
+               val64 = 0x0200010200010200ULL;
                writeq(val64, &bar0->rx_w_round_robin_1);
-               val64 = 0x0200000100010200ULL;
+               val64 = 0x0102000102000102ULL;
                writeq(val64, &bar0->rx_w_round_robin_2);
-               val64 = 0x0001000102000001ULL;
+               val64 = 0x0001020001020001ULL;
                writeq(val64, &bar0->rx_w_round_robin_3);
-               val64 = 0x0001020000000000ULL;
+               val64 = 0x0200010200000000ULL;
                writeq(val64, &bar0->rx_w_round_robin_4);
 
                val64 = 0x8080804040402020ULL;
                writeq(val64, &bar0->rts_qos_steering);
                break;
        case 4:
-               val64 = 0x0001020300010200ULL;
+               val64 = 0x0001020300010203ULL;
                writeq(val64, &bar0->rx_w_round_robin_0);
-               val64 = 0x0100000102030001ULL;
                writeq(val64, &bar0->rx_w_round_robin_1);
-               val64 = 0x0200010000010203ULL;
                writeq(val64, &bar0->rx_w_round_robin_2);
-               val64 = 0x0001020001000001ULL;
                writeq(val64, &bar0->rx_w_round_robin_3);
-               val64 = 0x0203000100000000ULL;
+               val64 = 0x0001020300000000ULL;
                writeq(val64, &bar0->rx_w_round_robin_4);
 
                val64 = 0x8080404020201010ULL;
                writeq(val64, &bar0->rts_qos_steering);
                break;
        case 5:
-               val64 = 0x0001000203000102ULL;
+               val64 = 0x0001020304000102ULL;
                writeq(val64, &bar0->rx_w_round_robin_0);
-               val64 = 0x0001020001030004ULL;
+               val64 = 0x0304000102030400ULL;
                writeq(val64, &bar0->rx_w_round_robin_1);
-               val64 = 0x0001000203000102ULL;
+               val64 = 0x0102030400010203ULL;
                writeq(val64, &bar0->rx_w_round_robin_2);
-               val64 = 0x0001020001030004ULL;
+               val64 = 0x0400010203040001ULL;
                writeq(val64, &bar0->rx_w_round_robin_3);
-               val64 = 0x0001000000000000ULL;
+               val64 = 0x0203040000000000ULL;
                writeq(val64, &bar0->rx_w_round_robin_4);
 
                val64 = 0x8080404020201008ULL;
                writeq(val64, &bar0->rts_qos_steering);
                break;
        case 6:
-               val64 = 0x0001020304000102ULL;
+               val64 = 0x0001020304050001ULL;
                writeq(val64, &bar0->rx_w_round_robin_0);
-               val64 = 0x0304050001020001ULL;
+               val64 = 0x0203040500010203ULL;
                writeq(val64, &bar0->rx_w_round_robin_1);
-               val64 = 0x0203000100000102ULL;
+               val64 = 0x0405000102030405ULL;
                writeq(val64, &bar0->rx_w_round_robin_2);
-               val64 = 0x0304000102030405ULL;
+               val64 = 0x0001020304050001ULL;
                writeq(val64, &bar0->rx_w_round_robin_3);
-               val64 = 0x0001000200000000ULL;
+               val64 = 0x0203040500000000ULL;
                writeq(val64, &bar0->rx_w_round_robin_4);
 
                val64 = 0x8080404020100804ULL;
                writeq(val64, &bar0->rts_qos_steering);
                break;
        case 7:
-               val64 = 0x0001020001020300ULL;
+               val64 = 0x0001020304050600ULL;
                writeq(val64, &bar0->rx_w_round_robin_0);
-               val64 = 0x0102030400010203ULL;
+               val64 = 0x0102030405060001ULL;
                writeq(val64, &bar0->rx_w_round_robin_1);
-               val64 = 0x0405060001020001ULL;
+               val64 = 0x0203040506000102ULL;
                writeq(val64, &bar0->rx_w_round_robin_2);
-               val64 = 0x0304050000010200ULL;
+               val64 = 0x0304050600010203ULL;
                writeq(val64, &bar0->rx_w_round_robin_3);
-               val64 = 0x0102030000000000ULL;
+               val64 = 0x0405060000000000ULL;
                writeq(val64, &bar0->rx_w_round_robin_4);
 
                val64 = 0x8080402010080402ULL;
                writeq(val64, &bar0->rts_qos_steering);
                break;
        case 8:
-               val64 = 0x0001020300040105ULL;
+               val64 = 0x0001020304050607ULL;
                writeq(val64, &bar0->rx_w_round_robin_0);
-               val64 = 0x0200030106000204ULL;
                writeq(val64, &bar0->rx_w_round_robin_1);
-               val64 = 0x0103000502010007ULL;
                writeq(val64, &bar0->rx_w_round_robin_2);
-               val64 = 0x0304010002060500ULL;
                writeq(val64, &bar0->rx_w_round_robin_3);
-               val64 = 0x0103020400000000ULL;
+               val64 = 0x0001020300000000ULL;
                writeq(val64, &bar0->rx_w_round_robin_4);
 
                val64 = 0x8040201008040201ULL;
@@ -1563,58 +1726,14 @@ static int init_nic(struct s2io_nic *nic)
            MAC_RX_LINK_UTIL_VAL(rmac_util_period);
        writeq(val64, &bar0->mac_link_util);
 
-
        /*
         * Initializing the Transmit and Receive Traffic Interrupt
         * Scheme.
         */
-       /*
-        * TTI Initialization. Default Tx timer gets us about
-        * 250 interrupts per sec. Continuous interrupts are enabled
-        * by default.
-        */
-       if (nic->device_type == XFRAME_II_DEVICE) {
-               int count = (nic->config.bus_speed * 125)/2;
-               val64 = TTI_DATA1_MEM_TX_TIMER_VAL(count);
-       } else {
-
-               val64 = TTI_DATA1_MEM_TX_TIMER_VAL(0x2078);
-       }
-       val64 |= TTI_DATA1_MEM_TX_URNG_A(0xA) |
-           TTI_DATA1_MEM_TX_URNG_B(0x10) |
-           TTI_DATA1_MEM_TX_URNG_C(0x30) | TTI_DATA1_MEM_TX_TIMER_AC_EN;
-               if (use_continuous_tx_intrs)
-                       val64 |= TTI_DATA1_MEM_TX_TIMER_CI_EN;
-       writeq(val64, &bar0->tti_data1_mem);
-
-       val64 = TTI_DATA2_MEM_TX_UFC_A(0x10) |
-           TTI_DATA2_MEM_TX_UFC_B(0x20) |
-           TTI_DATA2_MEM_TX_UFC_C(0x40) | TTI_DATA2_MEM_TX_UFC_D(0x80);
-       writeq(val64, &bar0->tti_data2_mem);
 
-       val64 = TTI_CMD_MEM_WE | TTI_CMD_MEM_STROBE_NEW_CMD;
-       writeq(val64, &bar0->tti_command_mem);
-
-       /*
-        * Once the operation completes, the Strobe bit of the command
-        * register will be reset. We poll for this particular condition
-        * We wait for a maximum of 500ms for the operation to complete,
-        * if it's not complete by then we return error.
-        */
-       time = 0;
-       while (TRUE) {
-               val64 = readq(&bar0->tti_command_mem);
-               if (!(val64 & TTI_CMD_MEM_STROBE_NEW_CMD)) {
-                       break;
-               }
-               if (time > 10) {
-                       DBG_PRINT(ERR_DBG, "%s: TTI init Failed\n",
-                                 dev->name);
-                       return -ENODEV;
-               }
-               msleep(50);
-               time++;
-       }
+       /* Initialize TTI */
+       if (SUCCESS != init_tti(nic, nic->last_link_state))
+               return -ENODEV;
 
        /* RTI Initialization */
        if (nic->device_type == XFRAME_II_DEVICE) {
@@ -2325,7 +2444,7 @@ static void free_tx_buffers(struct s2io_nic *nic)
        for (i = 0; i < config->tx_fifo_num; i++) {
                unsigned long flags;
                spin_lock_irqsave(&mac_control->fifos[i].tx_lock, flags);
-               for (j = 0; j < config->tx_cfg[i].fifo_len - 1; j++) {
+               for (j = 0; j < config->tx_cfg[i].fifo_len; j++) {
                        txdp = (struct TxD *) \
                        mac_control->fifos[i].list_info[j].list_virt_addr;
                        skb = s2io_txdl_getskb(&mac_control->fifos[i], txdp, j);
@@ -2380,8 +2499,7 @@ static void stop_nic(struct s2io_nic *nic)
 
 /**
  *  fill_rx_buffers - Allocates the Rx side skbs
- *  @nic:  device private variable
- *  @ring_no: ring number
+ *  @ring_info: per ring structure
  *  Description:
  *  The function allocates Rx side skbs and puts the physical
  *  address of these buffers into the RxD buffer pointers, so that the NIC
@@ -2399,112 +2517,94 @@ static void stop_nic(struct s2io_nic *nic)
  *  SUCCESS on success or an appropriate -ve value on failure.
  */
 
-static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
+static int fill_rx_buffers(struct ring_info *ring)
 {
-       struct net_device *dev = nic->dev;
        struct sk_buff *skb;
        struct RxD_t *rxdp;
-       int off, off1, size, block_no, block_no1;
+       int off, size, block_no, block_no1;
        u32 alloc_tab = 0;
        u32 alloc_cnt;
-       struct mac_info *mac_control;
-       struct config_param *config;
        u64 tmp;
        struct buffAdd *ba;
-       unsigned long flags;
        struct RxD_t *first_rxdp = NULL;
        u64 Buffer0_ptr = 0, Buffer1_ptr = 0;
+       int rxd_index = 0;
        struct RxD1 *rxdp1;
        struct RxD3 *rxdp3;
-       struct swStat *stats = &nic->mac_control.stats_info->sw_stat;
+       struct swStat *stats = &ring->nic->mac_control.stats_info->sw_stat;
 
-       mac_control = &nic->mac_control;
-       config = &nic->config;
-       alloc_cnt = mac_control->rings[ring_no].pkt_cnt -
-           atomic_read(&nic->rx_bufs_left[ring_no]);
+       alloc_cnt = ring->pkt_cnt - ring->rx_bufs_left;
 
-       block_no1 = mac_control->rings[ring_no].rx_curr_get_info.block_index;
-       off1 = mac_control->rings[ring_no].rx_curr_get_info.offset;
+       block_no1 = ring->rx_curr_get_info.block_index;
        while (alloc_tab < alloc_cnt) {
-               block_no = mac_control->rings[ring_no].rx_curr_put_info.
-                   block_index;
-               off = mac_control->rings[ring_no].rx_curr_put_info.offset;
+               block_no = ring->rx_curr_put_info.block_index;
 
-               rxdp = mac_control->rings[ring_no].
-                               rx_blocks[block_no].rxds[off].virt_addr;
+               off = ring->rx_curr_put_info.offset;
+
+               rxdp = ring->rx_blocks[block_no].rxds[off].virt_addr;
 
-               if ((block_no == block_no1) && (off == off1) &&
-                                       (rxdp->Host_Control)) {
+               rxd_index = off + 1;
+               if (block_no)
+                       rxd_index += (block_no * ring->rxd_count);
+
+               if ((block_no == block_no1) && 
+                       (off == ring->rx_curr_get_info.offset) &&
+                       (rxdp->Host_Control)) {
                        DBG_PRINT(INTR_DBG, "%s: Get and Put",
-                                 dev->name);
+                               ring->dev->name);
                        DBG_PRINT(INTR_DBG, " info equated\n");
                        goto end;
                }
-               if (off && (off == rxd_count[nic->rxd_mode])) {
-                       mac_control->rings[ring_no].rx_curr_put_info.
-                           block_index++;
-                       if (mac_control->rings[ring_no].rx_curr_put_info.
-                           block_index == mac_control->rings[ring_no].
-                                       block_count)
-                               mac_control->rings[ring_no].rx_curr_put_info.
-                                       block_index = 0;
-                       block_no = mac_control->rings[ring_no].
-                                       rx_curr_put_info.block_index;
-                       if (off == rxd_count[nic->rxd_mode])
-                               off = 0;
-                       mac_control->rings[ring_no].rx_curr_put_info.
-                               offset = off;
-                       rxdp = mac_control->rings[ring_no].
-                               rx_blocks[block_no].block_virt_addr;
+               if (off && (off == ring->rxd_count)) {
+                       ring->rx_curr_put_info.block_index++;
+                       if (ring->rx_curr_put_info.block_index ==
+                                                       ring->block_count)
+                               ring->rx_curr_put_info.block_index = 0;
+                       block_no = ring->rx_curr_put_info.block_index;
+                       off = 0;
+                       ring->rx_curr_put_info.offset = off;
+                       rxdp = ring->rx_blocks[block_no].block_virt_addr;
                        DBG_PRINT(INTR_DBG, "%s: Next block at: %p\n",
-                                 dev->name, rxdp);
-               }
-               if(!napi) {
-                       spin_lock_irqsave(&nic->put_lock, flags);
-                       mac_control->rings[ring_no].put_pos =
-                       (block_no * (rxd_count[nic->rxd_mode] + 1)) + off;
-                       spin_unlock_irqrestore(&nic->put_lock, flags);
-               } else {
-                       mac_control->rings[ring_no].put_pos =
-                       (block_no * (rxd_count[nic->rxd_mode] + 1)) + off;
+                                 ring->dev->name, rxdp);
+
                }
+
                if ((rxdp->Control_1 & RXD_OWN_XENA) &&
-                       ((nic->rxd_mode == RXD_MODE_3B) &&
+                       ((ring->rxd_mode == RXD_MODE_3B) &&
                                (rxdp->Control_2 & s2BIT(0)))) {
-                       mac_control->rings[ring_no].rx_curr_put_info.
-                                       offset = off;
+                       ring->rx_curr_put_info.offset = off;
                        goto end;
                }
                /* calculate size of skb based on ring mode */
-               size = dev->mtu + HEADER_ETHERNET_II_802_3_SIZE +
+               size = ring->mtu + HEADER_ETHERNET_II_802_3_SIZE +
                                HEADER_802_2_SIZE + HEADER_SNAP_SIZE;
-               if (nic->rxd_mode == RXD_MODE_1)
+               if (ring->rxd_mode == RXD_MODE_1)
                        size += NET_IP_ALIGN;
                else
-                       size = dev->mtu + ALIGN_SIZE + BUF0_LEN + 4;
+                       size = ring->mtu + ALIGN_SIZE + BUF0_LEN + 4;
 
                /* allocate skb */
                skb = dev_alloc_skb(size);
                if(!skb) {
-                       DBG_PRINT(INFO_DBG, "%s: Out of ", dev->name);
+                       DBG_PRINT(INFO_DBG, "%s: Out of ", ring->dev->name);
                        DBG_PRINT(INFO_DBG, "memory to allocate SKBs\n");
                        if (first_rxdp) {
                                wmb();
                                first_rxdp->Control_1 |= RXD_OWN_XENA;
                        }
-                       nic->mac_control.stats_info->sw_stat. \
-                               mem_alloc_fail_cnt++;
+                       stats->mem_alloc_fail_cnt++;
+                               
                        return -ENOMEM ;
                }
-               nic->mac_control.stats_info->sw_stat.mem_allocated
-                       += skb->truesize;
-               if (nic->rxd_mode == RXD_MODE_1) {
+               stats->mem_allocated += skb->truesize;
+
+               if (ring->rxd_mode == RXD_MODE_1) {
                        /* 1 buffer mode - normal operation mode */
                        rxdp1 = (struct RxD1*)rxdp;
                        memset(rxdp, 0, sizeof(struct RxD1));
                        skb_reserve(skb, NET_IP_ALIGN);
                        rxdp1->Buffer0_ptr = pci_map_single
-                           (nic->pdev, skb->data, size - NET_IP_ALIGN,
+                           (ring->pdev, skb->data, size - NET_IP_ALIGN,
                                PCI_DMA_FROMDEVICE);
                        if( (rxdp1->Buffer0_ptr == 0) ||
                                (rxdp1->Buffer0_ptr ==
@@ -2513,8 +2613,8 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
 
                        rxdp->Control_2 =
                                SET_BUFFER0_SIZE_1(size - NET_IP_ALIGN);
-
-               } else if (nic->rxd_mode == RXD_MODE_3B) {
+                       rxdp->Host_Control = (unsigned long) (skb);
+               } else if (ring->rxd_mode == RXD_MODE_3B) {
                        /*
                         * 2 buffer mode -
                         * 2 buffer mode provides 128
@@ -2530,7 +2630,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
                        rxdp3->Buffer0_ptr = Buffer0_ptr;
                        rxdp3->Buffer1_ptr = Buffer1_ptr;
 
-                       ba = &mac_control->rings[ring_no].ba[block_no][off];
+                       ba = &ring->ba[block_no][off];
                        skb_reserve(skb, BUF0_LEN);
                        tmp = (u64)(unsigned long) skb->data;
                        tmp += ALIGN_SIZE;
@@ -2540,10 +2640,10 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
 
                        if (!(rxdp3->Buffer0_ptr))
                                rxdp3->Buffer0_ptr =
-                                  pci_map_single(nic->pdev, ba->ba_0, BUF0_LEN,
-                                          PCI_DMA_FROMDEVICE);
+                                  pci_map_single(ring->pdev, ba->ba_0,
+                                       BUF0_LEN, PCI_DMA_FROMDEVICE);
                        else
-                               pci_dma_sync_single_for_device(nic->pdev,
+                               pci_dma_sync_single_for_device(ring->pdev,
                                (dma_addr_t) rxdp3->Buffer0_ptr,
                                    BUF0_LEN, PCI_DMA_FROMDEVICE);
                        if( (rxdp3->Buffer0_ptr == 0) ||
@@ -2551,7 +2651,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
                                goto pci_map_failed;
 
                        rxdp->Control_2 = SET_BUFFER0_SIZE_3(BUF0_LEN);
-                       if (nic->rxd_mode == RXD_MODE_3B) {
+                       if (ring->rxd_mode == RXD_MODE_3B) {
                                /* Two buffer mode */
 
                                /*
@@ -2559,39 +2659,42 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
                                 * L4 payload
                                 */
                                rxdp3->Buffer2_ptr = pci_map_single
-                               (nic->pdev, skb->data, dev->mtu + 4,
+                               (ring->pdev, skb->data, ring->mtu + 4,
                                                PCI_DMA_FROMDEVICE);
 
                                if( (rxdp3->Buffer2_ptr == 0) ||
                                        (rxdp3->Buffer2_ptr == DMA_ERROR_CODE))
                                        goto pci_map_failed;
 
-                               rxdp3->Buffer1_ptr =
-                                               pci_map_single(nic->pdev,
+                               if (!rxdp3->Buffer1_ptr)
+                                       rxdp3->Buffer1_ptr =
+                                               pci_map_single(ring->pdev,
                                                ba->ba_1, BUF1_LEN,
                                                PCI_DMA_FROMDEVICE);
+
                                if( (rxdp3->Buffer1_ptr == 0) ||
                                        (rxdp3->Buffer1_ptr == DMA_ERROR_CODE)) {
                                        pci_unmap_single
-                                               (nic->pdev,
-                                               (dma_addr_t)rxdp3->Buffer2_ptr,
-                                               dev->mtu + 4,
+                                               (ring->pdev,
+                                               (dma_addr_t)(unsigned long)
+                                               skb->data,
+                                               ring->mtu + 4,
                                                PCI_DMA_FROMDEVICE);
                                        goto pci_map_failed;
                                }
                                rxdp->Control_2 |= SET_BUFFER1_SIZE_3(1);
                                rxdp->Control_2 |= SET_BUFFER2_SIZE_3
-                                                               (dev->mtu + 4);
+                                                               (ring->mtu + 4);
                        }
                        rxdp->Control_2 |= s2BIT(0);
+                       rxdp->Host_Control = (unsigned long) (skb);
                }
-               rxdp->Host_Control = (unsigned long) (skb);
                if (alloc_tab & ((1 << rxsync_frequency) - 1))
                        rxdp->Control_1 |= RXD_OWN_XENA;
                off++;
-               if (off == (rxd_count[nic->rxd_mode] + 1))
+               if (off == (ring->rxd_count + 1))
                        off = 0;
-               mac_control->rings[ring_no].rx_curr_put_info.offset = off;
+               ring->rx_curr_put_info.offset = off;
 
                rxdp->Control_2 |= SET_RXD_MARKER;
                if (!(alloc_tab & ((1 << rxsync_frequency) - 1))) {
@@ -2601,7 +2704,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
                        }
                        first_rxdp = rxdp;
                }
-               atomic_inc(&nic->rx_bufs_left[ring_no]);
+               ring->rx_bufs_left += 1;
                alloc_tab++;
        }
 
@@ -2673,7 +2776,7 @@ static void free_rxd_blk(struct s2io_nic *sp, int ring_no, int blk)
                }
                sp->mac_control.stats_info->sw_stat.mem_freed += skb->truesize;
                dev_kfree_skb(skb);
-               atomic_dec(&sp->rx_bufs_left[ring_no]);
+               mac_control->rings[ring_no].rx_bufs_left -= 1;
        }
 }
 
@@ -2704,7 +2807,7 @@ static void free_rx_buffers(struct s2io_nic *sp)
                mac_control->rings[i].rx_curr_get_info.block_index = 0;
                mac_control->rings[i].rx_curr_put_info.offset = 0;
                mac_control->rings[i].rx_curr_get_info.offset = 0;
-               atomic_set(&sp->rx_bufs_left[i], 0);
+               mac_control->rings[i].rx_bufs_left = 0;
                DBG_PRINT(INIT_DBG, "%s:Freed 0x%x Rx Buffers on ring%d\n",
                          dev->name, buf_cnt, i);
        }
@@ -2754,7 +2857,7 @@ static int s2io_poll(struct napi_struct *napi, int budget)
        netif_rx_complete(dev, napi);
 
        for (i = 0; i < config->rx_ring_num; i++) {
-               if (fill_rx_buffers(nic, i) == -ENOMEM) {
+               if (fill_rx_buffers(&mac_control->rings[i]) == -ENOMEM) {
                        DBG_PRINT(INFO_DBG, "%s:Out of memory", dev->name);
                        DBG_PRINT(INFO_DBG, " in Rx Poll!!\n");
                        break;
@@ -2767,7 +2870,7 @@ static int s2io_poll(struct napi_struct *napi, int budget)
 
 no_rx:
        for (i = 0; i < config->rx_ring_num; i++) {
-               if (fill_rx_buffers(nic, i) == -ENOMEM) {
+               if (fill_rx_buffers(&mac_control->rings[i]) == -ENOMEM) {
                        DBG_PRINT(INFO_DBG, "%s:Out of memory", dev->name);
                        DBG_PRINT(INFO_DBG, " in Rx Poll!!\n");
                        break;
@@ -2818,7 +2921,7 @@ static void s2io_netpoll(struct net_device *dev)
                rx_intr_handler(&mac_control->rings[i]);
 
        for (i = 0; i < config->rx_ring_num; i++) {
-               if (fill_rx_buffers(nic, i) == -ENOMEM) {
+               if (fill_rx_buffers(&mac_control->rings[i]) == -ENOMEM) {
                        DBG_PRINT(INFO_DBG, "%s:Out of memory", dev->name);
                        DBG_PRINT(INFO_DBG, " in Rx Netpoll!!\n");
                        break;
@@ -2843,9 +2946,7 @@ static void s2io_netpoll(struct net_device *dev)
  */
 static void rx_intr_handler(struct ring_info *ring_data)
 {
-       struct s2io_nic *nic = ring_data->nic;
-       struct net_device *dev = (struct net_device *) nic->dev;
-       int get_block, put_block, put_offset;
+       int get_block, put_block;
        struct rx_curr_get_info get_info, put_info;
        struct RxD_t *rxdp;
        struct sk_buff *skb;
@@ -2854,19 +2955,11 @@ static void rx_intr_handler(struct ring_info *ring_data)
        struct RxD1* rxdp1;
        struct RxD3* rxdp3;
 
-       spin_lock(&nic->rx_lock);
-
        get_info = ring_data->rx_curr_get_info;
        get_block = get_info.block_index;
        memcpy(&put_info, &ring_data->rx_curr_put_info, sizeof(put_info));
        put_block = put_info.block_index;
        rxdp = ring_data->rx_blocks[get_block].rxds[get_info.offset].virt_addr;
-       if (!napi) {
-               spin_lock(&nic->put_lock);
-               put_offset = ring_data->put_pos;
-               spin_unlock(&nic->put_lock);
-       } else
-               put_offset = ring_data->put_pos;
 
        while (RXD_IS_UP2DT(rxdp)) {
                /*
@@ -2875,34 +2968,34 @@ static void rx_intr_handler(struct ring_info *ring_data)
                 */
                if ((get_block == put_block) &&
                    (get_info.offset + 1) == put_info.offset) {
-                       DBG_PRINT(INTR_DBG, "%s: Ring Full\n",dev->name);
+                       DBG_PRINT(INTR_DBG, "%s: Ring Full\n",
+                               ring_data->dev->name);
                        break;
                }
                skb = (struct sk_buff *) ((unsigned long)rxdp->Host_Control);
                if (skb == NULL) {
                        DBG_PRINT(ERR_DBG, "%s: The skb is ",
-                                 dev->name);
+                                 ring_data->dev->name);
                        DBG_PRINT(ERR_DBG, "Null in Rx Intr\n");
-                       spin_unlock(&nic->rx_lock);
                        return;
                }
-               if (nic->rxd_mode == RXD_MODE_1) {
+               if (ring_data->rxd_mode == RXD_MODE_1) {
                        rxdp1 = (struct RxD1*)rxdp;
-                       pci_unmap_single(nic->pdev, (dma_addr_t)
+                       pci_unmap_single(ring_data->pdev, (dma_addr_t)
                                rxdp1->Buffer0_ptr,
-                               dev->mtu +
+                               ring_data->mtu +
                                HEADER_ETHERNET_II_802_3_SIZE +
                                HEADER_802_2_SIZE +
                                HEADER_SNAP_SIZE,
                                PCI_DMA_FROMDEVICE);
-               } else if (nic->rxd_mode == RXD_MODE_3B) {
+               } else if (ring_data->rxd_mode == RXD_MODE_3B) {
                        rxdp3 = (struct RxD3*)rxdp;
-                       pci_dma_sync_single_for_cpu(nic->pdev, (dma_addr_t)
+                       pci_dma_sync_single_for_cpu(ring_data->pdev, (dma_addr_t)
                                rxdp3->Buffer0_ptr,
                                BUF0_LEN, PCI_DMA_FROMDEVICE);
-                       pci_unmap_single(nic->pdev, (dma_addr_t)
+                       pci_unmap_single(ring_data->pdev, (dma_addr_t)
                                rxdp3->Buffer2_ptr,
-                               dev->mtu + 4,
+                               ring_data->mtu + 4,
                                PCI_DMA_FROMDEVICE);
                }
                prefetch(skb->data);
@@ -2911,7 +3004,7 @@ static void rx_intr_handler(struct ring_info *ring_data)
                ring_data->rx_curr_get_info.offset = get_info.offset;
                rxdp = ring_data->rx_blocks[get_block].
                                rxds[get_info.offset].virt_addr;
-               if (get_info.offset == rxd_count[nic->rxd_mode]) {
+               if (get_info.offset == rxd_count[ring_data->rxd_mode]) {
                        get_info.offset = 0;
                        ring_data->rx_curr_get_info.offset = get_info.offset;
                        get_block++;
@@ -2921,26 +3014,26 @@ static void rx_intr_handler(struct ring_info *ring_data)
                        rxdp = ring_data->rx_blocks[get_block].block_virt_addr;
                }
 
-               nic->pkts_to_process -= 1;
-               if ((napi) && (!nic->pkts_to_process))
-                       break;
+               if(ring_data->nic->config.napi){
+                       ring_data->nic->pkts_to_process -= 1;
+                       if (!ring_data->nic->pkts_to_process)
+                               break;
+               }
                pkt_cnt++;
                if ((indicate_max_pkts) && (pkt_cnt > indicate_max_pkts))
                        break;
        }
-       if (nic->lro) {
+       if (ring_data->lro) {
                /* Clear all LRO sessions before exiting */
                for (i=0; i<MAX_LRO_SESSIONS; i++) {
-                       struct lro *lro = &nic->lro0_n[i];
+                       struct lro *lro = &ring_data->lro0_n[i];
                        if (lro->in_use) {
-                               update_L3L4_header(nic, lro);
-                               queue_rx_frame(lro->parent);
+                               update_L3L4_header(ring_data->nic, lro);
+                               queue_rx_frame(lro->parent, lro->vlan_tag);
                                clear_lro_session(lro);
                        }
                }
        }
-
-       spin_unlock(&nic->rx_lock);
 }
 
 /**
@@ -2958,10 +3051,10 @@ static void rx_intr_handler(struct ring_info *ring_data)
 static void tx_intr_handler(struct fifo_info *fifo_data)
 {
        struct s2io_nic *nic = fifo_data->nic;
-       struct net_device *dev = (struct net_device *) nic->dev;
        struct tx_curr_get_info get_info, put_info;
-       struct sk_buff *skb;
+       struct sk_buff *skb = NULL;
        struct TxD *txdlp;
+       int pkt_cnt = 0;
        unsigned long flags = 0;
        u8 err_mask;
 
@@ -3022,6 +3115,7 @@ static void tx_intr_handler(struct fifo_info *fifo_data)
                        DBG_PRINT(ERR_DBG, "in Tx Free Intr\n");
                        return;
                }
+               pkt_cnt++;
 
                /* Updating the statistics block */
                nic->stats.tx_bytes += skb->len;
@@ -3037,8 +3131,7 @@ static void tx_intr_handler(struct fifo_info *fifo_data)
                    get_info.offset;
        }
 
-       if (netif_queue_stopped(dev))
-               netif_wake_queue(dev);
+       s2io_wake_tx_queue(fifo_data, pkt_cnt, nic->config.multiq);
 
        spin_unlock_irqrestore(&fifo_data->tx_lock, flags);
 }
@@ -3919,8 +4012,7 @@ static int s2io_open(struct net_device *dev)
                err = -ENODEV;
                goto hw_init_failed;
        }
-
-       netif_start_queue(dev);
+       s2io_start_all_tx_queue(sp);
        return 0;
 
 hw_init_failed:
@@ -3965,8 +4057,7 @@ static int s2io_close(struct net_device *dev)
        if (!is_s2io_card_up(sp))
                return 0;
 
-       netif_stop_queue(dev);
-
+       s2io_stop_all_tx_queue(sp);
        /* delete all populated mac entries */
        for (offset = 1; offset < config->max_mc_addr; offset++) {
                tmp64 = do_s2io_read_unicast_mc(sp, offset);
@@ -3974,7 +4065,6 @@ static int s2io_close(struct net_device *dev)
                        do_s2io_delete_unicast_mc(sp, tmp64);
        }
 
-       /* Reset card, kill tasklet and free Tx and Rx buffers. */
        s2io_card_down(sp);
 
        return 0;
@@ -4002,11 +4092,12 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
        struct TxFIFO_element __iomem *tx_fifo;
        unsigned long flags = 0;
        u16 vlan_tag = 0;
-       int vlan_priority = 0;
        struct fifo_info *fifo = NULL;
        struct mac_info *mac_control;
        struct config_param *config;
+       int do_spin_lock = 1;
        int offload_type;
+       int enable_per_list_interrupt = 0;
        struct swStat *stats = &sp->mac_control.stats_info->sw_stat;
 
        mac_control = &sp->mac_control;
@@ -4028,15 +4119,67 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
        }
 
        queue = 0;
-       /* Get Fifo number to Transmit based on vlan priority */
-       if (sp->vlgrp && vlan_tx_tag_present(skb)) {
+       if (sp->vlgrp && vlan_tx_tag_present(skb))
                vlan_tag = vlan_tx_tag_get(skb);
-               vlan_priority = vlan_tag >> 13;
-               queue = config->fifo_mapping[vlan_priority];
+       if (sp->config.tx_steering_type == TX_DEFAULT_STEERING) {
+               if (skb->protocol == htons(ETH_P_IP)) {
+                       struct iphdr *ip;
+                       struct tcphdr *th;
+                       ip = ip_hdr(skb);
+
+                       if ((ip->frag_off & htons(IP_OFFSET|IP_MF)) == 0) {
+                               th = (struct tcphdr *)(((unsigned char *)ip) +
+                                               ip->ihl*4);
+
+                               if (ip->protocol == IPPROTO_TCP) {
+                                       queue_len = sp->total_tcp_fifos;
+                                       queue = (ntohs(th->source) +
+                                                       ntohs(th->dest)) &
+                                           sp->fifo_selector[queue_len - 1];
+                                       if (queue >= queue_len)
+                                               queue = queue_len - 1;
+                               } else if (ip->protocol == IPPROTO_UDP) {
+                                       queue_len = sp->total_udp_fifos;
+                                       queue = (ntohs(th->source) +
+                                                       ntohs(th->dest)) &
+                                           sp->fifo_selector[queue_len - 1];
+                                       if (queue >= queue_len)
+                                               queue = queue_len - 1;
+                                       queue += sp->udp_fifo_idx;
+                                       if (skb->len > 1024)
+                                               enable_per_list_interrupt = 1;
+                                       do_spin_lock = 0;
+                               }
+                       }
+               }
+       } else if (sp->config.tx_steering_type == TX_PRIORITY_STEERING)
+               /* get fifo number based on skb->priority value */
+               queue = config->fifo_mapping
+                                       [skb->priority & (MAX_TX_FIFOS - 1)];
+       fifo = &mac_control->fifos[queue];
+
+       if (do_spin_lock)
+               spin_lock_irqsave(&fifo->tx_lock, flags);
+       else {
+               if (unlikely(!spin_trylock_irqsave(&fifo->tx_lock, flags)))
+                       return NETDEV_TX_LOCKED;
+       }
+
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+       if (sp->config.multiq) {
+               if (__netif_subqueue_stopped(dev, fifo->fifo_no)) {
+                       spin_unlock_irqrestore(&fifo->tx_lock, flags);
+                       return NETDEV_TX_BUSY;
+               }
+       } else
+#endif
+       if (unlikely(fifo->queue_state == FIFO_QUEUE_STOP)) {
+               if (netif_queue_stopped(dev)) {
+                       spin_unlock_irqrestore(&fifo->tx_lock, flags);
+                       return NETDEV_TX_BUSY;
+               }
        }
 
-       fifo = &mac_control->fifos[queue];
-       spin_lock_irqsave(&fifo->tx_lock, flags);
        put_off = (u16) fifo->tx_curr_put_info.offset;
        get_off = (u16) fifo->tx_curr_get_info.offset;
        txdp = (struct TxD *) fifo->list_info[put_off].list_virt_addr;
@@ -4046,7 +4189,7 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
        if (txdp->Host_Control ||
                   ((put_off+1) == queue_len ? 0 : (put_off+1)) == get_off) {
                DBG_PRINT(TX_DBG, "Error in xmit, No free TXDs.\n");
-               netif_stop_queue(dev);
+               s2io_stop_tx_queue(sp, fifo->fifo_no);
                dev_kfree_skb(skb);
                spin_unlock_irqrestore(&fifo->tx_lock, flags);
                return 0;
@@ -4065,8 +4208,10 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
        txdp->Control_1 |= TXD_GATHER_CODE_FIRST;
        txdp->Control_1 |= TXD_LIST_OWN_XENA;
        txdp->Control_2 |= TXD_INT_NUMBER(fifo->fifo_no);
-
-       if (sp->vlgrp && vlan_tx_tag_present(skb)) {
+       if (enable_per_list_interrupt)
+               if (put_off & (queue_len >> 5))
+                       txdp->Control_2 |= TXD_INT_TYPE_PER_LIST;
+       if (vlan_tag) {
                txdp->Control_2 |= TXD_VLAN_ENABLE;
                txdp->Control_2 |= TXD_VLAN_TAG(vlan_tag);
        }
@@ -4081,11 +4226,12 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
                txdp->Control_1 |= TXD_UFO_MSS(ufo_size);
                txdp->Control_1 |= TXD_BUFFER0_SIZE(8);
 #ifdef __BIG_ENDIAN
+               /* both variants do cpu_to_be64(be32_to_cpu(...)) */
                fifo->ufo_in_band_v[put_off] =
-                               (u64)skb_shinfo(skb)->ip6_frag_id;
+                               (__force u64)skb_shinfo(skb)->ip6_frag_id;
 #else
                fifo->ufo_in_band_v[put_off] =
-                               (u64)skb_shinfo(skb)->ip6_frag_id << 32;
+                               (__force u64)skb_shinfo(skb)->ip6_frag_id << 32;
 #endif
                txdp->Host_Control = (unsigned long)fifo->ufo_in_band_v;
                txdp->Buffer_Pointer = pci_map_single(sp->pdev,
@@ -4152,16 +4298,19 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
                DBG_PRINT(TX_DBG,
                          "No free TxDs for xmit, Put: 0x%x Get:0x%x\n",
                          put_off, get_off);
-               netif_stop_queue(dev);
+               s2io_stop_tx_queue(sp, fifo->fifo_no);
        }
        mac_control->stats_info->sw_stat.mem_allocated += skb->truesize;
        dev->trans_start = jiffies;
        spin_unlock_irqrestore(&fifo->tx_lock, flags);
 
+       if (sp->config.intr_type == MSI_X)
+               tx_intr_handler(fifo);
+
        return 0;
 pci_map_failed:
        stats->pci_map_fail_cnt++;
-       netif_stop_queue(dev);
+       s2io_stop_tx_queue(sp, fifo->fifo_no);
        stats->mem_freed += skb->truesize;
        dev_kfree_skb(skb);
        spin_unlock_irqrestore(&fifo->tx_lock, flags);
@@ -4178,31 +4327,11 @@ s2io_alarm_handle(unsigned long data)
        mod_timer(&sp->alarm_timer, jiffies + HZ / 2);
 }
 
-static int s2io_chk_rx_buffers(struct s2io_nic *sp, int rng_n)
+static int s2io_chk_rx_buffers(struct ring_info *ring)
 {
-       int rxb_size, level;
-
-       if (!sp->lro) {
-               rxb_size = atomic_read(&sp->rx_bufs_left[rng_n]);
-               level = rx_buffer_level(sp, rxb_size, rng_n);
-
-               if ((level == PANIC) && (!TASKLET_IN_USE)) {
-                       int ret;
-                       DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", __FUNCTION__);
-                       DBG_PRINT(INTR_DBG, "PANIC levels\n");
-                       if ((ret = fill_rx_buffers(sp, rng_n)) == -ENOMEM) {
-                               DBG_PRINT(INFO_DBG, "Out of memory in %s",
-                                         __FUNCTION__);
-                               clear_bit(0, (&sp->tasklet_status));
-                               return -1;
-                       }
-                       clear_bit(0, (&sp->tasklet_status));
-               } else if (level == LOW)
-                       tasklet_schedule(&sp->task);
-
-       } else if (fill_rx_buffers(sp, rng_n) == -ENOMEM) {
-                       DBG_PRINT(INFO_DBG, "%s:Out of memory", sp->dev->name);
-                       DBG_PRINT(INFO_DBG, " in Rx Intr!!\n");
+       if (fill_rx_buffers(ring) == -ENOMEM) {
+               DBG_PRINT(INFO_DBG, "%s:Out of memory", ring->dev->name);
+               DBG_PRINT(INFO_DBG, " in Rx Intr!!\n");
        }
        return 0;
 }
@@ -4216,7 +4345,7 @@ static irqreturn_t s2io_msix_ring_handle(int irq, void *dev_id)
                return IRQ_HANDLED;
 
        rx_intr_handler(ring);
-       s2io_chk_rx_buffers(sp, ring->ring_no);
+       s2io_chk_rx_buffers(ring);
 
        return IRQ_HANDLED;
 }
@@ -4573,7 +4702,7 @@ static void s2io_handle_errors(void * dev_id)
        return;
 
 reset:
-       netif_stop_queue(dev);
+       s2io_stop_all_tx_queue(sp);
        schedule_work(&sp->rst_timer_task);
        sw_stat->soft_reset_cnt++;
        return;
@@ -4674,7 +4803,7 @@ static irqreturn_t s2io_isr(int irq, void *dev_id)
                 */
                if (!config->napi) {
                        for (i = 0; i < config->rx_ring_num; i++)
-                               s2io_chk_rx_buffers(sp, i);
+                               s2io_chk_rx_buffers(&mac_control->rings[i]);
                }
                writeq(sp->general_int_mask, &bar0->general_int_mask);
                readl(&bar0->general_int_status);
@@ -4731,6 +4860,7 @@ static struct net_device_stats *s2io_get_stats(struct net_device *dev)
        struct s2io_nic *sp = dev->priv;
        struct mac_info *mac_control;
        struct config_param *config;
+       int i;
 
 
        mac_control = &sp->mac_control;
@@ -4750,6 +4880,13 @@ static struct net_device_stats *s2io_get_stats(struct net_device *dev)
        sp->stats.rx_length_errors =
                le64_to_cpu(mac_control->stats_info->rmac_long_frms);
 
+       /* collect per-ring rx_packets and rx_bytes */
+       sp->stats.rx_packets = sp->stats.rx_bytes = 0;
+       for (i = 0; i < config->rx_ring_num; i++) {
+               sp->stats.rx_packets += mac_control->rings[i].rx_packets;
+               sp->stats.rx_bytes += mac_control->rings[i].rx_bytes;
+       }
+
        return (&sp->stats);
 }
 
@@ -6560,16 +6697,15 @@ static int s2io_change_mtu(struct net_device *dev, int new_mtu)
 
        dev->mtu = new_mtu;
        if (netif_running(dev)) {
+               s2io_stop_all_tx_queue(sp);
                s2io_card_down(sp);
-               netif_stop_queue(dev);
                ret = s2io_card_up(sp);
                if (ret) {
                        DBG_PRINT(ERR_DBG, "%s: Device bring up failed\n",
                                  __FUNCTION__);
                        return ret;
                }
-               if (netif_queue_stopped(dev))
-                       netif_wake_queue(dev);
+               s2io_wake_all_tx_queue(sp);
        } else { /* Device is down */
                struct XENA_dev_config __iomem *bar0 = sp->bar0;
                u64 val64 = new_mtu;
@@ -6581,49 +6717,6 @@ static int s2io_change_mtu(struct net_device *dev, int new_mtu)
 }
 
 /**
- *  s2io_tasklet - Bottom half of the ISR.
- *  @dev_adr : address of the device structure in dma_addr_t format.
- *  Description:
- *  This is the tasklet or the bottom half of the ISR. This is
- *  an extension of the ISR which is scheduled by the scheduler to be run
- *  when the load on the CPU is low. All low priority tasks of the ISR can
- *  be pushed into the tasklet. For now the tasklet is used only to
- *  replenish the Rx buffers in the Rx buffer descriptors.
- *  Return value:
- *  void.
- */
-
-static void s2io_tasklet(unsigned long dev_addr)
-{
-       struct net_device *dev = (struct net_device *) dev_addr;
-       struct s2io_nic *sp = dev->priv;
-       int i, ret;
-       struct mac_info *mac_control;
-       struct config_param *config;
-
-       mac_control = &sp->mac_control;
-       config = &sp->config;
-
-       if (!TASKLET_IN_USE) {
-               for (i = 0; i < config->rx_ring_num; i++) {
-                       ret = fill_rx_buffers(sp, i);
-                       if (ret == -ENOMEM) {
-                               DBG_PRINT(INFO_DBG, "%s: Out of ",
-                                         dev->name);
-                               DBG_PRINT(INFO_DBG, "memory in tasklet\n");
-                               break;
-                       } else if (ret == -EFILL) {
-                               DBG_PRINT(INFO_DBG,
-                                         "%s: Rx Ring %d is full\n",
-                                         dev->name, i);
-                               break;
-                       }
-               }
-               clear_bit(0, (&sp->tasklet_status));
-       }
-}
-
-/**
  * s2io_set_link - Set the LInk status
  * @data: long pointer to device private structue
  * Description: Sets the link status for the adapter
@@ -6677,7 +6770,7 @@ static void s2io_set_link(struct work_struct *work)
                        } else {
                                DBG_PRINT(ERR_DBG, "%s: Error: ", dev->name);
                                DBG_PRINT(ERR_DBG, "device is not Quiescent\n");
-                               netif_stop_queue(dev);
+                               s2io_stop_all_tx_queue(nic);
                        }
                }
                val64 = readq(&bar0->adapter_control);
@@ -6904,11 +6997,11 @@ static int s2io_add_isr(struct s2io_nic * sp)
                                if(!(sp->msix_info[i].addr &&
                                        sp->msix_info[i].data)) {
                                        DBG_PRINT(ERR_DBG, "%s @ Addr:0x%llx "
-                                               "Data:0x%lx\n",sp->desc[i],
+                                               "Data:0x%llx\n",sp->desc[i],
                                                (unsigned long long)
                                                sp->msix_info[i].addr,
-                                               (unsigned long)
-                                               ntohl(sp->msix_info[i].data));
+                                               (unsigned long long)
+                                               sp->msix_info[i].data);
                                } else {
                                        msix_tx_cnt++;
                                }
@@ -6922,11 +7015,11 @@ static int s2io_add_isr(struct s2io_nic * sp)
                                if(!(sp->msix_info[i].addr &&
                                        sp->msix_info[i].data)) {
                                        DBG_PRINT(ERR_DBG, "%s @ Addr:0x%llx "
-                                               "Data:0x%lx\n",sp->desc[i],
+                                               "Data:0x%llx\n",sp->desc[i],
                                                (unsigned long long)
                                                sp->msix_info[i].addr,
-                                               (unsigned long)
-                                               ntohl(sp->msix_info[i].data));
+                                               (unsigned long long)
+                                               sp->msix_info[i].data);
                                } else {
                                        msix_rx_cnt++;
                                }
@@ -6972,7 +7065,6 @@ static void do_s2io_card_down(struct s2io_nic * sp, int do_io)
 {
        int cnt = 0;
        struct XENA_dev_config __iomem *bar0 = sp->bar0;
-       unsigned long flags;
        register u64 val64 = 0;
        struct config_param *config;
        config = &sp->config;
@@ -6997,9 +7089,6 @@ static void do_s2io_card_down(struct s2io_nic * sp, int do_io)
 
        s2io_rem_isr(sp);
 
-       /* Kill tasklet. */
-       tasklet_kill(&sp->task);
-
        /* Check if the device is Quiescent and then Reset the NIC */
        while(do_io) {
                /* As per the HW requirement we need to replenish the
@@ -7034,9 +7123,7 @@ static void do_s2io_card_down(struct s2io_nic * sp, int do_io)
        free_tx_buffers(sp);
 
        /* Free all Rx buffers */
-       spin_lock_irqsave(&sp->rx_lock, flags);
        free_rx_buffers(sp);
-       spin_unlock_irqrestore(&sp->rx_lock, flags);
 
        clear_bit(__S2IO_STATE_LINK_TASK, &(sp->state));
 }
@@ -7072,7 +7159,9 @@ static int s2io_card_up(struct s2io_nic * sp)
        config = &sp->config;
 
        for (i = 0; i < config->rx_ring_num; i++) {
-               if ((ret = fill_rx_buffers(sp, i))) {
+               mac_control->rings[i].mtu = dev->mtu;
+               ret = fill_rx_buffers(&mac_control->rings[i]);
+               if (ret) {
                        DBG_PRINT(ERR_DBG, "%s: Out of memory in Open\n",
                                  dev->name);
                        s2io_reset(sp);
@@ -7080,7 +7169,7 @@ static int s2io_card_up(struct s2io_nic * sp)
                        return -ENOMEM;
                }
                DBG_PRINT(INFO_DBG, "Buf in ring:%d is %d:\n", i,
-                         atomic_read(&sp->rx_bufs_left[i]));
+                         mac_control->rings[i].rx_bufs_left);
        }
 
        /* Initialise napi */
@@ -7125,9 +7214,6 @@ static int s2io_card_up(struct s2io_nic * sp)
 
        S2IO_TIMER_CONF(sp->alarm_timer, s2io_alarm_handle, sp, (HZ/2));
 
-       /* Enable tasklet for the device */
-       tasklet_init(&sp->task, s2io_tasklet, (unsigned long) dev);
-
        /*  Enable select interrupts */
        en_dis_err_alarms(sp, ENA_ALL_INTRS, ENABLE_INTRS);
        if (sp->config.intr_type != INTA)
@@ -7167,7 +7253,7 @@ static void s2io_restart_nic(struct work_struct *work)
                DBG_PRINT(ERR_DBG, "%s: Device bring up failed\n",
                          dev->name);
        }
-       netif_wake_queue(dev);
+       s2io_wake_all_tx_queue(sp);
        DBG_PRINT(ERR_DBG, "%s: was reset by Tx watchdog timer\n",
                  dev->name);
 out_unlock:
@@ -7218,7 +7304,7 @@ static void s2io_tx_watchdog(struct net_device *dev)
 static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp)
 {
        struct s2io_nic *sp = ring_data->nic;
-       struct net_device *dev = (struct net_device *) sp->dev;
+       struct net_device *dev = (struct net_device *) ring_data->dev;
        struct sk_buff *skb = (struct sk_buff *)
                ((unsigned long) rxdp->Host_Control);
        int ring_no = ring_data->ring_no;
@@ -7295,19 +7381,19 @@ static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp)
                        sp->mac_control.stats_info->sw_stat.mem_freed
                                += skb->truesize;
                        dev_kfree_skb(skb);
-                       atomic_dec(&sp->rx_bufs_left[ring_no]);
+                       ring_data->rx_bufs_left -= 1;
                        rxdp->Host_Control = 0;
                        return 0;
                }
        }
 
        /* Updating statistics */
-       sp->stats.rx_packets++;
+       ring_data->rx_packets++;
        rxdp->Host_Control = 0;
        if (sp->rxd_mode == RXD_MODE_1) {
                int len = RXD_GET_BUFFER0_SIZE_1(rxdp->Control_2);
 
-               sp->stats.rx_bytes += len;
+               ring_data->rx_bytes += len;
                skb_put(skb, len);
 
        } else if (sp->rxd_mode == RXD_MODE_3B) {
@@ -7318,13 +7404,13 @@ static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp)
                unsigned char *buff = skb_push(skb, buf0_len);
 
                struct buffAdd *ba = &ring_data->ba[get_block][get_off];
-               sp->stats.rx_bytes += buf0_len + buf2_len;
+               ring_data->rx_bytes += buf0_len + buf2_len;
                memcpy(buff, ba->ba_0, buf0_len);
                skb_put(skb, buf2_len);
        }
 
-       if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) && ((!sp->lro) ||
-           (sp->lro && (!(rxdp->Control_1 & RXD_FRAME_IP_FRAG)))) &&
+       if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) && ((!ring_data->lro) ||
+           (ring_data->lro && (!(rxdp->Control_1 & RXD_FRAME_IP_FRAG)))) &&
            (sp->rx_csum)) {
                l3_csum = RXD_GET_L3_CKSUM(rxdp->Control_1);
                l4_csum = RXD_GET_L4_CKSUM(rxdp->Control_1);
@@ -7335,14 +7421,14 @@ static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp)
                         * a flag in the RxD.
                         */
                        skb->ip_summed = CHECKSUM_UNNECESSARY;
-                       if (sp->lro) {
+                       if (ring_data->lro) {
                                u32 tcp_len;
                                u8 *tcp;
                                int ret = 0;
 
-                               ret = s2io_club_tcp_session(skb->data, &tcp,
-                                                           &tcp_len, &lro,
-                                                           rxdp, sp);
+                               ret = s2io_club_tcp_session(ring_data,
+                                       skb->data, &tcp, &tcp_len, &lro,
+                                       rxdp, sp);
                                switch (ret) {
                                        case 3: /* Begin anew */
                                                lro->parent = skb;
@@ -7357,7 +7443,8 @@ static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp)
                                        {
                                                lro_append_pkt(sp, lro,
                                                        skb, tcp_len);
-                                               queue_rx_frame(lro->parent);
+                                               queue_rx_frame(lro->parent,
+                                                       lro->vlan_tag);
                                                clear_lro_session(lro);
                                                sp->mac_control.stats_info->
                                                    sw_stat.flush_max_pkts++;
@@ -7368,7 +7455,8 @@ static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp)
                                                        lro->frags_len;
                                                sp->mac_control.stats_info->
                                                     sw_stat.sending_both++;
-                                               queue_rx_frame(lro->parent);
+                                               queue_rx_frame(lro->parent,
+                                                       lro->vlan_tag);
                                                clear_lro_session(lro);
                                                goto send_up;
                                        case 0: /* sessions exceeded */
@@ -7394,34 +7482,15 @@ static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp)
                         */
                        skb->ip_summed = CHECKSUM_NONE;
                }
-       } else {
+       } else
                skb->ip_summed = CHECKSUM_NONE;
-       }
+
        sp->mac_control.stats_info->sw_stat.mem_freed += skb->truesize;
-       if (!sp->lro) {
-               skb->protocol = eth_type_trans(skb, dev);
-               if ((sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2) &&
-                       vlan_strip_flag)) {
-                       /* Queueing the vlan frame to the upper layer */
-                       if (napi)
-                               vlan_hwaccel_receive_skb(skb, sp->vlgrp,
-                                       RXD_GET_VLAN_TAG(rxdp->Control_2));
-                       else
-                               vlan_hwaccel_rx(skb, sp->vlgrp,
-                                       RXD_GET_VLAN_TAG(rxdp->Control_2));
-               } else {
-                       if (napi)
-                               netif_receive_skb(skb);
-                       else
-                               netif_rx(skb);
-               }
-       } else {
 send_up:
-               queue_rx_frame(skb);
-       }
+       queue_rx_frame(skb, RXD_GET_VLAN_TAG(rxdp->Control_2));
        dev->last_rx = jiffies;
 aggregate:
-       atomic_dec(&sp->rx_bufs_left[ring_no]);
+       sp->mac_control.rings[ring_no].rx_bufs_left -= 1;
        return SUCCESS;
 }
 
@@ -7443,8 +7512,10 @@ static void s2io_link(struct s2io_nic * sp, int link)
        struct net_device *dev = (struct net_device *) sp->dev;
 
        if (link != sp->last_link_state) {
+               init_tti(sp, link);
                if (link == LINK_DOWN) {
                        DBG_PRINT(ERR_DBG, "%s: Link down\n", dev->name);
+                       s2io_stop_all_tx_queue(sp);
                        netif_carrier_off(dev);
                        if(sp->mac_control.stats_info->sw_stat.link_up_cnt)
                        sp->mac_control.stats_info->sw_stat.link_up_time =
@@ -7457,6 +7528,7 @@ static void s2io_link(struct s2io_nic * sp, int link)
                                jiffies - sp->start_time;
                        sp->mac_control.stats_info->sw_stat.link_up_cnt++;
                        netif_carrier_on(dev);
+                       s2io_wake_all_tx_queue(sp);
                }
        }
        sp->last_link_state = link;
@@ -7493,26 +7565,56 @@ static void s2io_init_pci(struct s2io_nic * sp)
        pci_read_config_word(sp->pdev, PCI_COMMAND, &pci_cmd);
 }
 
-static int s2io_verify_parm(struct pci_dev *pdev, u8 *dev_intr_type)
+static int s2io_verify_parm(struct pci_dev *pdev, u8 *dev_intr_type,
+       u8 *dev_multiq)
 {
        if ((tx_fifo_num > MAX_TX_FIFOS) ||
-               (tx_fifo_num < FIFO_DEFAULT_NUM)) {
+               (tx_fifo_num < 1)) {
                DBG_PRINT(ERR_DBG, "s2io: Requested number of tx fifos "
                        "(%d) not supported\n", tx_fifo_num);
-               tx_fifo_num =
-                       ((tx_fifo_num > MAX_TX_FIFOS)? MAX_TX_FIFOS :
-                       ((tx_fifo_num < FIFO_DEFAULT_NUM) ? FIFO_DEFAULT_NUM :
-                       tx_fifo_num));
+
+               if (tx_fifo_num < 1)
+                       tx_fifo_num = 1;
+               else
+                       tx_fifo_num = MAX_TX_FIFOS;
+
                DBG_PRINT(ERR_DBG, "s2io: Default to %d ", tx_fifo_num);
                DBG_PRINT(ERR_DBG, "tx fifos\n");
        }
 
-       if ( rx_ring_num > 8) {
-               DBG_PRINT(ERR_DBG, "s2io: Requested number of Rx rings not "
+#ifndef CONFIG_NETDEVICES_MULTIQUEUE
+       if (multiq) {
+               DBG_PRINT(ERR_DBG, "s2io: Multiqueue support not enabled\n");
+               multiq = 0;
+       }
+#endif
+       if (multiq)
+               *dev_multiq = multiq;
+
+       if (tx_steering_type && (1 == tx_fifo_num)) {
+               if (tx_steering_type != TX_DEFAULT_STEERING)
+                       DBG_PRINT(ERR_DBG,
+                               "s2io: Tx steering is not supported with "
+                               "one fifo. Disabling Tx steering.\n");
+               tx_steering_type = NO_STEERING;
+       }
+
+       if ((tx_steering_type < NO_STEERING) ||
+               (tx_steering_type > TX_DEFAULT_STEERING)) {
+               DBG_PRINT(ERR_DBG, "s2io: Requested transmit steering not "
+                        "supported\n");
+               DBG_PRINT(ERR_DBG, "s2io: Disabling transmit steering\n");
+               tx_steering_type = NO_STEERING;
+       }
+
+       if (rx_ring_num > MAX_RX_RINGS) {
+               DBG_PRINT(ERR_DBG, "s2io: Requested number of rx rings not "
                         "supported\n");
-               DBG_PRINT(ERR_DBG, "s2io: Default to 8 Rx rings\n");
-               rx_ring_num = 8;
+               DBG_PRINT(ERR_DBG, "s2io: Default to %d rx rings\n",
+                       MAX_RX_RINGS);
+               rx_ring_num = MAX_RX_RINGS;
        }
+
        if (*dev_intr_type != INTA)
                napi = 0;
 
@@ -7541,7 +7643,7 @@ static int s2io_verify_parm(struct pci_dev *pdev, u8 *dev_intr_type)
 /**
  * rts_ds_steer - Receive traffic steering based on IPv4 or IPv6 TOS
  * or Traffic class respectively.
- * @nic: device peivate variable
+ * @nic: device private variable
  * Description: The function configures the receive steering to
  * desired receive ring.
  * Return Value:  SUCCESS on success and
@@ -7598,9 +7700,11 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
        struct config_param *config;
        int mode;
        u8 dev_intr_type = intr_type;
+       u8 dev_multiq = 0;
        DECLARE_MAC_BUF(mac);
 
-       if ((ret = s2io_verify_parm(pdev, &dev_intr_type)))
+       ret = s2io_verify_parm(pdev, &dev_intr_type, &dev_multiq);
+       if (ret)
                return ret;
 
        if ((ret = pci_enable_device(pdev))) {
@@ -7631,7 +7735,11 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
                pci_disable_device(pdev);
                return -ENODEV;
        }
-
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+       if (dev_multiq)
+               dev = alloc_etherdev_mq(sizeof(struct s2io_nic), tx_fifo_num);
+       else
+#endif
        dev = alloc_etherdev(sizeof(struct s2io_nic));
        if (dev == NULL) {
                DBG_PRINT(ERR_DBG, "Device allocation failed\n");
@@ -7680,17 +7788,45 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
        config = &sp->config;
 
        config->napi = napi;
+       config->tx_steering_type = tx_steering_type;
 
        /* Tx side parameters. */
-       config->tx_fifo_num = tx_fifo_num;
-       for (i = 0; i < MAX_TX_FIFOS; i++) {
+       if (config->tx_steering_type == TX_PRIORITY_STEERING)
+               config->tx_fifo_num = MAX_TX_FIFOS;
+       else
+               config->tx_fifo_num = tx_fifo_num;
+
+       /* Initialize the fifos used for tx steering */
+       if (config->tx_fifo_num < 5) {
+                       if (config->tx_fifo_num  == 1)
+                               sp->total_tcp_fifos = 1;
+                       else
+                               sp->total_tcp_fifos = config->tx_fifo_num - 1;
+                       sp->udp_fifo_idx = config->tx_fifo_num - 1;
+                       sp->total_udp_fifos = 1;
+                       sp->other_fifo_idx = sp->total_tcp_fifos - 1;
+       } else {
+               sp->total_tcp_fifos = (tx_fifo_num - FIFO_UDP_MAX_NUM -
+                                               FIFO_OTHER_MAX_NUM);
+               sp->udp_fifo_idx = sp->total_tcp_fifos;
+               sp->total_udp_fifos = FIFO_UDP_MAX_NUM;
+               sp->other_fifo_idx = sp->udp_fifo_idx + FIFO_UDP_MAX_NUM;
+       }
+
+       config->multiq = dev_multiq;
+       for (i = 0; i < config->tx_fifo_num; i++) {
                config->tx_cfg[i].fifo_len = tx_fifo_len[i];
                config->tx_cfg[i].fifo_priority = i;
        }
 
        /* mapping the QoS priority to the configured fifos */
        for (i = 0; i < MAX_TX_FIFOS; i++)
-               config->fifo_mapping[i] = fifo_map[config->tx_fifo_num][i];
+               config->fifo_mapping[i] = fifo_map[config->tx_fifo_num - 1][i];
+
+       /* map the hashing selector table to the configured fifos */
+       for (i = 0; i < config->tx_fifo_num; i++)
+               sp->fifo_selector[i] = fifo_selector[i];
+
 
        config->tx_intr_type = TXD_INT_TYPE_UTILZ;
        for (i = 0; i < config->tx_fifo_num; i++) {
@@ -7706,10 +7842,15 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
 
        /* Rx side parameters. */
        config->rx_ring_num = rx_ring_num;
-       for (i = 0; i < MAX_RX_RINGS; i++) {
+       for (i = 0; i < config->rx_ring_num; i++) {
                config->rx_cfg[i].num_rxd = rx_ring_sz[i] *
                    (rxd_count[sp->rxd_mode] + 1);
                config->rx_cfg[i].ring_priority = i;
+               mac_control->rings[i].rx_bufs_left = 0;
+               mac_control->rings[i].rxd_mode = sp->rxd_mode;
+               mac_control->rings[i].rxd_count = rxd_count[sp->rxd_mode];
+               mac_control->rings[i].pdev = sp->pdev;
+               mac_control->rings[i].dev = sp->dev;
        }
 
        for (i = 0; i < rx_ring_num; i++) {
@@ -7724,10 +7865,6 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
        mac_control->mc_pause_threshold_q4q7 = mc_pause_threshold_q4q7;
 
 
-       /* Initialize Ring buffer parameters. */
-       for (i = 0; i < config->rx_ring_num; i++)
-               atomic_set(&sp->rx_bufs_left[i], 0);
-
        /*  initialize the shared memory used by the NIC and the host */
        if (init_shared_mem(sp)) {
                DBG_PRINT(ERR_DBG, "%s: Memory allocation failed\n",
@@ -7775,6 +7912,7 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
        SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
        dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
        dev->vlan_rx_register = s2io_vlan_rx_register;
+       dev->vlan_rx_kill_vid = (void *)s2io_vlan_rx_kill_vid;
 
        /*
         * will use eth_mac_addr() for  dev->set_mac_address
@@ -7795,7 +7933,10 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
                dev->features |= NETIF_F_UFO;
                dev->features |= NETIF_F_HW_CSUM;
        }
-
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+       if (config->multiq)
+               dev->features |= NETIF_F_MULTI_QUEUE;
+#endif
        dev->tx_timeout = &s2io_tx_watchdog;
        dev->watchdog_timeo = WATCH_DOG_TIMEOUT;
        INIT_WORK(&sp->rst_timer_task, s2io_restart_nic);
@@ -7878,20 +8019,15 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
        s2io_reset(sp);
 
        /*
-        * Initialize the tasklet status and link state flags
+        * Initialize link state flags
         * and the card state parameter
         */
-       sp->tasklet_status = 0;
        sp->state = 0;
 
        /* Initialize spinlocks */
        for (i = 0; i < sp->config.tx_fifo_num; i++)
                spin_lock_init(&mac_control->fifos[i].tx_lock);
 
-       if (!napi)
-               spin_lock_init(&sp->put_lock);
-       spin_lock_init(&sp->rx_lock);
-
        /*
         * SXE-002: Configure link and activity LED to init state
         * on driver load.
@@ -7944,6 +8080,13 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
 
        if (napi)
                DBG_PRINT(ERR_DBG, "%s: NAPI enabled\n", dev->name);
+
+       DBG_PRINT(ERR_DBG, "%s: Using %d Tx fifo(s)\n", dev->name,
+               sp->config.tx_fifo_num);
+
+       DBG_PRINT(ERR_DBG, "%s: Using %d Rx ring(s)\n", dev->name,
+                 sp->config.rx_ring_num);
+
        switch(sp->config.intr_type) {
                case INTA:
                    DBG_PRINT(ERR_DBG, "%s: Interrupt type INTA\n", dev->name);
@@ -7952,6 +8095,29 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
                    DBG_PRINT(ERR_DBG, "%s: Interrupt type MSI-X\n", dev->name);
                    break;
        }
+       if (sp->config.multiq) {
+       for (i = 0; i < sp->config.tx_fifo_num; i++)
+               mac_control->fifos[i].multiq = config->multiq;
+               DBG_PRINT(ERR_DBG, "%s: Multiqueue support enabled\n",
+                       dev->name);
+       } else
+               DBG_PRINT(ERR_DBG, "%s: Multiqueue support disabled\n",
+                       dev->name);
+
+       switch (sp->config.tx_steering_type) {
+       case NO_STEERING:
+               DBG_PRINT(ERR_DBG, "%s: No steering enabled for"
+                       " transmit\n", dev->name);
+                       break;
+       case TX_PRIORITY_STEERING:
+               DBG_PRINT(ERR_DBG, "%s: Priority steering enabled for"
+                       " transmit\n", dev->name);
+               break;
+       case TX_DEFAULT_STEERING:
+               DBG_PRINT(ERR_DBG, "%s: Default steering enabled for"
+                       " transmit\n", dev->name);
+       }
+
        if (sp->lro)
                DBG_PRINT(ERR_DBG, "%s: Large receive offload enabled\n",
                          dev->name);
@@ -8046,7 +8212,8 @@ module_init(s2io_starter);
 module_exit(s2io_closer);
 
 static int check_L2_lro_capable(u8 *buffer, struct iphdr **ip,
-               struct tcphdr **tcp, struct RxD_t *rxdp)
+               struct tcphdr **tcp, struct RxD_t *rxdp,
+               struct s2io_nic *sp)
 {
        int ip_off;
        u8 l2_type = (u8)((rxdp->Control_1 >> 37) & 0x7), ip_len;
@@ -8057,19 +8224,20 @@ static int check_L2_lro_capable(u8 *buffer, struct iphdr **ip,
                return -1;
        }
 
-       /* TODO:
-        * By default the VLAN field in the MAC is stripped by the card, if this
-        * feature is turned off in rx_pa_cfg register, then the ip_off field
-        * has to be shifted by a further 2 bytes
-        */
-       switch (l2_type) {
-               case 0: /* DIX type */
-               case 4: /* DIX type with VLAN */
-                       ip_off = HEADER_ETHERNET_II_802_3_SIZE;
-                       break;
+       /* Checking for DIX type or DIX type with VLAN */
+       if ((l2_type == 0)
+               || (l2_type == 4)) {
+               ip_off = HEADER_ETHERNET_II_802_3_SIZE;
+               /*
+                * If vlan stripping is disabled and the frame is VLAN tagged,
+                * shift the offset by the VLAN header size bytes.
+                */
+               if ((!vlan_strip_flag) &&
+                       (rxdp->Control_1 & RXD_FRAME_VLAN_TAG))
+                       ip_off += HEADER_VLAN_SIZE;
+       } else {
                /* LLC, SNAP etc are considered non-mergeable */
-               default:
-                       return -1;
+               return -1;
        }
 
        *ip = (struct iphdr *)((u8 *)buffer + ip_off);
@@ -8096,26 +8264,27 @@ static inline int get_l4_pyld_length(struct iphdr *ip, struct tcphdr *tcp)
 }
 
 static void initiate_new_session(struct lro *lro, u8 *l2h,
-                    struct iphdr *ip, struct tcphdr *tcp, u32 tcp_pyld_len)
+       struct iphdr *ip, struct tcphdr *tcp, u32 tcp_pyld_len, u16 vlan_tag)
 {
        DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
        lro->l2h = l2h;
        lro->iph = ip;
        lro->tcph = tcp;
        lro->tcp_next_seq = tcp_pyld_len + ntohl(tcp->seq);
-       lro->tcp_ack = ntohl(tcp->ack_seq);
+       lro->tcp_ack = tcp->ack_seq;
        lro->sg_num = 1;
        lro->total_len = ntohs(ip->tot_len);
        lro->frags_len = 0;
+       lro->vlan_tag = vlan_tag;
        /*
         * check if we saw TCP timestamp. Other consistency checks have
         * already been done.
         */
        if (tcp->doff == 8) {
-               u32 *ptr;
-               ptr = (u32 *)(tcp+1);
+               __be32 *ptr;
+               ptr = (__be32 *)(tcp+1);
                lro->saw_ts = 1;
-               lro->cur_tsval = *(ptr+1);
+               lro->cur_tsval = ntohl(*(ptr+1));
                lro->cur_tsecr = *(ptr+2);
        }
        lro->in_use = 1;
@@ -8141,7 +8310,7 @@ static void update_L3L4_header(struct s2io_nic *sp, struct lro *lro)
 
        /* Update tsecr field if this session has timestamps enabled */
        if (lro->saw_ts) {
-               u32 *ptr = (u32 *)(tcp + 1);
+               __be32 *ptr = (__be32 *)(tcp + 1);
                *(ptr+2) = lro->cur_tsecr;
        }
 
@@ -8166,10 +8335,10 @@ static void aggregate_new_rx(struct lro *lro, struct iphdr *ip,
        lro->window = tcp->window;
 
        if (lro->saw_ts) {
-               u32 *ptr;
+               __be32 *ptr;
                /* Update tsecr and tsval from this packet */
-               ptr = (u32 *) (tcp + 1);
-               lro->cur_tsval = *(ptr + 1);
+               ptr = (__be32 *)(tcp+1);
+               lro->cur_tsval = ntohl(*(ptr+1));
                lro->cur_tsecr = *(ptr + 2);
        }
 }
@@ -8220,11 +8389,11 @@ static int verify_l3_l4_lro_capable(struct lro *l_lro, struct iphdr *ip,
 
                /* Ensure timestamp value increases monotonically */
                if (l_lro)
-                       if (l_lro->cur_tsval > *((u32 *)(ptr+2)))
+                       if (l_lro->cur_tsval > ntohl(*((__be32 *)(ptr+2))))
                                return -1;
 
                /* timestamp echo reply should be non-zero */
-               if (*((u32 *)(ptr+6)) == 0)
+               if (*((__be32 *)(ptr+6)) == 0)
                        return -1;
        }
 
@@ -8232,25 +8401,27 @@ static int verify_l3_l4_lro_capable(struct lro *l_lro, struct iphdr *ip,
 }
 
 static int
-s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, struct lro **lro,
-                     struct RxD_t *rxdp, struct s2io_nic *sp)
+s2io_club_tcp_session(struct ring_info *ring_data, u8 *buffer, u8 **tcp,
+       u32 *tcp_len, struct lro **lro, struct RxD_t *rxdp,
+       struct s2io_nic *sp)
 {
        struct iphdr *ip;
        struct tcphdr *tcph;
        int ret = 0, i;
+       u16 vlan_tag = 0;
 
        if (!(ret = check_L2_lro_capable(buffer, &ip, (struct tcphdr **)tcp,
-                                        rxdp))) {
+                                        rxdp, sp))) {
                DBG_PRINT(INFO_DBG,"IP Saddr: %x Daddr: %x\n",
                          ip->saddr, ip->daddr);
-       } else {
+       } else
                return ret;
-       }
 
+       vlan_tag = RXD_GET_VLAN_TAG(rxdp->Control_2);
        tcph = (struct tcphdr *)*tcp;
        *tcp_len = get_l4_pyld_length(ip, tcph);
        for (i=0; i<MAX_LRO_SESSIONS; i++) {
-               struct lro *l_lro = &sp->lro0_n[i];
+               struct lro *l_lro = &ring_data->lro0_n[i];
                if (l_lro->in_use) {
                        if (check_for_socket_match(l_lro, ip, tcph))
                                continue;
@@ -8288,7 +8459,7 @@ s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, struct lro **lro,
                }
 
                for (i=0; i<MAX_LRO_SESSIONS; i++) {
-                       struct lro *l_lro = &sp->lro0_n[i];
+                       struct lro *l_lro = &ring_data->lro0_n[i];
                        if (!(l_lro->in_use)) {
                                *lro = l_lro;
                                ret = 3; /* Begin anew */
@@ -8306,7 +8477,8 @@ s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, struct lro **lro,
 
        switch (ret) {
                case 3:
-                       initiate_new_session(*lro, buffer, ip, tcph, *tcp_len);
+                       initiate_new_session(*lro, buffer, ip, tcph, *tcp_len,
+                                                               vlan_tag);
                        break;
                case 2:
                        update_L3L4_header(sp, *lro);
@@ -8334,15 +8506,25 @@ static void clear_lro_session(struct lro *lro)
        memset(lro, 0, lro_struct_size);
 }
 
-static void queue_rx_frame(struct sk_buff *skb)
+static void queue_rx_frame(struct sk_buff *skb, u16 vlan_tag)
 {
        struct net_device *dev = skb->dev;
+       struct s2io_nic *sp = dev->priv;
 
        skb->protocol = eth_type_trans(skb, dev);
-       if (napi)
-               netif_receive_skb(skb);
-       else
-               netif_rx(skb);
+       if (sp->vlgrp && vlan_tag
+               && (vlan_strip_flag)) {
+               /* Queueing the vlan frame to the upper layer */
+               if (sp->config.napi)
+                       vlan_hwaccel_receive_skb(skb, sp->vlgrp, vlan_tag);
+               else
+                       vlan_hwaccel_rx(skb, sp->vlgrp, vlan_tag);
+       } else {
+               if (sp->config.napi)
+                       netif_receive_skb(skb);
+               else
+                       netif_rx(skb);
+       }
 }
 
 static void lro_append_pkt(struct s2io_nic *sp, struct lro *lro,