string: factorize skip_spaces and export it to be generally available
[safe/jmp/linux-2.6] / drivers / net / bnx2x_main.c
index 6bd9219..77ba135 100644 (file)
@@ -10,7 +10,7 @@
  * Written by: Eliezer Tamir
  * Based on code from Michael Chan's bnx2 driver
  * UDP CSUM errata workaround by Arik Gendelman
- * Slowpath rework by Vladislav Zolotarov
+ * Slowpath and fastpath rework by Vladislav Zolotarov
  * Statistics and Link management by Yitchak Gertner
  *
  */
 #include <linux/prefetch.h>
 #include <linux/zlib.h>
 #include <linux/io.h>
+#include <linux/stringify.h>
+
 
-#include "bnx2x_reg.h"
-#include "bnx2x_fw_defs.h"
-#include "bnx2x_hsi.h"
-#include "bnx2x_link.h"
 #include "bnx2x.h"
 #include "bnx2x_init.h"
+#include "bnx2x_init_ops.h"
+#include "bnx2x_dump.h"
 
-#define DRV_MODULE_VERSION     "1.45.26"
-#define DRV_MODULE_RELDATE     "2009/01/26"
+#define DRV_MODULE_VERSION     "1.52.1-5"
+#define DRV_MODULE_RELDATE     "2009/11/09"
 #define BNX2X_BC_VER           0x040200
 
+#include <linux/firmware.h>
+#include "bnx2x_fw_file_hdr.h"
+/* FW files */
+#define FW_FILE_VERSION                                        \
+       __stringify(BCM_5710_FW_MAJOR_VERSION) "."      \
+       __stringify(BCM_5710_FW_MINOR_VERSION) "."      \
+       __stringify(BCM_5710_FW_REVISION_VERSION) "."   \
+       __stringify(BCM_5710_FW_ENGINEERING_VERSION)
+#define FW_FILE_NAME_E1                "bnx2x-e1-" FW_FILE_VERSION ".fw"
+#define FW_FILE_NAME_E1H       "bnx2x-e1h-" FW_FILE_VERSION ".fw"
+
 /* Time in jiffies before concluding the transmitter is hung */
 #define TX_TIMEOUT             (5*HZ)
 
@@ -72,31 +83,44 @@ MODULE_AUTHOR("Eliezer Tamir");
 MODULE_DESCRIPTION("Broadcom NetXtreme II BCM57710/57711/57711E Driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_MODULE_VERSION);
+MODULE_FIRMWARE(FW_FILE_NAME_E1);
+MODULE_FIRMWARE(FW_FILE_NAME_E1H);
 
 static int multi_mode = 1;
 module_param(multi_mode, int, 0);
+MODULE_PARM_DESC(multi_mode, " Multi queue mode "
+                            "(0 Disable; 1 Enable (default))");
 
-static int disable_tpa;
-static int poll;
-static int debug;
-static int load_count[3]; /* 0-common, 1-port0, 2-port1 */
+static int num_queues;
+module_param(num_queues, int, 0);
+MODULE_PARM_DESC(num_queues, " Number of queues for multi_mode=1"
+                               " (default is as a number of CPUs)");
 
+static int disable_tpa;
 module_param(disable_tpa, int, 0);
+MODULE_PARM_DESC(disable_tpa, " Disable the TPA (LRO) feature");
 
 static int int_mode;
 module_param(int_mode, int, 0);
 MODULE_PARM_DESC(int_mode, " Force interrupt mode (1 INT#x; 2 MSI)");
 
+static int dropless_fc;
+module_param(dropless_fc, int, 0);
+MODULE_PARM_DESC(dropless_fc, " Pause on exhausted host ring");
+
+static int poll;
 module_param(poll, int, 0);
+MODULE_PARM_DESC(poll, " Use polling (for debug)");
 
 static int mrrs = -1;
 module_param(mrrs, int, 0);
 MODULE_PARM_DESC(mrrs, " Force Max Read Req Size (0..3) (for debug)");
 
+static int debug;
 module_param(debug, int, 0);
-MODULE_PARM_DESC(disable_tpa, "disable the TPA (LRO) feature");
-MODULE_PARM_DESC(poll, "use polling (for debug)");
-MODULE_PARM_DESC(debug, "default debug msglevel");
+MODULE_PARM_DESC(debug, " Default debug msglevel");
+
+static int load_count[3]; /* 0-common, 1-port0, 2-port1 */
 
 static struct workqueue_struct *bnx2x_wq;
 
@@ -117,12 +141,9 @@ static struct {
 
 
 static const struct pci_device_id bnx2x_pci_tbl[] = {
-       { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_NX2_57710,
-               PCI_ANY_ID, PCI_ANY_ID, 0, 0, BCM57710 },
-       { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_NX2_57711,
-               PCI_ANY_ID, PCI_ANY_ID, 0, 0, BCM57711 },
-       { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_NX2_57711E,
-               PCI_ANY_ID, PCI_ANY_ID, 0, 0, BCM57711E },
+       { PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57710), BCM57710 },
+       { PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57711), BCM57711 },
+       { PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57711E), BCM57711E },
        { 0 }
 };
 
@@ -135,7 +156,7 @@ MODULE_DEVICE_TABLE(pci, bnx2x_pci_tbl);
 /* used only at init
  * locking is done by mcp
  */
-static void bnx2x_reg_wr_ind(struct bnx2x *bp, u32 addr, u32 val)
+void bnx2x_reg_wr_ind(struct bnx2x *bp, u32 addr, u32 val)
 {
        pci_write_config_dword(bp->pdev, PCICFG_GRC_ADDRESS, addr);
        pci_write_config_dword(bp->pdev, PCICFG_GRC_DATA, val);
@@ -182,7 +203,7 @@ static void bnx2x_post_dmae(struct bnx2x *bp, struct dmae_command *dmae,
 void bnx2x_write_dmae(struct bnx2x *bp, dma_addr_t dma_addr, u32 dst_addr,
                      u32 len32)
 {
-       struct dmae_command *dmae = &bp->init_dmae;
+       struct dmae_command dmae;
        u32 *wb_comp = bnx2x_sp(bp, wb_comp);
        int cnt = 200;
 
@@ -195,43 +216,43 @@ void bnx2x_write_dmae(struct bnx2x *bp, dma_addr_t dma_addr, u32 dst_addr,
                return;
        }
 
-       mutex_lock(&bp->dmae_mutex);
-
-       memset(dmae, 0, sizeof(struct dmae_command));
+       memset(&dmae, 0, sizeof(struct dmae_command));
 
-       dmae->opcode = (DMAE_CMD_SRC_PCI | DMAE_CMD_DST_GRC |
-                       DMAE_CMD_C_DST_PCI | DMAE_CMD_C_ENABLE |
-                       DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET |
+       dmae.opcode = (DMAE_CMD_SRC_PCI | DMAE_CMD_DST_GRC |
+                      DMAE_CMD_C_DST_PCI | DMAE_CMD_C_ENABLE |
+                      DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET |
 #ifdef __BIG_ENDIAN
-                       DMAE_CMD_ENDIANITY_B_DW_SWAP |
+                      DMAE_CMD_ENDIANITY_B_DW_SWAP |
 #else
-                       DMAE_CMD_ENDIANITY_DW_SWAP |
+                      DMAE_CMD_ENDIANITY_DW_SWAP |
 #endif
-                       (BP_PORT(bp) ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0) |
-                       (BP_E1HVN(bp) << DMAE_CMD_E1HVN_SHIFT));
-       dmae->src_addr_lo = U64_LO(dma_addr);
-       dmae->src_addr_hi = U64_HI(dma_addr);
-       dmae->dst_addr_lo = dst_addr >> 2;
-       dmae->dst_addr_hi = 0;
-       dmae->len = len32;
-       dmae->comp_addr_lo = U64_LO(bnx2x_sp_mapping(bp, wb_comp));
-       dmae->comp_addr_hi = U64_HI(bnx2x_sp_mapping(bp, wb_comp));
-       dmae->comp_val = DMAE_COMP_VAL;
-
-       DP(BNX2X_MSG_OFF, "dmae: opcode 0x%08x\n"
+                      (BP_PORT(bp) ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0) |
+                      (BP_E1HVN(bp) << DMAE_CMD_E1HVN_SHIFT));
+       dmae.src_addr_lo = U64_LO(dma_addr);
+       dmae.src_addr_hi = U64_HI(dma_addr);
+       dmae.dst_addr_lo = dst_addr >> 2;
+       dmae.dst_addr_hi = 0;
+       dmae.len = len32;
+       dmae.comp_addr_lo = U64_LO(bnx2x_sp_mapping(bp, wb_comp));
+       dmae.comp_addr_hi = U64_HI(bnx2x_sp_mapping(bp, wb_comp));
+       dmae.comp_val = DMAE_COMP_VAL;
+
+       DP(BNX2X_MSG_OFF, "DMAE: opcode 0x%08x\n"
           DP_LEVEL "src_addr  [%x:%08x]  len [%d *4]  "
                    "dst_addr [%x:%08x (%08x)]\n"
           DP_LEVEL "comp_addr [%x:%08x]  comp_val 0x%08x\n",
-          dmae->opcode, dmae->src_addr_hi, dmae->src_addr_lo,
-          dmae->len, dmae->dst_addr_hi, dmae->dst_addr_lo, dst_addr,
-          dmae->comp_addr_hi, dmae->comp_addr_lo, dmae->comp_val);
+          dmae.opcode, dmae.src_addr_hi, dmae.src_addr_lo,
+          dmae.len, dmae.dst_addr_hi, dmae.dst_addr_lo, dst_addr,
+          dmae.comp_addr_hi, dmae.comp_addr_lo, dmae.comp_val);
        DP(BNX2X_MSG_OFF, "data [0x%08x 0x%08x 0x%08x 0x%08x]\n",
           bp->slowpath->wb_data[0], bp->slowpath->wb_data[1],
           bp->slowpath->wb_data[2], bp->slowpath->wb_data[3]);
 
+       mutex_lock(&bp->dmae_mutex);
+
        *wb_comp = 0;
 
-       bnx2x_post_dmae(bp, dmae, INIT_DMAE_C(bp));
+       bnx2x_post_dmae(bp, &dmae, INIT_DMAE_C(bp));
 
        udelay(5);
 
@@ -239,7 +260,7 @@ void bnx2x_write_dmae(struct bnx2x *bp, dma_addr_t dma_addr, u32 dst_addr,
                DP(BNX2X_MSG_OFF, "wb_comp 0x%08x\n", *wb_comp);
 
                if (!cnt) {
-                       BNX2X_ERR("dmae timeout!\n");
+                       BNX2X_ERR("DMAE timeout!\n");
                        break;
                }
                cnt--;
@@ -255,7 +276,7 @@ void bnx2x_write_dmae(struct bnx2x *bp, dma_addr_t dma_addr, u32 dst_addr,
 
 void bnx2x_read_dmae(struct bnx2x *bp, u32 src_addr, u32 len32)
 {
-       struct dmae_command *dmae = &bp->init_dmae;
+       struct dmae_command dmae;
        u32 *wb_comp = bnx2x_sp(bp, wb_comp);
        int cnt = 200;
 
@@ -270,48 +291,48 @@ void bnx2x_read_dmae(struct bnx2x *bp, u32 src_addr, u32 len32)
                return;
        }
 
-       mutex_lock(&bp->dmae_mutex);
-
-       memset(bnx2x_sp(bp, wb_data[0]), 0, sizeof(u32) * 4);
-       memset(dmae, 0, sizeof(struct dmae_command));
+       memset(&dmae, 0, sizeof(struct dmae_command));
 
-       dmae->opcode = (DMAE_CMD_SRC_GRC | DMAE_CMD_DST_PCI |
-                       DMAE_CMD_C_DST_PCI | DMAE_CMD_C_ENABLE |
-                       DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET |
+       dmae.opcode = (DMAE_CMD_SRC_GRC | DMAE_CMD_DST_PCI |
+                      DMAE_CMD_C_DST_PCI | DMAE_CMD_C_ENABLE |
+                      DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET |
 #ifdef __BIG_ENDIAN
-                       DMAE_CMD_ENDIANITY_B_DW_SWAP |
+                      DMAE_CMD_ENDIANITY_B_DW_SWAP |
 #else
-                       DMAE_CMD_ENDIANITY_DW_SWAP |
+                      DMAE_CMD_ENDIANITY_DW_SWAP |
 #endif
-                       (BP_PORT(bp) ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0) |
-                       (BP_E1HVN(bp) << DMAE_CMD_E1HVN_SHIFT));
-       dmae->src_addr_lo = src_addr >> 2;
-       dmae->src_addr_hi = 0;
-       dmae->dst_addr_lo = U64_LO(bnx2x_sp_mapping(bp, wb_data));
-       dmae->dst_addr_hi = U64_HI(bnx2x_sp_mapping(bp, wb_data));
-       dmae->len = len32;
-       dmae->comp_addr_lo = U64_LO(bnx2x_sp_mapping(bp, wb_comp));
-       dmae->comp_addr_hi = U64_HI(bnx2x_sp_mapping(bp, wb_comp));
-       dmae->comp_val = DMAE_COMP_VAL;
-
-       DP(BNX2X_MSG_OFF, "dmae: opcode 0x%08x\n"
+                      (BP_PORT(bp) ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0) |
+                      (BP_E1HVN(bp) << DMAE_CMD_E1HVN_SHIFT));
+       dmae.src_addr_lo = src_addr >> 2;
+       dmae.src_addr_hi = 0;
+       dmae.dst_addr_lo = U64_LO(bnx2x_sp_mapping(bp, wb_data));
+       dmae.dst_addr_hi = U64_HI(bnx2x_sp_mapping(bp, wb_data));
+       dmae.len = len32;
+       dmae.comp_addr_lo = U64_LO(bnx2x_sp_mapping(bp, wb_comp));
+       dmae.comp_addr_hi = U64_HI(bnx2x_sp_mapping(bp, wb_comp));
+       dmae.comp_val = DMAE_COMP_VAL;
+
+       DP(BNX2X_MSG_OFF, "DMAE: opcode 0x%08x\n"
           DP_LEVEL "src_addr  [%x:%08x]  len [%d *4]  "
                    "dst_addr [%x:%08x (%08x)]\n"
           DP_LEVEL "comp_addr [%x:%08x]  comp_val 0x%08x\n",
-          dmae->opcode, dmae->src_addr_hi, dmae->src_addr_lo,
-          dmae->len, dmae->dst_addr_hi, dmae->dst_addr_lo, src_addr,
-          dmae->comp_addr_hi, dmae->comp_addr_lo, dmae->comp_val);
+          dmae.opcode, dmae.src_addr_hi, dmae.src_addr_lo,
+          dmae.len, dmae.dst_addr_hi, dmae.dst_addr_lo, src_addr,
+          dmae.comp_addr_hi, dmae.comp_addr_lo, dmae.comp_val);
+
+       mutex_lock(&bp->dmae_mutex);
 
+       memset(bnx2x_sp(bp, wb_data[0]), 0, sizeof(u32) * 4);
        *wb_comp = 0;
 
-       bnx2x_post_dmae(bp, dmae, INIT_DMAE_C(bp));
+       bnx2x_post_dmae(bp, &dmae, INIT_DMAE_C(bp));
 
        udelay(5);
 
        while (*wb_comp != DMAE_COMP_VAL) {
 
                if (!cnt) {
-                       BNX2X_ERR("dmae timeout!\n");
+                       BNX2X_ERR("DMAE timeout!\n");
                        break;
                }
                cnt--;
@@ -328,6 +349,21 @@ void bnx2x_read_dmae(struct bnx2x *bp, u32 src_addr, u32 len32)
        mutex_unlock(&bp->dmae_mutex);
 }
 
+void bnx2x_write_dmae_phys_len(struct bnx2x *bp, dma_addr_t phys_addr,
+                              u32 addr, u32 len)
+{
+       int offset = 0;
+
+       while (len > DMAE_LEN32_WR_MAX) {
+               bnx2x_write_dmae(bp, phys_addr + offset,
+                                addr + offset, DMAE_LEN32_WR_MAX);
+               offset += DMAE_LEN32_WR_MAX * 4;
+               len -= DMAE_LEN32_WR_MAX;
+       }
+
+       bnx2x_write_dmae(bp, phys_addr + offset, addr + offset, len);
+}
+
 /* used only for slowpath so not inlined */
 static void bnx2x_wb_wr(struct bnx2x *bp, int reg, u32 val_hi, u32 val_lo)
 {
@@ -473,13 +509,14 @@ static int bnx2x_mc_assert(struct bnx2x *bp)
 static void bnx2x_fw_dump(struct bnx2x *bp)
 {
        u32 mark, offset;
-       u32 data[9];
+       __be32 data[9];
        int word;
 
        mark = REG_RD(bp, MCP_REG_MCPR_SCRATCH + 0xf104);
        mark = ((mark + 0x3) & ~0x3);
-       printk(KERN_ERR PFX "begin fw dump (mark 0x%x)\n" KERN_ERR, mark);
+       printk(KERN_ERR PFX "begin fw dump (mark 0x%x)\n", mark);
 
+       printk(KERN_ERR PFX);
        for (offset = mark - 0x08000000; offset <= 0xF900; offset += 0x8*4) {
                for (word = 0; word < 8; word++)
                        data[word] = htonl(REG_RD(bp, MCP_REG_MCPR_SCRATCH +
@@ -494,7 +531,7 @@ static void bnx2x_fw_dump(struct bnx2x *bp)
                data[8] = 0x0;
                printk(KERN_CONT "%s", (char *)data);
        }
-       printk("\n" KERN_ERR PFX "end of fw dump\n");
+       printk(KERN_ERR PFX "end of fw dump\n");
 }
 
 static void bnx2x_panic_dump(struct bnx2x *bp)
@@ -507,82 +544,102 @@ static void bnx2x_panic_dump(struct bnx2x *bp)
 
        BNX2X_ERR("begin crash dump -----------------\n");
 
+       /* Indices */
+       /* Common */
+       BNX2X_ERR("def_c_idx(%u)  def_u_idx(%u)  def_x_idx(%u)"
+                 "  def_t_idx(%u)  def_att_idx(%u)  attn_state(%u)"
+                 "  spq_prod_idx(%u)\n",
+                 bp->def_c_idx, bp->def_u_idx, bp->def_x_idx, bp->def_t_idx,
+                 bp->def_att_idx, bp->attn_state, bp->spq_prod_idx);
+
+       /* Rx */
        for_each_queue(bp, i) {
                struct bnx2x_fastpath *fp = &bp->fp[i];
-               struct eth_tx_db_data *hw_prods = fp->hw_tx_prods;
 
-               BNX2X_ERR("queue[%d]: tx_pkt_prod(%x)  tx_pkt_cons(%x)"
-                         "  tx_bd_prod(%x)  tx_bd_cons(%x)  *tx_cons_sb(%x)\n",
-                         i, fp->tx_pkt_prod, fp->tx_pkt_cons, fp->tx_bd_prod,
-                         fp->tx_bd_cons, le16_to_cpu(*fp->tx_cons_sb));
-               BNX2X_ERR("          rx_bd_prod(%x)  rx_bd_cons(%x)"
+               BNX2X_ERR("fp%d: rx_bd_prod(%x)  rx_bd_cons(%x)"
                          "  *rx_bd_cons_sb(%x)  rx_comp_prod(%x)"
                          "  rx_comp_cons(%x)  *rx_cons_sb(%x)\n",
-                         fp->rx_bd_prod, fp->rx_bd_cons,
+                         i, fp->rx_bd_prod, fp->rx_bd_cons,
                          le16_to_cpu(*fp->rx_bd_cons_sb), fp->rx_comp_prod,
                          fp->rx_comp_cons, le16_to_cpu(*fp->rx_cons_sb));
-               BNX2X_ERR("          rx_sge_prod(%x)  last_max_sge(%x)"
-                         "  fp_c_idx(%x)  *sb_c_idx(%x)  fp_u_idx(%x)"
-                         "  *sb_u_idx(%x)  bd data(%x,%x)\n",
-                         fp->rx_sge_prod, fp->last_max_sge, fp->fp_c_idx,
-                         fp->status_blk->c_status_block.status_block_index,
-                         fp->fp_u_idx,
-                         fp->status_blk->u_status_block.status_block_index,
-                         hw_prods->packets_prod, hw_prods->bds_prod);
-
-               start = TX_BD(le16_to_cpu(*fp->tx_cons_sb) - 10);
-               end = TX_BD(le16_to_cpu(*fp->tx_cons_sb) + 245);
-               for (j = start; j < end; j++) {
-                       struct sw_tx_bd *sw_bd = &fp->tx_buf_ring[j];
+               BNX2X_ERR("      rx_sge_prod(%x)  last_max_sge(%x)"
+                         "  fp_u_idx(%x) *sb_u_idx(%x)\n",
+                         fp->rx_sge_prod, fp->last_max_sge,
+                         le16_to_cpu(fp->fp_u_idx),
+                         fp->status_blk->u_status_block.status_block_index);
+       }
 
-                       BNX2X_ERR("packet[%x]=[%p,%x]\n", j,
-                                 sw_bd->skb, sw_bd->first_bd);
-               }
+       /* Tx */
+       for_each_queue(bp, i) {
+               struct bnx2x_fastpath *fp = &bp->fp[i];
 
-               start = TX_BD(fp->tx_bd_cons - 10);
-               end = TX_BD(fp->tx_bd_cons + 254);
-               for (j = start; j < end; j++) {
-                       u32 *tx_bd = (u32 *)&fp->tx_desc_ring[j];
+               BNX2X_ERR("fp%d: tx_pkt_prod(%x)  tx_pkt_cons(%x)"
+                         "  tx_bd_prod(%x)  tx_bd_cons(%x)  *tx_cons_sb(%x)\n",
+                         i, fp->tx_pkt_prod, fp->tx_pkt_cons, fp->tx_bd_prod,
+                         fp->tx_bd_cons, le16_to_cpu(*fp->tx_cons_sb));
+               BNX2X_ERR("      fp_c_idx(%x)  *sb_c_idx(%x)"
+                         "  tx_db_prod(%x)\n", le16_to_cpu(fp->fp_c_idx),
+                         fp->status_blk->c_status_block.status_block_index,
+                         fp->tx_db.data.prod);
+       }
 
-                       BNX2X_ERR("tx_bd[%x]=[%x:%x:%x:%x]\n",
-                                 j, tx_bd[0], tx_bd[1], tx_bd[2], tx_bd[3]);
-               }
+       /* Rings */
+       /* Rx */
+       for_each_queue(bp, i) {
+               struct bnx2x_fastpath *fp = &bp->fp[i];
 
                start = RX_BD(le16_to_cpu(*fp->rx_cons_sb) - 10);
                end = RX_BD(le16_to_cpu(*fp->rx_cons_sb) + 503);
-               for (j = start; j < end; j++) {
+               for (j = start; j != end; j = RX_BD(j + 1)) {
                        u32 *rx_bd = (u32 *)&fp->rx_desc_ring[j];
                        struct sw_rx_bd *sw_bd = &fp->rx_buf_ring[j];
 
-                       BNX2X_ERR("rx_bd[%x]=[%x:%x]  sw_bd=[%p]\n",
-                                 j, rx_bd[1], rx_bd[0], sw_bd->skb);
+                       BNX2X_ERR("fp%d: rx_bd[%x]=[%x:%x]  sw_bd=[%p]\n",
+                                 i, j, rx_bd[1], rx_bd[0], sw_bd->skb);
                }
 
                start = RX_SGE(fp->rx_sge_prod);
                end = RX_SGE(fp->last_max_sge);
-               for (j = start; j < end; j++) {
+               for (j = start; j != end; j = RX_SGE(j + 1)) {
                        u32 *rx_sge = (u32 *)&fp->rx_sge_ring[j];
                        struct sw_rx_page *sw_page = &fp->rx_page_ring[j];
 
-                       BNX2X_ERR("rx_sge[%x]=[%x:%x]  sw_page=[%p]\n",
-                                 j, rx_sge[1], rx_sge[0], sw_page->page);
+                       BNX2X_ERR("fp%d: rx_sge[%x]=[%x:%x]  sw_page=[%p]\n",
+                                 i, j, rx_sge[1], rx_sge[0], sw_page->page);
                }
 
                start = RCQ_BD(fp->rx_comp_cons - 10);
                end = RCQ_BD(fp->rx_comp_cons + 503);
-               for (j = start; j < end; j++) {
+               for (j = start; j != end; j = RCQ_BD(j + 1)) {
                        u32 *cqe = (u32 *)&fp->rx_comp_ring[j];
 
-                       BNX2X_ERR("cqe[%x]=[%x:%x:%x:%x]\n",
-                                 j, cqe[0], cqe[1], cqe[2], cqe[3]);
+                       BNX2X_ERR("fp%d: cqe[%x]=[%x:%x:%x:%x]\n",
+                                 i, j, cqe[0], cqe[1], cqe[2], cqe[3]);
                }
        }
 
-       BNX2X_ERR("def_c_idx(%u)  def_u_idx(%u)  def_x_idx(%u)"
-                 "  def_t_idx(%u)  def_att_idx(%u)  attn_state(%u)"
-                 "  spq_prod_idx(%u)\n",
-                 bp->def_c_idx, bp->def_u_idx, bp->def_x_idx, bp->def_t_idx,
-                 bp->def_att_idx, bp->attn_state, bp->spq_prod_idx);
+       /* Tx */
+       for_each_queue(bp, i) {
+               struct bnx2x_fastpath *fp = &bp->fp[i];
+
+               start = TX_BD(le16_to_cpu(*fp->tx_cons_sb) - 10);
+               end = TX_BD(le16_to_cpu(*fp->tx_cons_sb) + 245);
+               for (j = start; j != end; j = TX_BD(j + 1)) {
+                       struct sw_tx_bd *sw_bd = &fp->tx_buf_ring[j];
+
+                       BNX2X_ERR("fp%d: packet[%x]=[%p,%x]\n",
+                                 i, j, sw_bd->skb, sw_bd->first_bd);
+               }
+
+               start = TX_BD(fp->tx_bd_cons - 10);
+               end = TX_BD(fp->tx_bd_cons + 254);
+               for (j = start; j != end; j = TX_BD(j + 1)) {
+                       u32 *tx_bd = (u32 *)&fp->tx_desc_ring[j];
+
+                       BNX2X_ERR("fp%d: tx_bd[%x]=[%x:%x:%x:%x]\n",
+                                 i, j, tx_bd[0], tx_bd[1], tx_bd[2], tx_bd[3]);
+               }
+       }
 
        bnx2x_fw_dump(bp);
        bnx2x_mc_assert(bp);
@@ -625,6 +682,11 @@ static void bnx2x_int_enable(struct bnx2x *bp)
           val, port, addr, (msix ? "MSI-X" : (msi ? "MSI" : "INTx")));
 
        REG_WR(bp, addr, val);
+       /*
+        * Ensure that HC_CONFIG is written before leading/trailing edge config
+        */
+       mmiowb();
+       barrier();
 
        if (CHIP_IS_E1H(bp)) {
                /* init leading/trailing edge */
@@ -639,6 +701,9 @@ static void bnx2x_int_enable(struct bnx2x *bp)
                REG_WR(bp, HC_REG_TRAILING_EDGE_0 + port*8, val);
                REG_WR(bp, HC_REG_LEADING_EDGE_0 + port*8, val);
        }
+
+       /* Make sure that interrupts are indeed enabled from here on */
+       mmiowb();
 }
 
 static void bnx2x_int_disable(struct bnx2x *bp)
@@ -670,6 +735,8 @@ static void bnx2x_int_disable_sync(struct bnx2x *bp, int disable_hw)
 
        /* disable interrupt handling */
        atomic_inc(&bp->intr_sem);
+       smp_wmb(); /* Ensure that bp->intr_sem update is SMP-safe */
+
        if (disable_hw)
                /* prevent the HW from sending interrupts */
                bnx2x_int_disable(bp);
@@ -678,6 +745,9 @@ static void bnx2x_int_disable_sync(struct bnx2x *bp, int disable_hw)
        if (msix) {
                synchronize_irq(bp->msix_table[0].vector);
                offset = 1;
+#ifdef BCM_CNIC
+               offset++;
+#endif
                for_each_queue(bp, i)
                        synchronize_irq(bp->msix_table[i + offset].vector);
        } else
@@ -711,23 +781,19 @@ static inline void bnx2x_ack_sb(struct bnx2x *bp, u8 sb_id,
        DP(BNX2X_MSG_OFF, "write 0x%08x to HC addr 0x%x\n",
           (*(u32 *)&igu_ack), hc_addr);
        REG_WR(bp, hc_addr, (*(u32 *)&igu_ack));
+
+       /* Make sure that ACK is written */
+       mmiowb();
+       barrier();
 }
 
-static inline u16 bnx2x_update_fpsb_idx(struct bnx2x_fastpath *fp)
+static inline void bnx2x_update_fpsb_idx(struct bnx2x_fastpath *fp)
 {
        struct host_status_block *fpsb = fp->status_blk;
-       u16 rc = 0;
 
        barrier(); /* status block is written to by the chip */
-       if (fp->fp_c_idx != fpsb->c_status_block.status_block_index) {
-               fp->fp_c_idx = fpsb->c_status_block.status_block_index;
-               rc |= 1;
-       }
-       if (fp->fp_u_idx != fpsb->u_status_block.status_block_index) {
-               fp->fp_u_idx = fpsb->u_status_block.status_block_index;
-               rc |= 2;
-       }
-       return rc;
+       fp->fp_c_idx = fpsb->c_status_block.status_block_index;
+       fp->fp_u_idx = fpsb->u_status_block.status_block_index;
 }
 
 static u16 bnx2x_ack_int(struct bnx2x *bp)
@@ -747,22 +813,11 @@ static u16 bnx2x_ack_int(struct bnx2x *bp)
  * fast path service functions
  */
 
-static inline int bnx2x_has_tx_work(struct bnx2x_fastpath *fp)
-{
-       u16 tx_cons_sb;
-
-       /* Tell compiler that status block fields can change */
-       barrier();
-       tx_cons_sb = le16_to_cpu(*fp->tx_cons_sb);
-       return (fp->tx_pkt_cons != tx_cons_sb);
-}
-
 static inline int bnx2x_has_tx_work_unload(struct bnx2x_fastpath *fp)
 {
        /* Tell compiler that consumer and producer can change */
        barrier();
        return (fp->tx_pkt_prod != fp->tx_pkt_cons);
-
 }
 
 /* free skb in the packet ring at pos idx
@@ -772,54 +827,53 @@ static u16 bnx2x_free_tx_pkt(struct bnx2x *bp, struct bnx2x_fastpath *fp,
                             u16 idx)
 {
        struct sw_tx_bd *tx_buf = &fp->tx_buf_ring[idx];
-       struct eth_tx_bd *tx_bd;
+       struct eth_tx_start_bd *tx_start_bd;
+       struct eth_tx_bd *tx_data_bd;
        struct sk_buff *skb = tx_buf->skb;
        u16 bd_idx = TX_BD(tx_buf->first_bd), new_cons;
        int nbd;
 
+       /* prefetch skb end pointer to speedup dev_kfree_skb() */
+       prefetch(&skb->end);
+
        DP(BNX2X_MSG_OFF, "pkt_idx %d  buff @(%p)->skb %p\n",
           idx, tx_buf, skb);
 
        /* unmap first bd */
        DP(BNX2X_MSG_OFF, "free bd_idx %d\n", bd_idx);
-       tx_bd = &fp->tx_desc_ring[bd_idx];
-       pci_unmap_single(bp->pdev, BD_UNMAP_ADDR(tx_bd),
-                        BD_UNMAP_LEN(tx_bd), PCI_DMA_TODEVICE);
+       tx_start_bd = &fp->tx_desc_ring[bd_idx].start_bd;
+       pci_unmap_single(bp->pdev, BD_UNMAP_ADDR(tx_start_bd),
+                        BD_UNMAP_LEN(tx_start_bd), PCI_DMA_TODEVICE);
 
-       nbd = le16_to_cpu(tx_bd->nbd) - 1;
-       new_cons = nbd + tx_buf->first_bd;
+       nbd = le16_to_cpu(tx_start_bd->nbd) - 1;
 #ifdef BNX2X_STOP_ON_ERROR
-       if (nbd > (MAX_SKB_FRAGS + 2)) {
+       if ((nbd - 1) > (MAX_SKB_FRAGS + 2)) {
                BNX2X_ERR("BAD nbd!\n");
                bnx2x_panic();
        }
 #endif
+       new_cons = nbd + tx_buf->first_bd;
 
-       /* Skip a parse bd and the TSO split header bd
-          since they have no mapping */
-       if (nbd)
-               bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
+       /* Get the next bd */
+       bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
 
-       if (tx_bd->bd_flags.as_bitfield & (ETH_TX_BD_FLAGS_IP_CSUM |
-                                          ETH_TX_BD_FLAGS_TCP_CSUM |
-                                          ETH_TX_BD_FLAGS_SW_LSO)) {
-               if (--nbd)
-                       bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
-               tx_bd = &fp->tx_desc_ring[bd_idx];
-               /* is this a TSO split header bd? */
-               if (tx_bd->bd_flags.as_bitfield & ETH_TX_BD_FLAGS_SW_LSO) {
-                       if (--nbd)
-                               bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
-               }
+       /* Skip a parse bd... */
+       --nbd;
+       bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
+
+       /* ...and the TSO split header bd since they have no mapping */
+       if (tx_buf->flags & BNX2X_TSO_SPLIT_BD) {
+               --nbd;
+               bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
        }
 
        /* now free frags */
        while (nbd > 0) {
 
                DP(BNX2X_MSG_OFF, "free frag bd_idx %d\n", bd_idx);
-               tx_bd = &fp->tx_desc_ring[bd_idx];
-               pci_unmap_page(bp->pdev, BD_UNMAP_ADDR(tx_bd),
-                              BD_UNMAP_LEN(tx_bd), PCI_DMA_TODEVICE);
+               tx_data_bd = &fp->tx_desc_ring[bd_idx].reg_bd;
+               pci_unmap_page(bp->pdev, BD_UNMAP_ADDR(tx_data_bd),
+                              BD_UNMAP_LEN(tx_data_bd), PCI_DMA_TODEVICE);
                if (--nbd)
                        bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
        }
@@ -856,16 +910,25 @@ static inline u16 bnx2x_tx_avail(struct bnx2x_fastpath *fp)
        return (s16)(fp->bp->tx_ring_size) - used;
 }
 
-static void bnx2x_tx_int(struct bnx2x_fastpath *fp, int work)
+static inline int bnx2x_has_tx_work(struct bnx2x_fastpath *fp)
+{
+       u16 hw_cons;
+
+       /* Tell compiler that status block fields can change */
+       barrier();
+       hw_cons = le16_to_cpu(*fp->tx_cons_sb);
+       return hw_cons != fp->tx_pkt_cons;
+}
+
+static int bnx2x_tx_int(struct bnx2x_fastpath *fp)
 {
        struct bnx2x *bp = fp->bp;
        struct netdev_queue *txq;
        u16 hw_cons, sw_cons, bd_cons = fp->tx_bd_cons;
-       int done = 0;
 
 #ifdef BNX2X_STOP_ON_ERROR
        if (unlikely(bp->panic))
-               return;
+               return -1;
 #endif
 
        txq = netdev_get_tx_queue(bp->dev, fp->index);
@@ -889,36 +952,33 @@ static void bnx2x_tx_int(struct bnx2x_fastpath *fp, int work)
 */
                bd_cons = bnx2x_free_tx_pkt(bp, fp, pkt_cons);
                sw_cons++;
-               done++;
-
-               if (done == work)
-                       break;
        }
 
        fp->tx_pkt_cons = sw_cons;
        fp->tx_bd_cons = bd_cons;
 
-       /* Need to make the tx_bd_cons update visible to start_xmit()
-        * before checking for netif_tx_queue_stopped().  Without the
-        * memory barrier, there is a small possibility that start_xmit()
-        * will miss it and cause the queue to be stopped forever.
-        */
-       smp_mb();
-
        /* TBD need a thresh? */
        if (unlikely(netif_tx_queue_stopped(txq))) {
 
-               __netif_tx_lock(txq, smp_processor_id());
+               /* Need to make the tx_bd_cons update visible to start_xmit()
+                * before checking for netif_tx_queue_stopped().  Without the
+                * memory barrier, there is a small possibility that
+                * start_xmit() will miss it and cause the queue to be stopped
+                * forever.
+                */
+               smp_mb();
 
                if ((netif_tx_queue_stopped(txq)) &&
                    (bp->state == BNX2X_STATE_OPEN) &&
                    (bnx2x_tx_avail(fp) >= MAX_SKB_FRAGS + 3))
                        netif_tx_wake_queue(txq);
-
-               __netif_tx_unlock(txq);
        }
+       return 0;
 }
 
+#ifdef BCM_CNIC
+static void bnx2x_cnic_cfc_comp(struct bnx2x *bp, int cid);
+#endif
 
 static void bnx2x_sp_event(struct bnx2x_fastpath *fp,
                           union eth_rx_cqe *rr_cqe)
@@ -929,12 +989,12 @@ static void bnx2x_sp_event(struct bnx2x_fastpath *fp,
 
        DP(BNX2X_MSG_SP,
           "fp %d  cid %d  got ramrod #%d  state is %x  type is %d\n",
-          FP_IDX(fp), cid, command, bp->state,
+          fp->index, cid, command, bp->state,
           rr_cqe->ramrod_cqe.ramrod_type);
 
        bp->spq_left++;
 
-       if (FP_IDX(fp)) {
+       if (fp->index) {
                switch (command | fp->state) {
                case (RAMROD_CMD_ID_ETH_CLIENT_SETUP |
                                                BNX2X_FP_STATE_OPENING):
@@ -975,15 +1035,24 @@ static void bnx2x_sp_event(struct bnx2x_fastpath *fp,
                bnx2x_fp(bp, cid, state) = BNX2X_FP_STATE_CLOSED;
                break;
 
+#ifdef BCM_CNIC
+       case (RAMROD_CMD_ID_ETH_CFC_DEL | BNX2X_STATE_OPEN):
+               DP(NETIF_MSG_IFDOWN, "got delete ramrod for CID %d\n", cid);
+               bnx2x_cnic_cfc_comp(bp, cid);
+               break;
+#endif
 
        case (RAMROD_CMD_ID_ETH_SET_MAC | BNX2X_STATE_OPEN):
        case (RAMROD_CMD_ID_ETH_SET_MAC | BNX2X_STATE_DIAG):
                DP(NETIF_MSG_IFUP, "got set mac ramrod\n");
-               bp->set_mac_pending = 0;
+               bp->set_mac_pending--;
+               smp_wmb();
                break;
 
        case (RAMROD_CMD_ID_ETH_SET_MAC | BNX2X_STATE_CLOSING_WAIT4_HALT):
                DP(NETIF_MSG_IFDOWN, "got (un)set mac ramrod\n");
+               bp->set_mac_pending--;
+               smp_wmb();
                break;
 
        default:
@@ -1411,7 +1480,7 @@ static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
 
        for (i = 0; i < sizeof(struct ustorm_eth_rx_producers)/4; i++)
                REG_WR(bp, BAR_USTRORM_INTMEM +
-                      USTORM_RX_PRODS_OFFSET(BP_PORT(bp), FP_CL_ID(fp)) + i*4,
+                      USTORM_RX_PRODS_OFFSET(BP_PORT(bp), fp->cl_id) + i*4,
                       ((u32 *)&rx_prods)[i]);
 
        mmiowb(); /* keep prod updates ordered */
@@ -1452,7 +1521,7 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
 
        DP(NETIF_MSG_RX_STATUS,
           "queue[%d]:  hw_comp_cons %u  sw_comp_cons %u\n",
-          FP_IDX(fp), hw_comp_cons, sw_comp_cons);
+          fp->index, hw_comp_cons, sw_comp_cons);
 
        while (sw_comp_cons != hw_comp_cons) {
                struct sw_rx_bd *rx_buf = NULL;
@@ -1465,6 +1534,13 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
                bd_prod = RX_BD(bd_prod);
                bd_cons = RX_BD(bd_cons);
 
+               /* Prefetch the page containing the BD descriptor
+                  at producer's index. It will be needed when new skb is
+                  allocated */
+               prefetch((void *)(PAGE_ALIGN((unsigned long)
+                                            (&fp->rx_desc_ring[bd_prod])) -
+                                 PAGE_SIZE + 1));
+
                cqe = &fp->rx_comp_ring[comp_ring_cons];
                cqe_fp_flags = cqe->fast_path_cqe.type_error_flags;
 
@@ -1484,6 +1560,8 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
                } else {
                        rx_buf = &fp->rx_buf_ring[bd_cons];
                        skb = rx_buf->skb;
+                       prefetch(skb);
+                       prefetch((u8 *)skb + 256);
                        len = le16_to_cpu(cqe->fast_path_cqe.pkt_len);
                        pad = cqe->fast_path_cqe.placement_offset;
 
@@ -1521,7 +1599,7 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
                                                    len, cqe, comp_ring_cons);
 #ifdef BNX2X_STOP_ON_ERROR
                                        if (bp->panic)
-                                               return -EINVAL;
+                                               return 0;
 #endif
 
                                        bnx2x_update_sge_prod(fp,
@@ -1573,7 +1651,8 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
 
                                skb = new_skb;
 
-                       } else if (bnx2x_alloc_rx_skb(bp, fp, bd_prod) == 0) {
+                       } else
+                       if (likely(bnx2x_alloc_rx_skb(bp, fp, bd_prod) == 0)) {
                                pci_unmap_single(bp->pdev,
                                        pci_unmap_addr(rx_buf, mapping),
                                                 bp->rx_buf_size,
@@ -1603,6 +1682,7 @@ reuse_rx:
                }
 
                skb_record_rx_queue(skb, fp->index);
+
 #ifdef BCM_VLAN
                if ((bp->vlgrp != NULL) && (bp->flags & HW_VLAN_RX_FLAG) &&
                    (le16_to_cpu(cqe->fast_path_cqe.pars_flags.flags) &
@@ -1648,7 +1728,6 @@ static irqreturn_t bnx2x_msix_fp_int(int irq, void *fp_cookie)
 {
        struct bnx2x_fastpath *fp = fp_cookie;
        struct bnx2x *bp = fp->bp;
-       int index = FP_IDX(fp);
 
        /* Return here if interrupt is disabled */
        if (unlikely(atomic_read(&bp->intr_sem) != 0)) {
@@ -1657,20 +1736,20 @@ static irqreturn_t bnx2x_msix_fp_int(int irq, void *fp_cookie)
        }
 
        DP(BNX2X_MSG_FP, "got an MSI-X interrupt on IDX:SB [%d:%d]\n",
-          index, FP_SB_ID(fp));
-       bnx2x_ack_sb(bp, FP_SB_ID(fp), USTORM_ID, 0, IGU_INT_DISABLE, 0);
+          fp->index, fp->sb_id);
+       bnx2x_ack_sb(bp, fp->sb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0);
 
 #ifdef BNX2X_STOP_ON_ERROR
        if (unlikely(bp->panic))
                return IRQ_HANDLED;
 #endif
 
+       /* Handle Rx and Tx according to MSI-X vector */
        prefetch(fp->rx_cons_sb);
        prefetch(fp->tx_cons_sb);
-       prefetch(&fp->status_blk->c_status_block.status_block_index);
        prefetch(&fp->status_blk->u_status_block.status_block_index);
-
-       napi_schedule(&bnx2x_fp(bp, index, napi));
+       prefetch(&fp->status_blk->c_status_block.status_block_index);
+       napi_schedule(&bnx2x_fp(bp, fp->index, napi));
 
        return IRQ_HANDLED;
 }
@@ -1680,13 +1759,14 @@ static irqreturn_t bnx2x_interrupt(int irq, void *dev_instance)
        struct bnx2x *bp = netdev_priv(dev_instance);
        u16 status = bnx2x_ack_int(bp);
        u16 mask;
+       int i;
 
        /* Return here if interrupt is shared and it's not for us */
        if (unlikely(status == 0)) {
                DP(NETIF_MSG_INTR, "not our interrupt!\n");
                return IRQ_NONE;
        }
-       DP(NETIF_MSG_INTR, "got an interrupt  status %u\n", status);
+       DP(NETIF_MSG_INTR, "got an interrupt  status 0x%x\n", status);
 
        /* Return here if interrupt is disabled */
        if (unlikely(atomic_read(&bp->intr_sem) != 0)) {
@@ -1699,20 +1779,37 @@ static irqreturn_t bnx2x_interrupt(int irq, void *dev_instance)
                return IRQ_HANDLED;
 #endif
 
-       mask = 0x2 << bp->fp[0].sb_id;
-       if (status & mask) {
-               struct bnx2x_fastpath *fp = &bp->fp[0];
+       for (i = 0; i < BNX2X_NUM_QUEUES(bp); i++) {
+               struct bnx2x_fastpath *fp = &bp->fp[i];
 
-               prefetch(fp->rx_cons_sb);
-               prefetch(fp->tx_cons_sb);
-               prefetch(&fp->status_blk->c_status_block.status_block_index);
-               prefetch(&fp->status_blk->u_status_block.status_block_index);
+               mask = 0x2 << fp->sb_id;
+               if (status & mask) {
+                       /* Handle Rx and Tx according to SB id */
+                       prefetch(fp->rx_cons_sb);
+                       prefetch(&fp->status_blk->u_status_block.
+                                               status_block_index);
+                       prefetch(fp->tx_cons_sb);
+                       prefetch(&fp->status_blk->c_status_block.
+                                               status_block_index);
+                       napi_schedule(&bnx2x_fp(bp, fp->index, napi));
+                       status &= ~mask;
+               }
+       }
 
-               napi_schedule(&bnx2x_fp(bp, 0, napi));
+#ifdef BCM_CNIC
+       mask = 0x2 << CNIC_SB_ID(bp);
+       if (status & (mask | 0x1)) {
+               struct cnic_ops *c_ops = NULL;
+
+               rcu_read_lock();
+               c_ops = rcu_dereference(bp->cnic_ops);
+               if (c_ops)
+                       c_ops->cnic_handler(bp->cnic_data, NULL);
+               rcu_read_unlock();
 
                status &= ~mask;
        }
-
+#endif
 
        if (unlikely(status & 0x1)) {
                queue_delayed_work(bnx2x_wq, &bp->sp_task, 0);
@@ -2018,13 +2115,16 @@ static void bnx2x_calc_fc_adv(struct bnx2x *bp)
                bp->port.advertising &= ~(ADVERTISED_Asym_Pause |
                                          ADVERTISED_Pause);
                break;
+
        case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH:
                bp->port.advertising |= (ADVERTISED_Asym_Pause |
                                         ADVERTISED_Pause);
                break;
+
        case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC:
                bp->port.advertising |= ADVERTISED_Asym_Pause;
                break;
+
        default:
                bp->port.advertising &= ~(ADVERTISED_Asym_Pause |
                                          ADVERTISED_Pause);
@@ -2034,12 +2134,30 @@ static void bnx2x_calc_fc_adv(struct bnx2x *bp)
 
 static void bnx2x_link_report(struct bnx2x *bp)
 {
+       if (bp->flags & MF_FUNC_DIS) {
+               netif_carrier_off(bp->dev);
+               printk(KERN_ERR PFX "%s NIC Link is Down\n", bp->dev->name);
+               return;
+       }
+
        if (bp->link_vars.link_up) {
+               u16 line_speed;
+
                if (bp->state == BNX2X_STATE_OPEN)
                        netif_carrier_on(bp->dev);
                printk(KERN_INFO PFX "%s NIC Link is Up, ", bp->dev->name);
 
-               printk("%d Mbps ", bp->link_vars.line_speed);
+               line_speed = bp->link_vars.line_speed;
+               if (IS_E1HMF(bp)) {
+                       u16 vn_max_rate;
+
+                       vn_max_rate =
+                               ((bp->mf_config & FUNC_MF_CFG_MAX_BW_MASK) >>
+                                FUNC_MF_CFG_MAX_BW_SHIFT) * 100;
+                       if (vn_max_rate < line_speed)
+                               line_speed = vn_max_rate;
+               }
+               printk("%d Mbps ", line_speed);
 
                if (bp->link_vars.duplex == DUPLEX_FULL)
                        printk("full duplex");
@@ -2049,7 +2167,8 @@ static void bnx2x_link_report(struct bnx2x *bp)
                if (bp->link_vars.flow_ctrl != BNX2X_FLOW_CTRL_NONE) {
                        if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_RX) {
                                printk(", receive ");
-                               if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_TX)
+                               if (bp->link_vars.flow_ctrl &
+                                   BNX2X_FLOW_CTRL_TX)
                                        printk("& transmit ");
                        } else {
                                printk(", transmit ");
@@ -2064,7 +2183,7 @@ static void bnx2x_link_report(struct bnx2x *bp)
        }
 }
 
-static u8 bnx2x_initial_phy_init(struct bnx2x *bp)
+static u8 bnx2x_initial_phy_init(struct bnx2x *bp, int load_mode)
 {
        if (!BP_NOMCP(bp)) {
                u8 rc;
@@ -2072,26 +2191,30 @@ static u8 bnx2x_initial_phy_init(struct bnx2x *bp)
                /* Initialize link parameters structure variables */
                /* It is recommended to turn off RX FC for jumbo frames
                   for better performance */
-               if (IS_E1HMF(bp))
-                       bp->link_params.req_fc_auto_adv = BNX2X_FLOW_CTRL_BOTH;
-               else if (bp->dev->mtu > 5000)
+               if (bp->dev->mtu > 5000)
                        bp->link_params.req_fc_auto_adv = BNX2X_FLOW_CTRL_TX;
                else
                        bp->link_params.req_fc_auto_adv = BNX2X_FLOW_CTRL_BOTH;
 
                bnx2x_acquire_phy_lock(bp);
+
+               if (load_mode == LOAD_DIAG)
+                       bp->link_params.loopback_mode = LOOPBACK_XGXS_10;
+
                rc = bnx2x_phy_init(&bp->link_params, &bp->link_vars);
+
                bnx2x_release_phy_lock(bp);
 
                bnx2x_calc_fc_adv(bp);
 
-               if (bp->link_vars.link_up)
+               if (CHIP_REV_IS_SLOW(bp) && bp->link_vars.link_up) {
+                       bnx2x_stats_handle(bp, STATS_EVENT_LINK_UP);
                        bnx2x_link_report(bp);
-
+               }
 
                return rc;
        }
-       BNX2X_ERR("Bootcode is missing -not initializing link\n");
+       BNX2X_ERR("Bootcode is missing - can not initialize link\n");
        return -EINVAL;
 }
 
@@ -2104,7 +2227,7 @@ static void bnx2x_link_set(struct bnx2x *bp)
 
                bnx2x_calc_fc_adv(bp);
        } else
-               BNX2X_ERR("Bootcode is missing -not setting link\n");
+               BNX2X_ERR("Bootcode is missing - can not set link\n");
 }
 
 static void bnx2x__link_reset(struct bnx2x *bp)
@@ -2114,7 +2237,7 @@ static void bnx2x__link_reset(struct bnx2x *bp)
                bnx2x_link_reset(&bp->link_params, &bp->link_vars, 1);
                bnx2x_release_phy_lock(bp);
        } else
-               BNX2X_ERR("Bootcode is missing -not resetting link\n");
+               BNX2X_ERR("Bootcode is missing - can not reset link\n");
 }
 
 static u8 bnx2x_link_test(struct bnx2x *bp)
@@ -2163,6 +2286,52 @@ static void bnx2x_init_port_minmax(struct bnx2x *bp)
        bp->cmng.fair_vars.fairness_timeout = fair_periodic_timeout_usec / 4;
 }
 
+/* Calculates the sum of vn_min_rates.
+   It's needed for further normalizing of the min_rates.
+   Returns:
+     sum of vn_min_rates.
+       or
+     0 - if all the min_rates are 0.
+     In the later case fainess algorithm should be deactivated.
+     If not all min_rates are zero then those that are zeroes will be set to 1.
+ */
+static void bnx2x_calc_vn_weight_sum(struct bnx2x *bp)
+{
+       int all_zero = 1;
+       int port = BP_PORT(bp);
+       int vn;
+
+       bp->vn_weight_sum = 0;
+       for (vn = VN_0; vn < E1HVN_MAX; vn++) {
+               int func = 2*vn + port;
+               u32 vn_cfg = SHMEM_RD(bp, mf_cfg.func_mf_config[func].config);
+               u32 vn_min_rate = ((vn_cfg & FUNC_MF_CFG_MIN_BW_MASK) >>
+                                  FUNC_MF_CFG_MIN_BW_SHIFT) * 100;
+
+               /* Skip hidden vns */
+               if (vn_cfg & FUNC_MF_CFG_FUNC_HIDE)
+                       continue;
+
+               /* If min rate is zero - set it to 1 */
+               if (!vn_min_rate)
+                       vn_min_rate = DEF_MIN_RATE;
+               else
+                       all_zero = 0;
+
+               bp->vn_weight_sum += vn_min_rate;
+       }
+
+       /* ... only if all min rates are zeros - disable fairness */
+       if (all_zero) {
+               bp->cmng.flags.cmng_enables &=
+                                       ~CMNG_FLAGS_PER_PORT_FAIRNESS_VN;
+               DP(NETIF_MSG_IFUP, "All MIN values are zeroes"
+                  "  fairness will be disabled\n");
+       } else
+               bp->cmng.flags.cmng_enables |=
+                                       CMNG_FLAGS_PER_PORT_FAIRNESS_VN;
+}
+
 static void bnx2x_init_vn_minmax(struct bnx2x *bp, int func)
 {
        struct rate_shaping_vars_per_vn m_rs_vn;
@@ -2179,17 +2348,14 @@ static void bnx2x_init_vn_minmax(struct bnx2x *bp, int func)
        } else {
                vn_min_rate = ((vn_cfg & FUNC_MF_CFG_MIN_BW_MASK) >>
                                FUNC_MF_CFG_MIN_BW_SHIFT) * 100;
-               /* If fairness is enabled (not all min rates are zeroes) and
-                  if current min rate is zero - set it to 1.
-                  This is a requirement of the algorithm. */
-               if (bp->vn_weight_sum && (vn_min_rate == 0))
+               /* If min rate is zero - set it to 1 */
+               if (!vn_min_rate)
                        vn_min_rate = DEF_MIN_RATE;
                vn_max_rate = ((vn_cfg & FUNC_MF_CFG_MAX_BW_MASK) >>
                                FUNC_MF_CFG_MAX_BW_SHIFT) * 100;
        }
-
        DP(NETIF_MSG_IFUP,
-          "func %d: vn_min_rate=%d  vn_max_rate=%d  vn_weight_sum=%d\n",
+          "func %d: vn_min_rate %d  vn_max_rate %d  vn_weight_sum %d\n",
           func, vn_min_rate, vn_max_rate, bp->vn_weight_sum);
 
        memset(&m_rs_vn, 0, sizeof(struct rate_shaping_vars_per_vn));
@@ -2240,7 +2406,7 @@ static void bnx2x_link_attn(struct bnx2x *bp)
        if (bp->link_vars.link_up) {
 
                /* dropless flow control */
-               if (CHIP_IS_E1H(bp)) {
+               if (CHIP_IS_E1H(bp) && bp->dropless_fc) {
                        int port = BP_PORT(bp);
                        u32 pause_enabled = 0;
 
@@ -2248,7 +2414,7 @@ static void bnx2x_link_attn(struct bnx2x *bp)
                                pause_enabled = 1;
 
                        REG_WR(bp, BAR_USTRORM_INTMEM +
-                              USTORM_PAUSE_ENABLED_OFFSET(port),
+                              USTORM_ETH_PAUSE_ENABLED_OFFSET(port),
                               pause_enabled);
                }
 
@@ -2260,8 +2426,7 @@ static void bnx2x_link_attn(struct bnx2x *bp)
                        memset(&(pstats->mac_stx[0]), 0,
                               sizeof(struct mac_stx));
                }
-               if ((bp->state == BNX2X_STATE_OPEN) ||
-                   (bp->state == BNX2X_STATE_DISABLED))
+               if (bp->state == BNX2X_STATE_OPEN)
                        bnx2x_stats_handle(bp, STATS_EVENT_LINK_UP);
        }
 
@@ -2273,14 +2438,12 @@ static void bnx2x_link_attn(struct bnx2x *bp)
                int func;
                int vn;
 
+               /* Set the attention towards other drivers on the same port */
                for (vn = VN_0; vn < E1HVN_MAX; vn++) {
                        if (vn == BP_E1HVN(bp))
                                continue;
 
                        func = ((vn << 1) | port);
-
-                       /* Set the attention towards other drivers
-                          on the same port */
                        REG_WR(bp, MISC_REG_AEU_GENERAL_ATTN_0 +
                               (LINK_SYNC_ATTENTION_BIT_FUNC_0 + func)*4, 1);
                }
@@ -2306,7 +2469,7 @@ static void bnx2x_link_attn(struct bnx2x *bp)
 
 static void bnx2x__link_status_update(struct bnx2x *bp)
 {
-       if (bp->state != BNX2X_STATE_OPEN)
+       if ((bp->state != BNX2X_STATE_OPEN) || (bp->flags & MF_FUNC_DIS))
                return;
 
        bnx2x_link_status_update(&bp->link_params, &bp->link_vars);
@@ -2316,6 +2479,8 @@ static void bnx2x__link_status_update(struct bnx2x *bp)
        else
                bnx2x_stats_handle(bp, STATS_EVENT_STOP);
 
+       bnx2x_calc_vn_weight_sum(bp);
+
        /* indicate link status */
        bnx2x_link_report(bp);
 }
@@ -2344,20 +2509,189 @@ static void bnx2x_pmf_update(struct bnx2x *bp)
  * General service functions
  */
 
-/* the slow path queue is odd since completions arrive on the fastpath ring */
-static int bnx2x_sp_post(struct bnx2x *bp, int command, int cid,
-                        u32 data_hi, u32 data_lo, int common)
+/* send the MCP a request, block until there is a reply */
+u32 bnx2x_fw_command(struct bnx2x *bp, u32 command)
 {
        int func = BP_FUNC(bp);
+       u32 seq = ++bp->fw_seq;
+       u32 rc = 0;
+       u32 cnt = 1;
+       u8 delay = CHIP_REV_IS_SLOW(bp) ? 100 : 10;
 
-       DP(BNX2X_MSG_SP/*NETIF_MSG_TIMER*/,
-          "SPQE (%x:%x)  command %d  hw_cid %x  data (%x:%x)  left %x\n",
-          (u32)U64_HI(bp->spq_mapping), (u32)(U64_LO(bp->spq_mapping) +
-          (void *)bp->spq_prod_bd - (void *)bp->spq), command,
-          HW_CID(bp, cid), data_hi, data_lo, bp->spq_left);
+       mutex_lock(&bp->fw_mb_mutex);
+       SHMEM_WR(bp, func_mb[func].drv_mb_header, (command | seq));
+       DP(BNX2X_MSG_MCP, "wrote command (%x) to FW MB\n", (command | seq));
 
-#ifdef BNX2X_STOP_ON_ERROR
-       if (unlikely(bp->panic))
+       do {
+               /* let the FW do it's magic ... */
+               msleep(delay);
+
+               rc = SHMEM_RD(bp, func_mb[func].fw_mb_header);
+
+               /* Give the FW up to 5 second (500*10ms) */
+       } while ((seq != (rc & FW_MSG_SEQ_NUMBER_MASK)) && (cnt++ < 500));
+
+       DP(BNX2X_MSG_MCP, "[after %d ms] read (%x) seq is (%x) from FW MB\n",
+          cnt*delay, rc, seq);
+
+       /* is this a reply to our command? */
+       if (seq == (rc & FW_MSG_SEQ_NUMBER_MASK))
+               rc &= FW_MSG_CODE_MASK;
+       else {
+               /* FW BUG! */
+               BNX2X_ERR("FW failed to respond!\n");
+               bnx2x_fw_dump(bp);
+               rc = 0;
+       }
+       mutex_unlock(&bp->fw_mb_mutex);
+
+       return rc;
+}
+
+static void bnx2x_set_storm_rx_mode(struct bnx2x *bp);
+static void bnx2x_set_eth_mac_addr_e1h(struct bnx2x *bp, int set);
+static void bnx2x_set_rx_mode(struct net_device *dev);
+
+static void bnx2x_e1h_disable(struct bnx2x *bp)
+{
+       int port = BP_PORT(bp);
+
+       netif_tx_disable(bp->dev);
+
+       REG_WR(bp, NIG_REG_LLH0_FUNC_EN + port*8, 0);
+
+       netif_carrier_off(bp->dev);
+}
+
+static void bnx2x_e1h_enable(struct bnx2x *bp)
+{
+       int port = BP_PORT(bp);
+
+       REG_WR(bp, NIG_REG_LLH0_FUNC_EN + port*8, 1);
+
+       /* Tx queue should be only reenabled */
+       netif_tx_wake_all_queues(bp->dev);
+
+       /*
+        * Should not call netif_carrier_on since it will be called if the link
+        * is up when checking for link state
+        */
+}
+
+static void bnx2x_update_min_max(struct bnx2x *bp)
+{
+       int port = BP_PORT(bp);
+       int vn, i;
+
+       /* Init rate shaping and fairness contexts */
+       bnx2x_init_port_minmax(bp);
+
+       bnx2x_calc_vn_weight_sum(bp);
+
+       for (vn = VN_0; vn < E1HVN_MAX; vn++)
+               bnx2x_init_vn_minmax(bp, 2*vn + port);
+
+       if (bp->port.pmf) {
+               int func;
+
+               /* Set the attention towards other drivers on the same port */
+               for (vn = VN_0; vn < E1HVN_MAX; vn++) {
+                       if (vn == BP_E1HVN(bp))
+                               continue;
+
+                       func = ((vn << 1) | port);
+                       REG_WR(bp, MISC_REG_AEU_GENERAL_ATTN_0 +
+                              (LINK_SYNC_ATTENTION_BIT_FUNC_0 + func)*4, 1);
+               }
+
+               /* Store it to internal memory */
+               for (i = 0; i < sizeof(struct cmng_struct_per_port) / 4; i++)
+                       REG_WR(bp, BAR_XSTRORM_INTMEM +
+                              XSTORM_CMNG_PER_PORT_VARS_OFFSET(port) + i*4,
+                              ((u32 *)(&bp->cmng))[i]);
+       }
+}
+
+static void bnx2x_dcc_event(struct bnx2x *bp, u32 dcc_event)
+{
+       DP(BNX2X_MSG_MCP, "dcc_event 0x%x\n", dcc_event);
+
+       if (dcc_event & DRV_STATUS_DCC_DISABLE_ENABLE_PF) {
+
+               /*
+                * This is the only place besides the function initialization
+                * where the bp->flags can change so it is done without any
+                * locks
+                */
+               if (bp->mf_config & FUNC_MF_CFG_FUNC_DISABLED) {
+                       DP(NETIF_MSG_IFDOWN, "mf_cfg function disabled\n");
+                       bp->flags |= MF_FUNC_DIS;
+
+                       bnx2x_e1h_disable(bp);
+               } else {
+                       DP(NETIF_MSG_IFUP, "mf_cfg function enabled\n");
+                       bp->flags &= ~MF_FUNC_DIS;
+
+                       bnx2x_e1h_enable(bp);
+               }
+               dcc_event &= ~DRV_STATUS_DCC_DISABLE_ENABLE_PF;
+       }
+       if (dcc_event & DRV_STATUS_DCC_BANDWIDTH_ALLOCATION) {
+
+               bnx2x_update_min_max(bp);
+               dcc_event &= ~DRV_STATUS_DCC_BANDWIDTH_ALLOCATION;
+       }
+
+       /* Report results to MCP */
+       if (dcc_event)
+               bnx2x_fw_command(bp, DRV_MSG_CODE_DCC_FAILURE);
+       else
+               bnx2x_fw_command(bp, DRV_MSG_CODE_DCC_OK);
+}
+
+/* must be called under the spq lock */
+static inline struct eth_spe *bnx2x_sp_get_next(struct bnx2x *bp)
+{
+       struct eth_spe *next_spe = bp->spq_prod_bd;
+
+       if (bp->spq_prod_bd == bp->spq_last_bd) {
+               bp->spq_prod_bd = bp->spq;
+               bp->spq_prod_idx = 0;
+               DP(NETIF_MSG_TIMER, "end of spq\n");
+       } else {
+               bp->spq_prod_bd++;
+               bp->spq_prod_idx++;
+       }
+       return next_spe;
+}
+
+/* must be called under the spq lock */
+static inline void bnx2x_sp_prod_update(struct bnx2x *bp)
+{
+       int func = BP_FUNC(bp);
+
+       /* Make sure that BD data is updated before writing the producer */
+       wmb();
+
+       REG_WR(bp, BAR_XSTRORM_INTMEM + XSTORM_SPQ_PROD_OFFSET(func),
+              bp->spq_prod_idx);
+       mmiowb();
+}
+
+/* the slow path queue is odd since completions arrive on the fastpath ring */
+static int bnx2x_sp_post(struct bnx2x *bp, int command, int cid,
+                        u32 data_hi, u32 data_lo, int common)
+{
+       struct eth_spe *spe;
+
+       DP(BNX2X_MSG_SP/*NETIF_MSG_TIMER*/,
+          "SPQE (%x:%x)  command %d  hw_cid %x  data (%x:%x)  left %x\n",
+          (u32)U64_HI(bp->spq_mapping), (u32)(U64_LO(bp->spq_mapping) +
+          (void *)bp->spq_prod_bd - (void *)bp->spq), command,
+          HW_CID(bp, cid), data_hi, data_lo, bp->spq_left);
+
+#ifdef BNX2X_STOP_ON_ERROR
+       if (unlikely(bp->panic))
                return -EIO;
 #endif
 
@@ -2370,33 +2704,23 @@ static int bnx2x_sp_post(struct bnx2x *bp, int command, int cid,
                return -EBUSY;
        }
 
+       spe = bnx2x_sp_get_next(bp);
+
        /* CID needs port number to be encoded int it */
-       bp->spq_prod_bd->hdr.conn_and_cmd_data =
+       spe->hdr.conn_and_cmd_data =
                        cpu_to_le32(((command << SPE_HDR_CMD_ID_SHIFT) |
                                     HW_CID(bp, cid)));
-       bp->spq_prod_bd->hdr.type = cpu_to_le16(ETH_CONNECTION_TYPE);
+       spe->hdr.type = cpu_to_le16(ETH_CONNECTION_TYPE);
        if (common)
-               bp->spq_prod_bd->hdr.type |=
+               spe->hdr.type |=
                        cpu_to_le16((1 << SPE_HDR_COMMON_RAMROD_SHIFT));
 
-       bp->spq_prod_bd->data.mac_config_addr.hi = cpu_to_le32(data_hi);
-       bp->spq_prod_bd->data.mac_config_addr.lo = cpu_to_le32(data_lo);
+       spe->data.mac_config_addr.hi = cpu_to_le32(data_hi);
+       spe->data.mac_config_addr.lo = cpu_to_le32(data_lo);
 
        bp->spq_left--;
 
-       if (bp->spq_prod_bd == bp->spq_last_bd) {
-               bp->spq_prod_bd = bp->spq;
-               bp->spq_prod_idx = 0;
-               DP(NETIF_MSG_TIMER, "end of spq\n");
-
-       } else {
-               bp->spq_prod_bd++;
-               bp->spq_prod_idx++;
-       }
-
-       REG_WR(bp, BAR_XSTRORM_INTMEM + XSTORM_SPQ_PROD_OFFSET(func),
-              bp->spq_prod_idx);
-
+       bnx2x_sp_prod_update(bp);
        spin_unlock_bh(&bp->spq_lock);
        return 0;
 }
@@ -2563,11 +2887,28 @@ static void bnx2x_attn_int_asserted(struct bnx2x *bp, u32 asserted)
        }
 }
 
+static inline void bnx2x_fan_failure(struct bnx2x *bp)
+{
+       int port = BP_PORT(bp);
+
+       /* mark the failure */
+       bp->link_params.ext_phy_config &= ~PORT_HW_CFG_XGXS_EXT_PHY_TYPE_MASK;
+       bp->link_params.ext_phy_config |= PORT_HW_CFG_XGXS_EXT_PHY_TYPE_FAILURE;
+       SHMEM_WR(bp, dev_info.port_hw_config[port].external_phy_config,
+                bp->link_params.ext_phy_config);
+
+       /* log the failure */
+       printk(KERN_ERR PFX "Fan Failure on Network Controller %s has caused"
+              " the driver to shutdown the card to prevent permanent"
+              " damage.  Please contact Dell Support for assistance\n",
+              bp->dev->name);
+}
+
 static inline void bnx2x_attn_int_deasserted0(struct bnx2x *bp, u32 attn)
 {
        int port = BP_PORT(bp);
        int reg_offset;
-       u32 val;
+       u32 val, swap_val, swap_override;
 
        reg_offset = (port ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_0 :
                             MISC_REG_AEU_ENABLE1_FUNC_0_OUT_0);
@@ -2580,36 +2921,32 @@ static inline void bnx2x_attn_int_deasserted0(struct bnx2x *bp, u32 attn)
 
                BNX2X_ERR("SPIO5 hw attention\n");
 
+               /* Fan failure attention */
                switch (XGXS_EXT_PHY_TYPE(bp->link_params.ext_phy_config)) {
                case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_SFX7101:
-                       /* Fan failure attention */
-
+                       /* Low power mode is controlled by GPIO 2 */
+                       bnx2x_set_gpio(bp, MISC_REGISTERS_GPIO_2,
+                                      MISC_REGISTERS_GPIO_OUTPUT_LOW, port);
                        /* The PHY reset is controlled by GPIO 1 */
                        bnx2x_set_gpio(bp, MISC_REGISTERS_GPIO_1,
                                       MISC_REGISTERS_GPIO_OUTPUT_LOW, port);
-                       /* Low power mode is controlled by GPIO 2 */
-                       bnx2x_set_gpio(bp, MISC_REGISTERS_GPIO_2,
+                       break;
+
+               case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8727:
+                       /* The PHY reset is controlled by GPIO 1 */
+                       /* fake the port number to cancel the swap done in
+                          set_gpio() */
+                       swap_val = REG_RD(bp, NIG_REG_PORT_SWAP);
+                       swap_override = REG_RD(bp, NIG_REG_STRAP_OVERRIDE);
+                       port = (swap_val && swap_override) ^ 1;
+                       bnx2x_set_gpio(bp, MISC_REGISTERS_GPIO_1,
                                       MISC_REGISTERS_GPIO_OUTPUT_LOW, port);
-                       /* mark the failure */
-                       bp->link_params.ext_phy_config &=
-                                       ~PORT_HW_CFG_XGXS_EXT_PHY_TYPE_MASK;
-                       bp->link_params.ext_phy_config |=
-                                       PORT_HW_CFG_XGXS_EXT_PHY_TYPE_FAILURE;
-                       SHMEM_WR(bp,
-                                dev_info.port_hw_config[port].
-                                                       external_phy_config,
-                                bp->link_params.ext_phy_config);
-                       /* log the failure */
-                       printk(KERN_ERR PFX "Fan Failure on Network"
-                              " Controller %s has caused the driver to"
-                              " shutdown the card to prevent permanent"
-                              " damage.  Please contact Dell Support for"
-                              " assistance\n", bp->dev->name);
                        break;
 
                default:
                        break;
                }
+               bnx2x_fan_failure(bp);
        }
 
        if (attn & (AEU_INPUTS_ATTN_BITS_GPIO3_FUNCTION_0 |
@@ -2626,7 +2963,7 @@ static inline void bnx2x_attn_int_deasserted0(struct bnx2x *bp, u32 attn)
                REG_WR(bp, reg_offset, val);
 
                BNX2X_ERR("FATAL HW block attention set0 0x%x\n",
-                         (attn & HW_INTERRUT_ASSERT_SET_0));
+                         (u32)(attn & HW_INTERRUT_ASSERT_SET_0));
                bnx2x_panic();
        }
 }
@@ -2635,7 +2972,7 @@ static inline void bnx2x_attn_int_deasserted1(struct bnx2x *bp, u32 attn)
 {
        u32 val;
 
-       if (attn & BNX2X_DOORQ_ASSERT) {
+       if (attn & AEU_INPUTS_ATTN_BITS_DOORBELLQ_HW_INTERRUPT) {
 
                val = REG_RD(bp, DORQ_REG_DORQ_INT_STS_CLR);
                BNX2X_ERR("DB hw attention 0x%x\n", val);
@@ -2657,7 +2994,7 @@ static inline void bnx2x_attn_int_deasserted1(struct bnx2x *bp, u32 attn)
                REG_WR(bp, reg_offset, val);
 
                BNX2X_ERR("FATAL HW block attention set1 0x%x\n",
-                         (attn & HW_INTERRUT_ASSERT_SET_1));
+                         (u32)(attn & HW_INTERRUT_ASSERT_SET_1));
                bnx2x_panic();
        }
 }
@@ -2697,7 +3034,7 @@ static inline void bnx2x_attn_int_deasserted2(struct bnx2x *bp, u32 attn)
                REG_WR(bp, reg_offset, val);
 
                BNX2X_ERR("FATAL HW block attention set2 0x%x\n",
-                         (attn & HW_INTERRUT_ASSERT_SET_2));
+                         (u32)(attn & HW_INTERRUT_ASSERT_SET_2));
                bnx2x_panic();
        }
 }
@@ -2712,9 +3049,14 @@ static inline void bnx2x_attn_int_deasserted3(struct bnx2x *bp, u32 attn)
                        int func = BP_FUNC(bp);
 
                        REG_WR(bp, MISC_REG_AEU_GENERAL_ATTN_12 + func*4, 0);
+                       bp->mf_config = SHMEM_RD(bp,
+                                          mf_cfg.func_mf_config[func].config);
+                       val = SHMEM_RD(bp, func_mb[func].drv_status);
+                       if (val & DRV_STATUS_DCC_EVENT_MASK)
+                               bnx2x_dcc_event(bp,
+                                           (val & DRV_STATUS_DCC_EVENT_MASK));
                        bnx2x__link_status_update(bp);
-                       if (SHMEM_RD(bp, func_mb[func].drv_status) &
-                                                       DRV_STATUS_PMF)
+                       if ((bp->port.pmf == 0) && (val & DRV_STATUS_PMF))
                                bnx2x_pmf_update(bp);
 
                } else if (attn & BNX2X_MC_ASSERT_BITS) {
@@ -2912,6 +3254,17 @@ static irqreturn_t bnx2x_msix_sp_int(int irq, void *dev_instance)
                return IRQ_HANDLED;
 #endif
 
+#ifdef BCM_CNIC
+       {
+               struct cnic_ops *c_ops;
+
+               rcu_read_lock();
+               c_ops = rcu_dereference(bp->cnic_ops);
+               if (c_ops)
+                       c_ops->cnic_handler(bp->cnic_data, NULL);
+               rcu_read_unlock();
+       }
+#endif
        queue_delayed_work(bnx2x_wq, &bp->sp_task, 0);
 
        return IRQ_HANDLED;
@@ -2994,8 +3347,8 @@ static irqreturn_t bnx2x_msix_sp_int(int irq, void *dev_instance)
 
 #define UPDATE_EXTEND_TSTAT(s, t) \
        do { \
-               diff = le32_to_cpu(tclient->s) - old_tclient->s; \
-               old_tclient->s = le32_to_cpu(tclient->s); \
+               diff = le32_to_cpu(tclient->s) - le32_to_cpu(old_tclient->s); \
+               old_tclient->s = tclient->s; \
                ADD_EXTEND_64(qstats->t##_hi, qstats->t##_lo, diff); \
        } while (0)
 
@@ -3008,8 +3361,8 @@ static irqreturn_t bnx2x_msix_sp_int(int irq, void *dev_instance)
 
 #define UPDATE_EXTEND_XSTAT(s, t) \
        do { \
-               diff = le32_to_cpu(xclient->s) - old_xclient->s; \
-               old_xclient->s = le32_to_cpu(xclient->s); \
+               diff = le32_to_cpu(xclient->s) - le32_to_cpu(old_xclient->s); \
+               old_xclient->s = xclient->s; \
                ADD_EXTEND_64(qstats->t##_hi, qstats->t##_lo, diff); \
        } while (0)
 
@@ -3073,53 +3426,6 @@ static void bnx2x_storm_stats_post(struct bnx2x *bp)
        }
 }
 
-static void bnx2x_stats_init(struct bnx2x *bp)
-{
-       int port = BP_PORT(bp);
-       int i;
-
-       bp->stats_pending = 0;
-       bp->executer_idx = 0;
-       bp->stats_counter = 0;
-
-       /* port stats */
-       if (!BP_NOMCP(bp))
-               bp->port.port_stx = SHMEM_RD(bp, port_mb[port].port_stx);
-       else
-               bp->port.port_stx = 0;
-       DP(BNX2X_MSG_STATS, "port_stx 0x%x\n", bp->port.port_stx);
-
-       memset(&(bp->port.old_nig_stats), 0, sizeof(struct nig_stats));
-       bp->port.old_nig_stats.brb_discard =
-                       REG_RD(bp, NIG_REG_STAT0_BRB_DISCARD + port*0x38);
-       bp->port.old_nig_stats.brb_truncate =
-                       REG_RD(bp, NIG_REG_STAT0_BRB_TRUNCATE + port*0x38);
-       REG_RD_DMAE(bp, NIG_REG_STAT0_EGRESS_MAC_PKT0 + port*0x50,
-                   &(bp->port.old_nig_stats.egress_mac_pkt0_lo), 2);
-       REG_RD_DMAE(bp, NIG_REG_STAT0_EGRESS_MAC_PKT1 + port*0x50,
-                   &(bp->port.old_nig_stats.egress_mac_pkt1_lo), 2);
-
-       /* function stats */
-       for_each_queue(bp, i) {
-               struct bnx2x_fastpath *fp = &bp->fp[i];
-
-               memset(&fp->old_tclient, 0,
-                      sizeof(struct tstorm_per_client_stats));
-               memset(&fp->old_uclient, 0,
-                      sizeof(struct ustorm_per_client_stats));
-               memset(&fp->old_xclient, 0,
-                      sizeof(struct xstorm_per_client_stats));
-               memset(&fp->eth_q_stats, 0, sizeof(struct bnx2x_eth_q_stats));
-       }
-
-       memset(&bp->dev->stats, 0, sizeof(struct net_device_stats));
-       memset(&bp->eth_stats, 0, sizeof(struct bnx2x_eth_stats));
-
-       bp->stats_state = STATS_STATE_DISABLED;
-       if (IS_E1HMF(bp) && bp->port.pmf && bp->port.port_stx)
-               bnx2x_stats_handle(bp, STATS_EVENT_PMF);
-}
-
 static void bnx2x_hw_stats_post(struct bnx2x *bp)
 {
        struct dmae_command *dmae = &bp->stats_dmae;
@@ -3518,7 +3824,10 @@ static void bnx2x_bmac_stats_update(struct bnx2x *bp)
        struct bmac_stats *new = bnx2x_sp(bp, mac_stats.bmac_stats);
        struct host_port_stats *pstats = bnx2x_sp(bp, port_stats);
        struct bnx2x_eth_stats *estats = &bp->eth_stats;
-       struct regpair diff;
+       struct {
+               u32 lo;
+               u32 hi;
+       } diff;
 
        UPDATE_STAT64(rx_stat_grerb, rx_stat_ifhcinbadoctets);
        UPDATE_STAT64(rx_stat_grfcs, rx_stat_dot3statsfcserrors);
@@ -3624,7 +3933,10 @@ static int bnx2x_hw_stats_update(struct bnx2x *bp)
        struct nig_stats *old = &(bp->port.old_nig_stats);
        struct host_port_stats *pstats = bnx2x_sp(bp, port_stats);
        struct bnx2x_eth_stats *estats = &bp->eth_stats;
-       struct regpair diff;
+       struct {
+               u32 lo;
+               u32 hi;
+       } diff;
        u32 nig_timer_max;
 
        if (bp->link_vars.mac_type == MAC_TYPE_BMAC)
@@ -3634,7 +3946,7 @@ static int bnx2x_hw_stats_update(struct bnx2x *bp)
                bnx2x_emac_stats_update(bp);
 
        else { /* unreached */
-               BNX2X_ERR("stats updated by dmae but no MAC active\n");
+               BNX2X_ERR("stats updated by DMAE but no MAC active\n");
                return -1;
        }
 
@@ -3674,7 +3986,8 @@ static int bnx2x_storm_stats_update(struct bnx2x *bp)
        struct bnx2x_eth_stats *estats = &bp->eth_stats;
        int i;
 
-       memset(&(fstats->total_bytes_received_hi), 0,
+       memcpy(&(fstats->total_bytes_received_hi),
+              &(bnx2x_sp(bp, func_stats_base)->total_bytes_received_hi),
               sizeof(struct host_func_stats) - 2*sizeof(u32));
        estats->error_bytes_received_hi = 0;
        estats->error_bytes_received_lo = 0;
@@ -3722,11 +4035,24 @@ static int bnx2x_storm_stats_update(struct bnx2x *bp)
                }
 
                qstats->total_bytes_received_hi =
-               qstats->valid_bytes_received_hi =
-                               le32_to_cpu(tclient->total_rcv_bytes.hi);
+                       le32_to_cpu(tclient->rcv_broadcast_bytes.hi);
                qstats->total_bytes_received_lo =
+                       le32_to_cpu(tclient->rcv_broadcast_bytes.lo);
+
+               ADD_64(qstats->total_bytes_received_hi,
+                      le32_to_cpu(tclient->rcv_multicast_bytes.hi),
+                      qstats->total_bytes_received_lo,
+                      le32_to_cpu(tclient->rcv_multicast_bytes.lo));
+
+               ADD_64(qstats->total_bytes_received_hi,
+                      le32_to_cpu(tclient->rcv_unicast_bytes.hi),
+                      qstats->total_bytes_received_lo,
+                      le32_to_cpu(tclient->rcv_unicast_bytes.lo));
+
+               qstats->valid_bytes_received_hi =
+                                       qstats->total_bytes_received_hi;
                qstats->valid_bytes_received_lo =
-                               le32_to_cpu(tclient->total_rcv_bytes.lo);
+                                       qstats->total_bytes_received_lo;
 
                qstats->error_bytes_received_hi =
                                le32_to_cpu(tclient->rcv_error_bytes.hi);
@@ -3759,9 +4085,19 @@ static int bnx2x_storm_stats_update(struct bnx2x *bp)
                UPDATE_EXTEND_USTAT(bcast_no_buff_pkts, no_buff_discard);
 
                qstats->total_bytes_transmitted_hi =
-                               le32_to_cpu(xclient->total_sent_bytes.hi);
+                               le32_to_cpu(xclient->unicast_bytes_sent.hi);
                qstats->total_bytes_transmitted_lo =
-                               le32_to_cpu(xclient->total_sent_bytes.lo);
+                               le32_to_cpu(xclient->unicast_bytes_sent.lo);
+
+               ADD_64(qstats->total_bytes_transmitted_hi,
+                      le32_to_cpu(xclient->multicast_bytes_sent.hi),
+                      qstats->total_bytes_transmitted_lo,
+                      le32_to_cpu(xclient->multicast_bytes_sent.lo));
+
+               ADD_64(qstats->total_bytes_transmitted_hi,
+                      le32_to_cpu(xclient->broadcast_bytes_sent.hi),
+                      qstats->total_bytes_transmitted_lo,
+                      le32_to_cpu(xclient->broadcast_bytes_sent.lo));
 
                UPDATE_EXTEND_XSTAT(unicast_pkts_sent,
                                        total_unicast_packets_transmitted);
@@ -3961,6 +4297,8 @@ static void bnx2x_stats_update(struct bnx2x *bp)
        bnx2x_drv_stats_update(bp);
 
        if (bp->msglevel & NETIF_MSG_TIMER) {
+               struct bnx2x_fastpath *fp0_rx = bp->fp;
+               struct bnx2x_fastpath *fp0_tx = bp->fp;
                struct tstorm_per_client_stats *old_tclient =
                                                        &bp->fp->old_tclient;
                struct bnx2x_eth_q_stats *qstats = &bp->fp->eth_q_stats;
@@ -3971,13 +4309,13 @@ static void bnx2x_stats_update(struct bnx2x *bp)
                printk(KERN_DEBUG "%s:\n", bp->dev->name);
                printk(KERN_DEBUG "  tx avail (%4x)  tx hc idx (%x)"
                                  "  tx pkt (%lx)\n",
-                      bnx2x_tx_avail(bp->fp),
-                      le16_to_cpu(*bp->fp->tx_cons_sb), nstats->tx_packets);
+                      bnx2x_tx_avail(fp0_tx),
+                      le16_to_cpu(*fp0_tx->tx_cons_sb), nstats->tx_packets);
                printk(KERN_DEBUG "  rx usage (%4x)  rx hc idx (%x)"
                                  "  rx pkt (%lx)\n",
-                      (u16)(le16_to_cpu(*bp->fp->rx_cons_sb) -
-                            bp->fp->rx_comp_cons),
-                      le16_to_cpu(*bp->fp->rx_cons_sb), nstats->rx_packets);
+                      (u16)(le16_to_cpu(*fp0_rx->rx_cons_sb) -
+                            fp0_rx->rx_comp_cons),
+                      le16_to_cpu(*fp0_rx->rx_cons_sb), nstats->rx_packets);
                printk(KERN_DEBUG "  %s (Xoff events %u)  brb drops %u  "
                                  "brb truncate %u\n",
                       (netif_queue_stopped(bp->dev) ? "Xoff" : "Xon"),
@@ -3988,12 +4326,12 @@ static void bnx2x_stats_update(struct bnx2x *bp)
                        "mac_discard %u  mac_filter_discard %u  "
                        "xxovrflow_discard %u  brb_truncate_discard %u  "
                        "ttl0_discard %u\n",
-                      old_tclient->checksum_discard,
+                      le32_to_cpu(old_tclient->checksum_discard),
                       bnx2x_hilo(&qstats->etherstatsoverrsizepkts_hi),
                       bnx2x_hilo(&qstats->no_buff_discard_hi),
                       estats->mac_discard, estats->mac_filter_discard,
                       estats->xxoverflow_discard, estats->brb_truncate_discard,
-                      old_tclient->ttl0_discard);
+                      le32_to_cpu(old_tclient->ttl0_discard));
 
                for_each_queue(bp, i) {
                        printk(KERN_DEBUG "[%d]: %lu\t%lu\t%lu\n", i,
@@ -4123,37 +4461,207 @@ static void bnx2x_stats_handle(struct bnx2x *bp, enum bnx2x_stats_event event)
        bnx2x_stats_stm[state][event].action(bp);
        bp->stats_state = bnx2x_stats_stm[state][event].next_state;
 
+       /* Make sure the state has been "changed" */
+       smp_wmb();
+
        if ((event != STATS_EVENT_UPDATE) || (bp->msglevel & NETIF_MSG_TIMER))
                DP(BNX2X_MSG_STATS, "state %d -> event %d -> state %d\n",
                   state, event, bp->stats_state);
 }
 
-static void bnx2x_timer(unsigned long data)
+static void bnx2x_port_stats_base_init(struct bnx2x *bp)
 {
-       struct bnx2x *bp = (struct bnx2x *) data;
+       struct dmae_command *dmae;
+       u32 *stats_comp = bnx2x_sp(bp, stats_comp);
 
-       if (!netif_running(bp->dev))
+       /* sanity */
+       if (!bp->port.pmf || !bp->port.port_stx) {
+               BNX2X_ERR("BUG!\n");
                return;
+       }
 
-       if (atomic_read(&bp->intr_sem) != 0)
-               goto timer_restart;
+       bp->executer_idx = 0;
 
-       if (poll) {
-               struct bnx2x_fastpath *fp = &bp->fp[0];
-               int rc;
+       dmae = bnx2x_sp(bp, dmae[bp->executer_idx++]);
+       dmae->opcode = (DMAE_CMD_SRC_PCI | DMAE_CMD_DST_GRC |
+                       DMAE_CMD_C_DST_PCI | DMAE_CMD_C_ENABLE |
+                       DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET |
+#ifdef __BIG_ENDIAN
+                       DMAE_CMD_ENDIANITY_B_DW_SWAP |
+#else
+                       DMAE_CMD_ENDIANITY_DW_SWAP |
+#endif
+                       (BP_PORT(bp) ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0) |
+                       (BP_E1HVN(bp) << DMAE_CMD_E1HVN_SHIFT));
+       dmae->src_addr_lo = U64_LO(bnx2x_sp_mapping(bp, port_stats));
+       dmae->src_addr_hi = U64_HI(bnx2x_sp_mapping(bp, port_stats));
+       dmae->dst_addr_lo = bp->port.port_stx >> 2;
+       dmae->dst_addr_hi = 0;
+       dmae->len = sizeof(struct host_port_stats) >> 2;
+       dmae->comp_addr_lo = U64_LO(bnx2x_sp_mapping(bp, stats_comp));
+       dmae->comp_addr_hi = U64_HI(bnx2x_sp_mapping(bp, stats_comp));
+       dmae->comp_val = DMAE_COMP_VAL;
 
-               bnx2x_tx_int(fp, 1000);
-               rc = bnx2x_rx_int(fp, 1000);
+       *stats_comp = 0;
+       bnx2x_hw_stats_post(bp);
+       bnx2x_stats_comp(bp);
+}
+
+static void bnx2x_func_stats_base_init(struct bnx2x *bp)
+{
+       int vn, vn_max = IS_E1HMF(bp) ? E1HVN_MAX : E1VN_MAX;
+       int port = BP_PORT(bp);
+       int func;
+       u32 func_stx;
+
+       /* sanity */
+       if (!bp->port.pmf || !bp->func_stx) {
+               BNX2X_ERR("BUG!\n");
+               return;
        }
 
-       if (!BP_NOMCP(bp)) {
-               int func = BP_FUNC(bp);
-               u32 drv_pulse;
-               u32 mcp_pulse;
+       /* save our func_stx */
+       func_stx = bp->func_stx;
 
-               ++bp->fw_drv_pulse_wr_seq;
-               bp->fw_drv_pulse_wr_seq &= DRV_PULSE_SEQ_MASK;
-               /* TBD - add SYSTEM_TIME */
+       for (vn = VN_0; vn < vn_max; vn++) {
+               func = 2*vn + port;
+
+               bp->func_stx = SHMEM_RD(bp, func_mb[func].fw_mb_param);
+               bnx2x_func_stats_init(bp);
+               bnx2x_hw_stats_post(bp);
+               bnx2x_stats_comp(bp);
+       }
+
+       /* restore our func_stx */
+       bp->func_stx = func_stx;
+}
+
+static void bnx2x_func_stats_base_update(struct bnx2x *bp)
+{
+       struct dmae_command *dmae = &bp->stats_dmae;
+       u32 *stats_comp = bnx2x_sp(bp, stats_comp);
+
+       /* sanity */
+       if (!bp->func_stx) {
+               BNX2X_ERR("BUG!\n");
+               return;
+       }
+
+       bp->executer_idx = 0;
+       memset(dmae, 0, sizeof(struct dmae_command));
+
+       dmae->opcode = (DMAE_CMD_SRC_GRC | DMAE_CMD_DST_PCI |
+                       DMAE_CMD_C_DST_PCI | DMAE_CMD_C_ENABLE |
+                       DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET |
+#ifdef __BIG_ENDIAN
+                       DMAE_CMD_ENDIANITY_B_DW_SWAP |
+#else
+                       DMAE_CMD_ENDIANITY_DW_SWAP |
+#endif
+                       (BP_PORT(bp) ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0) |
+                       (BP_E1HVN(bp) << DMAE_CMD_E1HVN_SHIFT));
+       dmae->src_addr_lo = bp->func_stx >> 2;
+       dmae->src_addr_hi = 0;
+       dmae->dst_addr_lo = U64_LO(bnx2x_sp_mapping(bp, func_stats_base));
+       dmae->dst_addr_hi = U64_HI(bnx2x_sp_mapping(bp, func_stats_base));
+       dmae->len = sizeof(struct host_func_stats) >> 2;
+       dmae->comp_addr_lo = U64_LO(bnx2x_sp_mapping(bp, stats_comp));
+       dmae->comp_addr_hi = U64_HI(bnx2x_sp_mapping(bp, stats_comp));
+       dmae->comp_val = DMAE_COMP_VAL;
+
+       *stats_comp = 0;
+       bnx2x_hw_stats_post(bp);
+       bnx2x_stats_comp(bp);
+}
+
+static void bnx2x_stats_init(struct bnx2x *bp)
+{
+       int port = BP_PORT(bp);
+       int func = BP_FUNC(bp);
+       int i;
+
+       bp->stats_pending = 0;
+       bp->executer_idx = 0;
+       bp->stats_counter = 0;
+
+       /* port and func stats for management */
+       if (!BP_NOMCP(bp)) {
+               bp->port.port_stx = SHMEM_RD(bp, port_mb[port].port_stx);
+               bp->func_stx = SHMEM_RD(bp, func_mb[func].fw_mb_param);
+
+       } else {
+               bp->port.port_stx = 0;
+               bp->func_stx = 0;
+       }
+       DP(BNX2X_MSG_STATS, "port_stx 0x%x  func_stx 0x%x\n",
+          bp->port.port_stx, bp->func_stx);
+
+       /* port stats */
+       memset(&(bp->port.old_nig_stats), 0, sizeof(struct nig_stats));
+       bp->port.old_nig_stats.brb_discard =
+                       REG_RD(bp, NIG_REG_STAT0_BRB_DISCARD + port*0x38);
+       bp->port.old_nig_stats.brb_truncate =
+                       REG_RD(bp, NIG_REG_STAT0_BRB_TRUNCATE + port*0x38);
+       REG_RD_DMAE(bp, NIG_REG_STAT0_EGRESS_MAC_PKT0 + port*0x50,
+                   &(bp->port.old_nig_stats.egress_mac_pkt0_lo), 2);
+       REG_RD_DMAE(bp, NIG_REG_STAT0_EGRESS_MAC_PKT1 + port*0x50,
+                   &(bp->port.old_nig_stats.egress_mac_pkt1_lo), 2);
+
+       /* function stats */
+       for_each_queue(bp, i) {
+               struct bnx2x_fastpath *fp = &bp->fp[i];
+
+               memset(&fp->old_tclient, 0,
+                      sizeof(struct tstorm_per_client_stats));
+               memset(&fp->old_uclient, 0,
+                      sizeof(struct ustorm_per_client_stats));
+               memset(&fp->old_xclient, 0,
+                      sizeof(struct xstorm_per_client_stats));
+               memset(&fp->eth_q_stats, 0, sizeof(struct bnx2x_eth_q_stats));
+       }
+
+       memset(&bp->dev->stats, 0, sizeof(struct net_device_stats));
+       memset(&bp->eth_stats, 0, sizeof(struct bnx2x_eth_stats));
+
+       bp->stats_state = STATS_STATE_DISABLED;
+
+       if (bp->port.pmf) {
+               if (bp->port.port_stx)
+                       bnx2x_port_stats_base_init(bp);
+
+               if (bp->func_stx)
+                       bnx2x_func_stats_base_init(bp);
+
+       } else if (bp->func_stx)
+               bnx2x_func_stats_base_update(bp);
+}
+
+static void bnx2x_timer(unsigned long data)
+{
+       struct bnx2x *bp = (struct bnx2x *) data;
+
+       if (!netif_running(bp->dev))
+               return;
+
+       if (atomic_read(&bp->intr_sem) != 0)
+               goto timer_restart;
+
+       if (poll) {
+               struct bnx2x_fastpath *fp = &bp->fp[0];
+               int rc;
+
+               bnx2x_tx_int(fp);
+               rc = bnx2x_rx_int(fp, 1000);
+       }
+
+       if (!BP_NOMCP(bp)) {
+               int func = BP_FUNC(bp);
+               u32 drv_pulse;
+               u32 mcp_pulse;
+
+               ++bp->fw_drv_pulse_wr_seq;
+               bp->fw_drv_pulse_wr_seq &= DRV_PULSE_SEQ_MASK;
+               /* TBD - add SYSTEM_TIME */
                drv_pulse = bp->fw_drv_pulse_wr_seq;
                SHMEM_WR(bp, func_mb[func].drv_pulse_mb, drv_pulse);
 
@@ -4170,8 +4678,7 @@ static void bnx2x_timer(unsigned long data)
                }
        }
 
-       if ((bp->state == BNX2X_STATE_OPEN) ||
-           (bp->state == BNX2X_STATE_DISABLED))
+       if (bp->state == BNX2X_STATE_OPEN)
                bnx2x_stats_handle(bp, STATS_EVENT_UPDATE);
 
 timer_restart:
@@ -4190,12 +4697,13 @@ static void bnx2x_zero_sb(struct bnx2x *bp, int sb_id)
 {
        int port = BP_PORT(bp);
 
-       bnx2x_init_fill(bp, BAR_USTRORM_INTMEM +
-                       USTORM_SB_HOST_STATUS_BLOCK_OFFSET(port, sb_id), 0,
-                       sizeof(struct ustorm_status_block)/4);
-       bnx2x_init_fill(bp, BAR_CSTRORM_INTMEM +
-                       CSTORM_SB_HOST_STATUS_BLOCK_OFFSET(port, sb_id), 0,
-                       sizeof(struct cstorm_status_block)/4);
+       /* "CSTORM" */
+       bnx2x_init_fill(bp, CSEM_REG_FAST_MEMORY +
+                       CSTORM_SB_HOST_STATUS_BLOCK_U_OFFSET(port, sb_id), 0,
+                       CSTORM_SB_STATUS_BLOCK_U_SIZE / 4);
+       bnx2x_init_fill(bp, CSEM_REG_FAST_MEMORY +
+                       CSTORM_SB_HOST_STATUS_BLOCK_C_OFFSET(port, sb_id), 0,
+                       CSTORM_SB_STATUS_BLOCK_C_SIZE / 4);
 }
 
 static void bnx2x_init_sb(struct bnx2x *bp, struct host_status_block *sb,
@@ -4211,17 +4719,17 @@ static void bnx2x_init_sb(struct bnx2x *bp, struct host_status_block *sb,
                                            u_status_block);
        sb->u_status_block.status_block_id = sb_id;
 
-       REG_WR(bp, BAR_USTRORM_INTMEM +
-              USTORM_SB_HOST_SB_ADDR_OFFSET(port, sb_id), U64_LO(section));
-       REG_WR(bp, BAR_USTRORM_INTMEM +
-              ((USTORM_SB_HOST_SB_ADDR_OFFSET(port, sb_id)) + 4),
+       REG_WR(bp, BAR_CSTRORM_INTMEM +
+              CSTORM_SB_HOST_SB_ADDR_U_OFFSET(port, sb_id), U64_LO(section));
+       REG_WR(bp, BAR_CSTRORM_INTMEM +
+              ((CSTORM_SB_HOST_SB_ADDR_U_OFFSET(port, sb_id)) + 4),
               U64_HI(section));
-       REG_WR8(bp, BAR_USTRORM_INTMEM + FP_USB_FUNC_OFF +
-               USTORM_SB_HOST_STATUS_BLOCK_OFFSET(port, sb_id), func);
+       REG_WR8(bp, BAR_CSTRORM_INTMEM + FP_USB_FUNC_OFF +
+               CSTORM_SB_HOST_STATUS_BLOCK_U_OFFSET(port, sb_id), func);
 
        for (index = 0; index < HC_USTORM_SB_NUM_INDICES; index++)
-               REG_WR16(bp, BAR_USTRORM_INTMEM +
-                        USTORM_SB_HC_DISABLE_OFFSET(port, sb_id, index), 1);
+               REG_WR16(bp, BAR_CSTRORM_INTMEM +
+                        CSTORM_SB_HC_DISABLE_U_OFFSET(port, sb_id, index), 1);
 
        /* CSTORM */
        section = ((u64)mapping) + offsetof(struct host_status_block,
@@ -4229,16 +4737,16 @@ static void bnx2x_init_sb(struct bnx2x *bp, struct host_status_block *sb,
        sb->c_status_block.status_block_id = sb_id;
 
        REG_WR(bp, BAR_CSTRORM_INTMEM +
-              CSTORM_SB_HOST_SB_ADDR_OFFSET(port, sb_id), U64_LO(section));
+              CSTORM_SB_HOST_SB_ADDR_C_OFFSET(port, sb_id), U64_LO(section));
        REG_WR(bp, BAR_CSTRORM_INTMEM +
-              ((CSTORM_SB_HOST_SB_ADDR_OFFSET(port, sb_id)) + 4),
+              ((CSTORM_SB_HOST_SB_ADDR_C_OFFSET(port, sb_id)) + 4),
               U64_HI(section));
        REG_WR8(bp, BAR_CSTRORM_INTMEM + FP_CSB_FUNC_OFF +
-               CSTORM_SB_HOST_STATUS_BLOCK_OFFSET(port, sb_id), func);
+               CSTORM_SB_HOST_STATUS_BLOCK_C_OFFSET(port, sb_id), func);
 
        for (index = 0; index < HC_CSTORM_SB_NUM_INDICES; index++)
                REG_WR16(bp, BAR_CSTRORM_INTMEM +
-                        CSTORM_SB_HC_DISABLE_OFFSET(port, sb_id, index), 1);
+                        CSTORM_SB_HC_DISABLE_C_OFFSET(port, sb_id, index), 1);
 
        bnx2x_ack_sb(bp, sb_id, CSTORM_ID, 0, IGU_INT_ENABLE, 0);
 }
@@ -4247,18 +4755,18 @@ static void bnx2x_zero_def_sb(struct bnx2x *bp)
 {
        int func = BP_FUNC(bp);
 
-       bnx2x_init_fill(bp, BAR_USTRORM_INTMEM +
-                       USTORM_DEF_SB_HOST_STATUS_BLOCK_OFFSET(func), 0,
-                       sizeof(struct ustorm_def_status_block)/4);
-       bnx2x_init_fill(bp, BAR_CSTRORM_INTMEM +
-                       CSTORM_DEF_SB_HOST_STATUS_BLOCK_OFFSET(func), 0,
-                       sizeof(struct cstorm_def_status_block)/4);
-       bnx2x_init_fill(bp, BAR_XSTRORM_INTMEM +
-                       XSTORM_DEF_SB_HOST_STATUS_BLOCK_OFFSET(func), 0,
-                       sizeof(struct xstorm_def_status_block)/4);
-       bnx2x_init_fill(bp, BAR_TSTRORM_INTMEM +
+       bnx2x_init_fill(bp, TSEM_REG_FAST_MEMORY +
                        TSTORM_DEF_SB_HOST_STATUS_BLOCK_OFFSET(func), 0,
                        sizeof(struct tstorm_def_status_block)/4);
+       bnx2x_init_fill(bp, CSEM_REG_FAST_MEMORY +
+                       CSTORM_DEF_SB_HOST_STATUS_BLOCK_U_OFFSET(func), 0,
+                       sizeof(struct cstorm_def_status_block_u)/4);
+       bnx2x_init_fill(bp, CSEM_REG_FAST_MEMORY +
+                       CSTORM_DEF_SB_HOST_STATUS_BLOCK_C_OFFSET(func), 0,
+                       sizeof(struct cstorm_def_status_block_c)/4);
+       bnx2x_init_fill(bp, XSEM_REG_FAST_MEMORY +
+                       XSTORM_DEF_SB_HOST_STATUS_BLOCK_OFFSET(func), 0,
+                       sizeof(struct xstorm_def_status_block)/4);
 }
 
 static void bnx2x_init_def_sb(struct bnx2x *bp,
@@ -4308,17 +4816,17 @@ static void bnx2x_init_def_sb(struct bnx2x *bp,
                                            u_def_status_block);
        def_sb->u_def_status_block.status_block_id = sb_id;
 
-       REG_WR(bp, BAR_USTRORM_INTMEM +
-              USTORM_DEF_SB_HOST_SB_ADDR_OFFSET(func), U64_LO(section));
-       REG_WR(bp, BAR_USTRORM_INTMEM +
-              ((USTORM_DEF_SB_HOST_SB_ADDR_OFFSET(func)) + 4),
+       REG_WR(bp, BAR_CSTRORM_INTMEM +
+              CSTORM_DEF_SB_HOST_SB_ADDR_U_OFFSET(func), U64_LO(section));
+       REG_WR(bp, BAR_CSTRORM_INTMEM +
+              ((CSTORM_DEF_SB_HOST_SB_ADDR_U_OFFSET(func)) + 4),
               U64_HI(section));
-       REG_WR8(bp, BAR_USTRORM_INTMEM + DEF_USB_FUNC_OFF +
-               USTORM_DEF_SB_HOST_STATUS_BLOCK_OFFSET(func), func);
+       REG_WR8(bp, BAR_CSTRORM_INTMEM + DEF_USB_FUNC_OFF +
+               CSTORM_DEF_SB_HOST_STATUS_BLOCK_U_OFFSET(func), func);
 
        for (index = 0; index < HC_USTORM_DEF_SB_NUM_INDICES; index++)
-               REG_WR16(bp, BAR_USTRORM_INTMEM +
-                        USTORM_DEF_SB_HC_DISABLE_OFFSET(func, index), 1);
+               REG_WR16(bp, BAR_CSTRORM_INTMEM +
+                        CSTORM_DEF_SB_HC_DISABLE_U_OFFSET(func, index), 1);
 
        /* CSTORM */
        section = ((u64)mapping) + offsetof(struct host_def_status_block,
@@ -4326,16 +4834,16 @@ static void bnx2x_init_def_sb(struct bnx2x *bp,
        def_sb->c_def_status_block.status_block_id = sb_id;
 
        REG_WR(bp, BAR_CSTRORM_INTMEM +
-              CSTORM_DEF_SB_HOST_SB_ADDR_OFFSET(func), U64_LO(section));
+              CSTORM_DEF_SB_HOST_SB_ADDR_C_OFFSET(func), U64_LO(section));
        REG_WR(bp, BAR_CSTRORM_INTMEM +
-              ((CSTORM_DEF_SB_HOST_SB_ADDR_OFFSET(func)) + 4),
+              ((CSTORM_DEF_SB_HOST_SB_ADDR_C_OFFSET(func)) + 4),
               U64_HI(section));
        REG_WR8(bp, BAR_CSTRORM_INTMEM + DEF_CSB_FUNC_OFF +
-               CSTORM_DEF_SB_HOST_STATUS_BLOCK_OFFSET(func), func);
+               CSTORM_DEF_SB_HOST_STATUS_BLOCK_C_OFFSET(func), func);
 
        for (index = 0; index < HC_CSTORM_DEF_SB_NUM_INDICES; index++)
                REG_WR16(bp, BAR_CSTRORM_INTMEM +
-                        CSTORM_DEF_SB_HC_DISABLE_OFFSET(func, index), 1);
+                        CSTORM_DEF_SB_HC_DISABLE_C_OFFSET(func, index), 1);
 
        /* TSTORM */
        section = ((u64)mapping) + offsetof(struct host_def_status_block,
@@ -4386,24 +4894,24 @@ static void bnx2x_update_coalesce(struct bnx2x *bp)
                int sb_id = bp->fp[i].sb_id;
 
                /* HC_INDEX_U_ETH_RX_CQ_CONS */
-               REG_WR8(bp, BAR_USTRORM_INTMEM +
-                       USTORM_SB_HC_TIMEOUT_OFFSET(port, sb_id,
-                                                   U_SB_ETH_RX_CQ_INDEX),
-                       bp->rx_ticks/12);
-               REG_WR16(bp, BAR_USTRORM_INTMEM +
-                        USTORM_SB_HC_DISABLE_OFFSET(port, sb_id,
-                                                    U_SB_ETH_RX_CQ_INDEX),
-                        bp->rx_ticks ? 0 : 1);
+               REG_WR8(bp, BAR_CSTRORM_INTMEM +
+                       CSTORM_SB_HC_TIMEOUT_U_OFFSET(port, sb_id,
+                                                     U_SB_ETH_RX_CQ_INDEX),
+                       bp->rx_ticks/(4 * BNX2X_BTR));
+               REG_WR16(bp, BAR_CSTRORM_INTMEM +
+                        CSTORM_SB_HC_DISABLE_U_OFFSET(port, sb_id,
+                                                      U_SB_ETH_RX_CQ_INDEX),
+                        (bp->rx_ticks/(4 * BNX2X_BTR)) ? 0 : 1);
 
                /* HC_INDEX_C_ETH_TX_CQ_CONS */
                REG_WR8(bp, BAR_CSTRORM_INTMEM +
-                       CSTORM_SB_HC_TIMEOUT_OFFSET(port, sb_id,
-                                                   C_SB_ETH_TX_CQ_INDEX),
-                       bp->tx_ticks/12);
+                       CSTORM_SB_HC_TIMEOUT_C_OFFSET(port, sb_id,
+                                                     C_SB_ETH_TX_CQ_INDEX),
+                       bp->tx_ticks/(4 * BNX2X_BTR));
                REG_WR16(bp, BAR_CSTRORM_INTMEM +
-                        CSTORM_SB_HC_DISABLE_OFFSET(port, sb_id,
-                                                    C_SB_ETH_TX_CQ_INDEX),
-                        bp->tx_ticks ? 0 : 1);
+                        CSTORM_SB_HC_DISABLE_C_OFFSET(port, sb_id,
+                                                      C_SB_ETH_TX_CQ_INDEX),
+                        (bp->tx_ticks/(4 * BNX2X_BTR)) ? 0 : 1);
        }
 }
 
@@ -4424,8 +4932,7 @@ static inline void bnx2x_free_tpa_pool(struct bnx2x *bp,
                if (fp->tpa_state[i] == BNX2X_TPA_START)
                        pci_unmap_single(bp->pdev,
                                         pci_unmap_addr(rx_buf, mapping),
-                                        bp->rx_buf_size,
-                                        PCI_DMA_FROMDEVICE);
+                                        bp->rx_buf_size, PCI_DMA_FROMDEVICE);
 
                dev_kfree_skb(skb);
                rx_buf->skb = NULL;
@@ -4446,7 +4953,7 @@ static void bnx2x_init_rx_rings(struct bnx2x *bp)
 
        if (bp->flags & TPA_ENABLE_FLAG) {
 
-               for_each_rx_queue(bp, j) {
+               for_each_queue(bp, j) {
                        struct bnx2x_fastpath *fp = &bp->fp[j];
 
                        for (i = 0; i < max_agg_queues; i++) {
@@ -4469,7 +4976,7 @@ static void bnx2x_init_rx_rings(struct bnx2x *bp)
                }
        }
 
-       for_each_rx_queue(bp, j) {
+       for_each_queue(bp, j) {
                struct bnx2x_fastpath *fp = &bp->fp[j];
 
                fp->rx_bd_cons = 0;
@@ -4581,21 +5088,25 @@ static void bnx2x_init_tx_ring(struct bnx2x *bp)
 {
        int i, j;
 
-       for_each_tx_queue(bp, j) {
+       for_each_queue(bp, j) {
                struct bnx2x_fastpath *fp = &bp->fp[j];
 
                for (i = 1; i <= NUM_TX_RINGS; i++) {
-                       struct eth_tx_bd *tx_bd =
-                               &fp->tx_desc_ring[TX_DESC_CNT * i - 1];
+                       struct eth_tx_next_bd *tx_next_bd =
+                               &fp->tx_desc_ring[TX_DESC_CNT * i - 1].next_bd;
 
-                       tx_bd->addr_hi =
+                       tx_next_bd->addr_hi =
                                cpu_to_le32(U64_HI(fp->tx_desc_mapping +
                                            BCM_PAGE_SIZE*(i % NUM_TX_RINGS)));
-                       tx_bd->addr_lo =
+                       tx_next_bd->addr_lo =
                                cpu_to_le32(U64_LO(fp->tx_desc_mapping +
                                            BCM_PAGE_SIZE*(i % NUM_TX_RINGS)));
                }
 
+               fp->tx_db.data.header.header = DOORBELL_HDR_DB_TYPE;
+               fp->tx_db.data.zero_fill1 = 0;
+               fp->tx_db.data.prod = 0;
+
                fp->tx_pkt_prod = 0;
                fp->tx_pkt_cons = 0;
                fp->tx_bd_prod = 0;
@@ -4631,16 +5142,16 @@ static void bnx2x_init_context(struct bnx2x *bp)
 {
        int i;
 
+       /* Rx */
        for_each_queue(bp, i) {
                struct eth_context *context = bnx2x_sp(bp, context[i].eth);
                struct bnx2x_fastpath *fp = &bp->fp[i];
                u8 cl_id = fp->cl_id;
-               u8 sb_id = FP_SB_ID(fp);
 
                context->ustorm_st_context.common.sb_index_numbers =
                                                BNX2X_RX_SB_INDEX_NUM;
-               context->ustorm_st_context.common.clientId = FP_CL_ID(fp);
-               context->ustorm_st_context.common.status_block_id = sb_id;
+               context->ustorm_st_context.common.clientId = cl_id;
+               context->ustorm_st_context.common.status_block_id = fp->sb_id;
                context->ustorm_st_context.common.flags =
                        (USTORM_ETH_ST_CONTEXT_CONFIG_ENABLE_MC_ALIGNMENT |
                         USTORM_ETH_ST_CONTEXT_CONFIG_ENABLE_STATISTICS);
@@ -4656,8 +5167,7 @@ static void bnx2x_init_context(struct bnx2x *bp)
                                                U64_LO(fp->rx_desc_mapping);
                if (!fp->disable_tpa) {
                        context->ustorm_st_context.common.flags |=
-                               (USTORM_ETH_ST_CONTEXT_CONFIG_ENABLE_TPA |
-                                USTORM_ETH_ST_CONTEXT_CONFIG_ENABLE_SGE_RING);
+                               USTORM_ETH_ST_CONTEXT_CONFIG_ENABLE_TPA;
                        context->ustorm_st_context.common.sge_buff_size =
                                (u16)min((u32)SGE_PAGE_SIZE*PAGES_PER_SGE,
                                         (u32)0xffff);
@@ -4665,6 +5175,13 @@ static void bnx2x_init_context(struct bnx2x *bp)
                                                U64_HI(fp->rx_sge_mapping);
                        context->ustorm_st_context.common.sge_page_base_lo =
                                                U64_LO(fp->rx_sge_mapping);
+
+                       context->ustorm_st_context.common.max_sges_for_packet =
+                               SGE_PAGE_ALIGN(bp->dev->mtu) >> SGE_PAGE_SHIFT;
+                       context->ustorm_st_context.common.max_sges_for_packet =
+                               ((context->ustorm_st_context.common.
+                                 max_sges_for_packet + PAGES_PER_SGE - 1) &
+                                (~(PAGES_PER_SGE - 1))) >> PAGES_PER_SGE_SHIFT;
                }
 
                context->ustorm_ag_context.cdu_usage =
@@ -4672,24 +5189,28 @@ static void bnx2x_init_context(struct bnx2x *bp)
                                               CDU_REGION_NUMBER_UCM_AG,
                                               ETH_CONNECTION_TYPE);
 
+               context->xstorm_ag_context.cdu_reserved =
+                       CDU_RSRVD_VALUE_TYPE_A(HW_CID(bp, i),
+                                              CDU_REGION_NUMBER_XCM_AG,
+                                              ETH_CONNECTION_TYPE);
+       }
+
+       /* Tx */
+       for_each_queue(bp, i) {
+               struct bnx2x_fastpath *fp = &bp->fp[i];
+               struct eth_context *context =
+                       bnx2x_sp(bp, context[i].eth);
+
+               context->cstorm_st_context.sb_index_number =
+                                               C_SB_ETH_TX_CQ_INDEX;
+               context->cstorm_st_context.status_block_id = fp->sb_id;
+
                context->xstorm_st_context.tx_bd_page_base_hi =
                                                U64_HI(fp->tx_desc_mapping);
                context->xstorm_st_context.tx_bd_page_base_lo =
                                                U64_LO(fp->tx_desc_mapping);
-               context->xstorm_st_context.db_data_addr_hi =
-                                               U64_HI(fp->tx_prods_mapping);
-               context->xstorm_st_context.db_data_addr_lo =
-                                               U64_LO(fp->tx_prods_mapping);
                context->xstorm_st_context.statistics_data = (fp->cl_id |
                                XSTORM_ETH_ST_CONTEXT_STATISTICS_ENABLE);
-               context->cstorm_st_context.sb_index_number =
-                                               C_SB_ETH_TX_CQ_INDEX;
-               context->cstorm_st_context.status_block_id = sb_id;
-
-               context->xstorm_ag_context.cdu_reserved =
-                       CDU_RSRVD_VALUE_TYPE_A(HW_CID(bp, i),
-                                              CDU_REGION_NUMBER_XCM_AG,
-                                              ETH_CONNECTION_TYPE);
        }
 }
 
@@ -4706,7 +5227,7 @@ static void bnx2x_init_ind_table(struct bnx2x *bp)
        for (i = 0; i < TSTORM_INDIRECTION_TABLE_SIZE; i++)
                REG_WR8(bp, BAR_TSTRORM_INTMEM +
                        TSTORM_INDIRECTION_TABLE_OFFSET(func) + i,
-                       BP_CL_ID(bp) + (i % bp->num_rx_queues));
+                       bp->fp->cl_id + (i % bp->num_queues));
 }
 
 static void bnx2x_set_client_config(struct bnx2x *bp)
@@ -4727,18 +5248,6 @@ static void bnx2x_set_client_config(struct bnx2x *bp)
        }
 #endif
 
-       if (bp->flags & TPA_ENABLE_FLAG) {
-               tstorm_client.max_sges_for_packet =
-                       SGE_PAGE_ALIGN(tstorm_client.mtu) >> SGE_PAGE_SHIFT;
-               tstorm_client.max_sges_for_packet =
-                       ((tstorm_client.max_sges_for_packet +
-                         PAGES_PER_SGE - 1) & (~(PAGES_PER_SGE - 1))) >>
-                       PAGES_PER_SGE_SHIFT;
-
-               tstorm_client.config_flags |=
-                               TSTORM_ETH_CLIENT_CONFIG_ENABLE_SGE_RING;
-       }
-
        for_each_queue(bp, i) {
                tstorm_client.statistics_counter_id = bp->fp[i].cl_id;
 
@@ -4758,9 +5267,16 @@ static void bnx2x_set_storm_rx_mode(struct bnx2x *bp)
 {
        struct tstorm_eth_mac_filter_config tstorm_mac_filter = {0};
        int mode = bp->rx_mode;
-       int mask = (1 << BP_L_ID(bp));
+       int mask = bp->rx_mode_cl_mask;
        int func = BP_FUNC(bp);
+       int port = BP_PORT(bp);
        int i;
+       /* All but management unicast packets should pass to the host as well */
+       u32 llh_mask =
+               NIG_LLH0_BRB1_DRV_MASK_REG_LLH0_BRB1_DRV_MASK_BRCST |
+               NIG_LLH0_BRB1_DRV_MASK_REG_LLH0_BRB1_DRV_MASK_MLCST |
+               NIG_LLH0_BRB1_DRV_MASK_REG_LLH0_BRB1_DRV_MASK_VLAN |
+               NIG_LLH0_BRB1_DRV_MASK_REG_LLH0_BRB1_DRV_MASK_NO_VLAN;
 
        DP(NETIF_MSG_IFUP, "rx mode %d  mask 0x%x\n", mode, mask);
 
@@ -4770,23 +5286,33 @@ static void bnx2x_set_storm_rx_mode(struct bnx2x *bp)
                tstorm_mac_filter.mcast_drop_all = mask;
                tstorm_mac_filter.bcast_drop_all = mask;
                break;
+
        case BNX2X_RX_MODE_NORMAL:
                tstorm_mac_filter.bcast_accept_all = mask;
                break;
+
        case BNX2X_RX_MODE_ALLMULTI:
                tstorm_mac_filter.mcast_accept_all = mask;
                tstorm_mac_filter.bcast_accept_all = mask;
                break;
+
        case BNX2X_RX_MODE_PROMISC:
                tstorm_mac_filter.ucast_accept_all = mask;
                tstorm_mac_filter.mcast_accept_all = mask;
                tstorm_mac_filter.bcast_accept_all = mask;
+               /* pass management unicast packets as well */
+               llh_mask |= NIG_LLH0_BRB1_DRV_MASK_REG_LLH0_BRB1_DRV_MASK_UNCST;
                break;
+
        default:
                BNX2X_ERR("BAD rx mode (%d)\n", mode);
                break;
        }
 
+       REG_WR(bp,
+              (port ? NIG_REG_LLH1_BRB1_DRV_MASK : NIG_REG_LLH0_BRB1_DRV_MASK),
+              llh_mask);
+
        for (i = 0; i < sizeof(struct tstorm_eth_mac_filter_config)/4; i++) {
                REG_WR(bp, BAR_TSTRORM_INTMEM +
                       TSTORM_MAC_FILTER_CONFIG_OFFSET(func) + i * 4,
@@ -4804,17 +5330,6 @@ static void bnx2x_init_internal_common(struct bnx2x *bp)
 {
        int i;
 
-       if (bp->flags & TPA_ENABLE_FLAG) {
-               struct tstorm_eth_tpa_exist tpa = {0};
-
-               tpa.tpa_exist = 1;
-
-               REG_WR(bp, BAR_TSTRORM_INTMEM + TSTORM_TPA_EXIST_OFFSET,
-                      ((u32 *)&tpa)[0]);
-               REG_WR(bp, BAR_TSTRORM_INTMEM + TSTORM_TPA_EXIST_OFFSET + 4,
-                      ((u32 *)&tpa)[1]);
-       }
-
        /* Zero this manually as its initialization is
           currently missing in the initTool */
        for (i = 0; i < (USTORM_AGG_DATA_SIZE >> 2); i++)
@@ -4826,53 +5341,14 @@ static void bnx2x_init_internal_port(struct bnx2x *bp)
 {
        int port = BP_PORT(bp);
 
-       REG_WR(bp, BAR_USTRORM_INTMEM + USTORM_HC_BTR_OFFSET(port), BNX2X_BTR);
-       REG_WR(bp, BAR_CSTRORM_INTMEM + CSTORM_HC_BTR_OFFSET(port), BNX2X_BTR);
+       REG_WR(bp,
+              BAR_CSTRORM_INTMEM + CSTORM_HC_BTR_U_OFFSET(port), BNX2X_BTR);
+       REG_WR(bp,
+              BAR_CSTRORM_INTMEM + CSTORM_HC_BTR_C_OFFSET(port), BNX2X_BTR);
        REG_WR(bp, BAR_TSTRORM_INTMEM + TSTORM_HC_BTR_OFFSET(port), BNX2X_BTR);
        REG_WR(bp, BAR_XSTRORM_INTMEM + XSTORM_HC_BTR_OFFSET(port), BNX2X_BTR);
 }
 
-/* Calculates the sum of vn_min_rates.
-   It's needed for further normalizing of the min_rates.
-   Returns:
-     sum of vn_min_rates.
-       or
-     0 - if all the min_rates are 0.
-     In the later case fainess algorithm should be deactivated.
-     If not all min_rates are zero then those that are zeroes will be set to 1.
- */
-static void bnx2x_calc_vn_weight_sum(struct bnx2x *bp)
-{
-       int all_zero = 1;
-       int port = BP_PORT(bp);
-       int vn;
-
-       bp->vn_weight_sum = 0;
-       for (vn = VN_0; vn < E1HVN_MAX; vn++) {
-               int func = 2*vn + port;
-               u32 vn_cfg =
-                       SHMEM_RD(bp, mf_cfg.func_mf_config[func].config);
-               u32 vn_min_rate = ((vn_cfg & FUNC_MF_CFG_MIN_BW_MASK) >>
-                                  FUNC_MF_CFG_MIN_BW_SHIFT) * 100;
-
-               /* Skip hidden vns */
-               if (vn_cfg & FUNC_MF_CFG_FUNC_HIDE)
-                       continue;
-
-               /* If min rate is zero - set it to 1 */
-               if (!vn_min_rate)
-                       vn_min_rate = DEF_MIN_RATE;
-               else
-                       all_zero = 0;
-
-               bp->vn_weight_sum += vn_min_rate;
-       }
-
-       /* ... only if all min rates are zeros - disable fairness */
-       if (all_zero)
-               bp->vn_weight_sum = 0;
-}
-
 static void bnx2x_init_internal_func(struct bnx2x *bp)
 {
        struct tstorm_eth_function_common_config tstorm_config = {0};
@@ -4887,6 +5363,12 @@ static void bnx2x_init_internal_func(struct bnx2x *bp)
                tstorm_config.config_flags = MULTI_FLAGS(bp);
                tstorm_config.rss_result_mask = MULTI_MASK;
        }
+
+       /* Enable TPA if needed */
+       if (bp->flags & TPA_ENABLE_FLAG)
+               tstorm_config.config_flags |=
+                       TSTORM_ETH_FUNCTION_COMMON_CONFIG_ENABLE_TPA;
+
        if (IS_E1HMF(bp))
                tstorm_config.config_flags |=
                                TSTORM_ETH_FUNCTION_COMMON_CONFIG_E1HOV_IN_CAM;
@@ -4898,6 +5380,7 @@ static void bnx2x_init_internal_func(struct bnx2x *bp)
               (*(u32 *)&tstorm_config));
 
        bp->rx_mode = BNX2X_RX_MODE_NONE; /* no rx until link is up */
+       bp->rx_mode_cl_mask = (1 << BP_L_ID(bp));
        bnx2x_set_storm_rx_mode(bp);
 
        for_each_queue(bp, i) {
@@ -4988,18 +5471,26 @@ static void bnx2x_init_internal_func(struct bnx2x *bp)
                min((u32)(min((u32)8, (u32)MAX_SKB_FRAGS) *
                          SGE_PAGE_SIZE * PAGES_PER_SGE),
                    (u32)0xffff);
-       for_each_rx_queue(bp, i) {
+       for_each_queue(bp, i) {
                struct bnx2x_fastpath *fp = &bp->fp[i];
 
                REG_WR(bp, BAR_USTRORM_INTMEM +
-                      USTORM_CQE_PAGE_BASE_OFFSET(port, FP_CL_ID(fp)),
+                      USTORM_CQE_PAGE_BASE_OFFSET(port, fp->cl_id),
                       U64_LO(fp->rx_comp_mapping));
                REG_WR(bp, BAR_USTRORM_INTMEM +
-                      USTORM_CQE_PAGE_BASE_OFFSET(port, FP_CL_ID(fp)) + 4,
+                      USTORM_CQE_PAGE_BASE_OFFSET(port, fp->cl_id) + 4,
                       U64_HI(fp->rx_comp_mapping));
 
+               /* Next page */
+               REG_WR(bp, BAR_USTRORM_INTMEM +
+                      USTORM_CQE_PAGE_NEXT_OFFSET(port, fp->cl_id),
+                      U64_LO(fp->rx_comp_mapping + BCM_PAGE_SIZE));
+               REG_WR(bp, BAR_USTRORM_INTMEM +
+                      USTORM_CQE_PAGE_NEXT_OFFSET(port, fp->cl_id) + 4,
+                      U64_HI(fp->rx_comp_mapping + BCM_PAGE_SIZE));
+
                REG_WR16(bp, BAR_USTRORM_INTMEM +
-                        USTORM_MAX_AGG_SIZE_OFFSET(port, FP_CL_ID(fp)),
+                        USTORM_MAX_AGG_SIZE_OFFSET(port, fp->cl_id),
                         max_agg_size);
        }
 
@@ -5015,7 +5506,7 @@ static void bnx2x_init_internal_func(struct bnx2x *bp)
                rx_pause.cqe_thr_high = 350;
                rx_pause.sge_thr_high = 0;
 
-               for_each_rx_queue(bp, i) {
+               for_each_queue(bp, i) {
                        struct bnx2x_fastpath *fp = &bp->fp[i];
 
                        if (!fp->disable_tpa) {
@@ -5046,20 +5537,18 @@ static void bnx2x_init_internal_func(struct bnx2x *bp)
                bp->link_vars.line_speed = SPEED_10000;
                bnx2x_init_port_minmax(bp);
 
+               if (!BP_NOMCP(bp))
+                       bp->mf_config =
+                             SHMEM_RD(bp, mf_cfg.func_mf_config[func].config);
                bnx2x_calc_vn_weight_sum(bp);
 
                for (vn = VN_0; vn < E1HVN_MAX; vn++)
                        bnx2x_init_vn_minmax(bp, 2*vn + port);
 
                /* Enable rate shaping and fairness */
-               bp->cmng.flags.cmng_enables =
+               bp->cmng.flags.cmng_enables |=
                                        CMNG_FLAGS_PER_PORT_RATE_SHAPING_VN;
-               if (bp->vn_weight_sum)
-                       bp->cmng.flags.cmng_enables |=
-                                       CMNG_FLAGS_PER_PORT_FAIRNESS_VN;
-               else
-                       DP(NETIF_MSG_IFUP, "All MIN values are zeroes"
-                          "  fairness will be disabled\n");
+
        } else {
                /* rate shaping and fairness are disabled */
                DP(NETIF_MSG_IFUP,
@@ -5107,15 +5596,23 @@ static void bnx2x_nic_init(struct bnx2x *bp, u32 load_code)
                fp->state = BNX2X_FP_STATE_CLOSED;
                fp->index = i;
                fp->cl_id = BP_L_ID(bp) + i;
+#ifdef BCM_CNIC
+               fp->sb_id = fp->cl_id + 1;
+#else
                fp->sb_id = fp->cl_id;
+#endif
                DP(NETIF_MSG_IFUP,
-                  "bnx2x_init_sb(%p,%p) index %d  cl_id %d  sb %d\n",
-                  bp, fp->status_blk, i, FP_CL_ID(fp), FP_SB_ID(fp));
+                  "queue[%d]:  bnx2x_init_sb(%p,%p)  cl_id %d  sb %d\n",
+                  i, bp, fp->status_blk, fp->cl_id, fp->sb_id);
                bnx2x_init_sb(bp, fp->status_blk, fp->status_blk_mapping,
-                             FP_SB_ID(fp));
+                             fp->sb_id);
                bnx2x_update_fpsb_idx(fp);
        }
 
+       /* ensure status block indices were read */
+       rmb();
+
+
        bnx2x_init_def_sb(bp, bp->def_status_blk, bp->def_status_blk_mapping,
                          DEF_SB_ID);
        bnx2x_update_dsb_idx(bp);
@@ -5136,6 +5633,11 @@ static void bnx2x_nic_init(struct bnx2x *bp, u32 load_code)
        mmiowb();
 
        bnx2x_int_enable(bp);
+
+       /* Check for SPIO5 */
+       bnx2x_attn_int_deasserted0(bp,
+               REG_RD(bp, MISC_REG_AEU_AFTER_INVERT_1_FUNC_0 + BP_PORT(bp)*4) &
+                                  AEU_INPUTS_ATTN_BITS_SPIO5);
 }
 
 /* end of nic init */
@@ -5191,13 +5693,15 @@ static void bnx2x_gunzip_end(struct bnx2x *bp)
        }
 }
 
-static int bnx2x_gunzip(struct bnx2x *bp, u8 *zbuf, int len)
+static int bnx2x_gunzip(struct bnx2x *bp, const u8 *zbuf, int len)
 {
        int n, rc;
 
        /* check gzip header */
-       if ((zbuf[0] != 0x1f) || (zbuf[1] != 0x8b) || (zbuf[2] != Z_DEFLATED))
+       if ((zbuf[0] != 0x1f) || (zbuf[1] != 0x8b) || (zbuf[2] != Z_DEFLATED)) {
+               BNX2X_ERR("Bad gzip header\n");
                return -EINVAL;
+       }
 
        n = 10;
 
@@ -5206,7 +5710,7 @@ static int bnx2x_gunzip(struct bnx2x *bp, u8 *zbuf, int len)
        if (zbuf[3] & FNAME)
                while ((zbuf[n++] != 0) && (n < len));
 
-       bp->strm->next_in = zbuf + n;
+       bp->strm->next_in = (typeof(bp->strm->next_in))zbuf + n;
        bp->strm->avail_in = len - n;
        bp->strm->next_out = bp->gunzip_buf;
        bp->strm->avail_out = FW_BUF_SIZE;
@@ -5328,8 +5832,8 @@ static int bnx2x_int_mem_test(struct bnx2x *bp)
        msleep(50);
        REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0x03);
        msleep(50);
-       bnx2x_init_block(bp, BRB1_COMMON_START, BRB1_COMMON_END);
-       bnx2x_init_block(bp, PRS_COMMON_START, PRS_COMMON_END);
+       bnx2x_init_block(bp, BRB1_BLOCK, COMMON_STAGE);
+       bnx2x_init_block(bp, PRS_BLOCK, COMMON_STAGE);
 
        DP(NETIF_MSG_HW, "part2\n");
 
@@ -5393,9 +5897,9 @@ static int bnx2x_int_mem_test(struct bnx2x *bp)
        msleep(50);
        REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0x03);
        msleep(50);
-       bnx2x_init_block(bp, BRB1_COMMON_START, BRB1_COMMON_END);
-       bnx2x_init_block(bp, PRS_COMMON_START, PRS_COMMON_END);
-#ifndef BCM_ISCSI
+       bnx2x_init_block(bp, BRB1_BLOCK, COMMON_STAGE);
+       bnx2x_init_block(bp, PRS_BLOCK, COMMON_STAGE);
+#ifndef BCM_CNIC
        /* set NIC mode */
        REG_WR(bp, PRS_REG_NIC_MODE, 1);
 #endif
@@ -5459,32 +5963,107 @@ static void bnx2x_reset_common(struct bnx2x *bp)
        REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR, 0x1403);
 }
 
-static int bnx2x_init_common(struct bnx2x *bp)
+static void bnx2x_init_pxp(struct bnx2x *bp)
 {
-       u32 val, i;
+       u16 devctl;
+       int r_order, w_order;
 
-       DP(BNX2X_MSG_MCP, "starting common init  func %d\n", BP_FUNC(bp));
+       pci_read_config_word(bp->pdev,
+                            bp->pcie_cap + PCI_EXP_DEVCTL, &devctl);
+       DP(NETIF_MSG_HW, "read 0x%x from devctl\n", devctl);
+       w_order = ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5);
+       if (bp->mrrs == -1)
+               r_order = ((devctl & PCI_EXP_DEVCTL_READRQ) >> 12);
+       else {
+               DP(NETIF_MSG_HW, "force read order to %d\n", bp->mrrs);
+               r_order = bp->mrrs;
+       }
 
-       bnx2x_reset_common(bp);
-       REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0xffffffff);
-       REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET, 0xfffc);
+       bnx2x_init_pxp_arb(bp, r_order, w_order);
+}
 
-       bnx2x_init_block(bp, MISC_COMMON_START, MISC_COMMON_END);
-       if (CHIP_IS_E1H(bp))
-               REG_WR(bp, MISC_REG_E1HMF_MODE, IS_E1HMF(bp));
+static void bnx2x_setup_fan_failure_detection(struct bnx2x *bp)
+{
+       u32 val;
+       u8 port;
+       u8 is_required = 0;
 
-       REG_WR(bp, MISC_REG_LCPLL_CTRL_REG_2, 0x100);
-       msleep(30);
-       REG_WR(bp, MISC_REG_LCPLL_CTRL_REG_2, 0x0);
+       val = SHMEM_RD(bp, dev_info.shared_hw_config.config2) &
+             SHARED_HW_CFG_FAN_FAILURE_MASK;
 
-       bnx2x_init_block(bp, PXP_COMMON_START, PXP_COMMON_END);
+       if (val == SHARED_HW_CFG_FAN_FAILURE_ENABLED)
+               is_required = 1;
+
+       /*
+        * The fan failure mechanism is usually related to the PHY type since
+        * the power consumption of the board is affected by the PHY. Currently,
+        * fan is required for most designs with SFX7101, BCM8727 and BCM8481.
+        */
+       else if (val == SHARED_HW_CFG_FAN_FAILURE_PHY_TYPE)
+               for (port = PORT_0; port < PORT_MAX; port++) {
+                       u32 phy_type =
+                               SHMEM_RD(bp, dev_info.port_hw_config[port].
+                                        external_phy_config) &
+                               PORT_HW_CFG_XGXS_EXT_PHY_TYPE_MASK;
+                       is_required |=
+                               ((phy_type ==
+                                 PORT_HW_CFG_XGXS_EXT_PHY_TYPE_SFX7101) ||
+                                (phy_type ==
+                                 PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8727) ||
+                                (phy_type ==
+                                 PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8481));
+               }
+
+       DP(NETIF_MSG_HW, "fan detection setting: %d\n", is_required);
+
+       if (is_required == 0)
+               return;
+
+       /* Fan failure is indicated by SPIO 5 */
+       bnx2x_set_spio(bp, MISC_REGISTERS_SPIO_5,
+                      MISC_REGISTERS_SPIO_INPUT_HI_Z);
+
+       /* set to active low mode */
+       val = REG_RD(bp, MISC_REG_SPIO_INT);
+       val |= ((1 << MISC_REGISTERS_SPIO_5) <<
+                               MISC_REGISTERS_SPIO_INT_OLD_SET_POS);
+       REG_WR(bp, MISC_REG_SPIO_INT, val);
+
+       /* enable interrupt to signal the IGU */
+       val = REG_RD(bp, MISC_REG_SPIO_EVENT_EN);
+       val |= (1 << MISC_REGISTERS_SPIO_5);
+       REG_WR(bp, MISC_REG_SPIO_EVENT_EN, val);
+}
+
+static int bnx2x_init_common(struct bnx2x *bp)
+{
+       u32 val, i;
+#ifdef BCM_CNIC
+       u32 wb_write[2];
+#endif
+
+       DP(BNX2X_MSG_MCP, "starting common init  func %d\n", BP_FUNC(bp));
+
+       bnx2x_reset_common(bp);
+       REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0xffffffff);
+       REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET, 0xfffc);
+
+       bnx2x_init_block(bp, MISC_BLOCK, COMMON_STAGE);
+       if (CHIP_IS_E1H(bp))
+               REG_WR(bp, MISC_REG_E1HMF_MODE, IS_E1HMF(bp));
+
+       REG_WR(bp, MISC_REG_LCPLL_CTRL_REG_2, 0x100);
+       msleep(30);
+       REG_WR(bp, MISC_REG_LCPLL_CTRL_REG_2, 0x0);
+
+       bnx2x_init_block(bp, PXP_BLOCK, COMMON_STAGE);
        if (CHIP_IS_E1(bp)) {
                /* enable HW interrupt from PXP on USDM overflow
                   bit 16 on INT_MASK_0 */
                REG_WR(bp, PXP_REG_PXP_INT_MASK_0, 0);
        }
 
-       bnx2x_init_block(bp, PXP2_COMMON_START, PXP2_COMMON_END);
+       bnx2x_init_block(bp, PXP2_BLOCK, COMMON_STAGE);
        bnx2x_init_pxp(bp);
 
 #ifdef __BIG_ENDIAN
@@ -5504,7 +6083,7 @@ static int bnx2x_init_common(struct bnx2x *bp)
 #endif
 
        REG_WR(bp, PXP2_REG_RQ_CDU_P_SIZE, 2);
-#ifdef BCM_ISCSI
+#ifdef BCM_CNIC
        REG_WR(bp, PXP2_REG_RQ_TM_P_SIZE, 5);
        REG_WR(bp, PXP2_REG_RQ_QM_P_SIZE, 5);
        REG_WR(bp, PXP2_REG_RQ_SRC_P_SIZE, 5);
@@ -5530,87 +6109,77 @@ static int bnx2x_init_common(struct bnx2x *bp)
        REG_WR(bp, PXP2_REG_RQ_DISABLE_INPUTS, 0);
        REG_WR(bp, PXP2_REG_RD_DISABLE_INPUTS, 0);
 
-       bnx2x_init_block(bp, DMAE_COMMON_START, DMAE_COMMON_END);
+       bnx2x_init_block(bp, DMAE_BLOCK, COMMON_STAGE);
 
        /* clean the DMAE memory */
        bp->dmae_ready = 1;
        bnx2x_init_fill(bp, TSEM_REG_PRAM, 0, 8);
 
-       bnx2x_init_block(bp, TCM_COMMON_START, TCM_COMMON_END);
-       bnx2x_init_block(bp, UCM_COMMON_START, UCM_COMMON_END);
-       bnx2x_init_block(bp, CCM_COMMON_START, CCM_COMMON_END);
-       bnx2x_init_block(bp, XCM_COMMON_START, XCM_COMMON_END);
+       bnx2x_init_block(bp, TCM_BLOCK, COMMON_STAGE);
+       bnx2x_init_block(bp, UCM_BLOCK, COMMON_STAGE);
+       bnx2x_init_block(bp, CCM_BLOCK, COMMON_STAGE);
+       bnx2x_init_block(bp, XCM_BLOCK, COMMON_STAGE);
 
        bnx2x_read_dmae(bp, XSEM_REG_PASSIVE_BUFFER, 3);
        bnx2x_read_dmae(bp, CSEM_REG_PASSIVE_BUFFER, 3);
        bnx2x_read_dmae(bp, TSEM_REG_PASSIVE_BUFFER, 3);
        bnx2x_read_dmae(bp, USEM_REG_PASSIVE_BUFFER, 3);
 
-       bnx2x_init_block(bp, QM_COMMON_START, QM_COMMON_END);
+       bnx2x_init_block(bp, QM_BLOCK, COMMON_STAGE);
+
+#ifdef BCM_CNIC
+       wb_write[0] = 0;
+       wb_write[1] = 0;
+       for (i = 0; i < 64; i++) {
+               REG_WR(bp, QM_REG_BASEADDR + i*4, 1024 * 4 * (i%16));
+               bnx2x_init_ind_wr(bp, QM_REG_PTRTBL + i*8, wb_write, 2);
+
+               if (CHIP_IS_E1H(bp)) {
+                       REG_WR(bp, QM_REG_BASEADDR_EXT_A + i*4, 1024*4*(i%16));
+                       bnx2x_init_ind_wr(bp, QM_REG_PTRTBL_EXT_A + i*8,
+                                         wb_write, 2);
+               }
+       }
+#endif
        /* soft reset pulse */
        REG_WR(bp, QM_REG_SOFT_RESET, 1);
        REG_WR(bp, QM_REG_SOFT_RESET, 0);
 
-#ifdef BCM_ISCSI
-       bnx2x_init_block(bp, TIMERS_COMMON_START, TIMERS_COMMON_END);
+#ifdef BCM_CNIC
+       bnx2x_init_block(bp, TIMERS_BLOCK, COMMON_STAGE);
 #endif
 
-       bnx2x_init_block(bp, DQ_COMMON_START, DQ_COMMON_END);
+       bnx2x_init_block(bp, DQ_BLOCK, COMMON_STAGE);
        REG_WR(bp, DORQ_REG_DPM_CID_OFST, BCM_PAGE_SHIFT);
        if (!CHIP_REV_IS_SLOW(bp)) {
                /* enable hw interrupt from doorbell Q */
                REG_WR(bp, DORQ_REG_DORQ_INT_MASK, 0);
        }
 
-       bnx2x_init_block(bp, BRB1_COMMON_START, BRB1_COMMON_END);
-       bnx2x_init_block(bp, PRS_COMMON_START, PRS_COMMON_END);
+       bnx2x_init_block(bp, BRB1_BLOCK, COMMON_STAGE);
+       bnx2x_init_block(bp, PRS_BLOCK, COMMON_STAGE);
        REG_WR(bp, PRS_REG_A_PRSU_20, 0xf);
+#ifndef BCM_CNIC
        /* set NIC mode */
        REG_WR(bp, PRS_REG_NIC_MODE, 1);
+#endif
        if (CHIP_IS_E1H(bp))
                REG_WR(bp, PRS_REG_E1HOV_MODE, IS_E1HMF(bp));
 
-       bnx2x_init_block(bp, TSDM_COMMON_START, TSDM_COMMON_END);
-       bnx2x_init_block(bp, CSDM_COMMON_START, CSDM_COMMON_END);
-       bnx2x_init_block(bp, USDM_COMMON_START, USDM_COMMON_END);
-       bnx2x_init_block(bp, XSDM_COMMON_START, XSDM_COMMON_END);
+       bnx2x_init_block(bp, TSDM_BLOCK, COMMON_STAGE);
+       bnx2x_init_block(bp, CSDM_BLOCK, COMMON_STAGE);
+       bnx2x_init_block(bp, USDM_BLOCK, COMMON_STAGE);
+       bnx2x_init_block(bp, XSDM_BLOCK, COMMON_STAGE);
 
-       if (CHIP_IS_E1H(bp)) {
-               bnx2x_init_fill(bp, TSTORM_INTMEM_ADDR, 0,
-                               STORM_INTMEM_SIZE_E1H/2);
-               bnx2x_init_fill(bp,
-                               TSTORM_INTMEM_ADDR + STORM_INTMEM_SIZE_E1H/2,
-                               0, STORM_INTMEM_SIZE_E1H/2);
-               bnx2x_init_fill(bp, CSTORM_INTMEM_ADDR, 0,
-                               STORM_INTMEM_SIZE_E1H/2);
-               bnx2x_init_fill(bp,
-                               CSTORM_INTMEM_ADDR + STORM_INTMEM_SIZE_E1H/2,
-                               0, STORM_INTMEM_SIZE_E1H/2);
-               bnx2x_init_fill(bp, XSTORM_INTMEM_ADDR, 0,
-                               STORM_INTMEM_SIZE_E1H/2);
-               bnx2x_init_fill(bp,
-                               XSTORM_INTMEM_ADDR + STORM_INTMEM_SIZE_E1H/2,
-                               0, STORM_INTMEM_SIZE_E1H/2);
-               bnx2x_init_fill(bp, USTORM_INTMEM_ADDR, 0,
-                               STORM_INTMEM_SIZE_E1H/2);
-               bnx2x_init_fill(bp,
-                               USTORM_INTMEM_ADDR + STORM_INTMEM_SIZE_E1H/2,
-                               0, STORM_INTMEM_SIZE_E1H/2);
-       } else { /* E1 */
-               bnx2x_init_fill(bp, TSTORM_INTMEM_ADDR, 0,
-                               STORM_INTMEM_SIZE_E1);
-               bnx2x_init_fill(bp, CSTORM_INTMEM_ADDR, 0,
-                               STORM_INTMEM_SIZE_E1);
-               bnx2x_init_fill(bp, XSTORM_INTMEM_ADDR, 0,
-                               STORM_INTMEM_SIZE_E1);
-               bnx2x_init_fill(bp, USTORM_INTMEM_ADDR, 0,
-                               STORM_INTMEM_SIZE_E1);
-       }
-
-       bnx2x_init_block(bp, TSEM_COMMON_START, TSEM_COMMON_END);
-       bnx2x_init_block(bp, USEM_COMMON_START, USEM_COMMON_END);
-       bnx2x_init_block(bp, CSEM_COMMON_START, CSEM_COMMON_END);
-       bnx2x_init_block(bp, XSEM_COMMON_START, XSEM_COMMON_END);
+       bnx2x_init_fill(bp, TSEM_REG_FAST_MEMORY, 0, STORM_INTMEM_SIZE(bp));
+       bnx2x_init_fill(bp, USEM_REG_FAST_MEMORY, 0, STORM_INTMEM_SIZE(bp));
+       bnx2x_init_fill(bp, CSEM_REG_FAST_MEMORY, 0, STORM_INTMEM_SIZE(bp));
+       bnx2x_init_fill(bp, XSEM_REG_FAST_MEMORY, 0, STORM_INTMEM_SIZE(bp));
+
+       bnx2x_init_block(bp, TSEM_BLOCK, COMMON_STAGE);
+       bnx2x_init_block(bp, USEM_BLOCK, COMMON_STAGE);
+       bnx2x_init_block(bp, CSEM_BLOCK, COMMON_STAGE);
+       bnx2x_init_block(bp, XSEM_BLOCK, COMMON_STAGE);
 
        /* sync semi rtc */
        REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR,
@@ -5618,16 +6187,28 @@ static int bnx2x_init_common(struct bnx2x *bp)
        REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET,
               0x80000000);
 
-       bnx2x_init_block(bp, UPB_COMMON_START, UPB_COMMON_END);
-       bnx2x_init_block(bp, XPB_COMMON_START, XPB_COMMON_END);
-       bnx2x_init_block(bp, PBF_COMMON_START, PBF_COMMON_END);
+       bnx2x_init_block(bp, UPB_BLOCK, COMMON_STAGE);
+       bnx2x_init_block(bp, XPB_BLOCK, COMMON_STAGE);
+       bnx2x_init_block(bp, PBF_BLOCK, COMMON_STAGE);
 
        REG_WR(bp, SRC_REG_SOFT_RST, 1);
        for (i = SRC_REG_KEYRSS0_0; i <= SRC_REG_KEYRSS1_9; i += 4) {
                REG_WR(bp, i, 0xc0cac01a);
                /* TODO: replace with something meaningful */
        }
-       bnx2x_init_block(bp, SRCH_COMMON_START, SRCH_COMMON_END);
+       bnx2x_init_block(bp, SRCH_BLOCK, COMMON_STAGE);
+#ifdef BCM_CNIC
+       REG_WR(bp, SRC_REG_KEYSEARCH_0, 0x63285672);
+       REG_WR(bp, SRC_REG_KEYSEARCH_1, 0x24b8f2cc);
+       REG_WR(bp, SRC_REG_KEYSEARCH_2, 0x223aef9b);
+       REG_WR(bp, SRC_REG_KEYSEARCH_3, 0x26001e3a);
+       REG_WR(bp, SRC_REG_KEYSEARCH_4, 0x7ae91116);
+       REG_WR(bp, SRC_REG_KEYSEARCH_5, 0x5ce5230b);
+       REG_WR(bp, SRC_REG_KEYSEARCH_6, 0x298d8adf);
+       REG_WR(bp, SRC_REG_KEYSEARCH_7, 0x6eb0ff09);
+       REG_WR(bp, SRC_REG_KEYSEARCH_8, 0x1830f82f);
+       REG_WR(bp, SRC_REG_KEYSEARCH_9, 0x01e46be7);
+#endif
        REG_WR(bp, SRC_REG_SOFT_RST, 0);
 
        if (sizeof(union cdu_context) != 1024)
@@ -5635,16 +6216,11 @@ static int bnx2x_init_common(struct bnx2x *bp)
                printk(KERN_ALERT PFX "please adjust the size of"
                       " cdu_context(%ld)\n", (long)sizeof(union cdu_context));
 
-       bnx2x_init_block(bp, CDU_COMMON_START, CDU_COMMON_END);
+       bnx2x_init_block(bp, CDU_BLOCK, COMMON_STAGE);
        val = (4 << 24) + (0 << 12) + 1024;
        REG_WR(bp, CDU_REG_CDU_GLOBAL_PARAMS, val);
-       if (CHIP_IS_E1(bp)) {
-               /* !!! fix pxp client crdit until excel update */
-               REG_WR(bp, CDU_REG_CDU_DEBUG, 0x264);
-               REG_WR(bp, CDU_REG_CDU_DEBUG, 0);
-       }
 
-       bnx2x_init_block(bp, CFC_COMMON_START, CFC_COMMON_END);
+       bnx2x_init_block(bp, CFC_BLOCK, COMMON_STAGE);
        REG_WR(bp, CFC_REG_INIT_REG, 0x7FF);
        /* enable context validation interrupt from CFC */
        REG_WR(bp, CFC_REG_CFC_INT_MASK, 0);
@@ -5652,20 +6228,20 @@ static int bnx2x_init_common(struct bnx2x *bp)
        /* set the thresholds to prevent CFC/CDU race */
        REG_WR(bp, CFC_REG_DEBUG0, 0x20020000);
 
-       bnx2x_init_block(bp, HC_COMMON_START, HC_COMMON_END);
-       bnx2x_init_block(bp, MISC_AEU_COMMON_START, MISC_AEU_COMMON_END);
+       bnx2x_init_block(bp, HC_BLOCK, COMMON_STAGE);
+       bnx2x_init_block(bp, MISC_AEU_BLOCK, COMMON_STAGE);
 
-       /* PXPCS COMMON comes here */
+       bnx2x_init_block(bp, PXPCS_BLOCK, COMMON_STAGE);
        /* Reset PCIE errors for debug */
        REG_WR(bp, 0x2814, 0xffffffff);
        REG_WR(bp, 0x3820, 0xffffffff);
 
-       /* EMAC0 COMMON comes here */
-       /* EMAC1 COMMON comes here */
-       /* DBU COMMON comes here */
-       /* DBG COMMON comes here */
+       bnx2x_init_block(bp, EMAC0_BLOCK, COMMON_STAGE);
+       bnx2x_init_block(bp, EMAC1_BLOCK, COMMON_STAGE);
+       bnx2x_init_block(bp, DBU_BLOCK, COMMON_STAGE);
+       bnx2x_init_block(bp, DBG_BLOCK, COMMON_STAGE);
 
-       bnx2x_init_block(bp, NIG_COMMON_START, NIG_COMMON_END);
+       bnx2x_init_block(bp, NIG_BLOCK, COMMON_STAGE);
        if (CHIP_IS_E1H(bp)) {
                REG_WR(bp, NIG_REG_LLH_MF_MODE, IS_E1HMF(bp));
                REG_WR(bp, NIG_REG_LLH_E1HOV_MODE, IS_E1HMF(bp));
@@ -5707,30 +6283,16 @@ static int bnx2x_init_common(struct bnx2x *bp)
        case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8072:
        case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8073:
        case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8726:
+       case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8727:
                bp->port.need_hw_lock = 1;
                break;
 
-       case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_SFX7101:
-               /* Fan failure is indicated by SPIO 5 */
-               bnx2x_set_spio(bp, MISC_REGISTERS_SPIO_5,
-                              MISC_REGISTERS_SPIO_INPUT_HI_Z);
-
-               /* set to active low mode */
-               val = REG_RD(bp, MISC_REG_SPIO_INT);
-               val |= ((1 << MISC_REGISTERS_SPIO_5) <<
-                                       MISC_REGISTERS_SPIO_INT_OLD_SET_POS);
-               REG_WR(bp, MISC_REG_SPIO_INT, val);
-
-               /* enable interrupt to signal the IGU */
-               val = REG_RD(bp, MISC_REG_SPIO_EVENT_EN);
-               val |= (1 << MISC_REGISTERS_SPIO_5);
-               REG_WR(bp, MISC_REG_SPIO_EVENT_EN, val);
-               break;
-
        default:
                break;
        }
 
+       bnx2x_setup_fan_failure_detection(bp);
+
        /* clear PXP2 attentions */
        REG_RD(bp, PXP2_REG_PXP2_INT_STS_CLR_0);
 
@@ -5749,6 +6311,7 @@ static int bnx2x_init_common(struct bnx2x *bp)
 static int bnx2x_init_port(struct bnx2x *bp)
 {
        int port = BP_PORT(bp);
+       int init_stage = port ? PORT1_STAGE : PORT0_STAGE;
        u32 low, high;
        u32 val;
 
@@ -5756,49 +6319,24 @@ static int bnx2x_init_port(struct bnx2x *bp)
 
        REG_WR(bp, NIG_REG_MASK_INTERRUPT_PORT0 + port*4, 0);
 
-       /* Port PXP comes here */
-       /* Port PXP2 comes here */
-#ifdef BCM_ISCSI
-       /* Port0  1
-        * Port1  385 */
-       i++;
-       wb_write[0] = ONCHIP_ADDR1(bp->timers_mapping);
-       wb_write[1] = ONCHIP_ADDR2(bp->timers_mapping);
-       REG_WR_DMAE(bp, PXP2_REG_RQ_ONCHIP_AT + i*8, wb_write, 2);
-       REG_WR(bp, PXP2_REG_PSWRQ_TM0_L2P + func*4, PXP_ONE_ILT(i));
-
-       /* Port0  2
-        * Port1  386 */
-       i++;
-       wb_write[0] = ONCHIP_ADDR1(bp->qm_mapping);
-       wb_write[1] = ONCHIP_ADDR2(bp->qm_mapping);
-       REG_WR_DMAE(bp, PXP2_REG_RQ_ONCHIP_AT + i*8, wb_write, 2);
-       REG_WR(bp, PXP2_REG_PSWRQ_QM0_L2P + func*4, PXP_ONE_ILT(i));
+       bnx2x_init_block(bp, PXP_BLOCK, init_stage);
+       bnx2x_init_block(bp, PXP2_BLOCK, init_stage);
 
-       /* Port0  3
-        * Port1  387 */
-       i++;
-       wb_write[0] = ONCHIP_ADDR1(bp->t1_mapping);
-       wb_write[1] = ONCHIP_ADDR2(bp->t1_mapping);
-       REG_WR_DMAE(bp, PXP2_REG_RQ_ONCHIP_AT + i*8, wb_write, 2);
-       REG_WR(bp, PXP2_REG_PSWRQ_SRC0_L2P + func*4, PXP_ONE_ILT(i));
-#endif
-       /* Port CMs come here */
-       bnx2x_init_block(bp, (port ? XCM_PORT1_START : XCM_PORT0_START),
-                            (port ? XCM_PORT1_END : XCM_PORT0_END));
+       bnx2x_init_block(bp, TCM_BLOCK, init_stage);
+       bnx2x_init_block(bp, UCM_BLOCK, init_stage);
+       bnx2x_init_block(bp, CCM_BLOCK, init_stage);
+       bnx2x_init_block(bp, XCM_BLOCK, init_stage);
 
-       /* Port QM comes here */
-#ifdef BCM_ISCSI
-       REG_WR(bp, TM_REG_LIN0_SCAN_TIME + func*4, 1024/64*20);
-       REG_WR(bp, TM_REG_LIN0_MAX_ACTIVE_CID + func*4, 31);
+#ifdef BCM_CNIC
+       REG_WR(bp, QM_REG_CONNNUM_0 + port*4, 1024/16 - 1);
 
-       bnx2x_init_block(bp, func ? TIMERS_PORT1_START : TIMERS_PORT0_START,
-                            func ? TIMERS_PORT1_END : TIMERS_PORT0_END);
+       bnx2x_init_block(bp, TIMERS_BLOCK, init_stage);
+       REG_WR(bp, TM_REG_LIN0_SCAN_TIME + port*4, 20);
+       REG_WR(bp, TM_REG_LIN0_MAX_ACTIVE_CID + port*4, 31);
 #endif
-       /* Port DQ comes here */
+       bnx2x_init_block(bp, DQ_BLOCK, init_stage);
 
-       bnx2x_init_block(bp, (port ? BRB1_PORT1_START : BRB1_PORT0_START),
-                            (port ? BRB1_PORT1_END : BRB1_PORT0_END));
+       bnx2x_init_block(bp, BRB1_BLOCK, init_stage);
        if (CHIP_REV_IS_SLOW(bp) && !CHIP_IS_E1H(bp)) {
                /* no pause for emulation and FPGA */
                low = 0;
@@ -5822,24 +6360,22 @@ static int bnx2x_init_port(struct bnx2x *bp)
        REG_WR(bp, BRB1_REG_PAUSE_HIGH_THRESHOLD_0 + port*4, high);
 
 
-       /* Port PRS comes here */
-       /* Port TSDM comes here */
-       /* Port CSDM comes here */
-       /* Port USDM comes here */
-       /* Port XSDM comes here */
-       bnx2x_init_block(bp, port ? TSEM_PORT1_START : TSEM_PORT0_START,
-                            port ? TSEM_PORT1_END : TSEM_PORT0_END);
-       bnx2x_init_block(bp, port ? USEM_PORT1_START : USEM_PORT0_START,
-                            port ? USEM_PORT1_END : USEM_PORT0_END);
-       bnx2x_init_block(bp, port ? CSEM_PORT1_START : CSEM_PORT0_START,
-                            port ? CSEM_PORT1_END : CSEM_PORT0_END);
-       bnx2x_init_block(bp, port ? XSEM_PORT1_START : XSEM_PORT0_START,
-                            port ? XSEM_PORT1_END : XSEM_PORT0_END);
-       /* Port UPB comes here */
-       /* Port XPB comes here */
-
-       bnx2x_init_block(bp, port ? PBF_PORT1_START : PBF_PORT0_START,
-                            port ? PBF_PORT1_END : PBF_PORT0_END);
+       bnx2x_init_block(bp, PRS_BLOCK, init_stage);
+
+       bnx2x_init_block(bp, TSDM_BLOCK, init_stage);
+       bnx2x_init_block(bp, CSDM_BLOCK, init_stage);
+       bnx2x_init_block(bp, USDM_BLOCK, init_stage);
+       bnx2x_init_block(bp, XSDM_BLOCK, init_stage);
+
+       bnx2x_init_block(bp, TSEM_BLOCK, init_stage);
+       bnx2x_init_block(bp, USEM_BLOCK, init_stage);
+       bnx2x_init_block(bp, CSEM_BLOCK, init_stage);
+       bnx2x_init_block(bp, XSEM_BLOCK, init_stage);
+
+       bnx2x_init_block(bp, UPB_BLOCK, init_stage);
+       bnx2x_init_block(bp, XPB_BLOCK, init_stage);
+
+       bnx2x_init_block(bp, PBF_BLOCK, init_stage);
 
        /* configure PBF to work without PAUSE mtu 9000 */
        REG_WR(bp, PBF_REG_P0_PAUSE_ENABLE + port*4, 0);
@@ -5854,33 +6390,19 @@ static int bnx2x_init_port(struct bnx2x *bp)
        msleep(5);
        REG_WR(bp, PBF_REG_INIT_P0 + port*4, 0);
 
-#ifdef BCM_ISCSI
-       /* tell the searcher where the T2 table is */
-       REG_WR(bp, SRC_REG_COUNTFREE0 + func*4, 16*1024/64);
-
-       wb_write[0] = U64_LO(bp->t2_mapping);
-       wb_write[1] = U64_HI(bp->t2_mapping);
-       REG_WR_DMAE(bp, SRC_REG_FIRSTFREE0 + func*4, wb_write, 2);
-       wb_write[0] = U64_LO((u64)bp->t2_mapping + 16*1024 - 64);
-       wb_write[1] = U64_HI((u64)bp->t2_mapping + 16*1024 - 64);
-       REG_WR_DMAE(bp, SRC_REG_LASTFREE0 + func*4, wb_write, 2);
-
-       REG_WR(bp, SRC_REG_NUMBER_HASH_BITS0 + func*4, 10);
-       /* Port SRCH comes here */
+#ifdef BCM_CNIC
+       bnx2x_init_block(bp, SRCH_BLOCK, init_stage);
 #endif
-       /* Port CDU comes here */
-       /* Port CFC comes here */
+       bnx2x_init_block(bp, CDU_BLOCK, init_stage);
+       bnx2x_init_block(bp, CFC_BLOCK, init_stage);
 
        if (CHIP_IS_E1(bp)) {
                REG_WR(bp, HC_REG_LEADING_EDGE_0 + port*8, 0);
                REG_WR(bp, HC_REG_TRAILING_EDGE_0 + port*8, 0);
        }
-       bnx2x_init_block(bp, port ? HC_PORT1_START : HC_PORT0_START,
-                            port ? HC_PORT1_END : HC_PORT0_END);
+       bnx2x_init_block(bp, HC_BLOCK, init_stage);
 
-       bnx2x_init_block(bp, port ? MISC_AEU_PORT1_START :
-                                   MISC_AEU_PORT0_START,
-                            port ? MISC_AEU_PORT1_END : MISC_AEU_PORT0_END);
+       bnx2x_init_block(bp, MISC_AEU_BLOCK, init_stage);
        /* init aeu_mask_attn_func_0/1:
         *  - SF mode: bits 3-7 are masked. only bits 0-2 are in use
         *  - MF mode: bit 3 is masked. bits 0-2 are in use as in SF
@@ -5888,13 +6410,13 @@ static int bnx2x_init_port(struct bnx2x *bp)
        REG_WR(bp, MISC_REG_AEU_MASK_ATTN_FUNC_0 + port*4,
               (IS_E1HMF(bp) ? 0xF7 : 0x7));
 
-       /* Port PXPCS comes here */
-       /* Port EMAC0 comes here */
-       /* Port EMAC1 comes here */
-       /* Port DBU comes here */
-       /* Port DBG comes here */
-       bnx2x_init_block(bp, port ? NIG_PORT1_START : NIG_PORT0_START,
-                            port ? NIG_PORT1_END : NIG_PORT0_END);
+       bnx2x_init_block(bp, PXPCS_BLOCK, init_stage);
+       bnx2x_init_block(bp, EMAC0_BLOCK, init_stage);
+       bnx2x_init_block(bp, EMAC1_BLOCK, init_stage);
+       bnx2x_init_block(bp, DBU_BLOCK, init_stage);
+       bnx2x_init_block(bp, DBG_BLOCK, init_stage);
+
+       bnx2x_init_block(bp, NIG_BLOCK, init_stage);
 
        REG_WR(bp, NIG_REG_XGXS_SERDES0_MODE_SEL + port*4, 1);
 
@@ -5903,9 +6425,6 @@ static int bnx2x_init_port(struct bnx2x *bp)
                REG_WR(bp, NIG_REG_LLH0_BRB1_DRV_MASK_MF + port*4,
                       (IS_E1HMF(bp) ? 0x1 : 0x2));
 
-               /* support pause requests from USDM, TSDM and BRB */
-               REG_WR(bp, NIG_REG_LLFC_EGRESS_SRC_ENABLE_0 + port*4, 0x7);
-
                {
                        REG_WR(bp, NIG_REG_LLFC_ENABLE_0 + port*4, 0);
                        REG_WR(bp, NIG_REG_LLFC_OUT_EN_0 + port*4, 0);
@@ -5913,8 +6432,8 @@ static int bnx2x_init_port(struct bnx2x *bp)
                }
        }
 
-       /* Port MCP comes here */
-       /* Port DMAE comes here */
+       bnx2x_init_block(bp, MCP_BLOCK, init_stage);
+       bnx2x_init_block(bp, DMAE_BLOCK, init_stage);
 
        switch (XGXS_EXT_PHY_TYPE(bp->link_params.ext_phy_config)) {
        case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8726:
@@ -5949,10 +6468,15 @@ static int bnx2x_init_port(struct bnx2x *bp)
                break;
 
        case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_SFX7101:
+       case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8727:
                /* add SPIO 5 to group 0 */
-               val = REG_RD(bp, MISC_REG_AEU_ENABLE1_FUNC_0_OUT_0);
+               {
+               u32 reg_addr = (port ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_0 :
+                                      MISC_REG_AEU_ENABLE1_FUNC_0_OUT_0);
+               val = REG_RD(bp, reg_addr);
                val |= AEU_INPUTS_ATTN_BITS_SPIO5;
-               REG_WR(bp, MISC_REG_AEU_ENABLE1_FUNC_0_OUT_0, val);
+               REG_WR(bp, reg_addr, val);
+               }
                break;
 
        default:
@@ -5976,7 +6500,12 @@ static int bnx2x_init_port(struct bnx2x *bp)
 #define PXP_ONE_ILT(x)         (((x) << 10) | x)
 #define PXP_ILT_RANGE(f, l)    (((l) << 10) | f)
 
+#ifdef BCM_CNIC
+#define CNIC_ILT_LINES         127
+#define CNIC_CTX_PER_ILT       16
+#else
 #define CNIC_ILT_LINES         0
+#endif
 
 static void bnx2x_ilt_wr(struct bnx2x *bp, u32 index, dma_addr_t addr)
 {
@@ -6015,11 +6544,57 @@ static int bnx2x_init_func(struct bnx2x *bp)
                REG_WR(bp, PXP2_REG_PSWRQ_CDU0_L2P + func*4,
                       PXP_ILT_RANGE(i, i + CNIC_ILT_LINES));
 
+#ifdef BCM_CNIC
+       i += 1 + CNIC_ILT_LINES;
+       bnx2x_ilt_wr(bp, i, bp->timers_mapping);
+       if (CHIP_IS_E1(bp))
+               REG_WR(bp, PXP2_REG_PSWRQ_TM0_L2P + func*4, PXP_ONE_ILT(i));
+       else {
+               REG_WR(bp, PXP2_REG_RQ_TM_FIRST_ILT, i);
+               REG_WR(bp, PXP2_REG_RQ_TM_LAST_ILT, i);
+       }
+
+       i++;
+       bnx2x_ilt_wr(bp, i, bp->qm_mapping);
+       if (CHIP_IS_E1(bp))
+               REG_WR(bp, PXP2_REG_PSWRQ_QM0_L2P + func*4, PXP_ONE_ILT(i));
+       else {
+               REG_WR(bp, PXP2_REG_RQ_QM_FIRST_ILT, i);
+               REG_WR(bp, PXP2_REG_RQ_QM_LAST_ILT, i);
+       }
+
+       i++;
+       bnx2x_ilt_wr(bp, i, bp->t1_mapping);
+       if (CHIP_IS_E1(bp))
+               REG_WR(bp, PXP2_REG_PSWRQ_SRC0_L2P + func*4, PXP_ONE_ILT(i));
+       else {
+               REG_WR(bp, PXP2_REG_RQ_SRC_FIRST_ILT, i);
+               REG_WR(bp, PXP2_REG_RQ_SRC_LAST_ILT, i);
+       }
+
+       /* tell the searcher where the T2 table is */
+       REG_WR(bp, SRC_REG_COUNTFREE0 + port*4, 16*1024/64);
+
+       bnx2x_wb_wr(bp, SRC_REG_FIRSTFREE0 + port*16,
+                   U64_LO(bp->t2_mapping), U64_HI(bp->t2_mapping));
+
+       bnx2x_wb_wr(bp, SRC_REG_LASTFREE0 + port*16,
+                   U64_LO((u64)bp->t2_mapping + 16*1024 - 64),
+                   U64_HI((u64)bp->t2_mapping + 16*1024 - 64));
+
+       REG_WR(bp, SRC_REG_NUMBER_HASH_BITS0 + port*4, 10);
+#endif
 
        if (CHIP_IS_E1H(bp)) {
-               for (i = 0; i < 9; i++)
-                       bnx2x_init_block(bp,
-                                        cm_start[func][i], cm_end[func][i]);
+               bnx2x_init_block(bp, MISC_BLOCK, FUNC0_STAGE + func);
+               bnx2x_init_block(bp, TCM_BLOCK, FUNC0_STAGE + func);
+               bnx2x_init_block(bp, UCM_BLOCK, FUNC0_STAGE + func);
+               bnx2x_init_block(bp, CCM_BLOCK, FUNC0_STAGE + func);
+               bnx2x_init_block(bp, XCM_BLOCK, FUNC0_STAGE + func);
+               bnx2x_init_block(bp, TSEM_BLOCK, FUNC0_STAGE + func);
+               bnx2x_init_block(bp, USEM_BLOCK, FUNC0_STAGE + func);
+               bnx2x_init_block(bp, CSEM_BLOCK, FUNC0_STAGE + func);
+               bnx2x_init_block(bp, XSEM_BLOCK, FUNC0_STAGE + func);
 
                REG_WR(bp, NIG_REG_LLH0_FUNC_EN + port*8, 1);
                REG_WR(bp, NIG_REG_LLH0_FUNC_VLAN_ID + port*8, bp->e1hov);
@@ -6032,7 +6607,7 @@ static int bnx2x_init_func(struct bnx2x *bp)
                REG_WR(bp, HC_REG_LEADING_EDGE_0 + port*8, 0);
                REG_WR(bp, HC_REG_TRAILING_EDGE_0 + port*8, 0);
        }
-       bnx2x_init_block(bp, hc_limits[func][0], hc_limits[func][1]);
+       bnx2x_init_block(bp, HC_BLOCK, FUNC0_STAGE + func);
 
        /* Reset PCIE errors for debug */
        REG_WR(bp, 0x2114, 0xffffffff);
@@ -6050,7 +6625,9 @@ static int bnx2x_init_hw(struct bnx2x *bp, u32 load_code)
 
        bp->dmae_ready = 0;
        mutex_init(&bp->dmae_mutex);
-       bnx2x_gunzip_init(bp);
+       rc = bnx2x_gunzip_init(bp);
+       if (rc)
+               return rc;
 
        switch (load_code) {
        case FW_MSG_CODE_DRV_LOAD_COMMON:
@@ -6084,16 +6661,16 @@ static int bnx2x_init_hw(struct bnx2x *bp, u32 load_code)
                bp->fw_drv_pulse_wr_seq =
                                (SHMEM_RD(bp, func_mb[func].drv_pulse_mb) &
                                 DRV_PULSE_SEQ_MASK);
-               bp->func_stx = SHMEM_RD(bp, func_mb[func].fw_mb_param);
-               DP(BNX2X_MSG_MCP, "drv_pulse 0x%x  func_stx 0x%x\n",
-                  bp->fw_drv_pulse_wr_seq, bp->func_stx);
-       } else
-               bp->func_stx = 0;
+               DP(BNX2X_MSG_MCP, "drv_pulse 0x%x\n", bp->fw_drv_pulse_wr_seq);
+       }
 
        /* this needs to be done before gunzip end */
        bnx2x_zero_def_sb(bp);
        for_each_queue(bp, i)
                bnx2x_zero_sb(bp, BP_L_ID(bp) + i);
+#ifdef BCM_CNIC
+       bnx2x_zero_sb(bp, BP_L_ID(bp) + i);
+#endif
 
 init_hw_err:
        bnx2x_gunzip_end(bp);
@@ -6101,44 +6678,6 @@ init_hw_err:
        return rc;
 }
 
-/* send the MCP a request, block until there is a reply */
-static u32 bnx2x_fw_command(struct bnx2x *bp, u32 command)
-{
-       int func = BP_FUNC(bp);
-       u32 seq = ++bp->fw_seq;
-       u32 rc = 0;
-       u32 cnt = 1;
-       u8 delay = CHIP_REV_IS_SLOW(bp) ? 100 : 10;
-
-       SHMEM_WR(bp, func_mb[func].drv_mb_header, (command | seq));
-       DP(BNX2X_MSG_MCP, "wrote command (%x) to FW MB\n", (command | seq));
-
-       do {
-               /* let the FW do it's magic ... */
-               msleep(delay);
-
-               rc = SHMEM_RD(bp, func_mb[func].fw_mb_header);
-
-               /* Give the FW up to 2 second (200*10ms) */
-       } while ((seq != (rc & FW_MSG_SEQ_NUMBER_MASK)) && (cnt++ < 200));
-
-       DP(BNX2X_MSG_MCP, "[after %d ms] read (%x) seq is (%x) from FW MB\n",
-          cnt*delay, rc, seq);
-
-       /* is this a reply to our command? */
-       if (seq == (rc & FW_MSG_SEQ_NUMBER_MASK)) {
-               rc &= FW_MSG_CODE_MASK;
-
-       } else {
-               /* FW BUG! */
-               BNX2X_ERR("FW failed to respond!\n");
-               bnx2x_fw_dump(bp);
-               rc = 0;
-       }
-
-       return rc;
-}
-
 static void bnx2x_free_mem(struct bnx2x *bp)
 {
 
@@ -6168,11 +6707,10 @@ static void bnx2x_free_mem(struct bnx2x *bp)
                /* status blocks */
                BNX2X_PCI_FREE(bnx2x_fp(bp, i, status_blk),
                               bnx2x_fp(bp, i, status_blk_mapping),
-                              sizeof(struct host_status_block) +
-                              sizeof(struct eth_tx_db_data));
+                              sizeof(struct host_status_block));
        }
        /* Rx */
-       for_each_rx_queue(bp, i) {
+       for_each_queue(bp, i) {
 
                /* fastpath rx rings: rx_buf rx_desc rx_comp */
                BNX2X_FREE(bnx2x_fp(bp, i, rx_buf_ring));
@@ -6192,13 +6730,13 @@ static void bnx2x_free_mem(struct bnx2x *bp)
                               BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
        }
        /* Tx */
-       for_each_tx_queue(bp, i) {
+       for_each_queue(bp, i) {
 
                /* fastpath tx rings: tx_buf tx_desc */
                BNX2X_FREE(bnx2x_fp(bp, i, tx_buf_ring));
                BNX2X_PCI_FREE(bnx2x_fp(bp, i, tx_desc_ring),
                               bnx2x_fp(bp, i, tx_desc_mapping),
-                              sizeof(struct eth_tx_bd) * NUM_TX_BD);
+                              sizeof(union eth_tx_bd_types) * NUM_TX_BD);
        }
        /* end of fastpath */
 
@@ -6208,11 +6746,13 @@ static void bnx2x_free_mem(struct bnx2x *bp)
        BNX2X_PCI_FREE(bp->slowpath, bp->slowpath_mapping,
                       sizeof(struct bnx2x_slowpath));
 
-#ifdef BCM_ISCSI
+#ifdef BCM_CNIC
        BNX2X_PCI_FREE(bp->t1, bp->t1_mapping, 64*1024);
        BNX2X_PCI_FREE(bp->t2, bp->t2_mapping, 16*1024);
        BNX2X_PCI_FREE(bp->timers, bp->timers_mapping, 8*1024);
        BNX2X_PCI_FREE(bp->qm, bp->qm_mapping, 128*1024);
+       BNX2X_PCI_FREE(bp->cnic_sb, bp->cnic_sb_mapping,
+                      sizeof(struct host_status_block));
 #endif
        BNX2X_PCI_FREE(bp->spq, bp->spq_mapping, BCM_PAGE_SIZE);
 
@@ -6249,11 +6789,10 @@ static int bnx2x_alloc_mem(struct bnx2x *bp)
                /* status blocks */
                BNX2X_PCI_ALLOC(bnx2x_fp(bp, i, status_blk),
                                &bnx2x_fp(bp, i, status_blk_mapping),
-                               sizeof(struct host_status_block) +
-                               sizeof(struct eth_tx_db_data));
+                               sizeof(struct host_status_block));
        }
        /* Rx */
-       for_each_rx_queue(bp, i) {
+       for_each_queue(bp, i) {
 
                /* fastpath rx rings: rx_buf rx_desc rx_comp */
                BNX2X_ALLOC(bnx2x_fp(bp, i, rx_buf_ring),
@@ -6275,21 +6814,14 @@ static int bnx2x_alloc_mem(struct bnx2x *bp)
                                BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
        }
        /* Tx */
-       for_each_tx_queue(bp, i) {
-
-               bnx2x_fp(bp, i, hw_tx_prods) =
-                               (void *)(bnx2x_fp(bp, i, status_blk) + 1);
-
-               bnx2x_fp(bp, i, tx_prods_mapping) =
-                               bnx2x_fp(bp, i, status_blk_mapping) +
-                               sizeof(struct host_status_block);
+       for_each_queue(bp, i) {
 
                /* fastpath tx rings: tx_buf tx_desc */
                BNX2X_ALLOC(bnx2x_fp(bp, i, tx_buf_ring),
                                sizeof(struct sw_tx_bd) * NUM_TX_BD);
                BNX2X_PCI_ALLOC(bnx2x_fp(bp, i, tx_desc_ring),
                                &bnx2x_fp(bp, i, tx_desc_mapping),
-                               sizeof(struct eth_tx_bd) * NUM_TX_BD);
+                               sizeof(union eth_tx_bd_types) * NUM_TX_BD);
        }
        /* end of fastpath */
 
@@ -6299,32 +6831,26 @@ static int bnx2x_alloc_mem(struct bnx2x *bp)
        BNX2X_PCI_ALLOC(bp->slowpath, &bp->slowpath_mapping,
                        sizeof(struct bnx2x_slowpath));
 
-#ifdef BCM_ISCSI
+#ifdef BCM_CNIC
        BNX2X_PCI_ALLOC(bp->t1, &bp->t1_mapping, 64*1024);
 
-       /* Initialize T1 */
-       for (i = 0; i < 64*1024; i += 64) {
-               *(u64 *)((char *)bp->t1 + i + 56) = 0x0UL;
-               *(u64 *)((char *)bp->t1 + i + 3) = 0x0UL;
-       }
-
        /* allocate searcher T2 table
           we allocate 1/4 of alloc num for T2
          (which is not entered into the ILT) */
        BNX2X_PCI_ALLOC(bp->t2, &bp->t2_mapping, 16*1024);
 
-       /* Initialize T2 */
+       /* Initialize T2 (for 1024 connections) */
        for (i = 0; i < 16*1024; i += 64)
-               * (u64 *)((char *)bp->t2 + i + 56) = bp->t2_mapping + i + 64;
-
-       /* now fixup the last line in the block to point to the next block */
-       *(u64 *)((char *)bp->t2 + 1024*16-8) = bp->t2_mapping;
+               *(u64 *)((char *)bp->t2 + i + 56) = bp->t2_mapping + i + 64;
 
-       /* Timer block array (MAX_CONN*8) phys uncached for now 1024 conns */
+       /* Timer block array (8*MAX_CONN) phys uncached for now 1024 conns */
        BNX2X_PCI_ALLOC(bp->timers, &bp->timers_mapping, 8*1024);
 
        /* QM queues (128*MAX_CONN) */
        BNX2X_PCI_ALLOC(bp->qm, &bp->qm_mapping, 128*1024);
+
+       BNX2X_PCI_ALLOC(bp->cnic_sb, &bp->cnic_sb_mapping,
+                       sizeof(struct host_status_block));
 #endif
 
        /* Slow path ring */
@@ -6344,7 +6870,7 @@ static void bnx2x_free_tx_skbs(struct bnx2x *bp)
 {
        int i;
 
-       for_each_tx_queue(bp, i) {
+       for_each_queue(bp, i) {
                struct bnx2x_fastpath *fp = &bp->fp[i];
 
                u16 bd_cons = fp->tx_bd_cons;
@@ -6362,7 +6888,7 @@ static void bnx2x_free_rx_skbs(struct bnx2x *bp)
 {
        int i, j;
 
-       for_each_rx_queue(bp, j) {
+       for_each_queue(bp, j) {
                struct bnx2x_fastpath *fp = &bp->fp[j];
 
                for (i = 0; i < NUM_RX_BD; i++) {
@@ -6374,8 +6900,7 @@ static void bnx2x_free_rx_skbs(struct bnx2x *bp)
 
                        pci_unmap_single(bp->pdev,
                                         pci_unmap_addr(rx_buf, mapping),
-                                        bp->rx_buf_size,
-                                        PCI_DMA_FROMDEVICE);
+                                        bp->rx_buf_size, PCI_DMA_FROMDEVICE);
 
                        rx_buf->skb = NULL;
                        dev_kfree_skb(skb);
@@ -6401,6 +6926,9 @@ static void bnx2x_free_msix_irqs(struct bnx2x *bp)
        DP(NETIF_MSG_IFDOWN, "released sp irq (%d)\n",
           bp->msix_table[0].vector);
 
+#ifdef BCM_CNIC
+       offset++;
+#endif
        for_each_queue(bp, i) {
                DP(NETIF_MSG_IFDOWN, "about to release fp #%d->%d irq  "
                   "state %x\n", i, bp->msix_table[i + offset].vector,
@@ -6434,6 +6962,12 @@ static int bnx2x_enable_msix(struct bnx2x *bp)
        bp->msix_table[0].entry = igu_vec;
        DP(NETIF_MSG_IFUP, "msix_table[0].entry = %d (slowpath)\n", igu_vec);
 
+#ifdef BCM_CNIC
+       igu_vec = BP_L_ID(bp) + offset;
+       bp->msix_table[1].entry = igu_vec;
+       DP(NETIF_MSG_IFUP, "msix_table[1].entry = %d (CNIC)\n", igu_vec);
+       offset++;
+#endif
        for_each_queue(bp, i) {
                igu_vec = BP_L_ID(bp) + offset + i;
                bp->msix_table[i + offset].entry = igu_vec;
@@ -6464,10 +6998,14 @@ static int bnx2x_req_msix_irqs(struct bnx2x *bp)
                return -EBUSY;
        }
 
+#ifdef BCM_CNIC
+       offset++;
+#endif
        for_each_queue(bp, i) {
                struct bnx2x_fastpath *fp = &bp->fp[i];
+               snprintf(fp->name, sizeof(fp->name), "%s-fp-%d",
+                        bp->dev->name, i);
 
-               sprintf(fp->name, "%s.fp%d", bp->dev->name, i);
                rc = request_irq(bp->msix_table[i + offset].vector,
                                 bnx2x_msix_fp_int, 0, fp->name, fp);
                if (rc) {
@@ -6480,16 +7018,11 @@ static int bnx2x_req_msix_irqs(struct bnx2x *bp)
        }
 
        i = BNX2X_NUM_QUEUES(bp);
-       if (is_multi(bp))
-               printk(KERN_INFO PFX
-                      "%s: using MSI-X  IRQs: sp %d  fp %d - %d\n",
-                      bp->dev->name, bp->msix_table[0].vector,
-                      bp->msix_table[offset].vector,
-                      bp->msix_table[offset + i - 1].vector);
-       else
-               printk(KERN_INFO PFX "%s: using MSI-X  IRQs: sp %d  fp %d\n",
-                      bp->dev->name, bp->msix_table[0].vector,
-                      bp->msix_table[offset + i - 1].vector);
+       printk(KERN_INFO PFX "%s: using MSI-X  IRQs: sp %d  fp[%d] %d"
+              " ... fp[%d] %d\n",
+              bp->dev->name, bp->msix_table[0].vector,
+              0, bp->msix_table[offset].vector,
+              i - 1, bp->msix_table[offset + i - 1].vector);
 
        return 0;
 }
@@ -6530,7 +7063,7 @@ static void bnx2x_napi_enable(struct bnx2x *bp)
 {
        int i;
 
-       for_each_rx_queue(bp, i)
+       for_each_queue(bp, i)
                napi_enable(&bnx2x_fp(bp, i, napi));
 }
 
@@ -6538,13 +7071,18 @@ static void bnx2x_napi_disable(struct bnx2x *bp)
 {
        int i;
 
-       for_each_rx_queue(bp, i)
+       for_each_queue(bp, i)
                napi_disable(&bnx2x_fp(bp, i, napi));
 }
 
 static void bnx2x_netif_start(struct bnx2x *bp)
 {
-       if (atomic_dec_and_test(&bp->intr_sem)) {
+       int intr_sem;
+
+       intr_sem = atomic_dec_and_test(&bp->intr_sem);
+       smp_wmb(); /* Ensure that bp->intr_sem update is SMP-safe */
+
+       if (intr_sem) {
                if (netif_running(bp->dev)) {
                        bnx2x_napi_enable(bp);
                        bnx2x_int_enable(bp);
@@ -6558,17 +7096,26 @@ static void bnx2x_netif_stop(struct bnx2x *bp, int disable_hw)
 {
        bnx2x_int_disable_sync(bp, disable_hw);
        bnx2x_napi_disable(bp);
-       if (netif_running(bp->dev)) {
-               netif_tx_disable(bp->dev);
-               bp->dev->trans_start = jiffies; /* prevent tx timeout */
-       }
+       netif_tx_disable(bp->dev);
 }
 
 /*
  * Init service functions
  */
 
-static void bnx2x_set_mac_addr_e1(struct bnx2x *bp, int set)
+/**
+ * Sets a MAC in a CAM for a few L2 Clients for E1 chip
+ *
+ * @param bp driver descriptor
+ * @param set set or clear an entry (1 or 0)
+ * @param mac pointer to a buffer containing a MAC
+ * @param cl_bit_vec bit vector of clients to register a MAC for
+ * @param cam_offset offset in a CAM to use
+ * @param with_bcast set broadcast MAC as well
+ */
+static void bnx2x_set_mac_addr_e1_gen(struct bnx2x *bp, int set, u8 *mac,
+                                     u32 cl_bit_vec, u8 cam_offset,
+                                     u8 with_bcast)
 {
        struct mac_configuration_cmd *config = bnx2x_sp(bp, mac_config);
        int port = BP_PORT(bp);
@@ -6577,24 +7124,25 @@ static void bnx2x_set_mac_addr_e1(struct bnx2x *bp, int set)
         * unicasts 0-31:port0 32-63:port1
         * multicast 64-127:port0 128-191:port1
         */
-       config->hdr.length = 2;
-       config->hdr.offset = port ? 32 : 0;
-       config->hdr.client_id = BP_CL_ID(bp);
+       config->hdr.length = 1 + (with_bcast ? 1 : 0);
+       config->hdr.offset = cam_offset;
+       config->hdr.client_id = 0xff;
        config->hdr.reserved1 = 0;
 
        /* primary MAC */
        config->config_table[0].cam_entry.msb_mac_addr =
-                                       swab16(*(u16 *)&bp->dev->dev_addr[0]);
+                                       swab16(*(u16 *)&mac[0]);
        config->config_table[0].cam_entry.middle_mac_addr =
-                                       swab16(*(u16 *)&bp->dev->dev_addr[2]);
+                                       swab16(*(u16 *)&mac[2]);
        config->config_table[0].cam_entry.lsb_mac_addr =
-                                       swab16(*(u16 *)&bp->dev->dev_addr[4]);
+                                       swab16(*(u16 *)&mac[4]);
        config->config_table[0].cam_entry.flags = cpu_to_le16(port);
        if (set)
                config->config_table[0].target_table_entry.flags = 0;
        else
                CAM_INVALIDATE(config->config_table[0]);
-       config->config_table[0].target_table_entry.client_id = 0;
+       config->config_table[0].target_table_entry.clients_bit_vector =
+                                               cpu_to_le32(cl_bit_vec);
        config->config_table[0].target_table_entry.vlan_id = 0;
 
        DP(NETIF_MSG_IFUP, "%s MAC (%04x:%04x:%04x)\n",
@@ -6604,50 +7152,58 @@ static void bnx2x_set_mac_addr_e1(struct bnx2x *bp, int set)
           config->config_table[0].cam_entry.lsb_mac_addr);
 
        /* broadcast */
-       config->config_table[1].cam_entry.msb_mac_addr = 0xffff;
-       config->config_table[1].cam_entry.middle_mac_addr = 0xffff;
-       config->config_table[1].cam_entry.lsb_mac_addr = 0xffff;
-       config->config_table[1].cam_entry.flags = cpu_to_le16(port);
-       if (set)
-               config->config_table[1].target_table_entry.flags =
-                               TSTORM_CAM_TARGET_TABLE_ENTRY_BROADCAST;
-       else
-               CAM_INVALIDATE(config->config_table[1]);
-       config->config_table[1].target_table_entry.client_id = 0;
-       config->config_table[1].target_table_entry.vlan_id = 0;
+       if (with_bcast) {
+               config->config_table[1].cam_entry.msb_mac_addr =
+                       cpu_to_le16(0xffff);
+               config->config_table[1].cam_entry.middle_mac_addr =
+                       cpu_to_le16(0xffff);
+               config->config_table[1].cam_entry.lsb_mac_addr =
+                       cpu_to_le16(0xffff);
+               config->config_table[1].cam_entry.flags = cpu_to_le16(port);
+               if (set)
+                       config->config_table[1].target_table_entry.flags =
+                                       TSTORM_CAM_TARGET_TABLE_ENTRY_BROADCAST;
+               else
+                       CAM_INVALIDATE(config->config_table[1]);
+               config->config_table[1].target_table_entry.clients_bit_vector =
+                                                       cpu_to_le32(cl_bit_vec);
+               config->config_table[1].target_table_entry.vlan_id = 0;
+       }
 
        bnx2x_sp_post(bp, RAMROD_CMD_ID_ETH_SET_MAC, 0,
                      U64_HI(bnx2x_sp_mapping(bp, mac_config)),
                      U64_LO(bnx2x_sp_mapping(bp, mac_config)), 0);
 }
 
-static void bnx2x_set_mac_addr_e1h(struct bnx2x *bp, int set)
+/**
+ * Sets a MAC in a CAM for a few L2 Clients for E1H chip
+ *
+ * @param bp driver descriptor
+ * @param set set or clear an entry (1 or 0)
+ * @param mac pointer to a buffer containing a MAC
+ * @param cl_bit_vec bit vector of clients to register a MAC for
+ * @param cam_offset offset in a CAM to use
+ */
+static void bnx2x_set_mac_addr_e1h_gen(struct bnx2x *bp, int set, u8 *mac,
+                                      u32 cl_bit_vec, u8 cam_offset)
 {
        struct mac_configuration_cmd_e1h *config =
                (struct mac_configuration_cmd_e1h *)bnx2x_sp(bp, mac_config);
 
-       if (set && (bp->state != BNX2X_STATE_OPEN)) {
-               DP(NETIF_MSG_IFUP, "state is %x, returning\n", bp->state);
-               return;
-       }
-
-       /* CAM allocation for E1H
-        * unicasts: by func number
-        * multicast: 20+FUNC*20, 20 each
-        */
        config->hdr.length = 1;
-       config->hdr.offset = BP_FUNC(bp);
-       config->hdr.client_id = BP_CL_ID(bp);
+       config->hdr.offset = cam_offset;
+       config->hdr.client_id = 0xff;
        config->hdr.reserved1 = 0;
 
        /* primary MAC */
        config->config_table[0].msb_mac_addr =
-                                       swab16(*(u16 *)&bp->dev->dev_addr[0]);
+                                       swab16(*(u16 *)&mac[0]);
        config->config_table[0].middle_mac_addr =
-                                       swab16(*(u16 *)&bp->dev->dev_addr[2]);
+                                       swab16(*(u16 *)&mac[2]);
        config->config_table[0].lsb_mac_addr =
-                                       swab16(*(u16 *)&bp->dev->dev_addr[4]);
-       config->config_table[0].client_id = BP_L_ID(bp);
+                                       swab16(*(u16 *)&mac[4]);
+       config->config_table[0].clients_bit_vector =
+                                       cpu_to_le32(cl_bit_vec);
        config->config_table[0].vlan_id = 0;
        config->config_table[0].e1hov_id = cpu_to_le16(bp->e1hov);
        if (set)
@@ -6656,11 +7212,11 @@ static void bnx2x_set_mac_addr_e1h(struct bnx2x *bp, int set)
                config->config_table[0].flags =
                                MAC_CONFIGURATION_ENTRY_E1H_ACTION_TYPE;
 
-       DP(NETIF_MSG_IFUP, "%s MAC (%04x:%04x:%04x)  E1HOV %d  CLID %d\n",
+       DP(NETIF_MSG_IFUP, "%s MAC (%04x:%04x:%04x)  E1HOV %d  CLID mask %d\n",
           (set ? "setting" : "clearing"),
           config->config_table[0].msb_mac_addr,
           config->config_table[0].middle_mac_addr,
-          config->config_table[0].lsb_mac_addr, bp->e1hov, BP_L_ID(bp));
+          config->config_table[0].lsb_mac_addr, bp->e1hov, cl_bit_vec);
 
        bnx2x_sp_post(bp, RAMROD_CMD_ID_ETH_SET_MAC, 0,
                      U64_HI(bnx2x_sp_mapping(bp, mac_config)),
@@ -6697,6 +7253,9 @@ static int bnx2x_wait_ramrod(struct bnx2x *bp, int state, int idx,
                }
 
                msleep(1);
+
+               if (bp->panic)
+                       return -EIO;
        }
 
        /* timeout! */
@@ -6709,6 +7268,69 @@ static int bnx2x_wait_ramrod(struct bnx2x *bp, int state, int idx,
        return -EBUSY;
 }
 
+static void bnx2x_set_eth_mac_addr_e1h(struct bnx2x *bp, int set)
+{
+       bp->set_mac_pending++;
+       smp_wmb();
+
+       bnx2x_set_mac_addr_e1h_gen(bp, set, bp->dev->dev_addr,
+                                  (1 << bp->fp->cl_id), BP_FUNC(bp));
+
+       /* Wait for a completion */
+       bnx2x_wait_ramrod(bp, 0, 0, &bp->set_mac_pending, set ? 0 : 1);
+}
+
+static void bnx2x_set_eth_mac_addr_e1(struct bnx2x *bp, int set)
+{
+       bp->set_mac_pending++;
+       smp_wmb();
+
+       bnx2x_set_mac_addr_e1_gen(bp, set, bp->dev->dev_addr,
+                                 (1 << bp->fp->cl_id), (BP_PORT(bp) ? 32 : 0),
+                                 1);
+
+       /* Wait for a completion */
+       bnx2x_wait_ramrod(bp, 0, 0, &bp->set_mac_pending, set ? 0 : 1);
+}
+
+#ifdef BCM_CNIC
+/**
+ * Set iSCSI MAC(s) at the next enties in the CAM after the ETH
+ * MAC(s). This function will wait until the ramdord completion
+ * returns.
+ *
+ * @param bp driver handle
+ * @param set set or clear the CAM entry
+ *
+ * @return 0 if cussess, -ENODEV if ramrod doesn't return.
+ */
+static int bnx2x_set_iscsi_eth_mac_addr(struct bnx2x *bp, int set)
+{
+       u32 cl_bit_vec = (1 << BCM_ISCSI_ETH_CL_ID);
+
+       bp->set_mac_pending++;
+       smp_wmb();
+
+       /* Send a SET_MAC ramrod */
+       if (CHIP_IS_E1(bp))
+               bnx2x_set_mac_addr_e1_gen(bp, set, bp->iscsi_mac,
+                                 cl_bit_vec, (BP_PORT(bp) ? 32 : 0) + 2,
+                                 1);
+       else
+               /* CAM allocation for E1H
+               * unicasts: by func number
+               * multicast: 20+FUNC*20, 20 each
+               */
+               bnx2x_set_mac_addr_e1h_gen(bp, set, bp->iscsi_mac,
+                                  cl_bit_vec, E1H_FUNC_MAX + BP_FUNC(bp));
+
+       /* Wait for a completion when setting */
+       bnx2x_wait_ramrod(bp, 0, 0, &bp->set_mac_pending, set ? 0 : 1);
+
+       return 0;
+}
+#endif
+
 static int bnx2x_setup_leading(struct bnx2x *bp)
 {
        int rc;
@@ -6744,90 +7366,94 @@ static int bnx2x_setup_multi(struct bnx2x *bp, int index)
 
 static int bnx2x_poll(struct napi_struct *napi, int budget);
 
-static void bnx2x_set_int_mode(struct bnx2x *bp)
+static void bnx2x_set_num_queues_msix(struct bnx2x *bp)
+{
+
+       switch (bp->multi_mode) {
+       case ETH_RSS_MODE_DISABLED:
+               bp->num_queues = 1;
+               break;
+
+       case ETH_RSS_MODE_REGULAR:
+               if (num_queues)
+                       bp->num_queues = min_t(u32, num_queues,
+                                                 BNX2X_MAX_QUEUES(bp));
+               else
+                       bp->num_queues = min_t(u32, num_online_cpus(),
+                                                 BNX2X_MAX_QUEUES(bp));
+               break;
+
+
+       default:
+               bp->num_queues = 1;
+               break;
+       }
+}
+
+static int bnx2x_set_num_queues(struct bnx2x *bp)
 {
-       int num_queues;
+       int rc = 0;
 
        switch (int_mode) {
        case INT_MODE_INTx:
        case INT_MODE_MSI:
-               num_queues = 1;
-               bp->num_rx_queues = num_queues;
-               bp->num_tx_queues = num_queues;
-               DP(NETIF_MSG_IFUP,
-                  "set number of queues to %d\n", num_queues);
+               bp->num_queues = 1;
+               DP(NETIF_MSG_IFUP, "set number of queues to 1\n");
                break;
 
        case INT_MODE_MSIX:
        default:
-               if (bp->multi_mode == ETH_RSS_MODE_REGULAR)
-                       num_queues = min_t(u32, num_online_cpus(),
-                                          BNX2X_MAX_QUEUES(bp));
-               else
-                       num_queues = 1;
-               bp->num_rx_queues = num_queues;
-               bp->num_tx_queues = num_queues;
-               DP(NETIF_MSG_IFUP, "set number of rx queues to %d"
-                  "  number of tx queues to %d\n",
-                  bp->num_rx_queues, bp->num_tx_queues);
+               /* Set number of queues according to bp->multi_mode value */
+               bnx2x_set_num_queues_msix(bp);
+
+               DP(NETIF_MSG_IFUP, "set number of queues to %d\n",
+                  bp->num_queues);
+
                /* if we can't use MSI-X we only need one fp,
                 * so try to enable MSI-X with the requested number of fp's
                 * and fallback to MSI or legacy INTx with one fp
                 */
-               if (bnx2x_enable_msix(bp)) {
+               rc = bnx2x_enable_msix(bp);
+               if (rc)
                        /* failed to enable MSI-X */
-                       num_queues = 1;
-                       bp->num_rx_queues = num_queues;
-                       bp->num_tx_queues = num_queues;
-                       if (bp->multi_mode)
-                               BNX2X_ERR("Multi requested but failed to "
-                                         "enable MSI-X  set number of "
-                                         "queues to %d\n", num_queues);
-               }
+                       bp->num_queues = 1;
                break;
        }
-       bp->dev->real_num_tx_queues = bp->num_tx_queues;
+       bp->dev->real_num_tx_queues = bp->num_queues;
+       return rc;
 }
 
-static void bnx2x_set_rx_mode(struct net_device *dev);
+#ifdef BCM_CNIC
+static int bnx2x_cnic_notify(struct bnx2x *bp, int cmd);
+static void bnx2x_setup_cnic_irq_info(struct bnx2x *bp);
+#endif
 
 /* must be called with rtnl_lock */
 static int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 {
        u32 load_code;
-       int i, rc = 0;
+       int i, rc;
+
 #ifdef BNX2X_STOP_ON_ERROR
-       DP(NETIF_MSG_IFUP, "enter  load_mode %d\n", load_mode);
        if (unlikely(bp->panic))
                return -EPERM;
 #endif
 
        bp->state = BNX2X_STATE_OPENING_WAIT4_LOAD;
 
-       bnx2x_set_int_mode(bp);
+       rc = bnx2x_set_num_queues(bp);
 
        if (bnx2x_alloc_mem(bp))
                return -ENOMEM;
 
-       for_each_rx_queue(bp, i)
+       for_each_queue(bp, i)
                bnx2x_fp(bp, i, disable_tpa) =
                                        ((bp->flags & TPA_ENABLE_FLAG) == 0);
 
-       for_each_rx_queue(bp, i)
+       for_each_queue(bp, i)
                netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi),
                               bnx2x_poll, 128);
 
-#ifdef BNX2X_STOP_ON_ERROR
-       for_each_rx_queue(bp, i) {
-               struct bnx2x_fastpath *fp = &bp->fp[i];
-
-               fp->poll_no_work = 0;
-               fp->poll_calls = 0;
-               fp->poll_max_calls = 0;
-               fp->poll_complete = 0;
-               fp->poll_exit = 0;
-       }
-#endif
        bnx2x_napi_enable(bp);
 
        if (bp->flags & USING_MSIX_FLAG) {
@@ -6837,6 +7463,8 @@ static int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
                        goto load_error1;
                }
        } else {
+               /* Fall to INTx if failed to enable MSI-X due to lack of
+                  memory (in bnx2x_set_num_queues()) */
                if ((rc != -ENOMEM) && (int_mode != INT_MODE_INTx))
                        bnx2x_enable_msi(bp);
                bnx2x_ack_int(bp);
@@ -6874,11 +7502,11 @@ static int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
        } else {
                int port = BP_PORT(bp);
 
-               DP(NETIF_MSG_IFUP, "NO MCP load counts before us %d, %d, %d\n",
+               DP(NETIF_MSG_IFUP, "NO MCP - load counts      %d, %d, %d\n",
                   load_count[0], load_count[1], load_count[2]);
                load_count[0]++;
                load_count[1 + port]++;
-               DP(NETIF_MSG_IFUP, "NO MCP new load counts       %d, %d, %d\n",
+               DP(NETIF_MSG_IFUP, "NO MCP - new load counts  %d, %d, %d\n",
                   load_count[0], load_count[1], load_count[2]);
                if (load_count[0] == 1)
                        load_code = FW_MSG_CODE_DRV_LOAD_COMMON;
@@ -6905,6 +7533,12 @@ static int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
        /* Setup NIC internals and enable interrupts */
        bnx2x_nic_init(bp, load_code);
 
+       if ((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) &&
+           (bp->common.shmem2_base))
+               SHMEM2_WR(bp, dcc_support,
+                         (SHMEM_DCC_SUPPORT_DISABLE_ENABLE_PF_TLV |
+                          SHMEM_DCC_SUPPORT_BANDWIDTH_ALLOCATION_TLV));
+
        /* Send LOAD_DONE command to MCP */
        if (!BP_NOMCP(bp)) {
                load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE);
@@ -6920,41 +7554,68 @@ static int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
        rc = bnx2x_setup_leading(bp);
        if (rc) {
                BNX2X_ERR("Setup leading failed!\n");
+#ifndef BNX2X_STOP_ON_ERROR
                goto load_error3;
+#else
+               bp->panic = 1;
+               return -EBUSY;
+#endif
        }
 
        if (CHIP_IS_E1H(bp))
                if (bp->mf_config & FUNC_MF_CFG_FUNC_DISABLED) {
-                       BNX2X_ERR("!!!  mf_cfg function disabled\n");
-                       bp->state = BNX2X_STATE_DISABLED;
+                       DP(NETIF_MSG_IFUP, "mf_cfg function disabled\n");
+                       bp->flags |= MF_FUNC_DIS;
                }
 
-       if (bp->state == BNX2X_STATE_OPEN)
+       if (bp->state == BNX2X_STATE_OPEN) {
+#ifdef BCM_CNIC
+               /* Enable Timer scan */
+               REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + BP_PORT(bp)*4, 1);
+#endif
                for_each_nondefault_queue(bp, i) {
                        rc = bnx2x_setup_multi(bp, i);
                        if (rc)
+#ifdef BCM_CNIC
+                               goto load_error4;
+#else
                                goto load_error3;
+#endif
                }
 
-       if (CHIP_IS_E1(bp))
-               bnx2x_set_mac_addr_e1(bp, 1);
-       else
-               bnx2x_set_mac_addr_e1h(bp, 1);
+               if (CHIP_IS_E1(bp))
+                       bnx2x_set_eth_mac_addr_e1(bp, 1);
+               else
+                       bnx2x_set_eth_mac_addr_e1h(bp, 1);
+#ifdef BCM_CNIC
+               /* Set iSCSI L2 MAC */
+               mutex_lock(&bp->cnic_mutex);
+               if (bp->cnic_eth_dev.drv_state & CNIC_DRV_STATE_REGD) {
+                       bnx2x_set_iscsi_eth_mac_addr(bp, 1);
+                       bp->cnic_flags |= BNX2X_CNIC_FLAG_MAC_SET;
+               }
+               mutex_unlock(&bp->cnic_mutex);
+#endif
+       }
 
        if (bp->port.pmf)
-               bnx2x_initial_phy_init(bp);
+               bnx2x_initial_phy_init(bp, load_mode);
 
        /* Start fast path */
        switch (load_mode) {
        case LOAD_NORMAL:
-               /* Tx queue should be only reenabled */
-               netif_tx_wake_all_queues(bp->dev);
+               if (bp->state == BNX2X_STATE_OPEN) {
+                       /* Tx queue should be only reenabled */
+                       netif_tx_wake_all_queues(bp->dev);
+               }
                /* Initialize the receive filter. */
                bnx2x_set_rx_mode(bp->dev);
                break;
 
        case LOAD_OPEN:
                netif_tx_start_all_queues(bp->dev);
+               if (bp->state != BNX2X_STATE_OPEN)
+                       netif_tx_disable(bp->dev);
                /* Initialize the receive filter. */
                bnx2x_set_rx_mode(bp->dev);
                break;
@@ -6975,9 +7636,19 @@ static int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
        /* start the timer */
        mod_timer(&bp->timer, jiffies + bp->current_interval);
 
+#ifdef BCM_CNIC
+       bnx2x_setup_cnic_irq_info(bp);
+       if (bp->state == BNX2X_STATE_OPEN)
+               bnx2x_cnic_notify(bp, CNIC_CTL_START_CMD);
+#endif
 
        return 0;
 
+#ifdef BCM_CNIC
+load_error4:
+       /* Disable Timer scan */
+       REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + BP_PORT(bp)*4, 0);
+#endif
 load_error3:
        bnx2x_int_disable_sync(bp, 1);
        if (!BP_NOMCP(bp)) {
@@ -6987,19 +7658,17 @@ load_error3:
        bp->port.pmf = 0;
        /* Free SKBs, SGEs, TPA pool and driver internals */
        bnx2x_free_skbs(bp);
-       for_each_rx_queue(bp, i)
+       for_each_queue(bp, i)
                bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
 load_error2:
        /* Release IRQs */
        bnx2x_free_irq(bp);
 load_error1:
        bnx2x_napi_disable(bp);
-       for_each_rx_queue(bp, i)
+       for_each_queue(bp, i)
                netif_napi_del(&bnx2x_fp(bp, i, napi));
        bnx2x_free_mem(bp);
 
-       /* TBD we really need to reset the chip
-          if we want to recover from this */
        return rc;
 }
 
@@ -7029,7 +7698,7 @@ static int bnx2x_stop_multi(struct bnx2x *bp, int index)
 
 static int bnx2x_stop_leading(struct bnx2x *bp)
 {
-       u16 dsb_sp_prod_idx;
+       __le16 dsb_sp_prod_idx;
        /* if the other port is handling traffic,
           this can take a lot of time */
        int cnt = 500;
@@ -7039,7 +7708,7 @@ static int bnx2x_stop_leading(struct bnx2x *bp)
 
        /* Send HALT ramrod */
        bp->fp[0].state = BNX2X_FP_STATE_HALTING;
-       bnx2x_sp_post(bp, RAMROD_CMD_ID_ETH_HALT, 0, 0, BP_CL_ID(bp), 0);
+       bnx2x_sp_post(bp, RAMROD_CMD_ID_ETH_HALT, 0, 0, bp->fp->cl_id, 0);
 
        /* Wait for completion */
        rc = bnx2x_wait_ramrod(bp, BNX2X_FP_STATE_HALTED, 0,
@@ -7087,6 +7756,19 @@ static void bnx2x_reset_func(struct bnx2x *bp)
        REG_WR(bp, HC_REG_LEADING_EDGE_0 + port*8, 0);
        REG_WR(bp, HC_REG_TRAILING_EDGE_0 + port*8, 0);
 
+#ifdef BCM_CNIC
+       /* Disable Timer scan */
+       REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 0);
+       /*
+        * Wait for at least 10ms and up to 2 second for the timers scan to
+        * complete
+        */
+       for (i = 0; i < 200; i++) {
+               msleep(10);
+               if (!REG_RD(bp, TM_REG_LIN0_SCAN_ON + port*4))
+                       break;
+       }
+#endif
        /* Clear ILT */
        base = FUNC_ILT_BASE(func);
        for (i = base; i < base + ILT_PER_FUNC; i++)
@@ -7153,11 +7835,16 @@ static int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode)
        u32 reset_code = 0;
        int i, cnt, rc;
 
+#ifdef BCM_CNIC
+       bnx2x_cnic_notify(bp, CNIC_CTL_STOP_CMD);
+#endif
        bp->state = BNX2X_STATE_CLOSING_WAIT4_HALT;
 
+       /* Set "drop all" */
        bp->rx_mode = BNX2X_RX_MODE_NONE;
        bnx2x_set_storm_rx_mode(bp);
 
+       /* Disable HW interrupts, NAPI and Tx */
        bnx2x_netif_stop(bp, 1);
 
        del_timer_sync(&bp->timer);
@@ -7169,14 +7856,13 @@ static int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode)
        bnx2x_free_irq(bp);
 
        /* Wait until tx fastpath tasks complete */
-       for_each_tx_queue(bp, i) {
+       for_each_queue(bp, i) {
                struct bnx2x_fastpath *fp = &bp->fp[i];
 
                cnt = 1000;
-               smp_rmb();
                while (bnx2x_has_tx_work_unload(fp)) {
 
-                       bnx2x_tx_int(fp, 1000);
+                       bnx2x_tx_int(fp);
                        if (!cnt) {
                                BNX2X_ERR("timeout waiting for queue[%d]\n",
                                          i);
@@ -7189,7 +7875,6 @@ static int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode)
                        }
                        cnt--;
                        msleep(1);
-                       smp_rmb();
                }
        }
        /* Give HW time to discard old tx messages */
@@ -7199,7 +7884,7 @@ static int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode)
                struct mac_configuration_cmd *config =
                                                bnx2x_sp(bp, mcast_config);
 
-               bnx2x_set_mac_addr_e1(bp, 0);
+               bnx2x_set_eth_mac_addr_e1(bp, 0);
 
                for (i = 0; i < config->hdr.length; i++)
                        CAM_INVALIDATE(config->config_table[i]);
@@ -7209,9 +7894,12 @@ static int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode)
                        config->hdr.offset = BNX2X_MAX_EMUL_MULTI*(1 + port);
                else
                        config->hdr.offset = BNX2X_MAX_MULTICAST*(1 + port);
-               config->hdr.client_id = BP_CL_ID(bp);
+               config->hdr.client_id = bp->fp->cl_id;
                config->hdr.reserved1 = 0;
 
+               bp->set_mac_pending++;
+               smp_wmb();
+
                bnx2x_sp_post(bp, RAMROD_CMD_ID_ETH_SET_MAC, 0,
                              U64_HI(bnx2x_sp_mapping(bp, mcast_config)),
                              U64_LO(bnx2x_sp_mapping(bp, mcast_config)), 0);
@@ -7219,21 +7907,30 @@ static int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode)
        } else { /* E1H */
                REG_WR(bp, NIG_REG_LLH0_FUNC_EN + port*8, 0);
 
-               bnx2x_set_mac_addr_e1h(bp, 0);
+               bnx2x_set_eth_mac_addr_e1h(bp, 0);
 
                for (i = 0; i < MC_HASH_SIZE; i++)
                        REG_WR(bp, MC_HASH_OFFSET(bp, i), 0);
+
+               REG_WR(bp, MISC_REG_E1HMF_MODE, 0);
+       }
+#ifdef BCM_CNIC
+       /* Clear iSCSI L2 MAC */
+       mutex_lock(&bp->cnic_mutex);
+       if (bp->cnic_flags & BNX2X_CNIC_FLAG_MAC_SET) {
+               bnx2x_set_iscsi_eth_mac_addr(bp, 0);
+               bp->cnic_flags &= ~BNX2X_CNIC_FLAG_MAC_SET;
        }
+       mutex_unlock(&bp->cnic_mutex);
+#endif
 
        if (unload_mode == UNLOAD_NORMAL)
                reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS;
 
-       else if (bp->flags & NO_WOL_FLAG) {
+       else if (bp->flags & NO_WOL_FLAG)
                reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_MCP;
-               if (CHIP_IS_E1H(bp))
-                       REG_WR(bp, MISC_REG_E1HMF_MODE, 0);
 
-       else if (bp->wol) {
+       else if (bp->wol) {
                u32 emac_base = port ? GRCBASE_EMAC1 : GRCBASE_EMAC0;
                u8 *mac_addr = bp->dev->dev_addr;
                u32 val;
@@ -7273,11 +7970,11 @@ unload_error:
        if (!BP_NOMCP(bp))
                reset_code = bnx2x_fw_command(bp, reset_code);
        else {
-               DP(NETIF_MSG_IFDOWN, "NO MCP load counts      %d, %d, %d\n",
+               DP(NETIF_MSG_IFDOWN, "NO MCP load counts      %d, %d, %d\n",
                   load_count[0], load_count[1], load_count[2]);
                load_count[0]--;
                load_count[1 + port]--;
-               DP(NETIF_MSG_IFDOWN, "NO MCP new load counts  %d, %d, %d\n",
+               DP(NETIF_MSG_IFDOWN, "NO MCP new load counts  %d, %d, %d\n",
                   load_count[0], load_count[1], load_count[2]);
                if (load_count[0] == 0)
                        reset_code = FW_MSG_CODE_DRV_UNLOAD_COMMON;
@@ -7297,13 +7994,14 @@ unload_error:
        /* Report UNLOAD_DONE to MCP */
        if (!BP_NOMCP(bp))
                bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE);
+
        bp->port.pmf = 0;
 
        /* Free SKBs, SGEs, TPA pool and driver internals */
        bnx2x_free_skbs(bp);
-       for_each_rx_queue(bp, i)
+       for_each_queue(bp, i)
                bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
-       for_each_rx_queue(bp, i)
+       for_each_queue(bp, i)
                netif_napi_del(&bnx2x_fp(bp, i, napi));
        bnx2x_free_mem(bp);
 
@@ -7321,7 +8019,7 @@ static void bnx2x_reset_task(struct work_struct *work)
 #ifdef BNX2X_STOP_ON_ERROR
        BNX2X_ERR("reset task called but STOP_ON_ERROR defined"
                  " so reset not done to allow debug dump,\n"
-        KERN_ERR " you will need to reboot when done\n");
+                 " you will need to reboot when done\n");
        return;
 #endif
 
@@ -7535,8 +8233,10 @@ static void __devinit bnx2x_get_common_hwinfo(struct bnx2x *bp)
                       bp->common.flash_size, bp->common.flash_size);
 
        bp->common.shmem_base = REG_RD(bp, MISC_REG_SHARED_MEM_ADDR);
+       bp->common.shmem2_base = REG_RD(bp, MISC_REG_GENERIC_CR_0);
        bp->link_params.shmem_base = bp->common.shmem_base;
-       BNX2X_DEV_INFO("shmem offset is 0x%x\n", bp->common.shmem_base);
+       BNX2X_DEV_INFO("shmem offset 0x%x  shmem2 offset 0x%x\n",
+                      bp->common.shmem_base, bp->common.shmem2_base);
 
        if (!bp->common.shmem_base ||
            (bp->common.shmem_base < 0xA0000) ||
@@ -7576,6 +8276,9 @@ static void __devinit bnx2x_get_common_hwinfo(struct bnx2x *bp)
                BNX2X_ERR("This driver needs bc_ver %X but found %X,"
                          " please upgrade BC\n", BNX2X_BC_VER, val);
        }
+       bp->link_params.feature_config_flags |=
+               (val >= REQ_BC_VER_4_VRFY_OPT_MDL) ?
+               FEATURE_CONFIG_BC_SUPPORTS_OPT_MDL_VRFY : 0;
 
        if (BP_E1HVN(bp) == 0) {
                pci_read_config_word(bp->pdev, bp->pm_cap + PCI_PM_PMC, &pmc);
@@ -7585,7 +8288,7 @@ static void __devinit bnx2x_get_common_hwinfo(struct bnx2x *bp)
                bp->flags |= NO_WOL_FLAG;
        }
        BNX2X_DEV_INFO("%sWoL capable\n",
-                      (bp->flags & NO_WOL_FLAG) ? "Not " : "");
+                      (bp->flags & NO_WOL_FLAG) ? "not " : "");
 
        val = SHMEM_RD(bp, dev_info.shared_hw_config.part_num);
        val2 = SHMEM_RD(bp, dev_info.shared_hw_config.part_num[4]);
@@ -7736,6 +8439,18 @@ static void __devinit bnx2x_link_settings_supported(struct bnx2x *bp,
                                               SUPPORTED_Asym_Pause);
                        break;
 
+               case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8727:
+                       BNX2X_DEV_INFO("ext_phy_type 0x%x (8727)\n",
+                                      ext_phy_type);
+
+                       bp->port.supported |= (SUPPORTED_10000baseT_Full |
+                                              SUPPORTED_1000baseT_Full |
+                                              SUPPORTED_Autoneg |
+                                              SUPPORTED_FIBRE |
+                                              SUPPORTED_Pause |
+                                              SUPPORTED_Asym_Pause);
+                       break;
+
                case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_SFX7101:
                        BNX2X_DEV_INFO("ext_phy_type 0x%x (SFX7101)\n",
                                       ext_phy_type);
@@ -7984,12 +8699,21 @@ static void __devinit bnx2x_link_settings_requested(struct bnx2x *bp)
                       bp->link_params.req_flow_ctrl, bp->port.advertising);
 }
 
+static void __devinit bnx2x_set_mac_buf(u8 *mac_buf, u32 mac_lo, u16 mac_hi)
+{
+       mac_hi = cpu_to_be16(mac_hi);
+       mac_lo = cpu_to_be32(mac_lo);
+       memcpy(mac_buf, &mac_hi, sizeof(mac_hi));
+       memcpy(mac_buf + sizeof(mac_hi), &mac_lo, sizeof(mac_lo));
+}
+
 static void __devinit bnx2x_get_port_hwinfo(struct bnx2x *bp)
 {
        int port = BP_PORT(bp);
        u32 val, val2;
        u32 config;
        u16 i;
+       u32 ext_phy_type;
 
        bp->link_params.bp = bp;
        bp->link_params.port = port;
@@ -7999,6 +8723,17 @@ static void __devinit bnx2x_get_port_hwinfo(struct bnx2x *bp)
        bp->link_params.ext_phy_config =
                SHMEM_RD(bp,
                         dev_info.port_hw_config[port].external_phy_config);
+       /* BCM8727_NOC => BCM8727 no over current */
+       if (XGXS_EXT_PHY_TYPE(bp->link_params.ext_phy_config) ==
+           PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8727_NOC) {
+               bp->link_params.ext_phy_config &=
+                       ~PORT_HW_CFG_XGXS_EXT_PHY_TYPE_MASK;
+               bp->link_params.ext_phy_config |=
+                       PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8727;
+               bp->link_params.feature_config_flags |=
+                       FEATURE_CONFIG_BCM8727_NOC;
+       }
+
        bp->link_params.speed_cap_mask =
                SHMEM_RD(bp,
                         dev_info.port_hw_config[port].speed_capability_mask);
@@ -8019,13 +8754,12 @@ static void __devinit bnx2x_get_port_hwinfo(struct bnx2x *bp)
                bp->link_params.xgxs_config_tx[(i << 1) + 1] = (val & 0xffff);
        }
 
+       /* If the device is capable of WoL, set the default state according
+        * to the HW
+        */
        config = SHMEM_RD(bp, dev_info.port_feature_config[port].config);
-       if (config & PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_ENABLED)
-               bp->link_params.feature_config_flags |=
-                               FEATURE_CONFIG_MODULE_ENFORCMENT_ENABLED;
-       else
-               bp->link_params.feature_config_flags &=
-                               ~FEATURE_CONFIG_MODULE_ENFORCMENT_ENABLED;
+       bp->wol = (!(bp->flags & NO_WOL_FLAG) &&
+                  (config & PORT_FEATURE_WOL_ENABLED));
 
        BNX2X_DEV_INFO("lane_config 0x%08x  ext_phy_config 0x%08x"
                       "  speed_cap_mask 0x%08x  link_config 0x%08x\n",
@@ -8033,22 +8767,36 @@ static void __devinit bnx2x_get_port_hwinfo(struct bnx2x *bp)
                       bp->link_params.ext_phy_config,
                       bp->link_params.speed_cap_mask, bp->port.link_config);
 
-       bp->link_params.switch_cfg = (bp->port.link_config &
-                                     PORT_FEATURE_CONNECTED_SWITCH_MASK);
+       bp->link_params.switch_cfg |= (bp->port.link_config &
+                                      PORT_FEATURE_CONNECTED_SWITCH_MASK);
        bnx2x_link_settings_supported(bp, bp->link_params.switch_cfg);
 
        bnx2x_link_settings_requested(bp);
 
+       /*
+        * If connected directly, work with the internal PHY, otherwise, work
+        * with the external PHY
+        */
+       ext_phy_type = XGXS_EXT_PHY_TYPE(bp->link_params.ext_phy_config);
+       if (ext_phy_type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT)
+               bp->mdio.prtad = bp->link_params.phy_addr;
+
+       else if ((ext_phy_type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_FAILURE) &&
+                (ext_phy_type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_NOT_CONN))
+               bp->mdio.prtad =
+                       XGXS_EXT_PHY_ADDR(bp->link_params.ext_phy_config);
+
        val2 = SHMEM_RD(bp, dev_info.port_hw_config[port].mac_upper);
        val = SHMEM_RD(bp, dev_info.port_hw_config[port].mac_lower);
-       bp->dev->dev_addr[0] = (u8)(val2 >> 8 & 0xff);
-       bp->dev->dev_addr[1] = (u8)(val2 & 0xff);
-       bp->dev->dev_addr[2] = (u8)(val >> 24 & 0xff);
-       bp->dev->dev_addr[3] = (u8)(val >> 16 & 0xff);
-       bp->dev->dev_addr[4] = (u8)(val >> 8  & 0xff);
-       bp->dev->dev_addr[5] = (u8)(val & 0xff);
+       bnx2x_set_mac_buf(bp->dev->dev_addr, val, val2);
        memcpy(bp->link_params.mac_addr, bp->dev->dev_addr, ETH_ALEN);
        memcpy(bp->dev->perm_addr, bp->dev->dev_addr, ETH_ALEN);
+
+#ifdef BCM_CNIC
+       val2 = SHMEM_RD(bp, dev_info.port_hw_config[port].iscsi_mac_upper);
+       val = SHMEM_RD(bp, dev_info.port_hw_config[port].iscsi_mac_lower);
+       bnx2x_set_mac_buf(bp->iscsi_mac, val, val2);
+#endif
 }
 
 static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp)
@@ -8065,22 +8813,33 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp)
                bp->mf_config =
                        SHMEM_RD(bp, mf_cfg.func_mf_config[func].config);
 
-               val = (SHMEM_RD(bp, mf_cfg.func_mf_config[func].e1hov_tag) &
+               val = (SHMEM_RD(bp, mf_cfg.func_mf_config[FUNC_0].e1hov_tag) &
                       FUNC_MF_CFG_E1HOV_TAG_MASK);
-               if (val != FUNC_MF_CFG_E1HOV_TAG_DEFAULT) {
-
-                       bp->e1hov = val;
+               if (val != FUNC_MF_CFG_E1HOV_TAG_DEFAULT)
                        bp->e1hmf = 1;
-                       BNX2X_DEV_INFO("MF mode  E1HOV for func %d is %d "
-                                      "(0x%04x)\n",
-                                      func, bp->e1hov, bp->e1hov);
-               } else {
-                       BNX2X_DEV_INFO("Single function mode\n");
-                       if (BP_E1HVN(bp)) {
+               BNX2X_DEV_INFO("%s function mode\n",
+                              IS_E1HMF(bp) ? "multi" : "single");
+
+               if (IS_E1HMF(bp)) {
+                       val = (SHMEM_RD(bp, mf_cfg.func_mf_config[func].
+                                                               e1hov_tag) &
+                              FUNC_MF_CFG_E1HOV_TAG_MASK);
+                       if (val != FUNC_MF_CFG_E1HOV_TAG_DEFAULT) {
+                               bp->e1hov = val;
+                               BNX2X_DEV_INFO("E1HOV for func %d is %d "
+                                              "(0x%04x)\n",
+                                              func, bp->e1hov, bp->e1hov);
+                       } else {
                                BNX2X_ERR("!!!  No valid E1HOV for func %d,"
                                          "  aborting\n", func);
                                rc = -EPERM;
                        }
+               } else {
+                       if (BP_E1HVN(bp)) {
+                               BNX2X_ERR("!!!  VN %d in single function mode,"
+                                         "  aborting\n", BP_E1HVN(bp));
+                               rc = -EPERM;
+                       }
                }
        }
 
@@ -8130,8 +8889,13 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp)
 
        /* Disable interrupt handling until HW is initialized */
        atomic_set(&bp->intr_sem, 1);
+       smp_wmb(); /* Ensure that bp->intr_sem update is SMP-safe */
 
        mutex_init(&bp->port.phy_mutex);
+       mutex_init(&bp->fw_mb_mutex);
+#ifdef BCM_CNIC
+       mutex_init(&bp->cnic_mutex);
+#endif
 
        INIT_DELAYED_WORK(&bp->sp_task, bnx2x_sp_task);
        INIT_WORK(&bp->reset_task, bnx2x_reset_task);
@@ -8168,6 +8932,11 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp)
                bp->dev->features |= NETIF_F_LRO;
        }
 
+       if (CHIP_IS_E1(bp))
+               bp->dropless_fc = 0;
+       else
+               bp->dropless_fc = dropless_fc;
+
        bp->mrrs = mrrs;
 
        bp->tx_ring_size = MAX_TX_AVAIL;
@@ -8175,8 +8944,9 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp)
 
        bp->rx_csum = 1;
 
-       bp->tx_ticks = 50;
-       bp->rx_ticks = 25;
+       /* make sure that the numbers are in the right granularity */
+       bp->tx_ticks = (50 / (4 * BNX2X_BTR)) * (4 * BNX2X_BTR);
+       bp->rx_ticks = (25 / (4 * BNX2X_BTR)) * (4 * BNX2X_BTR);
 
        timer_interval = (CHIP_REV_IS_SLOW(bp) ? 5*HZ : HZ);
        bp->current_interval = (poll ? poll : timer_interval);
@@ -8202,20 +8972,23 @@ static int bnx2x_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
        cmd->supported = bp->port.supported;
        cmd->advertising = bp->port.advertising;
 
-       if (netif_carrier_ok(dev)) {
+       if ((bp->state == BNX2X_STATE_OPEN) &&
+           !(bp->flags & MF_FUNC_DIS) &&
+           (bp->link_vars.link_up)) {
                cmd->speed = bp->link_vars.line_speed;
                cmd->duplex = bp->link_vars.duplex;
-       } else {
-               cmd->speed = bp->link_params.req_line_speed;
-               cmd->duplex = bp->link_params.req_duplex;
-       }
-       if (IS_E1HMF(bp)) {
-               u16 vn_max_rate;
+               if (IS_E1HMF(bp)) {
+                       u16 vn_max_rate;
 
-               vn_max_rate = ((bp->mf_config & FUNC_MF_CFG_MAX_BW_MASK) >>
+                       vn_max_rate =
+                               ((bp->mf_config & FUNC_MF_CFG_MAX_BW_MASK) >>
                                FUNC_MF_CFG_MAX_BW_SHIFT) * 100;
-               if (vn_max_rate < cmd->speed)
-                       cmd->speed = vn_max_rate;
+                       if (vn_max_rate < cmd->speed)
+                               cmd->speed = vn_max_rate;
+               }
+       } else {
+               cmd->speed = -1;
+               cmd->duplex = -1;
        }
 
        if (bp->link_params.switch_cfg == SWITCH_CFG_10G) {
@@ -8229,6 +9002,7 @@ static int bnx2x_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
                case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8705:
                case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8706:
                case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8726:
+               case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8727:
                        cmd->port = PORT_FIBRE;
                        break;
 
@@ -8250,7 +9024,7 @@ static int bnx2x_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
        } else
                cmd->port = PORT_TP;
 
-       cmd->phy_address = bp->port.phy_addr;
+       cmd->phy_address = bp->mdio.prtad;
        cmd->transceiver = XCVR_INTERNAL;
 
        if (bp->link_params.req_line_speed == SPEED_AUTO_NEG)
@@ -8423,6 +9197,81 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
        return 0;
 }
 
+#define IS_E1_ONLINE(info)     (((info) & RI_E1_ONLINE) == RI_E1_ONLINE)
+#define IS_E1H_ONLINE(info)    (((info) & RI_E1H_ONLINE) == RI_E1H_ONLINE)
+
+static int bnx2x_get_regs_len(struct net_device *dev)
+{
+       struct bnx2x *bp = netdev_priv(dev);
+       int regdump_len = 0;
+       int i;
+
+       if (CHIP_IS_E1(bp)) {
+               for (i = 0; i < REGS_COUNT; i++)
+                       if (IS_E1_ONLINE(reg_addrs[i].info))
+                               regdump_len += reg_addrs[i].size;
+
+               for (i = 0; i < WREGS_COUNT_E1; i++)
+                       if (IS_E1_ONLINE(wreg_addrs_e1[i].info))
+                               regdump_len += wreg_addrs_e1[i].size *
+                                       (1 + wreg_addrs_e1[i].read_regs_count);
+
+       } else { /* E1H */
+               for (i = 0; i < REGS_COUNT; i++)
+                       if (IS_E1H_ONLINE(reg_addrs[i].info))
+                               regdump_len += reg_addrs[i].size;
+
+               for (i = 0; i < WREGS_COUNT_E1H; i++)
+                       if (IS_E1H_ONLINE(wreg_addrs_e1h[i].info))
+                               regdump_len += wreg_addrs_e1h[i].size *
+                                       (1 + wreg_addrs_e1h[i].read_regs_count);
+       }
+       regdump_len *= 4;
+       regdump_len += sizeof(struct dump_hdr);
+
+       return regdump_len;
+}
+
+static void bnx2x_get_regs(struct net_device *dev,
+                          struct ethtool_regs *regs, void *_p)
+{
+       u32 *p = _p, i, j;
+       struct bnx2x *bp = netdev_priv(dev);
+       struct dump_hdr dump_hdr = {0};
+
+       regs->version = 0;
+       memset(p, 0, regs->len);
+
+       if (!netif_running(bp->dev))
+               return;
+
+       dump_hdr.hdr_size = (sizeof(struct dump_hdr) / 4) - 1;
+       dump_hdr.dump_sign = dump_sign_all;
+       dump_hdr.xstorm_waitp = REG_RD(bp, XSTORM_WAITP_ADDR);
+       dump_hdr.tstorm_waitp = REG_RD(bp, TSTORM_WAITP_ADDR);
+       dump_hdr.ustorm_waitp = REG_RD(bp, USTORM_WAITP_ADDR);
+       dump_hdr.cstorm_waitp = REG_RD(bp, CSTORM_WAITP_ADDR);
+       dump_hdr.info = CHIP_IS_E1(bp) ? RI_E1_ONLINE : RI_E1H_ONLINE;
+
+       memcpy(p, &dump_hdr, sizeof(struct dump_hdr));
+       p += dump_hdr.hdr_size + 1;
+
+       if (CHIP_IS_E1(bp)) {
+               for (i = 0; i < REGS_COUNT; i++)
+                       if (IS_E1_ONLINE(reg_addrs[i].info))
+                               for (j = 0; j < reg_addrs[i].size; j++)
+                                       *p++ = REG_RD(bp,
+                                                     reg_addrs[i].addr + j*4);
+
+       } else { /* E1H */
+               for (i = 0; i < REGS_COUNT; i++)
+                       if (IS_E1H_ONLINE(reg_addrs[i].info))
+                               for (j = 0; j < reg_addrs[i].size; j++)
+                                       *p++ = REG_RD(bp,
+                                                     reg_addrs[i].addr + j*4);
+       }
+}
+
 #define PHY_FW_VER_LEN                 10
 
 static void bnx2x_get_drvinfo(struct net_device *dev,
@@ -8452,7 +9301,7 @@ static void bnx2x_get_drvinfo(struct net_device *dev,
        info->n_stats = BNX2X_NUM_STATS;
        info->testinfo_len = BNX2X_NUM_TESTS;
        info->eedump_len = bp->common.flash_size;
-       info->regdump_len = 0;
+       info->regdump_len = bnx2x_get_regs_len(dev);
 }
 
 static void bnx2x_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
@@ -8520,6 +9369,16 @@ static int bnx2x_nway_reset(struct net_device *dev)
        return 0;
 }
 
+static u32 bnx2x_get_link(struct net_device *dev)
+{
+       struct bnx2x *bp = netdev_priv(dev);
+
+       if (bp->flags & MF_FUNC_DIS)
+               return 0;
+
+       return bp->link_vars.link_up;
+}
+
 static int bnx2x_get_eeprom_len(struct net_device *dev)
 {
        struct bnx2x *bp = netdev_priv(dev);
@@ -8613,7 +9472,7 @@ static void bnx2x_disable_nvram_access(struct bnx2x *bp)
                        MCPR_NVM_ACCESS_ENABLE_WR_EN)));
 }
 
-static int bnx2x_nvram_read_dword(struct bnx2x *bp, u32 offset, u32 *ret_val,
+static int bnx2x_nvram_read_dword(struct bnx2x *bp, u32 offset, __be32 *ret_val,
                                  u32 cmd_flags)
 {
        int count, i, rc;
@@ -8649,8 +9508,7 @@ static int bnx2x_nvram_read_dword(struct bnx2x *bp, u32 offset, u32 *ret_val,
                        /* we read nvram data in cpu order
                         * but ethtool sees it as an array of bytes
                         * converting to big-endian will do the work */
-                       val = cpu_to_be32(val);
-                       *ret_val = val;
+                       *ret_val = cpu_to_be32(val);
                        rc = 0;
                        break;
                }
@@ -8664,7 +9522,7 @@ static int bnx2x_nvram_read(struct bnx2x *bp, u32 offset, u8 *ret_buf,
 {
        int rc;
        u32 cmd_flags;
-       u32 val;
+       __be32 val;
 
        if ((offset & 0x03) || (buf_size & 0x03) || (buf_size == 0)) {
                DP(BNX2X_MSG_NVM,
@@ -8783,7 +9641,7 @@ static int bnx2x_nvram_write1(struct bnx2x *bp, u32 offset, u8 *data_buf,
        int rc;
        u32 cmd_flags;
        u32 align_offset;
-       u32 val;
+       __be32 val;
 
        if (offset + buf_size > bp->common.flash_size) {
                DP(BNX2X_MSG_NVM, "Invalid parameter: offset (0x%x) +"
@@ -8888,7 +9746,8 @@ static int bnx2x_set_eeprom(struct net_device *dev,
                            struct ethtool_eeprom *eeprom, u8 *eebuf)
 {
        struct bnx2x *bp = netdev_priv(dev);
-       int rc;
+       int port = BP_PORT(bp);
+       int rc = 0;
 
        if (!netif_running(dev))
                return -EAGAIN;
@@ -8900,27 +9759,59 @@ static int bnx2x_set_eeprom(struct net_device *dev,
 
        /* parameters already validated in ethtool_set_eeprom */
 
-       /* If the magic number is PHY (0x00504859) upgrade the PHY FW */
-       if (eeprom->magic == 0x00504859)
-               if (bp->port.pmf) {
+       /* PHY eeprom can be accessed only by the PMF */
+       if ((eeprom->magic >= 0x50485900) && (eeprom->magic <= 0x504859FF) &&
+           !bp->port.pmf)
+               return -EINVAL;
+
+       if (eeprom->magic == 0x50485950) {
+               /* 'PHYP' (0x50485950): prepare phy for FW upgrade */
+               bnx2x_stats_handle(bp, STATS_EVENT_STOP);
+
+               bnx2x_acquire_phy_lock(bp);
+               rc |= bnx2x_link_reset(&bp->link_params,
+                                      &bp->link_vars, 0);
+               if (XGXS_EXT_PHY_TYPE(bp->link_params.ext_phy_config) ==
+                                       PORT_HW_CFG_XGXS_EXT_PHY_TYPE_SFX7101)
+                       bnx2x_set_gpio(bp, MISC_REGISTERS_GPIO_0,
+                                      MISC_REGISTERS_GPIO_HIGH, port);
+               bnx2x_release_phy_lock(bp);
+               bnx2x_link_report(bp);
 
+       } else if (eeprom->magic == 0x50485952) {
+               /* 'PHYR' (0x50485952): re-init link after FW upgrade */
+               if (bp->state == BNX2X_STATE_OPEN) {
                        bnx2x_acquire_phy_lock(bp);
-                       rc = bnx2x_flash_download(bp, BP_PORT(bp),
-                                            bp->link_params.ext_phy_config,
-                                            (bp->state != BNX2X_STATE_CLOSED),
-                                            eebuf, eeprom->len);
-                       if ((bp->state == BNX2X_STATE_OPEN) ||
-                           (bp->state == BNX2X_STATE_DISABLED)) {
-                               rc |= bnx2x_link_reset(&bp->link_params,
-                                                      &bp->link_vars, 1);
-                               rc |= bnx2x_phy_init(&bp->link_params,
-                                                    &bp->link_vars);
-                       }
+                       rc |= bnx2x_link_reset(&bp->link_params,
+                                              &bp->link_vars, 1);
+
+                       rc |= bnx2x_phy_init(&bp->link_params,
+                                            &bp->link_vars);
                        bnx2x_release_phy_lock(bp);
+                       bnx2x_calc_fc_adv(bp);
+               }
+       } else if (eeprom->magic == 0x53985943) {
+               /* 'PHYC' (0x53985943): PHY FW upgrade completed */
+               if (XGXS_EXT_PHY_TYPE(bp->link_params.ext_phy_config) ==
+                                      PORT_HW_CFG_XGXS_EXT_PHY_TYPE_SFX7101) {
+                       u8 ext_phy_addr =
+                            XGXS_EXT_PHY_ADDR(bp->link_params.ext_phy_config);
 
-               } else /* Only the PMF can access the PHY */
-                       return -EINVAL;
-       else
+                       /* DSP Remove Download Mode */
+                       bnx2x_set_gpio(bp, MISC_REGISTERS_GPIO_0,
+                                      MISC_REGISTERS_GPIO_LOW, port);
+
+                       bnx2x_acquire_phy_lock(bp);
+
+                       bnx2x_sfx7101_sp_sw_reset(bp, port, ext_phy_addr);
+
+                       /* wait 0.5 sec to allow it to run */
+                       msleep(500);
+                       bnx2x_ext_phy_hw_reset(bp, port);
+                       msleep(500);
+                       bnx2x_release_phy_lock(bp);
+               }
+       } else
                rc = bnx2x_nvram_write(bp, eeprom->offset, eebuf, eeprom->len);
 
        return rc;
@@ -8939,18 +9830,19 @@ static int bnx2x_get_coalesce(struct net_device *dev,
        return 0;
 }
 
+#define BNX2X_MAX_COALES_TOUT  (0xf0*12) /* Maximal coalescing timeout in us */
 static int bnx2x_set_coalesce(struct net_device *dev,
                              struct ethtool_coalesce *coal)
 {
        struct bnx2x *bp = netdev_priv(dev);
 
        bp->rx_ticks = (u16) coal->rx_coalesce_usecs;
-       if (bp->rx_ticks > 3000)
-               bp->rx_ticks = 3000;
+       if (bp->rx_ticks > BNX2X_MAX_COALES_TOUT)
+               bp->rx_ticks = BNX2X_MAX_COALES_TOUT;
 
        bp->tx_ticks = (u16) coal->tx_coalesce_usecs;
-       if (bp->tx_ticks > 0x3000)
-               bp->tx_ticks = 0x3000;
+       if (bp->tx_ticks > BNX2X_MAX_COALES_TOUT)
+               bp->tx_ticks = BNX2X_MAX_COALES_TOUT;
 
        if (netif_running(dev))
                bnx2x_update_coalesce(bp);
@@ -9002,7 +9894,8 @@ static void bnx2x_get_pauseparam(struct net_device *dev,
 {
        struct bnx2x *bp = netdev_priv(dev);
 
-       epause->autoneg = (bp->link_params.req_flow_ctrl == BNX2X_FLOW_CTRL_AUTO) &&
+       epause->autoneg = (bp->link_params.req_flow_ctrl ==
+                          BNX2X_FLOW_CTRL_AUTO) &&
                          (bp->link_params.req_line_speed == SPEED_AUTO_NEG);
 
        epause->rx_pause = ((bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_RX) ==
@@ -9137,11 +10030,6 @@ static const struct {
        { "idle check (online)" }
 };
 
-static int bnx2x_self_test_count(struct net_device *dev)
-{
-       return BNX2X_NUM_TESTS;
-}
-
 static int bnx2x_test_registers(struct bnx2x *bp)
 {
        int idx, i, rc = -ENODEV;
@@ -9170,10 +10058,9 @@ static int bnx2x_test_registers(struct bnx2x *bp)
                { XCM_REG_WU_DA_SET_TMR_CNT_FLG_CMD00, 4, 0x00000001 },
                { XCM_REG_WU_DA_CNT_CMD00,             4, 0x00000003 },
                { XCM_REG_GLB_DEL_ACK_MAX_CNT_0,       4, 0x000000ff },
-               { NIG_REG_EGRESS_MNG0_FIFO,           20, 0xffffffff },
                { NIG_REG_LLH0_T_BIT,                  4, 0x00000001 },
-/* 20 */       { NIG_REG_EMAC0_IN_EN,                 4, 0x00000001 },
-               { NIG_REG_BMAC0_IN_EN,                 4, 0x00000001 },
+               { NIG_REG_EMAC0_IN_EN,                 4, 0x00000001 },
+/* 20 */       { NIG_REG_BMAC0_IN_EN,                 4, 0x00000001 },
                { NIG_REG_XCM0_OUT_EN,                 4, 0x00000001 },
                { NIG_REG_BRB0_OUT_EN,                 4, 0x00000001 },
                { NIG_REG_LLH0_XCM_MASK,               4, 0x00000007 },
@@ -9182,8 +10069,8 @@ static int bnx2x_test_registers(struct bnx2x *bp)
                { NIG_REG_LLH0_DEST_MAC_0_0,         160, 0xffffffff },
                { NIG_REG_LLH0_DEST_IP_0_1,          160, 0xffffffff },
                { NIG_REG_LLH0_IPV4_IPV6_0,          160, 0x00000001 },
-/* 30 */       { NIG_REG_LLH0_DEST_UDP_0,           160, 0x0000ffff },
-               { NIG_REG_LLH0_DEST_TCP_0,           160, 0x0000ffff },
+               { NIG_REG_LLH0_DEST_UDP_0,           160, 0x0000ffff },
+/* 30 */       { NIG_REG_LLH0_DEST_TCP_0,           160, 0x0000ffff },
                { NIG_REG_LLH0_VLAN_ID_0,            160, 0x00000fff },
                { NIG_REG_XGXS_SERDES0_MODE_SEL,       4, 0x00000001 },
                { NIG_REG_LED_CONTROL_OVERRIDE_TRAFFIC_P0, 4, 0x00000001 },
@@ -9309,12 +10196,14 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode, u8 link_up)
        unsigned int pkt_size, num_pkts, i;
        struct sk_buff *skb;
        unsigned char *packet;
-       struct bnx2x_fastpath *fp = &bp->fp[0];
+       struct bnx2x_fastpath *fp_rx = &bp->fp[0];
+       struct bnx2x_fastpath *fp_tx = &bp->fp[0];
        u16 tx_start_idx, tx_idx;
        u16 rx_start_idx, rx_idx;
-       u16 pkt_prod;
+       u16 pkt_prod, bd_prod;
        struct sw_tx_bd *tx_buf;
-       struct eth_tx_bd *tx_bd;
+       struct eth_tx_start_bd *tx_start_bd;
+       struct eth_tx_parse_bd *pbd = NULL;
        dma_addr_t mapping;
        union eth_rx_cqe *cqe;
        u8 cqe_fp_flags;
@@ -9322,23 +10211,23 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode, u8 link_up)
        u16 len;
        int rc = -ENODEV;
 
-       if (loopback_mode == BNX2X_MAC_LOOPBACK) {
+       /* check the loopback mode */
+       switch (loopback_mode) {
+       case BNX2X_PHY_LOOPBACK:
+               if (bp->link_params.loopback_mode != LOOPBACK_XGXS_10)
+                       return -EINVAL;
+               break;
+       case BNX2X_MAC_LOOPBACK:
                bp->link_params.loopback_mode = LOOPBACK_BMAC;
                bnx2x_phy_init(&bp->link_params, &bp->link_vars);
-
-       } else if (loopback_mode == BNX2X_PHY_LOOPBACK) {
-               u16 cnt = 1000;
-               bp->link_params.loopback_mode = LOOPBACK_XGXS_10;
-               bnx2x_phy_init(&bp->link_params, &bp->link_vars);
-               /* wait until link state is restored */
-               if (link_up)
-                       while (cnt-- && bnx2x_test_link(&bp->link_params,
-                                                       &bp->link_vars))
-                               msleep(10);
-       } else
+               break;
+       default:
                return -EINVAL;
+       }
 
-       pkt_size = 1514;
+       /* prepare the loopback packet */
+       pkt_size = (((bp->dev->mtu < ETH_MAX_PACKET_SIZE) ?
+                    bp->dev->mtu : ETH_MAX_PACKET_SIZE) + ETH_HLEN);
        skb = netdev_alloc_skb(bp->dev, bp->rx_buf_size);
        if (!skb) {
                rc = -ENOMEM;
@@ -9346,58 +10235,63 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode, u8 link_up)
        }
        packet = skb_put(skb, pkt_size);
        memcpy(packet, bp->dev->dev_addr, ETH_ALEN);
-       memset(packet + ETH_ALEN, 0, (ETH_HLEN - ETH_ALEN));
+       memset(packet + ETH_ALEN, 0, ETH_ALEN);
+       memset(packet + 2*ETH_ALEN, 0x77, (ETH_HLEN - 2*ETH_ALEN));
        for (i = ETH_HLEN; i < pkt_size; i++)
                packet[i] = (unsigned char) (i & 0xff);
 
+       /* send the loopback packet */
        num_pkts = 0;
-       tx_start_idx = le16_to_cpu(*fp->tx_cons_sb);
-       rx_start_idx = le16_to_cpu(*fp->rx_cons_sb);
+       tx_start_idx = le16_to_cpu(*fp_tx->tx_cons_sb);
+       rx_start_idx = le16_to_cpu(*fp_rx->rx_cons_sb);
 
-       pkt_prod = fp->tx_pkt_prod++;
-       tx_buf = &fp->tx_buf_ring[TX_BD(pkt_prod)];
-       tx_buf->first_bd = fp->tx_bd_prod;
+       pkt_prod = fp_tx->tx_pkt_prod++;
+       tx_buf = &fp_tx->tx_buf_ring[TX_BD(pkt_prod)];
+       tx_buf->first_bd = fp_tx->tx_bd_prod;
        tx_buf->skb = skb;
+       tx_buf->flags = 0;
 
-       tx_bd = &fp->tx_desc_ring[TX_BD(fp->tx_bd_prod)];
+       bd_prod = TX_BD(fp_tx->tx_bd_prod);
+       tx_start_bd = &fp_tx->tx_desc_ring[bd_prod].start_bd;
        mapping = pci_map_single(bp->pdev, skb->data,
                                 skb_headlen(skb), PCI_DMA_TODEVICE);
-       tx_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
-       tx_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
-       tx_bd->nbd = cpu_to_le16(1);
-       tx_bd->nbytes = cpu_to_le16(skb_headlen(skb));
-       tx_bd->vlan = cpu_to_le16(pkt_prod);
-       tx_bd->bd_flags.as_bitfield = (ETH_TX_BD_FLAGS_START_BD |
-                                      ETH_TX_BD_FLAGS_END_BD);
-       tx_bd->general_data = ((UNICAST_ADDRESS <<
-                               ETH_TX_BD_ETH_ADDR_TYPE_SHIFT) | 1);
+       tx_start_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
+       tx_start_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
+       tx_start_bd->nbd = cpu_to_le16(2); /* start + pbd */
+       tx_start_bd->nbytes = cpu_to_le16(skb_headlen(skb));
+       tx_start_bd->vlan = cpu_to_le16(pkt_prod);
+       tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD;
+       tx_start_bd->general_data = ((UNICAST_ADDRESS <<
+                               ETH_TX_START_BD_ETH_ADDR_TYPE_SHIFT) | 1);
+
+       /* turn on parsing and get a BD */
+       bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
+       pbd = &fp_tx->tx_desc_ring[bd_prod].parse_bd;
+
+       memset(pbd, 0, sizeof(struct eth_tx_parse_bd));
 
        wmb();
 
-       fp->hw_tx_prods->bds_prod =
-               cpu_to_le16(le16_to_cpu(fp->hw_tx_prods->bds_prod) + 1);
-       mb(); /* FW restriction: must not reorder writing nbd and packets */
-       fp->hw_tx_prods->packets_prod =
-               cpu_to_le32(le32_to_cpu(fp->hw_tx_prods->packets_prod) + 1);
-       DOORBELL(bp, FP_IDX(fp), 0);
+       fp_tx->tx_db.data.prod += 2;
+       barrier();
+       DOORBELL(bp, fp_tx->index, fp_tx->tx_db.raw);
 
        mmiowb();
 
        num_pkts++;
-       fp->tx_bd_prod++;
-       bp->dev->trans_start = jiffies;
+       fp_tx->tx_bd_prod += 2; /* start + pbd */
 
        udelay(100);
 
-       tx_idx = le16_to_cpu(*fp->tx_cons_sb);
+       tx_idx = le16_to_cpu(*fp_tx->tx_cons_sb);
        if (tx_idx != tx_start_idx + num_pkts)
                goto test_loopback_exit;
 
-       rx_idx = le16_to_cpu(*fp->rx_cons_sb);
+       rx_idx = le16_to_cpu(*fp_rx->rx_cons_sb);
        if (rx_idx != rx_start_idx + num_pkts)
                goto test_loopback_exit;
 
-       cqe = &fp->rx_comp_ring[RCQ_BD(fp->rx_comp_cons)];
+       cqe = &fp_rx->rx_comp_ring[RCQ_BD(fp_rx->rx_comp_cons)];
        cqe_fp_flags = cqe->fast_path_cqe.type_error_flags;
        if (CQE_TYPE(cqe_fp_flags) || (cqe_fp_flags & ETH_RX_ERROR_FALGS))
                goto test_loopback_rx_exit;
@@ -9406,7 +10300,7 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode, u8 link_up)
        if (len != pkt_size)
                goto test_loopback_rx_exit;
 
-       rx_buf = &fp->rx_buf_ring[RX_BD(fp->rx_bd_cons)];
+       rx_buf = &fp_rx->rx_buf_ring[RX_BD(fp_rx->rx_bd_cons)];
        skb = rx_buf->skb;
        skb_reserve(skb, cqe->fast_path_cqe.placement_offset);
        for (i = ETH_HLEN; i < pkt_size; i++)
@@ -9417,14 +10311,14 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode, u8 link_up)
 
 test_loopback_rx_exit:
 
-       fp->rx_bd_cons = NEXT_RX_IDX(fp->rx_bd_cons);
-       fp->rx_bd_prod = NEXT_RX_IDX(fp->rx_bd_prod);
-       fp->rx_comp_cons = NEXT_RCQ_IDX(fp->rx_comp_cons);
-       fp->rx_comp_prod = NEXT_RCQ_IDX(fp->rx_comp_prod);
+       fp_rx->rx_bd_cons = NEXT_RX_IDX(fp_rx->rx_bd_cons);
+       fp_rx->rx_bd_prod = NEXT_RX_IDX(fp_rx->rx_bd_prod);
+       fp_rx->rx_comp_cons = NEXT_RCQ_IDX(fp_rx->rx_comp_cons);
+       fp_rx->rx_comp_prod = NEXT_RCQ_IDX(fp_rx->rx_comp_prod);
 
        /* Update producers */
-       bnx2x_update_rx_prod(bp, fp, fp->rx_bd_prod, fp->rx_comp_prod,
-                            fp->rx_sge_prod);
+       bnx2x_update_rx_prod(bp, fp_rx, fp_rx->rx_bd_prod, fp_rx->rx_comp_prod,
+                            fp_rx->rx_sge_prod);
 
 test_loopback_exit:
        bp->link_params.loopback_mode = LOOPBACK_NONE;
@@ -9434,7 +10328,7 @@ test_loopback_exit:
 
 static int bnx2x_test_loopback(struct bnx2x *bp, u8 link_up)
 {
-       int rc = 0;
+       int rc = 0, res;
 
        if (!netif_running(bp->dev))
                return BNX2X_LOOPBACK_FAILED;
@@ -9442,14 +10336,16 @@ static int bnx2x_test_loopback(struct bnx2x *bp, u8 link_up)
        bnx2x_netif_stop(bp, 1);
        bnx2x_acquire_phy_lock(bp);
 
-       if (bnx2x_run_loopback(bp, BNX2X_MAC_LOOPBACK, link_up)) {
-               DP(NETIF_MSG_PROBE, "MAC loopback failed\n");
-               rc |= BNX2X_MAC_LOOPBACK_FAILED;
+       res = bnx2x_run_loopback(bp, BNX2X_PHY_LOOPBACK, link_up);
+       if (res) {
+               DP(NETIF_MSG_PROBE, "  PHY loopback failed  (res %d)\n", res);
+               rc |= BNX2X_PHY_LOOPBACK_FAILED;
        }
 
-       if (bnx2x_run_loopback(bp, BNX2X_PHY_LOOPBACK, link_up)) {
-               DP(NETIF_MSG_PROBE, "PHY loopback failed\n");
-               rc |= BNX2X_PHY_LOOPBACK_FAILED;
+       res = bnx2x_run_loopback(bp, BNX2X_MAC_LOOPBACK, link_up);
+       if (res) {
+               DP(NETIF_MSG_PROBE, "  MAC loopback failed  (res %d)\n", res);
+               rc |= BNX2X_MAC_LOOPBACK_FAILED;
        }
 
        bnx2x_release_phy_lock(bp);
@@ -9476,14 +10372,14 @@ static int bnx2x_test_nvram(struct bnx2x *bp)
                { 0x778,  0x70 },
                {     0,     0 }
        };
-       u32 buf[0x350 / 4];
+       __be32 buf[0x350 / 4];
        u8 *data = (u8 *)buf;
        int i, rc;
-       u32 magic, csum;
+       u32 magic, crc;
 
        rc = bnx2x_nvram_read(bp, 0, data, 4);
        if (rc) {
-               DP(NETIF_MSG_PROBE, "magic value read (rc -%d)\n", -rc);
+               DP(NETIF_MSG_PROBE, "magic value read (rc %d)\n", rc);
                goto test_nvram_exit;
        }
 
@@ -9500,14 +10396,14 @@ static int bnx2x_test_nvram(struct bnx2x *bp)
                                      nvram_tbl[i].size);
                if (rc) {
                        DP(NETIF_MSG_PROBE,
-                          "nvram_tbl[%d] read data (rc -%d)\n", i, -rc);
+                          "nvram_tbl[%d] read data (rc %d)\n", i, rc);
                        goto test_nvram_exit;
                }
 
-               csum = ether_crc_le(nvram_tbl[i].size, data);
-               if (csum != CRC32_RESIDUAL) {
+               crc = ether_crc_le(nvram_tbl[i].size, data);
+               if (crc != CRC32_RESIDUAL) {
                        DP(NETIF_MSG_PROBE,
-                          "nvram_tbl[%d] csum value (0x%08x)\n", i, csum);
+                          "nvram_tbl[%d] crc value (0x%08x)\n", i, crc);
                        rc = -ENODEV;
                        goto test_nvram_exit;
                }
@@ -9530,17 +10426,19 @@ static int bnx2x_test_intr(struct bnx2x *bp)
                config->hdr.offset = (BP_PORT(bp) ? 32 : 0);
        else
                config->hdr.offset = BP_FUNC(bp);
-       config->hdr.client_id = BP_CL_ID(bp);
+       config->hdr.client_id = bp->fp->cl_id;
        config->hdr.reserved1 = 0;
 
+       bp->set_mac_pending++;
+       smp_wmb();
        rc = bnx2x_sp_post(bp, RAMROD_CMD_ID_ETH_SET_MAC, 0,
                           U64_HI(bnx2x_sp_mapping(bp, mac_config)),
                           U64_LO(bnx2x_sp_mapping(bp, mac_config)), 0);
        if (rc == 0) {
-               bp->set_mac_pending++;
                for (i = 0; i < 10; i++) {
                        if (!bp->set_mac_pending)
                                break;
+                       smp_rmb();
                        msleep_interruptible(10);
                }
                if (i == 10)
@@ -9565,9 +10463,16 @@ static void bnx2x_self_test(struct net_device *dev,
                etest->flags &= ~ETH_TEST_FL_OFFLINE;
 
        if (etest->flags & ETH_TEST_FL_OFFLINE) {
+               int port = BP_PORT(bp);
+               u32 val;
                u8 link_up;
 
-               link_up = bp->link_vars.link_up;
+               /* save current value of input enable for TX port IF */
+               val = REG_RD(bp, NIG_REG_EGRESS_UMP0_IN_EN + port*4);
+               /* disable input for TX port IF */
+               REG_WR(bp, NIG_REG_EGRESS_UMP0_IN_EN + port*4, 0);
+
+               link_up = (bnx2x_link_test(bp) == 0);
                bnx2x_nic_unload(bp, UNLOAD_NORMAL);
                bnx2x_nic_load(bp, LOAD_DIAG);
                /* wait until link state is restored */
@@ -9586,6 +10491,10 @@ static void bnx2x_self_test(struct net_device *dev,
                        etest->flags |= ETH_TEST_FL_FAILED;
 
                bnx2x_nic_unload(bp, UNLOAD_NORMAL);
+
+               /* restore input for TX port IF */
+               REG_WR(bp, NIG_REG_EGRESS_UMP0_IN_EN + port*4, val);
+
                bnx2x_nic_load(bp, LOAD_NORMAL);
                /* wait until link state is restored */
                bnx2x_wait_for_link(bp, link_up);
@@ -9735,6 +10644,36 @@ static const struct {
 #define IS_E1HMF_MODE_STAT(bp) \
                        (IS_E1HMF(bp) && !(bp->msglevel & BNX2X_MSG_STATS))
 
+static int bnx2x_get_sset_count(struct net_device *dev, int stringset)
+{
+       struct bnx2x *bp = netdev_priv(dev);
+       int i, num_stats;
+
+       switch(stringset) {
+       case ETH_SS_STATS:
+               if (is_multi(bp)) {
+                       num_stats = BNX2X_NUM_Q_STATS * bp->num_queues;
+                       if (!IS_E1HMF_MODE_STAT(bp))
+                               num_stats += BNX2X_NUM_STATS;
+               } else {
+                       if (IS_E1HMF_MODE_STAT(bp)) {
+                               num_stats = 0;
+                               for (i = 0; i < BNX2X_NUM_STATS; i++)
+                                       if (IS_FUNC_STAT(i))
+                                               num_stats++;
+                       } else
+                               num_stats = BNX2X_NUM_STATS;
+               }
+               return num_stats;
+
+       case ETH_SS_TEST:
+               return BNX2X_NUM_TESTS;
+
+       default:
+               return -EINVAL;
+       }
+}
+
 static void bnx2x_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 {
        struct bnx2x *bp = netdev_priv(dev);
@@ -9772,28 +10711,6 @@ static void bnx2x_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
        }
 }
 
-static int bnx2x_get_stats_count(struct net_device *dev)
-{
-       struct bnx2x *bp = netdev_priv(dev);
-       int i, num_stats;
-
-       if (is_multi(bp)) {
-               num_stats = BNX2X_NUM_Q_STATS * BNX2X_NUM_QUEUES(bp);
-               if (!IS_E1HMF_MODE_STAT(bp))
-                       num_stats += BNX2X_NUM_STATS;
-       } else {
-               if (IS_E1HMF_MODE_STAT(bp)) {
-                       num_stats = 0;
-                       for (i = 0; i < BNX2X_NUM_STATS; i++)
-                               if (IS_FUNC_STAT(i))
-                                       num_stats++;
-               } else
-                       num_stats = BNX2X_NUM_STATS;
-       }
-
-       return num_stats;
-}
-
 static void bnx2x_get_ethtool_stats(struct net_device *dev,
                                    struct ethtool_stats *stats, u64 *buf)
 {
@@ -9869,7 +10786,6 @@ static void bnx2x_get_ethtool_stats(struct net_device *dev,
 static int bnx2x_phys_id(struct net_device *dev, u32 data)
 {
        struct bnx2x *bp = netdev_priv(dev);
-       int port = BP_PORT(bp);
        int i;
 
        if (!netif_running(dev))
@@ -9883,13 +10799,10 @@ static int bnx2x_phys_id(struct net_device *dev, u32 data)
 
        for (i = 0; i < (data * 2); i++) {
                if ((i % 2) == 0)
-                       bnx2x_set_led(bp, port, LED_MODE_OPER, SPEED_1000,
-                                     bp->link_params.hw_led_mode,
-                                     bp->link_params.chip_id);
+                       bnx2x_set_led(&bp->link_params, LED_MODE_OPER,
+                                     SPEED_1000);
                else
-                       bnx2x_set_led(bp, port, LED_MODE_OFF, 0,
-                                     bp->link_params.hw_led_mode,
-                                     bp->link_params.chip_id);
+                       bnx2x_set_led(&bp->link_params, LED_MODE_OFF, 0);
 
                msleep_interruptible(500);
                if (signal_pending(current))
@@ -9897,24 +10810,24 @@ static int bnx2x_phys_id(struct net_device *dev, u32 data)
        }
 
        if (bp->link_vars.link_up)
-               bnx2x_set_led(bp, port, LED_MODE_OPER,
-                             bp->link_vars.line_speed,
-                             bp->link_params.hw_led_mode,
-                             bp->link_params.chip_id);
+               bnx2x_set_led(&bp->link_params, LED_MODE_OPER,
+                             bp->link_vars.line_speed);
 
        return 0;
 }
 
-static struct ethtool_ops bnx2x_ethtool_ops = {
+static const struct ethtool_ops bnx2x_ethtool_ops = {
        .get_settings           = bnx2x_get_settings,
        .set_settings           = bnx2x_set_settings,
        .get_drvinfo            = bnx2x_get_drvinfo,
+       .get_regs_len           = bnx2x_get_regs_len,
+       .get_regs               = bnx2x_get_regs,
        .get_wol                = bnx2x_get_wol,
        .set_wol                = bnx2x_set_wol,
        .get_msglevel           = bnx2x_get_msglevel,
        .set_msglevel           = bnx2x_set_msglevel,
        .nway_reset             = bnx2x_nway_reset,
-       .get_link               = ethtool_op_get_link,
+       .get_link               = bnx2x_get_link,
        .get_eeprom_len         = bnx2x_get_eeprom_len,
        .get_eeprom             = bnx2x_get_eeprom,
        .set_eeprom             = bnx2x_set_eeprom,
@@ -9934,11 +10847,10 @@ static struct ethtool_ops bnx2x_ethtool_ops = {
        .set_sg                 = ethtool_op_set_sg,
        .get_tso                = ethtool_op_get_tso,
        .set_tso                = bnx2x_set_tso,
-       .self_test_count        = bnx2x_self_test_count,
        .self_test              = bnx2x_self_test,
+       .get_sset_count         = bnx2x_get_sset_count,
        .get_strings            = bnx2x_get_strings,
        .phys_id                = bnx2x_phys_id,
-       .get_stats_count        = bnx2x_get_stats_count,
        .get_ethtool_stats      = bnx2x_get_ethtool_stats,
 };
 
@@ -10004,42 +10916,60 @@ static inline int bnx2x_has_rx_work(struct bnx2x_fastpath *fp)
 
 static int bnx2x_poll(struct napi_struct *napi, int budget)
 {
+       int work_done = 0;
        struct bnx2x_fastpath *fp = container_of(napi, struct bnx2x_fastpath,
                                                 napi);
        struct bnx2x *bp = fp->bp;
-       int work_done = 0;
 
+       while (1) {
 #ifdef BNX2X_STOP_ON_ERROR
-       if (unlikely(bp->panic))
-               goto poll_panic;
+               if (unlikely(bp->panic)) {
+                       napi_complete(napi);
+                       return 0;
+               }
 #endif
 
-       prefetch(fp->tx_buf_ring[TX_BD(fp->tx_pkt_cons)].skb);
-       prefetch(fp->rx_buf_ring[RX_BD(fp->rx_bd_cons)].skb);
-       prefetch((char *)(fp->rx_buf_ring[RX_BD(fp->rx_bd_cons)].skb) + 256);
-
-       bnx2x_update_fpsb_idx(fp);
+               if (bnx2x_has_tx_work(fp))
+                       bnx2x_tx_int(fp);
 
-       if (bnx2x_has_tx_work(fp))
-               bnx2x_tx_int(fp, budget);
+               if (bnx2x_has_rx_work(fp)) {
+                       work_done += bnx2x_rx_int(fp, budget - work_done);
 
-       if (bnx2x_has_rx_work(fp))
-               work_done = bnx2x_rx_int(fp, budget);
-       rmb(); /* BNX2X_HAS_WORK() reads the status block */
-
-       /* must not complete if we consumed full budget */
-       if ((work_done < budget) && !BNX2X_HAS_WORK(fp)) {
+                       /* must not complete if we consumed full budget */
+                       if (work_done >= budget)
+                               break;
+               }
 
-#ifdef BNX2X_STOP_ON_ERROR
-poll_panic:
-#endif
-               napi_complete(napi);
+               /* Fall out from the NAPI loop if needed */
+               if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
+                       bnx2x_update_fpsb_idx(fp);
+               /* bnx2x_has_rx_work() reads the status block, thus we need
+                * to ensure that status block indices have been actually read
+                * (bnx2x_update_fpsb_idx) prior to this check
+                * (bnx2x_has_rx_work) so that we won't write the "newer"
+                * value of the status block to IGU (if there was a DMA right
+                * after bnx2x_has_rx_work and if there is no rmb, the memory
+                * reading (bnx2x_update_fpsb_idx) may be postponed to right
+                * before bnx2x_ack_sb). In this case there will never be
+                * another interrupt until there is another update of the
+                * status block, while there is still unhandled work.
+                */
+                       rmb();
 
-               bnx2x_ack_sb(bp, FP_SB_ID(fp), USTORM_ID,
-                            le16_to_cpu(fp->fp_u_idx), IGU_INT_NOP, 1);
-               bnx2x_ack_sb(bp, FP_SB_ID(fp), CSTORM_ID,
-                            le16_to_cpu(fp->fp_c_idx), IGU_INT_ENABLE, 1);
+                       if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
+                               napi_complete(napi);
+                               /* Re-enable interrupts */
+                               bnx2x_ack_sb(bp, fp->sb_id, CSTORM_ID,
+                                            le16_to_cpu(fp->fp_c_idx),
+                                            IGU_INT_NOP, 1);
+                               bnx2x_ack_sb(bp, fp->sb_id, USTORM_ID,
+                                            le16_to_cpu(fp->fp_u_idx),
+                                            IGU_INT_ENABLE, 1);
+                               break;
+                       }
+               }
        }
+
        return work_done;
 }
 
@@ -10052,10 +10982,11 @@ poll_panic:
  */
 static noinline u16 bnx2x_tx_split(struct bnx2x *bp,
                                   struct bnx2x_fastpath *fp,
-                                  struct eth_tx_bd **tx_bd, u16 hlen,
+                                  struct sw_tx_bd *tx_buf,
+                                  struct eth_tx_start_bd **tx_bd, u16 hlen,
                                   u16 bd_prod, int nbd)
 {
-       struct eth_tx_bd *h_tx_bd = *tx_bd;
+       struct eth_tx_start_bd *h_tx_bd = *tx_bd;
        struct eth_tx_bd *d_tx_bd;
        dma_addr_t mapping;
        int old_len = le16_to_cpu(h_tx_bd->nbytes);
@@ -10071,7 +11002,7 @@ static noinline u16 bnx2x_tx_split(struct bnx2x *bp,
        /* now get a new data BD
         * (after the pbd) and fill it */
        bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
-       d_tx_bd = &fp->tx_desc_ring[bd_prod];
+       d_tx_bd = &fp->tx_desc_ring[bd_prod].reg_bd;
 
        mapping = HILO_U64(le32_to_cpu(h_tx_bd->addr_hi),
                           le32_to_cpu(h_tx_bd->addr_lo)) + hlen;
@@ -10079,17 +11010,16 @@ static noinline u16 bnx2x_tx_split(struct bnx2x *bp,
        d_tx_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
        d_tx_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
        d_tx_bd->nbytes = cpu_to_le16(old_len - hlen);
-       d_tx_bd->vlan = 0;
-       /* this marks the BD as one that has no individual mapping
-        * the FW ignores this flag in a BD not marked start
-        */
-       d_tx_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_SW_LSO;
+
+       /* this marks the BD as one that has no individual mapping */
+       tx_buf->flags |= BNX2X_TSO_SPLIT_BD;
+
        DP(NETIF_MSG_TX_QUEUED,
           "TSO split data size is %d (%x:%x)\n",
           d_tx_bd->nbytes, d_tx_bd->addr_hi, d_tx_bd->addr_lo);
 
-       /* update tx_bd for marking the last BD flag */
-       *tx_bd = d_tx_bd;
+       /* update tx_bd */
+       *tx_bd = (struct eth_tx_start_bd *)d_tx_bd;
 
        return bd_prod;
 }
@@ -10115,7 +11045,7 @@ static inline u32 bnx2x_xmit_type(struct bnx2x *bp, struct sk_buff *skb)
                rc = XMIT_PLAIN;
 
        else {
-               if (skb->protocol == ntohs(ETH_P_IPV6)) {
+               if (skb->protocol == htons(ETH_P_IPV6)) {
                        rc = XMIT_CSUM_V6;
                        if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
                                rc |= XMIT_CSUM_TCP;
@@ -10128,16 +11058,18 @@ static inline u32 bnx2x_xmit_type(struct bnx2x *bp, struct sk_buff *skb)
        }
 
        if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
-               rc |= XMIT_GSO_V4;
+               rc |= (XMIT_GSO_V4 | XMIT_CSUM_V4 | XMIT_CSUM_TCP);
 
        else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
-               rc |= XMIT_GSO_V6;
+               rc |= (XMIT_GSO_V6 | XMIT_CSUM_TCP | XMIT_CSUM_V6);
 
        return rc;
 }
 
 #if (MAX_SKB_FRAGS >= MAX_FETCH_BD - 3)
-/* check if packet requires linearization (packet is too fragmented) */
+/* check if packet requires linearization (packet is too fragmented)
+   no need to check fragmentation if page size > 8K (there will be no
+   violation to FW restrictions) */
 static int bnx2x_pkt_req_lin(struct bnx2x *bp, struct sk_buff *skb,
                             u32 xmit_type)
 {
@@ -10196,7 +11128,6 @@ static int bnx2x_pkt_req_lin(struct bnx2x *bp, struct sk_buff *skb,
                                wnd_sum -=
                                        skb_shinfo(skb)->frags[wnd_idx].size;
                        }
-
                } else {
                        /* in non-LSO too fragmented packet should always
                           be linearized */
@@ -10220,21 +11151,22 @@ exit_lbl:
  * bnx2x_tx_int() runs without netif_tx_lock unless it needs to call
  * netif_wake_queue()
  */
-static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct bnx2x *bp = netdev_priv(dev);
        struct bnx2x_fastpath *fp;
        struct netdev_queue *txq;
        struct sw_tx_bd *tx_buf;
-       struct eth_tx_bd *tx_bd;
+       struct eth_tx_start_bd *tx_start_bd;
+       struct eth_tx_bd *tx_data_bd, *total_pkt_bd = NULL;
        struct eth_tx_parse_bd *pbd = NULL;
        u16 pkt_prod, bd_prod;
        int nbd, fp_index;
        dma_addr_t mapping;
        u32 xmit_type = bnx2x_xmit_type(bp, skb);
-       int vlan_off = (bp->e1hov ? 4 : 0);
        int i;
        u8 hlen = 0;
+       __le16 pkt_size = 0;
 
 #ifdef BNX2X_STOP_ON_ERROR
        if (unlikely(bp->panic))
@@ -10247,7 +11179,7 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
        fp = &bp->fp[fp_index];
 
        if (unlikely(bnx2x_tx_avail(fp) < (skb_shinfo(skb)->nr_frags + 3))) {
-               fp->eth_q_stats.driver_xoff++,
+               fp->eth_q_stats.driver_xoff++;
                netif_tx_stop_queue(txq);
                BNX2X_ERR("BUG! Tx ring full when queue awake!\n");
                return NETDEV_TX_BUSY;
@@ -10259,8 +11191,9 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
           ip_hdr(skb)->protocol, skb_shinfo(skb)->gso_type, xmit_type);
 
 #if (MAX_SKB_FRAGS >= MAX_FETCH_BD - 3)
-       /* First, check if we need to linearize the skb
-          (due to FW restrictions) */
+       /* First, check if we need to linearize the skb (due to FW
+          restrictions). No need to check fragmentation if page size > 8K
+          (there will be no violation to FW restrictions) */
        if (bnx2x_pkt_req_lin(bp, skb, xmit_type)) {
                /* Statistics of linearization */
                bp->lin_cnt++;
@@ -10275,7 +11208,7 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
        /*
        Please read carefully. First we use one BD which we mark as start,
-       then for TSO or xsum we have a parsing info BD,
+       then we have a parsing info BD (used for TSO or xsum),
        and only then we have the rest of the TSO BDs.
        (don't forget to mark the last one as last,
        and to unmap only AFTER you write to the BD ...)
@@ -10287,47 +11220,45 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
        /* get a tx_buf and first BD */
        tx_buf = &fp->tx_buf_ring[TX_BD(pkt_prod)];
-       tx_bd = &fp->tx_desc_ring[bd_prod];
+       tx_start_bd = &fp->tx_desc_ring[bd_prod].start_bd;
 
-       tx_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD;
-       tx_bd->general_data = (UNICAST_ADDRESS <<
-                              ETH_TX_BD_ETH_ADDR_TYPE_SHIFT);
+       tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD;
+       tx_start_bd->general_data = (UNICAST_ADDRESS <<
+                                    ETH_TX_START_BD_ETH_ADDR_TYPE_SHIFT);
        /* header nbd */
-       tx_bd->general_data |= (1 << ETH_TX_BD_HDR_NBDS_SHIFT);
+       tx_start_bd->general_data |= (1 << ETH_TX_START_BD_HDR_NBDS_SHIFT);
 
        /* remember the first BD of the packet */
        tx_buf->first_bd = fp->tx_bd_prod;
        tx_buf->skb = skb;
+       tx_buf->flags = 0;
 
        DP(NETIF_MSG_TX_QUEUED,
           "sending pkt %u @%p  next_idx %u  bd %u @%p\n",
-          pkt_prod, tx_buf, fp->tx_pkt_prod, bd_prod, tx_bd);
+          pkt_prod, tx_buf, fp->tx_pkt_prod, bd_prod, tx_start_bd);
 
 #ifdef BCM_VLAN
        if ((bp->vlgrp != NULL) && vlan_tx_tag_present(skb) &&
            (bp->flags & HW_VLAN_TX_FLAG)) {
-               tx_bd->vlan = cpu_to_le16(vlan_tx_tag_get(skb));
-               tx_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_VLAN_TAG;
-               vlan_off += 4;
+               tx_start_bd->vlan = cpu_to_le16(vlan_tx_tag_get(skb));
+               tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_VLAN_TAG;
        } else
 #endif
-               tx_bd->vlan = cpu_to_le16(pkt_prod);
+               tx_start_bd->vlan = cpu_to_le16(pkt_prod);
 
-       if (xmit_type) {
-               /* turn on parsing and get a BD */
-               bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
-               pbd = (void *)&fp->tx_desc_ring[bd_prod];
+       /* turn on parsing and get a BD */
+       bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
+       pbd = &fp->tx_desc_ring[bd_prod].parse_bd;
 
-               memset(pbd, 0, sizeof(struct eth_tx_parse_bd));
-       }
+       memset(pbd, 0, sizeof(struct eth_tx_parse_bd));
 
        if (xmit_type & XMIT_CSUM) {
-               hlen = (skb_network_header(skb) - skb->data + vlan_off) / 2;
+               hlen = (skb_network_header(skb) - skb->data) / 2;
 
                /* for now NS flag is not used in Linux */
-               pbd->global_data = (hlen |
-                                   ((skb->protocol == ntohs(ETH_P_8021Q)) <<
-                                    ETH_TX_PARSE_BD_LLC_SNAP_EN_SHIFT));
+               pbd->global_data =
+                       (hlen | ((skb->protocol == cpu_to_be16(ETH_P_8021Q)) <<
+                                ETH_TX_PARSE_BD_LLC_SNAP_EN_SHIFT));
 
                pbd->ip_hlen = (skb_transport_header(skb) -
                                skb_network_header(skb)) / 2;
@@ -10335,15 +11266,16 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
                hlen += pbd->ip_hlen + tcp_hdrlen(skb) / 2;
 
                pbd->total_hlen = cpu_to_le16(hlen);
-               hlen = hlen*2 - vlan_off;
+               hlen = hlen*2;
 
-               tx_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_TCP_CSUM;
+               tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_L4_CSUM;
 
                if (xmit_type & XMIT_CSUM_V4)
-                       tx_bd->bd_flags.as_bitfield |=
+                       tx_start_bd->bd_flags.as_bitfield |=
                                                ETH_TX_BD_FLAGS_IP_CSUM;
                else
-                       tx_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_IPV6;
+                       tx_start_bd->bd_flags.as_bitfield |=
+                                               ETH_TX_BD_FLAGS_IPV6;
 
                if (xmit_type & XMIT_CSUM_TCP) {
                        pbd->tcp_pseudo_csum = swab16(tcp_hdr(skb)->check);
@@ -10351,13 +11283,11 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
                } else {
                        s8 fix = SKB_CS_OFF(skb); /* signed! */
 
-                       pbd->global_data |= ETH_TX_PARSE_BD_CS_ANY_FLG;
-                       pbd->cs_offset = fix / 2;
+                       pbd->global_data |= ETH_TX_PARSE_BD_UDP_CS_FLG;
 
                        DP(NETIF_MSG_TX_QUEUED,
-                          "hlen %d  offset %d  fix %d  csum before fix %x\n",
-                          le16_to_cpu(pbd->total_hlen), pbd->cs_offset, fix,
-                          SKB_CS(skb));
+                          "hlen %d  fix %d  csum before fix %x\n",
+                          le16_to_cpu(pbd->total_hlen), fix, SKB_CS(skb));
 
                        /* HW bug: fixup the CSUM */
                        pbd->tcp_pseudo_csum =
@@ -10372,17 +11302,18 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
        mapping = pci_map_single(bp->pdev, skb->data,
                                 skb_headlen(skb), PCI_DMA_TODEVICE);
 
-       tx_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
-       tx_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
-       nbd = skb_shinfo(skb)->nr_frags + ((pbd == NULL) ? 1 : 2);
-       tx_bd->nbd = cpu_to_le16(nbd);
-       tx_bd->nbytes = cpu_to_le16(skb_headlen(skb));
+       tx_start_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
+       tx_start_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
+       nbd = skb_shinfo(skb)->nr_frags + 2; /* start_bd + pbd + frags */
+       tx_start_bd->nbd = cpu_to_le16(nbd);
+       tx_start_bd->nbytes = cpu_to_le16(skb_headlen(skb));
+       pkt_size = tx_start_bd->nbytes;
 
        DP(NETIF_MSG_TX_QUEUED, "first bd @%p  addr (%x:%x)  nbd %d"
           "  nbytes %d  flags %x  vlan %x\n",
-          tx_bd, tx_bd->addr_hi, tx_bd->addr_lo, le16_to_cpu(tx_bd->nbd),
-          le16_to_cpu(tx_bd->nbytes), tx_bd->bd_flags.as_bitfield,
-          le16_to_cpu(tx_bd->vlan));
+          tx_start_bd, tx_start_bd->addr_hi, tx_start_bd->addr_lo,
+          le16_to_cpu(tx_start_bd->nbd), le16_to_cpu(tx_start_bd->nbytes),
+          tx_start_bd->bd_flags.as_bitfield, le16_to_cpu(tx_start_bd->vlan));
 
        if (xmit_type & XMIT_GSO) {
 
@@ -10391,11 +11322,11 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
                   skb->len, hlen, skb_headlen(skb),
                   skb_shinfo(skb)->gso_size);
 
-               tx_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_SW_LSO;
+               tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_SW_LSO;
 
                if (unlikely(skb_headlen(skb) > hlen))
-                       bd_prod = bnx2x_tx_split(bp, fp, &tx_bd, hlen,
-                                                bd_prod, ++nbd);
+                       bd_prod = bnx2x_tx_split(bp, fp, tx_buf, &tx_start_bd,
+                                                hlen, bd_prod, ++nbd);
 
                pbd->lso_mss = cpu_to_le16(skb_shinfo(skb)->gso_size);
                pbd->tcp_send_seq = swab32(tcp_hdr(skb)->seq);
@@ -10416,33 +11347,31 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
                pbd->global_data |= ETH_TX_PARSE_BD_PSEUDO_CS_WITHOUT_LEN;
        }
+       tx_data_bd = (struct eth_tx_bd *)tx_start_bd;
 
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
                bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
-               tx_bd = &fp->tx_desc_ring[bd_prod];
+               tx_data_bd = &fp->tx_desc_ring[bd_prod].reg_bd;
+               if (total_pkt_bd == NULL)
+                       total_pkt_bd = &fp->tx_desc_ring[bd_prod].reg_bd;
 
                mapping = pci_map_page(bp->pdev, frag->page, frag->page_offset,
                                       frag->size, PCI_DMA_TODEVICE);
 
-               tx_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
-               tx_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
-               tx_bd->nbytes = cpu_to_le16(frag->size);
-               tx_bd->vlan = cpu_to_le16(pkt_prod);
-               tx_bd->bd_flags.as_bitfield = 0;
+               tx_data_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
+               tx_data_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
+               tx_data_bd->nbytes = cpu_to_le16(frag->size);
+               le16_add_cpu(&pkt_size, frag->size);
 
                DP(NETIF_MSG_TX_QUEUED,
-                  "frag %d  bd @%p  addr (%x:%x)  nbytes %d  flags %x\n",
-                  i, tx_bd, tx_bd->addr_hi, tx_bd->addr_lo,
-                  le16_to_cpu(tx_bd->nbytes), tx_bd->bd_flags.as_bitfield);
+                  "frag %d  bd @%p  addr (%x:%x)  nbytes %d\n",
+                  i, tx_data_bd, tx_data_bd->addr_hi, tx_data_bd->addr_lo,
+                  le16_to_cpu(tx_data_bd->nbytes));
        }
 
-       /* now at last mark the BD as the last BD */
-       tx_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_END_BD;
-
-       DP(NETIF_MSG_TX_QUEUED, "last bd @%p  flags %x\n",
-          tx_bd, tx_bd->bd_flags.as_bitfield);
+       DP(NETIF_MSG_TX_QUEUED, "last bd @%p\n", tx_data_bd);
 
        bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
 
@@ -10452,6 +11381,9 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
        if (TX_BD_POFF(bd_prod) < nbd)
                nbd++;
 
+       if (total_pkt_bd != NULL)
+               total_pkt_bd->total_pkt_bytes = pkt_size;
+
        if (pbd)
                DP(NETIF_MSG_TX_QUEUED,
                   "PBD @%p  ip_data %x  ip_hlen %u  ip_id %u  lso_mss %u"
@@ -10471,23 +11403,19 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
         */
        wmb();
 
-       fp->hw_tx_prods->bds_prod =
-               cpu_to_le16(le16_to_cpu(fp->hw_tx_prods->bds_prod) + nbd);
-       mb(); /* FW restriction: must not reorder writing nbd and packets */
-       fp->hw_tx_prods->packets_prod =
-               cpu_to_le32(le32_to_cpu(fp->hw_tx_prods->packets_prod) + 1);
-       DOORBELL(bp, FP_IDX(fp), 0);
+       fp->tx_db.data.prod += nbd;
+       barrier();
+       DOORBELL(bp, fp->index, fp->tx_db.raw);
 
        mmiowb();
 
        fp->tx_bd_prod += nbd;
-       dev->trans_start = jiffies;
 
        if (unlikely(bnx2x_tx_avail(fp) < MAX_SKB_FRAGS + 3)) {
+               netif_tx_stop_queue(txq);
                /* We want bnx2x_tx_int to "see" the updated tx_bd_prod
                   if we put Tx into XOFF state. */
                smp_mb();
-               netif_tx_stop_queue(txq);
                fp->eth_q_stats.driver_xoff++;
                if (bnx2x_tx_avail(fp) >= MAX_SKB_FRAGS + 3)
                        netif_tx_wake_queue(txq);
@@ -10523,7 +11451,7 @@ static int bnx2x_close(struct net_device *dev)
        return 0;
 }
 
-/* called with netif_tx_lock from set_multicast */
+/* called with netif_tx_lock from dev_mcast.c */
 static void bnx2x_set_rx_mode(struct net_device *dev)
 {
        struct bnx2x *bp = netdev_priv(dev);
@@ -10568,8 +11496,9 @@ static void bnx2x_set_rx_mode(struct net_device *dev)
                                                        cpu_to_le16(port);
                                config->config_table[i].
                                        target_table_entry.flags = 0;
-                               config->config_table[i].
-                                       target_table_entry.client_id = 0;
+                               config->config_table[i].target_table_entry.
+                                       clients_bit_vector =
+                                               cpu_to_le32(1 << BP_L_ID(bp));
                                config->config_table[i].
                                        target_table_entry.vlan_id = 0;
 
@@ -10606,6 +11535,9 @@ static void bnx2x_set_rx_mode(struct net_device *dev)
                        config->hdr.client_id = bp->fp->cl_id;
                        config->hdr.reserved1 = 0;
 
+                       bp->set_mac_pending++;
+                       smp_wmb();
+
                        bnx2x_sp_post(bp, RAMROD_CMD_ID_ETH_SET_MAC, 0,
                                   U64_HI(bnx2x_sp_mapping(bp, mcast_config)),
                                   U64_LO(bnx2x_sp_mapping(bp, mcast_config)),
@@ -10655,63 +11587,86 @@ static int bnx2x_change_mac_addr(struct net_device *dev, void *p)
        memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
        if (netif_running(dev)) {
                if (CHIP_IS_E1(bp))
-                       bnx2x_set_mac_addr_e1(bp, 1);
+                       bnx2x_set_eth_mac_addr_e1(bp, 1);
                else
-                       bnx2x_set_mac_addr_e1h(bp, 1);
+                       bnx2x_set_eth_mac_addr_e1h(bp, 1);
        }
 
        return 0;
 }
 
 /* called with rtnl_lock */
-static int bnx2x_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int bnx2x_mdio_read(struct net_device *netdev, int prtad,
+                          int devad, u16 addr)
 {
-       struct mii_ioctl_data *data = if_mii(ifr);
-       struct bnx2x *bp = netdev_priv(dev);
-       int port = BP_PORT(bp);
-       int err;
+       struct bnx2x *bp = netdev_priv(netdev);
+       u16 value;
+       int rc;
+       u32 phy_type = XGXS_EXT_PHY_TYPE(bp->link_params.ext_phy_config);
+
+       DP(NETIF_MSG_LINK, "mdio_read: prtad 0x%x, devad 0x%x, addr 0x%x\n",
+          prtad, devad, addr);
 
-       switch (cmd) {
-       case SIOCGMIIPHY:
-               data->phy_id = bp->port.phy_addr;
+       if (prtad != bp->mdio.prtad) {
+               DP(NETIF_MSG_LINK, "prtad missmatch (cmd:0x%x != bp:0x%x)\n",
+                  prtad, bp->mdio.prtad);
+               return -EINVAL;
+       }
 
-               /* fallthrough */
+       /* The HW expects different devad if CL22 is used */
+       devad = (devad == MDIO_DEVAD_NONE) ? DEFAULT_PHY_DEV_ADDR : devad;
+
+       bnx2x_acquire_phy_lock(bp);
+       rc = bnx2x_cl45_read(bp, BP_PORT(bp), phy_type, prtad,
+                            devad, addr, &value);
+       bnx2x_release_phy_lock(bp);
+       DP(NETIF_MSG_LINK, "mdio_read_val 0x%x rc = 0x%x\n", value, rc);
+
+       if (!rc)
+               rc = value;
+       return rc;
+}
 
-       case SIOCGMIIREG: {
-               u16 mii_regval;
+/* called with rtnl_lock */
+static int bnx2x_mdio_write(struct net_device *netdev, int prtad, int devad,
+                           u16 addr, u16 value)
+{
+       struct bnx2x *bp = netdev_priv(netdev);
+       u32 ext_phy_type = XGXS_EXT_PHY_TYPE(bp->link_params.ext_phy_config);
+       int rc;
 
-               if (!netif_running(dev))
-                       return -EAGAIN;
+       DP(NETIF_MSG_LINK, "mdio_write: prtad 0x%x, devad 0x%x, addr 0x%x,"
+                          " value 0x%x\n", prtad, devad, addr, value);
 
-               mutex_lock(&bp->port.phy_mutex);
-               err = bnx2x_cl45_read(bp, port, 0, bp->port.phy_addr,
-                                     DEFAULT_PHY_DEV_ADDR,
-                                     (data->reg_num & 0x1f), &mii_regval);
-               data->val_out = mii_regval;
-               mutex_unlock(&bp->port.phy_mutex);
-               return err;
+       if (prtad != bp->mdio.prtad) {
+               DP(NETIF_MSG_LINK, "prtad missmatch (cmd:0x%x != bp:0x%x)\n",
+                  prtad, bp->mdio.prtad);
+               return -EINVAL;
        }
 
-       case SIOCSMIIREG:
-               if (!capable(CAP_NET_ADMIN))
-                       return -EPERM;
+       /* The HW expects different devad if CL22 is used */
+       devad = (devad == MDIO_DEVAD_NONE) ? DEFAULT_PHY_DEV_ADDR : devad;
 
-               if (!netif_running(dev))
-                       return -EAGAIN;
+       bnx2x_acquire_phy_lock(bp);
+       rc = bnx2x_cl45_write(bp, BP_PORT(bp), ext_phy_type, prtad,
+                             devad, addr, value);
+       bnx2x_release_phy_lock(bp);
+       return rc;
+}
 
-               mutex_lock(&bp->port.phy_mutex);
-               err = bnx2x_cl45_write(bp, port, 0, bp->port.phy_addr,
-                                      DEFAULT_PHY_DEV_ADDR,
-                                      (data->reg_num & 0x1f), data->val_in);
-               mutex_unlock(&bp->port.phy_mutex);
-               return err;
+/* called with rtnl_lock */
+static int bnx2x_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+       struct bnx2x *bp = netdev_priv(dev);
+       struct mii_ioctl_data *mdio = if_mii(ifr);
 
-       default:
-               /* do nothing */
-               break;
-       }
+       DP(NETIF_MSG_LINK, "ioctl: phy id 0x%x, reg 0x%x, val_in 0x%x\n",
+          mdio->phy_id, mdio->reg_num, mdio->val_in);
+
+       if (!netif_running(dev))
+               return -EAGAIN;
 
-       return -EOPNOTSUPP;
+       return mdio_mii_ioctl(&bp->mdio, mdio, cmd);
 }
 
 /* called with rtnl_lock */
@@ -10789,7 +11744,7 @@ static const struct net_device_ops bnx2x_netdev_ops = {
        .ndo_open               = bnx2x_open,
        .ndo_stop               = bnx2x_close,
        .ndo_start_xmit         = bnx2x_start_xmit,
-       .ndo_set_multicast_list = bnx2x_set_rx_mode,
+       .ndo_set_multicast_list = bnx2x_set_rx_mode,
        .ndo_set_mac_address    = bnx2x_change_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_do_ioctl           = bnx2x_ioctl,
@@ -10803,7 +11758,6 @@ static const struct net_device_ops bnx2x_netdev_ops = {
 #endif
 };
 
-
 static int __devinit bnx2x_init_dev(struct pci_dev *pdev,
                                    struct net_device *dev)
 {
@@ -10866,16 +11820,16 @@ static int __devinit bnx2x_init_dev(struct pci_dev *pdev,
                goto err_out_release;
        }
 
-       if (pci_set_dma_mask(pdev, DMA_64BIT_MASK) == 0) {
+       if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
                bp->flags |= USING_DAC_FLAG;
-               if (pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK) != 0) {
+               if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
                        printk(KERN_ERR PFX "pci_set_consistent_dma_mask"
                               " failed, aborting\n");
                        rc = -EIO;
                        goto err_out_release;
                }
 
-       } else if (pci_set_dma_mask(pdev, DMA_32BIT_MASK) != 0) {
+       } else if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
                printk(KERN_ERR PFX "System does not support DMA,"
                       " aborting\n");
                rc = -EIO;
@@ -10922,12 +11876,27 @@ static int __devinit bnx2x_init_dev(struct pci_dev *pdev,
        dev->features |= NETIF_F_HW_CSUM;
        if (bp->flags & USING_DAC_FLAG)
                dev->features |= NETIF_F_HIGHDMA;
+       dev->features |= (NETIF_F_TSO | NETIF_F_TSO_ECN);
+       dev->features |= NETIF_F_TSO6;
 #ifdef BCM_VLAN
        dev->features |= (NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX);
        bp->flags |= (HW_VLAN_RX_FLAG | HW_VLAN_TX_FLAG);
+
+       dev->vlan_features |= NETIF_F_SG;
+       dev->vlan_features |= NETIF_F_HW_CSUM;
+       if (bp->flags & USING_DAC_FLAG)
+               dev->vlan_features |= NETIF_F_HIGHDMA;
+       dev->vlan_features |= (NETIF_F_TSO | NETIF_F_TSO_ECN);
+       dev->vlan_features |= NETIF_F_TSO6;
 #endif
-       dev->features |= (NETIF_F_TSO | NETIF_F_TSO_ECN);
-       dev->features |= NETIF_F_TSO6;
+
+       /* get_port_hwinfo() will set prtad and mmds properly */
+       bp->mdio.prtad = MDIO_PRTAD_NONE;
+       bp->mdio.mmds = 0;
+       bp->mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22;
+       bp->mdio.dev = dev;
+       bp->mdio.mdio_read = bnx2x_mdio_read;
+       bp->mdio.mdio_write = bnx2x_mdio_write;
 
        return 0;
 
@@ -10953,34 +11922,207 @@ err_out:
        return rc;
 }
 
-static int __devinit bnx2x_get_pcie_width(struct bnx2x *bp)
+static void __devinit bnx2x_get_pcie_width_speed(struct bnx2x *bp,
+                                                int *width, int *speed)
 {
        u32 val = REG_RD(bp, PCICFG_OFFSET + PCICFG_LINK_CONTROL);
 
-       val = (val & PCICFG_LINK_WIDTH) >> PCICFG_LINK_WIDTH_SHIFT;
-       return val;
+       *width = (val & PCICFG_LINK_WIDTH) >> PCICFG_LINK_WIDTH_SHIFT;
+
+       /* return value of 1=2.5GHz 2=5GHz */
+       *speed = (val & PCICFG_LINK_SPEED) >> PCICFG_LINK_SPEED_SHIFT;
 }
 
-/* return value of 1=2.5GHz 2=5GHz */
-static int __devinit bnx2x_get_pcie_speed(struct bnx2x *bp)
+static int __devinit bnx2x_check_firmware(struct bnx2x *bp)
 {
-       u32 val = REG_RD(bp, PCICFG_OFFSET + PCICFG_LINK_CONTROL);
+       const struct firmware *firmware = bp->firmware;
+       struct bnx2x_fw_file_hdr *fw_hdr;
+       struct bnx2x_fw_file_section *sections;
+       u32 offset, len, num_ops;
+       u16 *ops_offsets;
+       int i;
+       const u8 *fw_ver;
 
-       val = (val & PCICFG_LINK_SPEED) >> PCICFG_LINK_SPEED_SHIFT;
-       return val;
+       if (firmware->size < sizeof(struct bnx2x_fw_file_hdr))
+               return -EINVAL;
+
+       fw_hdr = (struct bnx2x_fw_file_hdr *)firmware->data;
+       sections = (struct bnx2x_fw_file_section *)fw_hdr;
+
+       /* Make sure none of the offsets and sizes make us read beyond
+        * the end of the firmware data */
+       for (i = 0; i < sizeof(*fw_hdr) / sizeof(*sections); i++) {
+               offset = be32_to_cpu(sections[i].offset);
+               len = be32_to_cpu(sections[i].len);
+               if (offset + len > firmware->size) {
+                       printk(KERN_ERR PFX "Section %d length is out of "
+                                           "bounds\n", i);
+                       return -EINVAL;
+               }
+       }
+
+       /* Likewise for the init_ops offsets */
+       offset = be32_to_cpu(fw_hdr->init_ops_offsets.offset);
+       ops_offsets = (u16 *)(firmware->data + offset);
+       num_ops = be32_to_cpu(fw_hdr->init_ops.len) / sizeof(struct raw_op);
+
+       for (i = 0; i < be32_to_cpu(fw_hdr->init_ops_offsets.len) / 2; i++) {
+               if (be16_to_cpu(ops_offsets[i]) > num_ops) {
+                       printk(KERN_ERR PFX "Section offset %d is out of "
+                                           "bounds\n", i);
+                       return -EINVAL;
+               }
+       }
+
+       /* Check FW version */
+       offset = be32_to_cpu(fw_hdr->fw_version.offset);
+       fw_ver = firmware->data + offset;
+       if ((fw_ver[0] != BCM_5710_FW_MAJOR_VERSION) ||
+           (fw_ver[1] != BCM_5710_FW_MINOR_VERSION) ||
+           (fw_ver[2] != BCM_5710_FW_REVISION_VERSION) ||
+           (fw_ver[3] != BCM_5710_FW_ENGINEERING_VERSION)) {
+               printk(KERN_ERR PFX "Bad FW version:%d.%d.%d.%d."
+                                   " Should be %d.%d.%d.%d\n",
+                      fw_ver[0], fw_ver[1], fw_ver[2],
+                      fw_ver[3], BCM_5710_FW_MAJOR_VERSION,
+                      BCM_5710_FW_MINOR_VERSION,
+                      BCM_5710_FW_REVISION_VERSION,
+                      BCM_5710_FW_ENGINEERING_VERSION);
+               return -EINVAL;
+       }
+
+       return 0;
 }
 
+static inline void be32_to_cpu_n(const u8 *_source, u8 *_target, u32 n)
+{
+       const __be32 *source = (const __be32 *)_source;
+       u32 *target = (u32 *)_target;
+       u32 i;
+
+       for (i = 0; i < n/4; i++)
+               target[i] = be32_to_cpu(source[i]);
+}
+
+/*
+   Ops array is stored in the following format:
+   {op(8bit), offset(24bit, big endian), data(32bit, big endian)}
+ */
+static inline void bnx2x_prep_ops(const u8 *_source, u8 *_target, u32 n)
+{
+       const __be32 *source = (const __be32 *)_source;
+       struct raw_op *target = (struct raw_op *)_target;
+       u32 i, j, tmp;
+
+       for (i = 0, j = 0; i < n/8; i++, j += 2) {
+               tmp = be32_to_cpu(source[j]);
+               target[i].op = (tmp >> 24) & 0xff;
+               target[i].offset =  tmp & 0xffffff;
+               target[i].raw_data = be32_to_cpu(source[j+1]);
+       }
+}
+
+static inline void be16_to_cpu_n(const u8 *_source, u8 *_target, u32 n)
+{
+       const __be16 *source = (const __be16 *)_source;
+       u16 *target = (u16 *)_target;
+       u32 i;
+
+       for (i = 0; i < n/2; i++)
+               target[i] = be16_to_cpu(source[i]);
+}
+
+#define BNX2X_ALLOC_AND_SET(arr, lbl, func) \
+       do { \
+               u32 len = be32_to_cpu(fw_hdr->arr.len); \
+               bp->arr = kmalloc(len, GFP_KERNEL); \
+               if (!bp->arr) { \
+                       printk(KERN_ERR PFX "Failed to allocate %d bytes " \
+                                           "for "#arr"\n", len); \
+                       goto lbl; \
+               } \
+               func(bp->firmware->data + be32_to_cpu(fw_hdr->arr.offset), \
+                    (u8 *)bp->arr, len); \
+       } while (0)
+
+static int __devinit bnx2x_init_firmware(struct bnx2x *bp, struct device *dev)
+{
+       const char *fw_file_name;
+       struct bnx2x_fw_file_hdr *fw_hdr;
+       int rc;
+
+       if (CHIP_IS_E1(bp))
+               fw_file_name = FW_FILE_NAME_E1;
+       else
+               fw_file_name = FW_FILE_NAME_E1H;
+
+       printk(KERN_INFO PFX "Loading %s\n", fw_file_name);
+
+       rc = request_firmware(&bp->firmware, fw_file_name, dev);
+       if (rc) {
+               printk(KERN_ERR PFX "Can't load firmware file %s\n",
+                      fw_file_name);
+               goto request_firmware_exit;
+       }
+
+       rc = bnx2x_check_firmware(bp);
+       if (rc) {
+               printk(KERN_ERR PFX "Corrupt firmware file %s\n", fw_file_name);
+               goto request_firmware_exit;
+       }
+
+       fw_hdr = (struct bnx2x_fw_file_hdr *)bp->firmware->data;
+
+       /* Initialize the pointers to the init arrays */
+       /* Blob */
+       BNX2X_ALLOC_AND_SET(init_data, request_firmware_exit, be32_to_cpu_n);
+
+       /* Opcodes */
+       BNX2X_ALLOC_AND_SET(init_ops, init_ops_alloc_err, bnx2x_prep_ops);
+
+       /* Offsets */
+       BNX2X_ALLOC_AND_SET(init_ops_offsets, init_offsets_alloc_err,
+                           be16_to_cpu_n);
+
+       /* STORMs firmware */
+       INIT_TSEM_INT_TABLE_DATA(bp) = bp->firmware->data +
+                       be32_to_cpu(fw_hdr->tsem_int_table_data.offset);
+       INIT_TSEM_PRAM_DATA(bp)      = bp->firmware->data +
+                       be32_to_cpu(fw_hdr->tsem_pram_data.offset);
+       INIT_USEM_INT_TABLE_DATA(bp) = bp->firmware->data +
+                       be32_to_cpu(fw_hdr->usem_int_table_data.offset);
+       INIT_USEM_PRAM_DATA(bp)      = bp->firmware->data +
+                       be32_to_cpu(fw_hdr->usem_pram_data.offset);
+       INIT_XSEM_INT_TABLE_DATA(bp) = bp->firmware->data +
+                       be32_to_cpu(fw_hdr->xsem_int_table_data.offset);
+       INIT_XSEM_PRAM_DATA(bp)      = bp->firmware->data +
+                       be32_to_cpu(fw_hdr->xsem_pram_data.offset);
+       INIT_CSEM_INT_TABLE_DATA(bp) = bp->firmware->data +
+                       be32_to_cpu(fw_hdr->csem_int_table_data.offset);
+       INIT_CSEM_PRAM_DATA(bp)      = bp->firmware->data +
+                       be32_to_cpu(fw_hdr->csem_pram_data.offset);
+
+       return 0;
+
+init_offsets_alloc_err:
+       kfree(bp->init_ops);
+init_ops_alloc_err:
+       kfree(bp->init_data);
+request_firmware_exit:
+       release_firmware(bp->firmware);
+
+       return rc;
+}
+
+
 static int __devinit bnx2x_init_one(struct pci_dev *pdev,
                                    const struct pci_device_id *ent)
 {
-       static int version_printed;
        struct net_device *dev = NULL;
        struct bnx2x *bp;
+       int pcie_width, pcie_speed;
        int rc;
 
-       if (version_printed++ == 0)
-               printk(KERN_INFO "%s", version);
-
        /* dev zeroed in init_etherdev */
        dev = alloc_etherdev_mq(sizeof(*bp), MAX_CONTEXT);
        if (!dev) {
@@ -10991,31 +12133,39 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev,
        bp = netdev_priv(dev);
        bp->msglevel = debug;
 
+       pci_set_drvdata(pdev, dev);
+
        rc = bnx2x_init_dev(pdev, dev);
        if (rc < 0) {
                free_netdev(dev);
                return rc;
        }
 
-       pci_set_drvdata(pdev, dev);
-
        rc = bnx2x_init_bp(bp);
        if (rc)
                goto init_one_exit;
 
+       /* Set init arrays */
+       rc = bnx2x_init_firmware(bp, &pdev->dev);
+       if (rc) {
+               printk(KERN_ERR PFX "Error loading firmware\n");
+               goto init_one_exit;
+       }
+
        rc = register_netdev(dev);
        if (rc) {
                dev_err(&pdev->dev, "Cannot register net device\n");
                goto init_one_exit;
        }
 
+       bnx2x_get_pcie_width_speed(bp, &pcie_width, &pcie_speed);
        printk(KERN_INFO "%s: %s (%c%d) PCI-E x%d %s found at mem %lx,"
               " IRQ %d, ", dev->name, board_info[ent->driver_data].name,
               (CHIP_REV(bp) >> 12) + 'A', (CHIP_METAL(bp) >> 4),
-              bnx2x_get_pcie_width(bp),
-              (bnx2x_get_pcie_speed(bp) == 2) ? "5GHz (Gen2)" : "2.5GHz",
+              pcie_width, (pcie_speed == 2) ? "5GHz (Gen2)" : "2.5GHz",
               dev->base_addr, bp->pdev->irq);
        printk(KERN_CONT "node addr %pM\n", dev->dev_addr);
+
        return 0;
 
 init_one_exit:
@@ -11049,6 +12199,11 @@ static void __devexit bnx2x_remove_one(struct pci_dev *pdev)
 
        unregister_netdev(dev);
 
+       kfree(bp->init_ops_offsets);
+       kfree(bp->init_ops);
+       kfree(bp->init_data);
+       release_firmware(bp->firmware);
+
        if (bp->regview)
                iounmap(bp->regview);
 
@@ -11153,9 +12308,9 @@ static int bnx2x_eeh_nic_unload(struct bnx2x *bp)
 
        /* Free SKBs, SGEs, TPA pool and driver internals */
        bnx2x_free_skbs(bp);
-       for_each_rx_queue(bp, i)
+       for_each_queue(bp, i)
                bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
-       for_each_rx_queue(bp, i)
+       for_each_queue(bp, i)
                netif_napi_del(&bnx2x_fp(bp, i, napi));
        bnx2x_free_mem(bp);
 
@@ -11214,6 +12369,11 @@ static pci_ers_result_t bnx2x_io_error_detected(struct pci_dev *pdev,
 
        netif_device_detach(dev);
 
+       if (state == pci_channel_io_perm_failure) {
+               rtnl_unlock();
+               return PCI_ERS_RESULT_DISCONNECT;
+       }
+
        if (netif_running(dev))
                bnx2x_eeh_nic_unload(bp);
 
@@ -11282,8 +12442,8 @@ static void bnx2x_io_resume(struct pci_dev *pdev)
 
 static struct pci_error_handlers bnx2x_err_handler = {
        .error_detected = bnx2x_io_error_detected,
-       .slot_reset = bnx2x_io_slot_reset,
-       .resume = bnx2x_io_resume,
+       .slot_reset     = bnx2x_io_slot_reset,
+       .resume         = bnx2x_io_resume,
 };
 
 static struct pci_driver bnx2x_pci_driver = {
@@ -11298,13 +12458,22 @@ static struct pci_driver bnx2x_pci_driver = {
 
 static int __init bnx2x_init(void)
 {
+       int ret;
+
+       printk(KERN_INFO "%s", version);
+
        bnx2x_wq = create_singlethread_workqueue("bnx2x");
        if (bnx2x_wq == NULL) {
                printk(KERN_ERR PFX "Cannot create workqueue\n");
                return -ENOMEM;
        }
 
-       return pci_register_driver(&bnx2x_pci_driver);
+       ret = pci_register_driver(&bnx2x_pci_driver);
+       if (ret) {
+               printk(KERN_ERR PFX "Cannot register driver\n");
+               destroy_workqueue(bnx2x_wq);
+       }
+       return ret;
 }
 
 static void __exit bnx2x_cleanup(void)
@@ -11317,3 +12486,287 @@ static void __exit bnx2x_cleanup(void)
 module_init(bnx2x_init);
 module_exit(bnx2x_cleanup);
 
+#ifdef BCM_CNIC
+
+/* count denotes the number of new completions we have seen */
+static void bnx2x_cnic_sp_post(struct bnx2x *bp, int count)
+{
+       struct eth_spe *spe;
+
+#ifdef BNX2X_STOP_ON_ERROR
+       if (unlikely(bp->panic))
+               return;
+#endif
+
+       spin_lock_bh(&bp->spq_lock);
+       bp->cnic_spq_pending -= count;
+
+       for (; bp->cnic_spq_pending < bp->cnic_eth_dev.max_kwqe_pending;
+            bp->cnic_spq_pending++) {
+
+               if (!bp->cnic_kwq_pending)
+                       break;
+
+               spe = bnx2x_sp_get_next(bp);
+               *spe = *bp->cnic_kwq_cons;
+
+               bp->cnic_kwq_pending--;
+
+               DP(NETIF_MSG_TIMER, "pending on SPQ %d, on KWQ %d count %d\n",
+                  bp->cnic_spq_pending, bp->cnic_kwq_pending, count);
+
+               if (bp->cnic_kwq_cons == bp->cnic_kwq_last)
+                       bp->cnic_kwq_cons = bp->cnic_kwq;
+               else
+                       bp->cnic_kwq_cons++;
+       }
+       bnx2x_sp_prod_update(bp);
+       spin_unlock_bh(&bp->spq_lock);
+}
+
+static int bnx2x_cnic_sp_queue(struct net_device *dev,
+                              struct kwqe_16 *kwqes[], u32 count)
+{
+       struct bnx2x *bp = netdev_priv(dev);
+       int i;
+
+#ifdef BNX2X_STOP_ON_ERROR
+       if (unlikely(bp->panic))
+               return -EIO;
+#endif
+
+       spin_lock_bh(&bp->spq_lock);
+
+       for (i = 0; i < count; i++) {
+               struct eth_spe *spe = (struct eth_spe *)kwqes[i];
+
+               if (bp->cnic_kwq_pending == MAX_SP_DESC_CNT)
+                       break;
+
+               *bp->cnic_kwq_prod = *spe;
+
+               bp->cnic_kwq_pending++;
+
+               DP(NETIF_MSG_TIMER, "L5 SPQE %x %x %x:%x pos %d\n",
+                  spe->hdr.conn_and_cmd_data, spe->hdr.type,
+                  spe->data.mac_config_addr.hi,
+                  spe->data.mac_config_addr.lo,
+                  bp->cnic_kwq_pending);
+
+               if (bp->cnic_kwq_prod == bp->cnic_kwq_last)
+                       bp->cnic_kwq_prod = bp->cnic_kwq;
+               else
+                       bp->cnic_kwq_prod++;
+       }
+
+       spin_unlock_bh(&bp->spq_lock);
+
+       if (bp->cnic_spq_pending < bp->cnic_eth_dev.max_kwqe_pending)
+               bnx2x_cnic_sp_post(bp, 0);
+
+       return i;
+}
+
+static int bnx2x_cnic_ctl_send(struct bnx2x *bp, struct cnic_ctl_info *ctl)
+{
+       struct cnic_ops *c_ops;
+       int rc = 0;
+
+       mutex_lock(&bp->cnic_mutex);
+       c_ops = bp->cnic_ops;
+       if (c_ops)
+               rc = c_ops->cnic_ctl(bp->cnic_data, ctl);
+       mutex_unlock(&bp->cnic_mutex);
+
+       return rc;
+}
+
+static int bnx2x_cnic_ctl_send_bh(struct bnx2x *bp, struct cnic_ctl_info *ctl)
+{
+       struct cnic_ops *c_ops;
+       int rc = 0;
+
+       rcu_read_lock();
+       c_ops = rcu_dereference(bp->cnic_ops);
+       if (c_ops)
+               rc = c_ops->cnic_ctl(bp->cnic_data, ctl);
+       rcu_read_unlock();
+
+       return rc;
+}
+
+/*
+ * for commands that have no data
+ */
+static int bnx2x_cnic_notify(struct bnx2x *bp, int cmd)
+{
+       struct cnic_ctl_info ctl = {0};
+
+       ctl.cmd = cmd;
+
+       return bnx2x_cnic_ctl_send(bp, &ctl);
+}
+
+static void bnx2x_cnic_cfc_comp(struct bnx2x *bp, int cid)
+{
+       struct cnic_ctl_info ctl;
+
+       /* first we tell CNIC and only then we count this as a completion */
+       ctl.cmd = CNIC_CTL_COMPLETION_CMD;
+       ctl.data.comp.cid = cid;
+
+       bnx2x_cnic_ctl_send_bh(bp, &ctl);
+       bnx2x_cnic_sp_post(bp, 1);
+}
+
+static int bnx2x_drv_ctl(struct net_device *dev, struct drv_ctl_info *ctl)
+{
+       struct bnx2x *bp = netdev_priv(dev);
+       int rc = 0;
+
+       switch (ctl->cmd) {
+       case DRV_CTL_CTXTBL_WR_CMD: {
+               u32 index = ctl->data.io.offset;
+               dma_addr_t addr = ctl->data.io.dma_addr;
+
+               bnx2x_ilt_wr(bp, index, addr);
+               break;
+       }
+
+       case DRV_CTL_COMPLETION_CMD: {
+               int count = ctl->data.comp.comp_count;
+
+               bnx2x_cnic_sp_post(bp, count);
+               break;
+       }
+
+       /* rtnl_lock is held.  */
+       case DRV_CTL_START_L2_CMD: {
+               u32 cli = ctl->data.ring.client_id;
+
+               bp->rx_mode_cl_mask |= (1 << cli);
+               bnx2x_set_storm_rx_mode(bp);
+               break;
+       }
+
+       /* rtnl_lock is held.  */
+       case DRV_CTL_STOP_L2_CMD: {
+               u32 cli = ctl->data.ring.client_id;
+
+               bp->rx_mode_cl_mask &= ~(1 << cli);
+               bnx2x_set_storm_rx_mode(bp);
+               break;
+       }
+
+       default:
+               BNX2X_ERR("unknown command %x\n", ctl->cmd);
+               rc = -EINVAL;
+       }
+
+       return rc;
+}
+
+static void bnx2x_setup_cnic_irq_info(struct bnx2x *bp)
+{
+       struct cnic_eth_dev *cp = &bp->cnic_eth_dev;
+
+       if (bp->flags & USING_MSIX_FLAG) {
+               cp->drv_state |= CNIC_DRV_STATE_USING_MSIX;
+               cp->irq_arr[0].irq_flags |= CNIC_IRQ_FL_MSIX;
+               cp->irq_arr[0].vector = bp->msix_table[1].vector;
+       } else {
+               cp->drv_state &= ~CNIC_DRV_STATE_USING_MSIX;
+               cp->irq_arr[0].irq_flags &= ~CNIC_IRQ_FL_MSIX;
+       }
+       cp->irq_arr[0].status_blk = bp->cnic_sb;
+       cp->irq_arr[0].status_blk_num = CNIC_SB_ID(bp);
+       cp->irq_arr[1].status_blk = bp->def_status_blk;
+       cp->irq_arr[1].status_blk_num = DEF_SB_ID;
+
+       cp->num_irq = 2;
+}
+
+static int bnx2x_register_cnic(struct net_device *dev, struct cnic_ops *ops,
+                              void *data)
+{
+       struct bnx2x *bp = netdev_priv(dev);
+       struct cnic_eth_dev *cp = &bp->cnic_eth_dev;
+
+       if (ops == NULL)
+               return -EINVAL;
+
+       if (atomic_read(&bp->intr_sem) != 0)
+               return -EBUSY;
+
+       bp->cnic_kwq = kzalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!bp->cnic_kwq)
+               return -ENOMEM;
+
+       bp->cnic_kwq_cons = bp->cnic_kwq;
+       bp->cnic_kwq_prod = bp->cnic_kwq;
+       bp->cnic_kwq_last = bp->cnic_kwq + MAX_SP_DESC_CNT;
+
+       bp->cnic_spq_pending = 0;
+       bp->cnic_kwq_pending = 0;
+
+       bp->cnic_data = data;
+
+       cp->num_irq = 0;
+       cp->drv_state = CNIC_DRV_STATE_REGD;
+
+       bnx2x_init_sb(bp, bp->cnic_sb, bp->cnic_sb_mapping, CNIC_SB_ID(bp));
+
+       bnx2x_setup_cnic_irq_info(bp);
+       bnx2x_set_iscsi_eth_mac_addr(bp, 1);
+       bp->cnic_flags |= BNX2X_CNIC_FLAG_MAC_SET;
+       rcu_assign_pointer(bp->cnic_ops, ops);
+
+       return 0;
+}
+
+static int bnx2x_unregister_cnic(struct net_device *dev)
+{
+       struct bnx2x *bp = netdev_priv(dev);
+       struct cnic_eth_dev *cp = &bp->cnic_eth_dev;
+
+       mutex_lock(&bp->cnic_mutex);
+       if (bp->cnic_flags & BNX2X_CNIC_FLAG_MAC_SET) {
+               bp->cnic_flags &= ~BNX2X_CNIC_FLAG_MAC_SET;
+               bnx2x_set_iscsi_eth_mac_addr(bp, 0);
+       }
+       cp->drv_state = 0;
+       rcu_assign_pointer(bp->cnic_ops, NULL);
+       mutex_unlock(&bp->cnic_mutex);
+       synchronize_rcu();
+       kfree(bp->cnic_kwq);
+       bp->cnic_kwq = NULL;
+
+       return 0;
+}
+
+struct cnic_eth_dev *bnx2x_cnic_probe(struct net_device *dev)
+{
+       struct bnx2x *bp = netdev_priv(dev);
+       struct cnic_eth_dev *cp = &bp->cnic_eth_dev;
+
+       cp->drv_owner = THIS_MODULE;
+       cp->chip_id = CHIP_ID(bp);
+       cp->pdev = bp->pdev;
+       cp->io_base = bp->regview;
+       cp->io_base2 = bp->doorbells;
+       cp->max_kwqe_pending = 8;
+       cp->ctx_blk_size = CNIC_CTX_PER_ILT * sizeof(union cdu_context);
+       cp->ctx_tbl_offset = FUNC_ILT_BASE(BP_FUNC(bp)) + 1;
+       cp->ctx_tbl_len = CNIC_ILT_LINES;
+       cp->starting_cid = BCM_CNIC_CID_START;
+       cp->drv_submit_kwqes_16 = bnx2x_cnic_sp_queue;
+       cp->drv_ctl = bnx2x_drv_ctl;
+       cp->drv_register_cnic = bnx2x_register_cnic;
+       cp->drv_unregister_cnic = bnx2x_unregister_cnic;
+
+       return cp;
+}
+EXPORT_SYMBOL(bnx2x_cnic_probe);
+
+#endif /* BCM_CNIC */
+