include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit...

[safe/jmp/linux-2.6] / drivers / ata / sata_mv.c
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c

index b3f35a6..71cc0d4 100644 (file)
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -1,10 +1,13 @@
  /*
   * sata_mv.c - Marvell SATA support
   *
- * Copyright 2008: Marvell Corporation, all rights reserved.
+ * Copyright 2008-2009: Marvell Corporation, all rights reserved.
   * Copyright 2005: EMC Corporation, all rights reserved.
   * Copyright 2005 Red Hat, Inc.  All rights reserved.
   *
+ * Originally written by Brett Russ.
+ * Extensive overhaul and enhancement by Mark Lord <mlord@pobox.com>.
+ *
   * Please ALWAYS copy linux-ide@vger.kernel.org on emails.
   *
   * This program is free software; you can redistribute it and/or modify
@@ -23,46 +26,28 @@
   */
  
  /*
-  sata_mv TODO list:
-
-  1) Needs a full errata audit for all chipsets.  I implemented most
-  of the errata workarounds found in the Marvell vendor driver, but
-  I distinctly remember a couple workarounds (one related to PCI-X)
-  are still needed.
-
-  2) Improve/fix IRQ and error handling sequences.
-
-  3) ATAPI support (Marvell claims the 60xx/70xx chips can do it).
-
-  4) Think about TCQ support here, and for libata in general
-  with controllers that suppport it via host-queuing hardware
-  (a software-only implementation could be a nightmare).
-
-  5) Investigate problems with PCI Message Signalled Interrupts (MSI).
-
-  6) Cache frequently-accessed registers in mv_port_priv to reduce overhead.
-
-  7) Fix/reenable hot plug/unplug (should happen as a side-effect of (2) above).
-
-  8) Develop a low-power-consumption strategy, and implement it.
-
-  9) [Experiment, low priority] See if ATAPI can be supported using
-  "unknown FIS" or "vendor-specific FIS" support, or something creative
-  like that.
-
-  10) [Experiment, low priority] Investigate interrupt coalescing.
-  Quite often, especially with PCI Message Signalled Interrupts (MSI),
-  the overhead reduced by interrupt mitigation is quite often not
-  worth the latency cost.
-
-  11) [Experiment, Marvell value added] Is it possible to use target
-  mode to cross-connect two Linux boxes with Marvell cards?  If so,
-  creating LibATA target mode support would be very interesting.
-
-  Target mode, for those without docs, is the ability to directly
-  connect two SATA controllers.
+ * sata_mv TODO list:
+ *
+ * --> Develop a low-power-consumption strategy, and implement it.
+ *
+ * --> Add sysfs attributes for per-chip / per-HC IRQ coalescing thresholds.
+ *
+ * --> [Experiment, Marvell value added] Is it possible to use target
+ *       mode to cross-connect two Linux boxes with Marvell cards?  If so,
+ *       creating LibATA target mode support would be very interesting.
+ *
+ *       Target mode, for those without docs, is the ability to directly
+ *       connect two SATA ports.
+ */
  
-*/
+/*
+ * 80x1-B2 errata PCI#11:
+ *
+ * Users of the 6041/6081 Rev.B2 chips (current is C0)
+ * should be careful to insert those cards only onto PCI-X bus #0,
+ * and only in device slots 0..7, not higher.  The chips may not
+ * work correctly otherwise  (note: this is a pretty rare condition).
+ */
  
  #include <linux/kernel.h>
  #include <linux/module.h>
@@ -74,16 +59,39 @@
  #include <linux/dmapool.h>
  #include <linux/dma-mapping.h>
  #include <linux/device.h>
+#include <linux/clk.h>
  #include <linux/platform_device.h>
  #include <linux/ata_platform.h>
  #include <linux/mbus.h>
+#include <linux/bitops.h>
+#include <linux/gfp.h>
  #include <scsi/scsi_host.h>
  #include <scsi/scsi_cmnd.h>
  #include <scsi/scsi_device.h>
  #include <linux/libata.h>
  
  #define DRV_NAME       "sata_mv"
-#define DRV_VERSION    "1.20"
+#define DRV_VERSION    "1.28"
+
+/*
+ * module options
+ */
+
+static int msi;
+#ifdef CONFIG_PCI
+module_param(msi, int, S_IRUGO);
+MODULE_PARM_DESC(msi, "Enable use of PCI MSI (0=off, 1=on)");
+#endif
+
+static int irq_coalescing_io_count;
+module_param(irq_coalescing_io_count, int, S_IRUGO);
+MODULE_PARM_DESC(irq_coalescing_io_count,
+                "IRQ coalescing I/O count threshold (0..255)");
+
+static int irq_coalescing_usecs;
+module_param(irq_coalescing_usecs, int, S_IRUGO);
+MODULE_PARM_DESC(irq_coalescing_usecs,
+                "IRQ coalescing time threshold in usecs");
  
  enum {
         /* BAR's are enumerated in terms of pci_resource_start() terms */
@@ -94,18 +102,37 @@ enum {
         MV_MAJOR_REG_AREA_SZ    = 0x10000,      /* 64KB */
         MV_MINOR_REG_AREA_SZ    = 0x2000,       /* 8KB */
  
+       /* For use with both IRQ coalescing methods ("all ports" or "per-HC" */
+       COAL_CLOCKS_PER_USEC    = 150,          /* for calculating COAL_TIMEs */
+       MAX_COAL_TIME_THRESHOLD = ((1 << 24) - 1), /* internal clocks count */
+       MAX_COAL_IO_COUNT       = 255,          /* completed I/O count */
+
         MV_PCI_REG_BASE         = 0,
-       MV_IRQ_COAL_REG_BASE    = 0x18000,      /* 6xxx part only */
-       MV_IRQ_COAL_CAUSE               = (MV_IRQ_COAL_REG_BASE + 0x08),
-       MV_IRQ_COAL_CAUSE_LO            = (MV_IRQ_COAL_REG_BASE + 0x88),
-       MV_IRQ_COAL_CAUSE_HI            = (MV_IRQ_COAL_REG_BASE + 0x8c),
-       MV_IRQ_COAL_THRESHOLD           = (MV_IRQ_COAL_REG_BASE + 0xcc),
-       MV_IRQ_COAL_TIME_THRESHOLD      = (MV_IRQ_COAL_REG_BASE + 0xd0),
-
-       MV_SATAHC0_REG_BASE     = 0x20000,
-       MV_FLASH_CTL            = 0x1046c,
-       MV_GPIO_PORT_CTL        = 0x104f0,
-       MV_RESET_CFG            = 0x180d8,
+
+       /*
+        * Per-chip ("all ports") interrupt coalescing feature.
+        * This is only for GEN_II / GEN_IIE hardware.
+        *
+        * Coalescing defers the interrupt until either the IO_THRESHOLD
+        * (count of completed I/Os) is met, or the TIME_THRESHOLD is met.
+        */
+       COAL_REG_BASE           = 0x18000,
+       IRQ_COAL_CAUSE          = (COAL_REG_BASE + 0x08),
+       ALL_PORTS_COAL_IRQ      = (1 << 4),     /* all ports irq event */
+
+       IRQ_COAL_IO_THRESHOLD   = (COAL_REG_BASE + 0xcc),
+       IRQ_COAL_TIME_THRESHOLD = (COAL_REG_BASE + 0xd0),
+
+       /*
+        * Registers for the (unused here) transaction coalescing feature:
+        */
+       TRAN_COAL_CAUSE_LO      = (COAL_REG_BASE + 0x88),
+       TRAN_COAL_CAUSE_HI      = (COAL_REG_BASE + 0x8c),
+
+       SATAHC0_REG_BASE        = 0x20000,
+       FLASH_CTL               = 0x1046c,
+       GPIO_PORT_CTL           = 0x104f0,
+       RESET_CFG               = 0x180d8,
  
         MV_PCI_REG_SZ           = MV_MAJOR_REG_AREA_SZ,
         MV_SATAHC_REG_SZ        = MV_MAJOR_REG_AREA_SZ,
@@ -132,14 +159,16 @@ enum {
  
         /* Host Flags */
         MV_FLAG_DUAL_HC         = (1 << 30),  /* two SATA Host Controllers */
-       MV_FLAG_IRQ_COALESCE    = (1 << 29),  /* IRQ coalescing capability */
-       /* SoC integrated controllers, no PCI interface */
-       MV_FLAG_SOC             = (1 << 28),
  
         MV_COMMON_FLAGS         = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
-                                 ATA_FLAG_MMIO | ATA_FLAG_NO_ATAPI |
-                                 ATA_FLAG_PIO_POLLING,
-       MV_6XXX_FLAGS           = MV_FLAG_IRQ_COALESCE,
+                                 ATA_FLAG_MMIO | ATA_FLAG_PIO_POLLING,
+
+       MV_GEN_I_FLAGS          = MV_COMMON_FLAGS | ATA_FLAG_NO_ATAPI,
+
+       MV_GEN_II_FLAGS         = MV_COMMON_FLAGS | ATA_FLAG_NCQ |
+                                 ATA_FLAG_PMP | ATA_FLAG_ACPI_SATA,
+
+       MV_GEN_IIE_FLAGS        = MV_GEN_II_FLAGS | ATA_FLAG_AN,
  
         CRQB_FLAG_READ          = (1 << 0),
         CRQB_TAG_SHIFT          = 1,
@@ -158,14 +187,18 @@ enum {
  
         /* PCI interface registers */
  
-       PCI_COMMAND_OFS         = 0xc00,
+       MV_PCI_COMMAND          = 0xc00,
+       MV_PCI_COMMAND_MWRCOM   = (1 << 4),     /* PCI Master Write Combining */
+       MV_PCI_COMMAND_MRDTRIG  = (1 << 7),     /* PCI Master Read Trigger */
  
-       PCI_MAIN_CMD_STS_OFS    = 0xd30,
+       PCI_MAIN_CMD_STS        = 0xd30,
         STOP_PCI_MASTER         = (1 << 2),
         PCI_MASTER_EMPTY        = (1 << 3),
         GLOB_SFT_RST            = (1 << 4),
  
         MV_PCI_MODE             = 0xd00,
+       MV_PCI_MODE_MASK        = 0x30,
+
         MV_PCI_EXP_ROM_BAR_CTL  = 0xd2c,
         MV_PCI_DISC_TIMER       = 0xd04,
         MV_PCI_MSI_TRIGGER      = 0xc38,
@@ -176,81 +209,105 @@ enum {
         MV_PCI_ERR_ATTRIBUTE    = 0x1d48,
         MV_PCI_ERR_COMMAND      = 0x1d50,
  
-       PCI_IRQ_CAUSE_OFS       = 0x1d58,
-       PCI_IRQ_MASK_OFS        = 0x1d5c,
+       PCI_IRQ_CAUSE           = 0x1d58,
+       PCI_IRQ_MASK            = 0x1d5c,
         PCI_UNMASK_ALL_IRQS     = 0x7fffff,     /* bits 22-0 */
  
-       PCIE_IRQ_CAUSE_OFS      = 0x1900,
-       PCIE_IRQ_MASK_OFS       = 0x1910,
+       PCIE_IRQ_CAUSE          = 0x1900,
+       PCIE_IRQ_MASK           = 0x1910,
         PCIE_UNMASK_ALL_IRQS    = 0x40a,        /* assorted bits */
  
-       HC_MAIN_IRQ_CAUSE_OFS   = 0x1d60,
-       HC_MAIN_IRQ_MASK_OFS    = 0x1d64,
-       HC_SOC_MAIN_IRQ_CAUSE_OFS = 0x20020,
-       HC_SOC_MAIN_IRQ_MASK_OFS = 0x20024,
-       ERR_IRQ                 = (1 << 0),     /* shift by port # */
-       DONE_IRQ                = (1 << 1),     /* shift by port # */
+       /* Host Controller Main Interrupt Cause/Mask registers (1 per-chip) */
+       PCI_HC_MAIN_IRQ_CAUSE   = 0x1d60,
+       PCI_HC_MAIN_IRQ_MASK    = 0x1d64,
+       SOC_HC_MAIN_IRQ_CAUSE   = 0x20020,
+       SOC_HC_MAIN_IRQ_MASK    = 0x20024,
+       ERR_IRQ                 = (1 << 0),     /* shift by (2 * port #) */
+       DONE_IRQ                = (1 << 1),     /* shift by (2 * port #) */
         HC0_IRQ_PEND            = 0x1ff,        /* bits 0-8 = HC0's ports */
         HC_SHIFT                = 9,            /* bits 9-17 = HC1's ports */
+       DONE_IRQ_0_3            = 0x000000aa,   /* DONE_IRQ ports 0,1,2,3 */
+       DONE_IRQ_4_7            = (DONE_IRQ_0_3 << HC_SHIFT),  /* 4,5,6,7 */
         PCI_ERR                 = (1 << 18),
-       TRAN_LO_DONE            = (1 << 19),    /* 6xxx: IRQ coalescing */
-       TRAN_HI_DONE            = (1 << 20),    /* 6xxx: IRQ coalescing */
-       PORTS_0_3_COAL_DONE     = (1 << 8),
-       PORTS_4_7_COAL_DONE     = (1 << 17),
-       PORTS_0_7_COAL_DONE     = (1 << 21),    /* 6xxx: IRQ coalescing */
+       TRAN_COAL_LO_DONE       = (1 << 19),    /* transaction coalescing */
+       TRAN_COAL_HI_DONE       = (1 << 20),    /* transaction coalescing */
+       PORTS_0_3_COAL_DONE     = (1 << 8),     /* HC0 IRQ coalescing */
+       PORTS_4_7_COAL_DONE     = (1 << 17),    /* HC1 IRQ coalescing */
+       ALL_PORTS_COAL_DONE     = (1 << 21),    /* GEN_II(E) IRQ coalescing */
         GPIO_INT                = (1 << 22),
         SELF_INT                = (1 << 23),
         TWSI_INT                = (1 << 24),
         HC_MAIN_RSVD            = (0x7f << 25), /* bits 31-25 */
         HC_MAIN_RSVD_5          = (0x1fff << 19), /* bits 31-19 */
         HC_MAIN_RSVD_SOC        = (0x3fffffb << 6),     /* bits 31-9, 7-6 */
-       HC_MAIN_MASKED_IRQS     = (TRAN_LO_DONE | TRAN_HI_DONE |
-                                  PORTS_0_3_COAL_DONE | PORTS_4_7_COAL_DONE |
-                                  PORTS_0_7_COAL_DONE | GPIO_INT | TWSI_INT |
-                                  HC_MAIN_RSVD),
-       HC_MAIN_MASKED_IRQS_5   = (PORTS_0_3_COAL_DONE | PORTS_4_7_COAL_DONE |
-                                  HC_MAIN_RSVD_5),
-       HC_MAIN_MASKED_IRQS_SOC = (PORTS_0_3_COAL_DONE | HC_MAIN_RSVD_SOC),
  
         /* SATAHC registers */
-       HC_CFG_OFS              = 0,
+       HC_CFG                  = 0x00,
  
-       HC_IRQ_CAUSE_OFS        = 0x14,
+       HC_IRQ_CAUSE            = 0x14,
         DMA_IRQ                 = (1 << 0),     /* shift by port # */
         HC_COAL_IRQ             = (1 << 4),     /* IRQ coalescing */
         DEV_IRQ                 = (1 << 8),     /* shift by port # */
  
+       /*
+        * Per-HC (Host-Controller) interrupt coalescing feature.
+        * This is present on all chip generations.
+        *
+        * Coalescing defers the interrupt until either the IO_THRESHOLD
+        * (count of completed I/Os) is met, or the TIME_THRESHOLD is met.
+        */
+       HC_IRQ_COAL_IO_THRESHOLD        = 0x000c,
+       HC_IRQ_COAL_TIME_THRESHOLD      = 0x0010,
+
+       SOC_LED_CTRL            = 0x2c,
+       SOC_LED_CTRL_BLINK      = (1 << 0),     /* Active LED blink */
+       SOC_LED_CTRL_ACT_PRESENCE = (1 << 2),   /* Multiplex dev presence */
+                                               /*  with dev activity LED */
+
         /* Shadow block registers */
-       SHD_BLK_OFS             = 0x100,
-       SHD_CTL_AST_OFS         = 0x20,         /* ofs from SHD_BLK_OFS */
+       SHD_BLK                 = 0x100,
+       SHD_CTL_AST             = 0x20,         /* ofs from SHD_BLK */
  
         /* SATA registers */
-       SATA_STATUS_OFS         = 0x300,  /* ctrl, err regs follow status */
-       SATA_ACTIVE_OFS         = 0x350,
-       SATA_FIS_IRQ_CAUSE_OFS  = 0x364,
+       SATA_STATUS             = 0x300,  /* ctrl, err regs follow status */
+       SATA_ACTIVE             = 0x350,
+       FIS_IRQ_CAUSE           = 0x364,
+       FIS_IRQ_CAUSE_AN        = (1 << 9),     /* async notification */
  
-       LTMODE_OFS              = 0x30c,
+       LTMODE                  = 0x30c,        /* requires read-after-write */
         LTMODE_BIT8             = (1 << 8),     /* unknown, but necessary */
  
-       PHY_MODE3               = 0x310,
-       PHY_MODE4               = 0x314,
         PHY_MODE2               = 0x330,
-       SATA_IFCTL_OFS          = 0x344,
-       SATA_IFSTAT_OFS         = 0x34c,
-       VENDOR_UNIQUE_FIS_OFS   = 0x35c,
+       PHY_MODE3               = 0x310,
+
+       PHY_MODE4               = 0x314,        /* requires read-after-write */
+       PHY_MODE4_CFG_MASK      = 0x00000003,   /* phy internal config field */
+       PHY_MODE4_CFG_VALUE     = 0x00000001,   /* phy internal config field */
+       PHY_MODE4_RSVD_ZEROS    = 0x5de3fffa,   /* Gen2e always write zeros */
+       PHY_MODE4_RSVD_ONES     = 0x00000005,   /* Gen2e always write ones */
+
+       SATA_IFCTL              = 0x344,
+       SATA_TESTCTL            = 0x348,
+       SATA_IFSTAT             = 0x34c,
+       VENDOR_UNIQUE_FIS       = 0x35c,
  
-       FIS_CFG_OFS             = 0x360,
-       FIS_CFG_SINGLE_SYNC     = (1 << 16),    /* SYNC on DMA activation */
+       FISCFG                  = 0x360,
+       FISCFG_WAIT_DEV_ERR     = (1 << 8),     /* wait for host on DevErr */
+       FISCFG_SINGLE_SYNC      = (1 << 16),    /* SYNC on DMA activation */
+
+       PHY_MODE9_GEN2          = 0x398,
+       PHY_MODE9_GEN1          = 0x39c,
+       PHYCFG_OFS              = 0x3a0,        /* only in 65n devices */
  
         MV5_PHY_MODE            = 0x74,
-       MV5_LT_MODE             = 0x30,
+       MV5_LTMODE              = 0x30,
         MV5_PHY_CTL             = 0x0C,
-       SATA_INTERFACE_CFG      = 0x050,
+       SATA_IFCFG              = 0x050,
  
         MV_M2_PREAMP_MASK       = 0x7e0,
  
         /* Port registers */
-       EDMA_CFG_OFS            = 0,
+       EDMA_CFG                = 0,
         EDMA_CFG_Q_DEPTH        = 0x1f,         /* max device queue depth */
         EDMA_CFG_NCQ            = (1 << 5),     /* for R/W FPDMA queued */
         EDMA_CFG_NCQ_GO_ON_ERR  = (1 << 14),    /* continue on error */
@@ -259,8 +316,8 @@ enum {
         EDMA_CFG_EDMA_FBS       = (1 << 16),    /* EDMA FIS-Based Switching */
         EDMA_CFG_FBS            = (1 << 26),    /* FIS-Based Switching */
  
-       EDMA_ERR_IRQ_CAUSE_OFS  = 0x8,
-       EDMA_ERR_IRQ_MASK_OFS   = 0xc,
+       EDMA_ERR_IRQ_CAUSE      = 0x8,
+       EDMA_ERR_IRQ_MASK       = 0xc,
         EDMA_ERR_D_PAR          = (1 << 0),     /* UDMA data parity err */
         EDMA_ERR_PRD_PAR        = (1 << 1),     /* UDMA PRD parity err */
         EDMA_ERR_DEV            = (1 << 2),     /* device error */
@@ -300,9 +357,7 @@ enum {
         EDMA_ERR_IRQ_TRANSIENT  = EDMA_ERR_LNK_CTRL_RX_0 |
                                   EDMA_ERR_LNK_CTRL_RX_1 |
                                   EDMA_ERR_LNK_CTRL_RX_3 |
-                                 EDMA_ERR_LNK_CTRL_TX |
-                                /* temporary, until we fix hotplug: */
-                                (EDMA_ERR_DEV_DCON | EDMA_ERR_DEV_CON),
+                                 EDMA_ERR_LNK_CTRL_TX,
  
         EDMA_EH_FREEZE          = EDMA_ERR_D_PAR |
                                   EDMA_ERR_PRD_PAR |
@@ -331,26 +386,36 @@ enum {
                                   EDMA_ERR_INTRL_PAR |
                                   EDMA_ERR_IORDY,
  
-       EDMA_REQ_Q_BASE_HI_OFS  = 0x10,
-       EDMA_REQ_Q_IN_PTR_OFS   = 0x14,         /* also contains BASE_LO */
+       EDMA_REQ_Q_BASE_HI      = 0x10,
+       EDMA_REQ_Q_IN_PTR       = 0x14,         /* also contains BASE_LO */
  
-       EDMA_REQ_Q_OUT_PTR_OFS  = 0x18,
+       EDMA_REQ_Q_OUT_PTR      = 0x18,
         EDMA_REQ_Q_PTR_SHIFT    = 5,
  
-       EDMA_RSP_Q_BASE_HI_OFS  = 0x1c,
-       EDMA_RSP_Q_IN_PTR_OFS   = 0x20,
-       EDMA_RSP_Q_OUT_PTR_OFS  = 0x24,         /* also contains BASE_LO */
+       EDMA_RSP_Q_BASE_HI      = 0x1c,
+       EDMA_RSP_Q_IN_PTR       = 0x20,
+       EDMA_RSP_Q_OUT_PTR      = 0x24,         /* also contains BASE_LO */
         EDMA_RSP_Q_PTR_SHIFT    = 3,
  
-       EDMA_CMD_OFS            = 0x28,         /* EDMA command register */
+       EDMA_CMD                = 0x28,         /* EDMA command register */
         EDMA_EN                 = (1 << 0),     /* enable EDMA */
         EDMA_DS                 = (1 << 1),     /* disable EDMA; self-negated */
-       ATA_RST                 = (1 << 2),     /* reset trans/link/phy */
+       EDMA_RESET              = (1 << 2),     /* reset eng/trans/link/phy */
+
+       EDMA_STATUS             = 0x30,         /* EDMA engine status */
+       EDMA_STATUS_CACHE_EMPTY = (1 << 6),     /* GenIIe command cache empty */
+       EDMA_STATUS_IDLE        = (1 << 7),     /* GenIIe EDMA enabled/idle */
  
         EDMA_IORDY_TMOUT        = 0x34,
         EDMA_ARB_CFG            = 0x38,
  
-       GEN_II_NCQ_MAX_SECTORS  = 256,          /* max sects/io on Gen2 w/NCQ */
+       EDMA_HALTCOND           = 0x60,         /* GenIIe halt conditions */
+       EDMA_UNKNOWN_RSVD       = 0x6C,         /* GenIIe unknown/reserved */
+
+       BMDMA_CMD               = 0x224,        /* bmdma command register */
+       BMDMA_STATUS            = 0x228,        /* bmdma status register */
+       BMDMA_PRD_LOW           = 0x22c,        /* bmdma PRD addr 31:0 */
+       BMDMA_PRD_HIGH          = 0x230,        /* bmdma PRD addr 63:32 */
  
         /* Host private flags (hp_flags) */
         MV_HP_FLAG_MSI          = (1 << 0),
@@ -358,21 +423,27 @@ enum {
         MV_HP_ERRATA_50XXB2     = (1 << 2),
         MV_HP_ERRATA_60X1B2     = (1 << 3),
         MV_HP_ERRATA_60X1C0     = (1 << 4),
-       MV_HP_ERRATA_XX42A0     = (1 << 5),
         MV_HP_GEN_I             = (1 << 6),     /* Generation I: 50xx */
         MV_HP_GEN_II            = (1 << 7),     /* Generation II: 60xx */
         MV_HP_GEN_IIE           = (1 << 8),     /* Generation IIE: 6042/7042 */
         MV_HP_PCIE              = (1 << 9),     /* PCIe bus/regs: 7042 */
+       MV_HP_CUT_THROUGH       = (1 << 10),    /* can use EDMA cut-through */
+       MV_HP_FLAG_SOC          = (1 << 11),    /* SystemOnChip, no PCI */
+       MV_HP_QUIRK_LED_BLINK_EN = (1 << 12),   /* is led blinking enabled? */
  
         /* Port private flags (pp_flags) */
         MV_PP_FLAG_EDMA_EN      = (1 << 0),     /* is EDMA engine enabled? */
         MV_PP_FLAG_NCQ_EN       = (1 << 1),     /* is EDMA set up for NCQ? */
+       MV_PP_FLAG_FBS_EN       = (1 << 2),     /* is EDMA set up for FBS? */
+       MV_PP_FLAG_DELAYED_EH   = (1 << 3),     /* delayed dev err handling */
+       MV_PP_FLAG_FAKE_ATA_BUSY = (1 << 4),    /* ignore initial ATA_DRDY */
  };
  
  #define IS_GEN_I(hpriv) ((hpriv)->hp_flags & MV_HP_GEN_I)
  #define IS_GEN_II(hpriv) ((hpriv)->hp_flags & MV_HP_GEN_II)
  #define IS_GEN_IIE(hpriv) ((hpriv)->hp_flags & MV_HP_GEN_IIE)
-#define HAS_PCI(host) (!((host)->ports[0]->flags & MV_FLAG_SOC))
+#define IS_PCIE(hpriv) ((hpriv)->hp_flags & MV_HP_PCIE)
+#define IS_SOC(hpriv) ((hpriv)->hp_flags & MV_HP_FLAG_SOC)
  
  #define WINDOW_CTRL(i)         (0x20030 + ((i) << 4))
  #define WINDOW_BASE(i)         (0x20034 + ((i) << 4))
@@ -434,6 +505,18 @@ struct mv_sg {
         __le32                  reserved;
  };
  
+/*
+ * We keep a local cache of a few frequently accessed port
+ * registers here, to avoid having to read them (very slow)
+ * when switching between EDMA and non-EDMA modes.
+ */
+struct mv_cached_regs {
+       u32                     fiscfg;
+       u32                     ltmode;
+       u32                     haltcond;
+       u32                     unknown_rsvd;
+};
+
  struct mv_port_priv {
         struct mv_crqb          *crqb;
         dma_addr_t              crqb_dma;
@@ -446,6 +529,8 @@ struct mv_port_priv {
         unsigned int            resp_idx;
  
         u32                     pp_flags;
+       struct mv_cached_regs   cached;
+       unsigned int            delayed_eh_pmp_map;
  };
  
  struct mv_port_signal {
@@ -455,15 +540,21 @@ struct mv_port_signal {
  
  struct mv_host_priv {
         u32                     hp_flags;
+       unsigned int            board_idx;
+       u32                     main_irq_mask;
         struct mv_port_signal   signal[8];
         const struct mv_hw_ops  *ops;
         int                     n_ports;
         void __iomem            *base;
-       void __iomem            *main_cause_reg_addr;
-       void __iomem            *main_mask_reg_addr;
-       u32                     irq_cause_ofs;
-       u32                     irq_mask_ofs;
+       void __iomem            *main_irq_cause_addr;
+       void __iomem            *main_irq_mask_addr;
+       u32                     irq_cause_offset;
+       u32                     irq_mask_offset;
         u32                     unmask_all_irqs;
+
+#if defined(CONFIG_HAVE_CLK)
+       struct clk              *clk;
+#endif
         /*
          * These consistent DMA memory pools give us guaranteed
          * alignment for hardware-accessed data structures,
@@ -486,12 +577,13 @@ struct mv_hw_ops {
         void (*reset_bus)(struct ata_host *host, void __iomem *mmio);
  };
  
-static int mv_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val);
-static int mv_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val);
-static int mv5_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val);
-static int mv5_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val);
+static int mv_scr_read(struct ata_link *link, unsigned int sc_reg_in, u32 *val);
+static int mv_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val);
+static int mv5_scr_read(struct ata_link *link, unsigned int sc_reg_in, u32 *val);
+static int mv5_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val);
  static int mv_port_start(struct ata_port *ap);
  static void mv_port_stop(struct ata_port *ap);
+static int mv_qc_defer(struct ata_queued_cmd *qc);
  static void mv_qc_prep(struct ata_queued_cmd *qc);
  static void mv_qc_prep_iie(struct ata_queued_cmd *qc);
  static unsigned int mv_qc_issue(struct ata_queued_cmd *qc);
@@ -528,18 +620,31 @@ static int mv_soc_reset_hc(struct mv_host_priv *hpriv,
  static void mv_soc_reset_flash(struct mv_host_priv *hpriv,
                                       void __iomem *mmio);
  static void mv_soc_reset_bus(struct ata_host *host, void __iomem *mmio);
+static void mv_soc_65n_phy_errata(struct mv_host_priv *hpriv,
+                                 void __iomem *mmio, unsigned int port);
  static void mv_reset_pci_bus(struct ata_host *host, void __iomem *mmio);
  static void mv_reset_channel(struct mv_host_priv *hpriv, void __iomem *mmio,
                              unsigned int port_no);
  static int mv_stop_edma(struct ata_port *ap);
  static int mv_stop_edma_engine(void __iomem *port_mmio);
-static void mv_edma_cfg(struct ata_port *ap, int want_ncq);
+static void mv_edma_cfg(struct ata_port *ap, int want_ncq, int want_edma);
  
  static void mv_pmp_select(struct ata_port *ap, int pmp);
  static int mv_pmp_hardreset(struct ata_link *link, unsigned int *class,
                                 unsigned long deadline);
  static int  mv_softreset(struct ata_link *link, unsigned int *class,
                                 unsigned long deadline);
+static void mv_pmp_error_handler(struct ata_port *ap);
+static void mv_process_crpb_entries(struct ata_port *ap,
+                                       struct mv_port_priv *pp);
+
+static void mv_sff_irq_clear(struct ata_port *ap);
+static int mv_check_atapi_dma(struct ata_queued_cmd *qc);
+static void mv_bmdma_setup(struct ata_queued_cmd *qc);
+static void mv_bmdma_start(struct ata_queued_cmd *qc);
+static void mv_bmdma_stop(struct ata_queued_cmd *qc);
+static u8   mv_bmdma_status(struct ata_port *ap);
+static u8 mv_sff_check_status(struct ata_port *ap);
  
  /* .sg_tablesize is (MV_MAX_SG_CT / 2) in the structures below
   * because we have to allow room for worst case splitting of
@@ -561,6 +666,9 @@ static struct scsi_host_template mv6_sht = {
  static struct ata_port_operations mv5_ops = {
         .inherits               = &ata_sff_port_ops,
  
+       .lost_interrupt         = ATA_OP_NULL,
+
+       .qc_defer               = mv_qc_defer,
         .qc_prep                = mv_qc_prep,
         .qc_issue               = mv_qc_issue,
  
@@ -579,7 +687,6 @@ static struct ata_port_operations mv5_ops = {
  
  static struct ata_port_operations mv6_ops = {
         .inherits               = &mv5_ops,
-       .qc_defer               = sata_pmp_qc_defer_cmd_switch,
         .dev_config             = mv6_dev_config,
         .scr_read               = mv_scr_read,
         .scr_write              = mv_scr_write,
@@ -587,72 +694,69 @@ static struct ata_port_operations mv6_ops = {
         .pmp_hardreset          = mv_pmp_hardreset,
         .pmp_softreset          = mv_softreset,
         .softreset              = mv_softreset,
-       .error_handler          = sata_pmp_error_handler,
+       .error_handler          = mv_pmp_error_handler,
+
+       .sff_check_status       = mv_sff_check_status,
+       .sff_irq_clear          = mv_sff_irq_clear,
+       .check_atapi_dma        = mv_check_atapi_dma,
+       .bmdma_setup            = mv_bmdma_setup,
+       .bmdma_start            = mv_bmdma_start,
+       .bmdma_stop             = mv_bmdma_stop,
+       .bmdma_status           = mv_bmdma_status,
  };
  
  static struct ata_port_operations mv_iie_ops = {
         .inherits               = &mv6_ops,
-       .qc_defer               = ata_std_qc_defer, /* FIS-based switching */
         .dev_config             = ATA_OP_NULL,
         .qc_prep                = mv_qc_prep_iie,
  };
  
  static const struct ata_port_info mv_port_info[] = {
         {  /* chip_504x */
-               .flags          = MV_COMMON_FLAGS,
-               .pio_mask       = 0x1f, /* pio0-4 */
+               .flags          = MV_GEN_I_FLAGS,
+               .pio_mask       = ATA_PIO4,
                 .udma_mask      = ATA_UDMA6,
                 .port_ops       = &mv5_ops,
         },
         {  /* chip_508x */
-               .flags          = MV_COMMON_FLAGS | MV_FLAG_DUAL_HC,
-               .pio_mask       = 0x1f, /* pio0-4 */
+               .flags          = MV_GEN_I_FLAGS | MV_FLAG_DUAL_HC,
+               .pio_mask       = ATA_PIO4,
                 .udma_mask      = ATA_UDMA6,
                 .port_ops       = &mv5_ops,
         },
         {  /* chip_5080 */
-               .flags          = MV_COMMON_FLAGS | MV_FLAG_DUAL_HC,
-               .pio_mask       = 0x1f, /* pio0-4 */
+               .flags          = MV_GEN_I_FLAGS | MV_FLAG_DUAL_HC,
+               .pio_mask       = ATA_PIO4,
                 .udma_mask      = ATA_UDMA6,
                 .port_ops       = &mv5_ops,
         },
         {  /* chip_604x */
-               .flags          = MV_COMMON_FLAGS | MV_6XXX_FLAGS |
-                                 ATA_FLAG_PMP | ATA_FLAG_ACPI_SATA |
-                                 ATA_FLAG_NCQ,
-               .pio_mask       = 0x1f, /* pio0-4 */
+               .flags          = MV_GEN_II_FLAGS,
+               .pio_mask       = ATA_PIO4,
                 .udma_mask      = ATA_UDMA6,
                 .port_ops       = &mv6_ops,
         },
         {  /* chip_608x */
-               .flags          = MV_COMMON_FLAGS | MV_6XXX_FLAGS |
-                                 ATA_FLAG_PMP | ATA_FLAG_ACPI_SATA |
-                                 ATA_FLAG_NCQ | MV_FLAG_DUAL_HC,
-               .pio_mask       = 0x1f, /* pio0-4 */
+               .flags          = MV_GEN_II_FLAGS | MV_FLAG_DUAL_HC,
+               .pio_mask       = ATA_PIO4,
                 .udma_mask      = ATA_UDMA6,
                 .port_ops       = &mv6_ops,
         },
         {  /* chip_6042 */
-               .flags          = MV_COMMON_FLAGS | MV_6XXX_FLAGS |
-                                 ATA_FLAG_PMP | ATA_FLAG_ACPI_SATA |
-                                 ATA_FLAG_NCQ,
-               .pio_mask       = 0x1f, /* pio0-4 */
+               .flags          = MV_GEN_IIE_FLAGS,
+               .pio_mask       = ATA_PIO4,
                 .udma_mask      = ATA_UDMA6,
                 .port_ops       = &mv_iie_ops,
         },
         {  /* chip_7042 */
-               .flags          = MV_COMMON_FLAGS | MV_6XXX_FLAGS |
-                                 ATA_FLAG_PMP | ATA_FLAG_ACPI_SATA |
-                                 ATA_FLAG_NCQ,
-               .pio_mask       = 0x1f, /* pio0-4 */
+               .flags          = MV_GEN_IIE_FLAGS,
+               .pio_mask       = ATA_PIO4,
                 .udma_mask      = ATA_UDMA6,
                 .port_ops       = &mv_iie_ops,
         },
         {  /* chip_soc */
-               .flags          = MV_COMMON_FLAGS | MV_6XXX_FLAGS |
-                                 ATA_FLAG_PMP | ATA_FLAG_ACPI_SATA |
-                                 ATA_FLAG_NCQ | MV_FLAG_SOC,
-               .pio_mask       = 0x1f, /* pio0-4 */
+               .flags          = MV_GEN_IIE_FLAGS,
+               .pio_mask       = ATA_PIO4,
                 .udma_mask      = ATA_UDMA6,
                 .port_ops       = &mv_iie_ops,
         },
@@ -663,9 +767,10 @@ static const struct pci_device_id mv_pci_tbl[] = {
         { PCI_VDEVICE(MARVELL, 0x5041), chip_504x },
         { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 },
         { PCI_VDEVICE(MARVELL, 0x5081), chip_508x },
-       /* RocketRAID 1740/174x have different identifiers */
-       { PCI_VDEVICE(TTI, 0x1740), chip_508x },
-       { PCI_VDEVICE(TTI, 0x1742), chip_508x },
+       /* RocketRAID 1720/174x have different identifiers */
+       { PCI_VDEVICE(TTI, 0x1720), chip_6042 },
+       { PCI_VDEVICE(TTI, 0x1740), chip_6042 },
+       { PCI_VDEVICE(TTI, 0x1742), chip_6042 },
  
         { PCI_VDEVICE(MARVELL, 0x6040), chip_604x },
         { PCI_VDEVICE(MARVELL, 0x6041), chip_604x },
@@ -715,6 +820,14 @@ static const struct mv_hw_ops mv_soc_ops = {
         .reset_bus              = mv_soc_reset_bus,
  };
  
+static const struct mv_hw_ops mv_soc_65n_ops = {
+       .phy_errata             = mv_soc_65n_phy_errata,
+       .enable_leds            = mv_soc_enable_leds,
+       .reset_hc               = mv_soc_reset_hc,
+       .reset_flash            = mv_soc_reset_flash,
+       .reset_bus              = mv_soc_reset_bus,
+};
+
  /*
   * Functions
   */
@@ -735,9 +848,27 @@ static inline unsigned int mv_hardport_from_port(unsigned int port)
         return port & MV_PORT_MASK;
  }
  
+/*
+ * Consolidate some rather tricky bit shift calculations.
+ * This is hot-path stuff, so not a function.
+ * Simple code, with two return values, so macro rather than inline.
+ *
+ * port is the sole input, in range 0..7.
+ * shift is one output, for use with main_irq_cause / main_irq_mask registers.
+ * hardport is the other output, in range 0..3.
+ *
+ * Note that port and hardport may be the same variable in some cases.
+ */
+#define MV_PORT_TO_SHIFT_AND_HARDPORT(port, shift, hardport)   \
+{                                                              \
+       shift    = mv_hc_from_port(port) * HC_SHIFT;            \
+       hardport = mv_hardport_from_port(port);                 \
+       shift   += hardport * 2;                                \
+}
+
  static inline void __iomem *mv_hc_base(void __iomem *base, unsigned int hc)
  {
-       return (base + MV_SATAHC0_REG_BASE + (hc * MV_SATAHC_REG_SZ));
+       return (base + SATAHC0_REG_BASE + (hc * MV_SATAHC_REG_SZ));
  }
  
  static inline void __iomem *mv_hc_base_from_port(void __iomem *base,
@@ -777,6 +908,62 @@ static inline int mv_get_hc_count(unsigned long port_flags)
         return ((port_flags & MV_FLAG_DUAL_HC) ? 2 : 1);
  }
  
+/**
+ *      mv_save_cached_regs - (re-)initialize cached port registers
+ *      @ap: the port whose registers we are caching
+ *
+ *     Initialize the local cache of port registers,
+ *     so that reading them over and over again can
+ *     be avoided on the hotter paths of this driver.
+ *     This saves a few microseconds each time we switch
+ *     to/from EDMA mode to perform (eg.) a drive cache flush.
+ */
+static void mv_save_cached_regs(struct ata_port *ap)
+{
+       void __iomem *port_mmio = mv_ap_base(ap);
+       struct mv_port_priv *pp = ap->private_data;
+
+       pp->cached.fiscfg = readl(port_mmio + FISCFG);
+       pp->cached.ltmode = readl(port_mmio + LTMODE);
+       pp->cached.haltcond = readl(port_mmio + EDMA_HALTCOND);
+       pp->cached.unknown_rsvd = readl(port_mmio + EDMA_UNKNOWN_RSVD);
+}
+
+/**
+ *      mv_write_cached_reg - write to a cached port register
+ *      @addr: hardware address of the register
+ *      @old: pointer to cached value of the register
+ *      @new: new value for the register
+ *
+ *     Write a new value to a cached register,
+ *     but only if the value is different from before.
+ */
+static inline void mv_write_cached_reg(void __iomem *addr, u32 *old, u32 new)
+{
+       if (new != *old) {
+               unsigned long laddr;
+               *old = new;
+               /*
+                * Workaround for 88SX60x1-B2 FEr SATA#13:
+                * Read-after-write is needed to prevent generating 64-bit
+                * write cycles on the PCI bus for SATA interface registers
+                * at offsets ending in 0x4 or 0xc.
+                *
+                * Looks like a lot of fuss, but it avoids an unnecessary
+                * +1 usec read-after-write delay for unaffected registers.
+                */
+               laddr = (long)addr & 0xffff;
+               if (laddr >= 0x300 && laddr <= 0x33c) {
+                       laddr &= 0x000f;
+                       if (laddr == 0x4 || laddr == 0xc) {
+                               writelfl(new, addr); /* read after write */
+                               return;
+                       }
+               }
+               writel(new, addr); /* unaffected by the errata */
+       }
+}
+
  static void mv_set_edma_ptrs(void __iomem *port_mmio,
                              struct mv_host_priv *hpriv,
                              struct mv_port_priv *pp)
@@ -786,39 +973,160 @@ static void mv_set_edma_ptrs(void __iomem *port_mmio,
         /*
          * initialize request queue
          */
-       index = (pp->req_idx & MV_MAX_Q_DEPTH_MASK) << EDMA_REQ_Q_PTR_SHIFT;
+       pp->req_idx &= MV_MAX_Q_DEPTH_MASK;     /* paranoia */
+       index = pp->req_idx << EDMA_REQ_Q_PTR_SHIFT;
  
         WARN_ON(pp->crqb_dma & 0x3ff);
-       writel((pp->crqb_dma >> 16) >> 16, port_mmio + EDMA_REQ_Q_BASE_HI_OFS);
+       writel((pp->crqb_dma >> 16) >> 16, port_mmio + EDMA_REQ_Q_BASE_HI);
         writelfl((pp->crqb_dma & EDMA_REQ_Q_BASE_LO_MASK) | index,
-                port_mmio + EDMA_REQ_Q_IN_PTR_OFS);
-
-       if (hpriv->hp_flags & MV_HP_ERRATA_XX42A0)
-               writelfl((pp->crqb_dma & 0xffffffff) | index,
-                        port_mmio + EDMA_REQ_Q_OUT_PTR_OFS);
-       else
-               writelfl(index, port_mmio + EDMA_REQ_Q_OUT_PTR_OFS);
+                port_mmio + EDMA_REQ_Q_IN_PTR);
+       writelfl(index, port_mmio + EDMA_REQ_Q_OUT_PTR);
  
         /*
          * initialize response queue
          */
-       index = (pp->resp_idx & MV_MAX_Q_DEPTH_MASK) << EDMA_RSP_Q_PTR_SHIFT;
+       pp->resp_idx &= MV_MAX_Q_DEPTH_MASK;    /* paranoia */
+       index = pp->resp_idx << EDMA_RSP_Q_PTR_SHIFT;
  
         WARN_ON(pp->crpb_dma & 0xff);
-       writel((pp->crpb_dma >> 16) >> 16, port_mmio + EDMA_RSP_Q_BASE_HI_OFS);
+       writel((pp->crpb_dma >> 16) >> 16, port_mmio + EDMA_RSP_Q_BASE_HI);
+       writelfl(index, port_mmio + EDMA_RSP_Q_IN_PTR);
+       writelfl((pp->crpb_dma & EDMA_RSP_Q_BASE_LO_MASK) | index,
+                port_mmio + EDMA_RSP_Q_OUT_PTR);
+}
  
-       if (hpriv->hp_flags & MV_HP_ERRATA_XX42A0)
-               writelfl((pp->crpb_dma & 0xffffffff) | index,
-                        port_mmio + EDMA_RSP_Q_IN_PTR_OFS);
-       else
-               writelfl(index, port_mmio + EDMA_RSP_Q_IN_PTR_OFS);
+static void mv_write_main_irq_mask(u32 mask, struct mv_host_priv *hpriv)
+{
+       /*
+        * When writing to the main_irq_mask in hardware,
+        * we must ensure exclusivity between the interrupt coalescing bits
+        * and the corresponding individual port DONE_IRQ bits.
+        *
+        * Note that this register is really an "IRQ enable" register,
+        * not an "IRQ mask" register as Marvell's naming might suggest.
+        */
+       if (mask & (ALL_PORTS_COAL_DONE | PORTS_0_3_COAL_DONE))
+               mask &= ~DONE_IRQ_0_3;
+       if (mask & (ALL_PORTS_COAL_DONE | PORTS_4_7_COAL_DONE))
+               mask &= ~DONE_IRQ_4_7;
+       writelfl(mask, hpriv->main_irq_mask_addr);
+}
  
-       writelfl((pp->crpb_dma & EDMA_RSP_Q_BASE_LO_MASK) | index,
-                port_mmio + EDMA_RSP_Q_OUT_PTR_OFS);
+static void mv_set_main_irq_mask(struct ata_host *host,
+                                u32 disable_bits, u32 enable_bits)
+{
+       struct mv_host_priv *hpriv = host->private_data;
+       u32 old_mask, new_mask;
+
+       old_mask = hpriv->main_irq_mask;
+       new_mask = (old_mask & ~disable_bits) | enable_bits;
+       if (new_mask != old_mask) {
+               hpriv->main_irq_mask = new_mask;
+               mv_write_main_irq_mask(new_mask, hpriv);
+       }
+}
+
+static void mv_enable_port_irqs(struct ata_port *ap,
+                                    unsigned int port_bits)
+{
+       unsigned int shift, hardport, port = ap->port_no;
+       u32 disable_bits, enable_bits;
+
+       MV_PORT_TO_SHIFT_AND_HARDPORT(port, shift, hardport);
+
+       disable_bits = (DONE_IRQ | ERR_IRQ) << shift;
+       enable_bits  = port_bits << shift;
+       mv_set_main_irq_mask(ap->host, disable_bits, enable_bits);
+}
+
+static void mv_clear_and_enable_port_irqs(struct ata_port *ap,
+                                         void __iomem *port_mmio,
+                                         unsigned int port_irqs)
+{
+       struct mv_host_priv *hpriv = ap->host->private_data;
+       int hardport = mv_hardport_from_port(ap->port_no);
+       void __iomem *hc_mmio = mv_hc_base_from_port(
+                               mv_host_base(ap->host), ap->port_no);
+       u32 hc_irq_cause;
+
+       /* clear EDMA event indicators, if any */
+       writelfl(0, port_mmio + EDMA_ERR_IRQ_CAUSE);
+
+       /* clear pending irq events */
+       hc_irq_cause = ~((DEV_IRQ | DMA_IRQ) << hardport);
+       writelfl(hc_irq_cause, hc_mmio + HC_IRQ_CAUSE);
+
+       /* clear FIS IRQ Cause */
+       if (IS_GEN_IIE(hpriv))
+               writelfl(0, port_mmio + FIS_IRQ_CAUSE);
+
+       mv_enable_port_irqs(ap, port_irqs);
+}
+
+static void mv_set_irq_coalescing(struct ata_host *host,
+                                 unsigned int count, unsigned int usecs)
+{
+       struct mv_host_priv *hpriv = host->private_data;
+       void __iomem *mmio = hpriv->base, *hc_mmio;
+       u32 coal_enable = 0;
+       unsigned long flags;
+       unsigned int clks, is_dual_hc = hpriv->n_ports > MV_PORTS_PER_HC;
+       const u32 coal_disable = PORTS_0_3_COAL_DONE | PORTS_4_7_COAL_DONE |
+                                                       ALL_PORTS_COAL_DONE;
+
+       /* Disable IRQ coalescing if either threshold is zero */
+       if (!usecs || !count) {
+               clks = count = 0;
+       } else {
+               /* Respect maximum limits of the hardware */
+               clks = usecs * COAL_CLOCKS_PER_USEC;
+               if (clks > MAX_COAL_TIME_THRESHOLD)
+                       clks = MAX_COAL_TIME_THRESHOLD;
+               if (count > MAX_COAL_IO_COUNT)
+                       count = MAX_COAL_IO_COUNT;
+       }
+
+       spin_lock_irqsave(&host->lock, flags);
+       mv_set_main_irq_mask(host, coal_disable, 0);
+
+       if (is_dual_hc && !IS_GEN_I(hpriv)) {
+               /*
+                * GEN_II/GEN_IIE with dual host controllers:
+                * one set of global thresholds for the entire chip.
+                */
+               writel(clks,  mmio + IRQ_COAL_TIME_THRESHOLD);
+               writel(count, mmio + IRQ_COAL_IO_THRESHOLD);
+               /* clear leftover coal IRQ bit */
+               writel(~ALL_PORTS_COAL_IRQ, mmio + IRQ_COAL_CAUSE);
+               if (count)
+                       coal_enable = ALL_PORTS_COAL_DONE;
+               clks = count = 0; /* force clearing of regular regs below */
+       }
+
+       /*
+        * All chips: independent thresholds for each HC on the chip.
+        */
+       hc_mmio = mv_hc_base_from_port(mmio, 0);
+       writel(clks,  hc_mmio + HC_IRQ_COAL_TIME_THRESHOLD);
+       writel(count, hc_mmio + HC_IRQ_COAL_IO_THRESHOLD);
+       writel(~HC_COAL_IRQ, hc_mmio + HC_IRQ_CAUSE);
+       if (count)
+               coal_enable |= PORTS_0_3_COAL_DONE;
+       if (is_dual_hc) {
+               hc_mmio = mv_hc_base_from_port(mmio, MV_PORTS_PER_HC);
+               writel(clks,  hc_mmio + HC_IRQ_COAL_TIME_THRESHOLD);
+               writel(count, hc_mmio + HC_IRQ_COAL_IO_THRESHOLD);
+               writel(~HC_COAL_IRQ, hc_mmio + HC_IRQ_CAUSE);
+               if (count)
+                       coal_enable |= PORTS_4_7_COAL_DONE;
+       }
+
+       mv_set_main_irq_mask(host, 0, coal_enable);
+       spin_unlock_irqrestore(&host->lock, flags);
  }
  
  /**
- *      mv_start_dma - Enable eDMA engine
+ *      mv_start_edma - Enable eDMA engine
   *      @base: port base address
   *      @pp: port private data
   *
@@ -828,7 +1136,7 @@ static void mv_set_edma_ptrs(void __iomem *port_mmio,
   *      LOCKING:
   *      Inherited from caller.
   */
-static void mv_start_dma(struct ata_port *ap, void __iomem *port_mmio,
+static void mv_start_edma(struct ata_port *ap, void __iomem *port_mmio,
                          struct mv_port_priv *pp, u8 protocol)
  {
         int want_ncq = (protocol == ATA_PROT_NCQ);
@@ -840,34 +1148,40 @@ static void mv_start_dma(struct ata_port *ap, void __iomem *port_mmio,
         }
         if (!(pp->pp_flags & MV_PP_FLAG_EDMA_EN)) {
                 struct mv_host_priv *hpriv = ap->host->private_data;
-               int hardport = mv_hardport_from_port(ap->port_no);
-               void __iomem *hc_mmio = mv_hc_base_from_port(
-                                       mv_host_base(ap->host), hardport);
-               u32 hc_irq_cause, ipending;
-
-               /* clear EDMA event indicators, if any */
-               writelfl(0, port_mmio + EDMA_ERR_IRQ_CAUSE_OFS);
-
-               /* clear EDMA interrupt indicator, if any */
-               hc_irq_cause = readl(hc_mmio + HC_IRQ_CAUSE_OFS);
-               ipending = (DEV_IRQ | DMA_IRQ) << hardport;
-               if (hc_irq_cause & ipending) {
-                       writelfl(hc_irq_cause & ~ipending,
-                                hc_mmio + HC_IRQ_CAUSE_OFS);
-               }
-
-               mv_edma_cfg(ap, want_ncq);
  
-               /* clear FIS IRQ Cause */
-               writelfl(0, port_mmio + SATA_FIS_IRQ_CAUSE_OFS);
+               mv_edma_cfg(ap, want_ncq, 1);
  
                 mv_set_edma_ptrs(port_mmio, hpriv, pp);
+               mv_clear_and_enable_port_irqs(ap, port_mmio, DONE_IRQ|ERR_IRQ);
  
-               writelfl(EDMA_EN, port_mmio + EDMA_CMD_OFS);
+               writelfl(EDMA_EN, port_mmio + EDMA_CMD);
                 pp->pp_flags |= MV_PP_FLAG_EDMA_EN;
         }
  }
  
+static void mv_wait_for_edma_empty_idle(struct ata_port *ap)
+{
+       void __iomem *port_mmio = mv_ap_base(ap);
+       const u32 empty_idle = (EDMA_STATUS_CACHE_EMPTY | EDMA_STATUS_IDLE);
+       const int per_loop = 5, timeout = (15 * 1000 / per_loop);
+       int i;
+
+       /*
+        * Wait for the EDMA engine to finish transactions in progress.
+        * No idea what a good "timeout" value might be, but measurements
+        * indicate that it often requires hundreds of microseconds
+        * with two drives in-use.  So we use the 15msec value above
+        * as a rough guess at what even more drives might require.
+        */
+       for (i = 0; i < timeout; ++i) {
+               u32 edma_stat = readl(port_mmio + EDMA_STATUS);
+               if ((edma_stat & empty_idle) == empty_idle)
+                       break;
+               udelay(per_loop);
+       }
+       /* ata_port_printk(ap, KERN_INFO, "%s: %u+ usecs\n", __func__, i); */
+}
+
  /**
   *      mv_stop_edma_engine - Disable eDMA engine
   *      @port_mmio: io base address
@@ -880,11 +1194,11 @@ static int mv_stop_edma_engine(void __iomem *port_mmio)
         int i;
  
         /* Disable eDMA.  The disable bit auto clears. */
-       writelfl(EDMA_DS, port_mmio + EDMA_CMD_OFS);
+       writelfl(EDMA_DS, port_mmio + EDMA_CMD);
  
         /* Wait for the chip to confirm eDMA is off. */
         for (i = 10000; i > 0; i--) {
-               u32 reg = readl(port_mmio + EDMA_CMD_OFS);
+               u32 reg = readl(port_mmio + EDMA_CMD);
                 if (!(reg & EDMA_EN))
                         return 0;
                 udelay(10);
@@ -896,15 +1210,18 @@ static int mv_stop_edma(struct ata_port *ap)
  {
         void __iomem *port_mmio = mv_ap_base(ap);
         struct mv_port_priv *pp = ap->private_data;
+       int err = 0;
  
         if (!(pp->pp_flags & MV_PP_FLAG_EDMA_EN))
                 return 0;
         pp->pp_flags &= ~MV_PP_FLAG_EDMA_EN;
+       mv_wait_for_edma_empty_idle(ap);
         if (mv_stop_edma_engine(port_mmio)) {
                 ata_port_printk(ap, KERN_ERR, "Unable to stop eDMA\n");
-               return -EIO;
+               err = -EIO;
         }
-       return 0;
+       mv_edma_cfg(ap, 0, 0);
+       return err;
  }
  
  #ifdef ATA_DEBUG
@@ -991,10 +1308,10 @@ static unsigned int mv_scr_offset(unsigned int sc_reg_in)
         case SCR_STATUS:
         case SCR_CONTROL:
         case SCR_ERROR:
-               ofs = SATA_STATUS_OFS + (sc_reg_in * sizeof(u32));
+               ofs = SATA_STATUS + (sc_reg_in * sizeof(u32));
                 break;
         case SCR_ACTIVE:
-               ofs = SATA_ACTIVE_OFS;   /* active is not with the others */
+               ofs = SATA_ACTIVE;   /* active is not with the others */
                 break;
         default:
                 ofs = 0xffffffffU;
@@ -1003,23 +1320,41 @@ static unsigned int mv_scr_offset(unsigned int sc_reg_in)
         return ofs;
  }
  
-static int mv_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val)
+static int mv_scr_read(struct ata_link *link, unsigned int sc_reg_in, u32 *val)
  {
         unsigned int ofs = mv_scr_offset(sc_reg_in);
  
         if (ofs != 0xffffffffU) {
-               *val = readl(mv_ap_base(ap) + ofs);
+               *val = readl(mv_ap_base(link->ap) + ofs);
                 return 0;
         } else
                 return -EINVAL;
  }
  
-static int mv_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val)
+static int mv_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val)
  {
         unsigned int ofs = mv_scr_offset(sc_reg_in);
  
         if (ofs != 0xffffffffU) {
-               writelfl(val, mv_ap_base(ap) + ofs);
+               void __iomem *addr = mv_ap_base(link->ap) + ofs;
+               if (sc_reg_in == SCR_CONTROL) {
+                       /*
+                        * Workaround for 88SX60x1 FEr SATA#26:
+                        *
+                        * COMRESETs have to take care not to accidently
+                        * put the drive to sleep when writing SCR_CONTROL.
+                        * Setting bits 12..15 prevents this problem.
+                        *
+                        * So if we see an outbound COMMRESET, set those bits.
+                        * Ditto for the followup write that clears the reset.
+                        *
+                        * The proprietary driver does this for
+                        * all chip versions, and so do we.
+                        */
+                       if ((val & 0xf) == 1 || (readl(addr) & 0xf) == 1)
+                               val |= 0xf000;
+               }
+               writelfl(val, addr);
                 return 0;
         } else
                 return -EINVAL;
@@ -1032,47 +1367,196 @@ static void mv6_dev_config(struct ata_device *adev)
          *
          * Gen-II does not support NCQ over a port multiplier
          *  (no FIS-based switching).
-        *
-        * We don't have hob_nsect when doing NCQ commands on Gen-II.
-        * See mv_qc_prep() for more info.
          */
         if (adev->flags & ATA_DFLAG_NCQ) {
                 if (sata_pmp_attached(adev->link->ap)) {
                         adev->flags &= ~ATA_DFLAG_NCQ;
                         ata_dev_printk(adev, KERN_INFO,
                                 "NCQ disabled for command-based switching\n");
-               } else if (adev->max_sectors > GEN_II_NCQ_MAX_SECTORS) {
-                       adev->max_sectors = GEN_II_NCQ_MAX_SECTORS;
-                       ata_dev_printk(adev, KERN_INFO,
-                               "max_sectors limited to %u for NCQ\n",
-                               adev->max_sectors);
                 }
         }
  }
  
-static void mv_config_fbs(void __iomem *port_mmio, int enable_fbs)
+static int mv_qc_defer(struct ata_queued_cmd *qc)
  {
-       u32 old_fcfg, new_fcfg, old_ltmode, new_ltmode;
+       struct ata_link *link = qc->dev->link;
+       struct ata_port *ap = link->ap;
+       struct mv_port_priv *pp = ap->private_data;
+
         /*
-        * Various bit settings required for operation
-        * in FIS-based switching (fbs) mode on GenIIe:
+        * Don't allow new commands if we're in a delayed EH state
+        * for NCQ and/or FIS-based switching.
          */
-       old_fcfg   = readl(port_mmio + FIS_CFG_OFS);
-       old_ltmode = readl(port_mmio + LTMODE_OFS);
-       if (enable_fbs) {
-               new_fcfg   = old_fcfg   |  FIS_CFG_SINGLE_SYNC;
-               new_ltmode = old_ltmode |  LTMODE_BIT8;
-       } else { /* disable fbs */
-               new_fcfg   = old_fcfg   & ~FIS_CFG_SINGLE_SYNC;
-               new_ltmode = old_ltmode & ~LTMODE_BIT8;
+       if (pp->pp_flags & MV_PP_FLAG_DELAYED_EH)
+               return ATA_DEFER_PORT;
+
+       /* PIO commands need exclusive link: no other commands [DMA or PIO]
+        * can run concurrently.
+        * set excl_link when we want to send a PIO command in DMA mode
+        * or a non-NCQ command in NCQ mode.
+        * When we receive a command from that link, and there are no
+        * outstanding commands, mark a flag to clear excl_link and let
+        * the command go through.
+        */
+       if (unlikely(ap->excl_link)) {
+               if (link == ap->excl_link) {
+                       if (ap->nr_active_links)
+                               return ATA_DEFER_PORT;
+                       qc->flags |= ATA_QCFLAG_CLEAR_EXCL;
+                       return 0;
+               } else
+                       return ATA_DEFER_PORT;
         }
-       if (new_fcfg != old_fcfg)
-               writelfl(new_fcfg, port_mmio + FIS_CFG_OFS);
-       if (new_ltmode != old_ltmode)
-               writelfl(new_ltmode, port_mmio + LTMODE_OFS);
+
+       /*
+        * If the port is completely idle, then allow the new qc.
+        */
+       if (ap->nr_active_links == 0)
+               return 0;
+
+       /*
+        * The port is operating in host queuing mode (EDMA) with NCQ
+        * enabled, allow multiple NCQ commands.  EDMA also allows
+        * queueing multiple DMA commands but libata core currently
+        * doesn't allow it.
+        */
+       if ((pp->pp_flags & MV_PP_FLAG_EDMA_EN) &&
+           (pp->pp_flags & MV_PP_FLAG_NCQ_EN)) {
+               if (ata_is_ncq(qc->tf.protocol))
+                       return 0;
+               else {
+                       ap->excl_link = link;
+                       return ATA_DEFER_PORT;
+               }
+       }
+
+       return ATA_DEFER_PORT;
  }
  
-static void mv_edma_cfg(struct ata_port *ap, int want_ncq)
+static void mv_config_fbs(struct ata_port *ap, int want_ncq, int want_fbs)
+{
+       struct mv_port_priv *pp = ap->private_data;
+       void __iomem *port_mmio;
+
+       u32 fiscfg,   *old_fiscfg   = &pp->cached.fiscfg;
+       u32 ltmode,   *old_ltmode   = &pp->cached.ltmode;
+       u32 haltcond, *old_haltcond = &pp->cached.haltcond;
+
+       ltmode   = *old_ltmode & ~LTMODE_BIT8;
+       haltcond = *old_haltcond | EDMA_ERR_DEV;
+
+       if (want_fbs) {
+               fiscfg = *old_fiscfg | FISCFG_SINGLE_SYNC;
+               ltmode = *old_ltmode | LTMODE_BIT8;
+               if (want_ncq)
+                       haltcond &= ~EDMA_ERR_DEV;
+               else
+                       fiscfg |=  FISCFG_WAIT_DEV_ERR;
+       } else {
+               fiscfg = *old_fiscfg & ~(FISCFG_SINGLE_SYNC | FISCFG_WAIT_DEV_ERR);
+       }
+
+       port_mmio = mv_ap_base(ap);
+       mv_write_cached_reg(port_mmio + FISCFG, old_fiscfg, fiscfg);
+       mv_write_cached_reg(port_mmio + LTMODE, old_ltmode, ltmode);
+       mv_write_cached_reg(port_mmio + EDMA_HALTCOND, old_haltcond, haltcond);
+}
+
+static void mv_60x1_errata_sata25(struct ata_port *ap, int want_ncq)
+{
+       struct mv_host_priv *hpriv = ap->host->private_data;
+       u32 old, new;
+
+       /* workaround for 88SX60x1 FEr SATA#25 (part 1) */
+       old = readl(hpriv->base + GPIO_PORT_CTL);
+       if (want_ncq)
+               new = old | (1 << 22);
+       else
+               new = old & ~(1 << 22);
+       if (new != old)
+               writel(new, hpriv->base + GPIO_PORT_CTL);
+}
+
+/**
+ *     mv_bmdma_enable - set a magic bit on GEN_IIE to allow bmdma
+ *     @ap: Port being initialized
+ *
+ *     There are two DMA modes on these chips:  basic DMA, and EDMA.
+ *
+ *     Bit-0 of the "EDMA RESERVED" register enables/disables use
+ *     of basic DMA on the GEN_IIE versions of the chips.
+ *
+ *     This bit survives EDMA resets, and must be set for basic DMA
+ *     to function, and should be cleared when EDMA is active.
+ */
+static void mv_bmdma_enable_iie(struct ata_port *ap, int enable_bmdma)
+{
+       struct mv_port_priv *pp = ap->private_data;
+       u32 new, *old = &pp->cached.unknown_rsvd;
+
+       if (enable_bmdma)
+               new = *old | 1;
+       else
+               new = *old & ~1;
+       mv_write_cached_reg(mv_ap_base(ap) + EDMA_UNKNOWN_RSVD, old, new);
+}
+
+/*
+ * SOC chips have an issue whereby the HDD LEDs don't always blink
+ * during I/O when NCQ is enabled. Enabling a special "LED blink" mode
+ * of the SOC takes care of it, generating a steady blink rate when
+ * any drive on the chip is active.
+ *
+ * Unfortunately, the blink mode is a global hardware setting for the SOC,
+ * so we must use it whenever at least one port on the SOC has NCQ enabled.
+ *
+ * We turn "LED blink" off when NCQ is not in use anywhere, because the normal
+ * LED operation works then, and provides better (more accurate) feedback.
+ *
+ * Note that this code assumes that an SOC never has more than one HC onboard.
+ */
+static void mv_soc_led_blink_enable(struct ata_port *ap)
+{
+       struct ata_host *host = ap->host;
+       struct mv_host_priv *hpriv = host->private_data;
+       void __iomem *hc_mmio;
+       u32 led_ctrl;
+
+       if (hpriv->hp_flags & MV_HP_QUIRK_LED_BLINK_EN)
+               return;
+       hpriv->hp_flags |= MV_HP_QUIRK_LED_BLINK_EN;
+       hc_mmio = mv_hc_base_from_port(mv_host_base(host), ap->port_no);
+       led_ctrl = readl(hc_mmio + SOC_LED_CTRL);
+       writel(led_ctrl | SOC_LED_CTRL_BLINK, hc_mmio + SOC_LED_CTRL);
+}
+
+static void mv_soc_led_blink_disable(struct ata_port *ap)
+{
+       struct ata_host *host = ap->host;
+       struct mv_host_priv *hpriv = host->private_data;
+       void __iomem *hc_mmio;
+       u32 led_ctrl;
+       unsigned int port;
+
+       if (!(hpriv->hp_flags & MV_HP_QUIRK_LED_BLINK_EN))
+               return;
+
+       /* disable led-blink only if no ports are using NCQ */
+       for (port = 0; port < hpriv->n_ports; port++) {
+               struct ata_port *this_ap = host->ports[port];
+               struct mv_port_priv *pp = this_ap->private_data;
+
+               if (pp->pp_flags & MV_PP_FLAG_NCQ_EN)
+                       return;
+       }
+
+       hpriv->hp_flags &= ~MV_HP_QUIRK_LED_BLINK_EN;
+       hc_mmio = mv_hc_base_from_port(mv_host_base(host), ap->port_no);
+       led_ctrl = readl(hc_mmio + SOC_LED_CTRL);
+       writel(led_ctrl & ~SOC_LED_CTRL_BLINK, hc_mmio + SOC_LED_CTRL);
+}
+
+static void mv_edma_cfg(struct ata_port *ap, int want_ncq, int want_edma)
  {
         u32 cfg;
         struct mv_port_priv *pp    = ap->private_data;
@@ -1081,34 +1565,59 @@ static void mv_edma_cfg(struct ata_port *ap, int want_ncq)
  
         /* set up non-NCQ EDMA configuration */
         cfg = EDMA_CFG_Q_DEPTH;         /* always 0x1f for *all* chips */
+       pp->pp_flags &=
+         ~(MV_PP_FLAG_FBS_EN | MV_PP_FLAG_NCQ_EN | MV_PP_FLAG_FAKE_ATA_BUSY);
  
         if (IS_GEN_I(hpriv))
                 cfg |= (1 << 8);        /* enab config burst size mask */
  
-       else if (IS_GEN_II(hpriv))
+       else if (IS_GEN_II(hpriv)) {
                 cfg |= EDMA_CFG_RD_BRST_EXT | EDMA_CFG_WR_BUFF_LEN;
+               mv_60x1_errata_sata25(ap, want_ncq);
  
-       else if (IS_GEN_IIE(hpriv)) {
-               cfg |= (1 << 23);       /* do not mask PM field in rx'd FIS */
-               cfg |= (1 << 22);       /* enab 4-entry host queue cache */
-               cfg |= (1 << 18);       /* enab early completion */
-               cfg |= (1 << 17);       /* enab cut-through (dis stor&forwrd) */
+       } else if (IS_GEN_IIE(hpriv)) {
+               int want_fbs = sata_pmp_attached(ap);
+               /*
+                * Possible future enhancement:
+                *
+                * The chip can use FBS with non-NCQ, if we allow it,
+                * But first we need to have the error handling in place
+                * for this mode (datasheet section 7.3.15.4.2.3).
+                * So disallow non-NCQ FBS for now.
+                */
+               want_fbs &= want_ncq;
  
-               if (want_ncq && sata_pmp_attached(ap)) {
+               mv_config_fbs(ap, want_ncq, want_fbs);
+
+               if (want_fbs) {
+                       pp->pp_flags |= MV_PP_FLAG_FBS_EN;
                         cfg |= EDMA_CFG_EDMA_FBS; /* FIS-based switching */
-                       mv_config_fbs(port_mmio, 1);
-               } else {
-                       mv_config_fbs(port_mmio, 0);
+               }
+
+               cfg |= (1 << 23);       /* do not mask PM field in rx'd FIS */
+               if (want_edma) {
+                       cfg |= (1 << 22); /* enab 4-entry host queue cache */
+                       if (!IS_SOC(hpriv))
+                               cfg |= (1 << 18); /* enab early completion */
+               }
+               if (hpriv->hp_flags & MV_HP_CUT_THROUGH)
+                       cfg |= (1 << 17); /* enab cut-thru (dis stor&forwrd) */
+               mv_bmdma_enable_iie(ap, !want_edma);
+
+               if (IS_SOC(hpriv)) {
+                       if (want_ncq)
+                               mv_soc_led_blink_enable(ap);
+                       else
+                               mv_soc_led_blink_disable(ap);
                 }
         }
  
         if (want_ncq) {
                 cfg |= EDMA_CFG_NCQ;
                 pp->pp_flags |=  MV_PP_FLAG_NCQ_EN;
-       } else
-               pp->pp_flags &= ~MV_PP_FLAG_NCQ_EN;
+       }
  
-       writelfl(cfg, port_mmio + EDMA_CFG_OFS);
+       writelfl(cfg, port_mmio + EDMA_CFG);
  }
  
  static void mv_port_free_dma_mem(struct ata_port *ap)
@@ -1155,6 +1664,7 @@ static int mv_port_start(struct ata_port *ap)
         struct device *dev = ap->host->dev;
         struct mv_host_priv *hpriv = ap->host->private_data;
         struct mv_port_priv *pp;
+       unsigned long flags;
         int tag;
  
         pp = devm_kzalloc(dev, sizeof(*pp), GFP_KERNEL);
@@ -1172,6 +1682,9 @@ static int mv_port_start(struct ata_port *ap)
                 goto out_port_free_dma_mem;
         memset(pp->crpb, 0, MV_CRPB_Q_SZ);
  
+       /* 6041/6081 Rev. "C0" (and newer) are okay with async notify */
+       if (hpriv->hp_flags & MV_HP_ERRATA_60X1C0)
+               ap->flags |= ATA_FLAG_AN;
         /*
          * For GEN_I, there's no NCQ, so we only allocate a single sg_tbl.
          * For later hardware, we need one unique sg_tbl per NCQ tag.
@@ -1187,6 +1700,12 @@ static int mv_port_start(struct ata_port *ap)
                         pp->sg_tbl_dma[tag] = pp->sg_tbl_dma[0];
                 }
         }
+
+       spin_lock_irqsave(ap->lock, flags);
+       mv_save_cached_regs(ap);
+       mv_edma_cfg(ap, 0, 0);
+       spin_unlock_irqrestore(ap->lock, flags);
+
         return 0;
  
  out_port_free_dma_mem:
@@ -1205,7 +1724,12 @@ out_port_free_dma_mem:
   */
  static void mv_port_stop(struct ata_port *ap)
  {
+       unsigned long flags;
+
+       spin_lock_irqsave(ap->lock, flags);
         mv_stop_edma(ap);
+       mv_enable_port_irqs(ap, 0);
+       spin_unlock_irqrestore(ap->lock, flags);
         mv_port_free_dma_mem(ap);
  }
  
@@ -1234,12 +1758,13 @@ static void mv_fill_sg(struct ata_queued_cmd *qc)
                         u32 offset = addr & 0xffff;
                         u32 len = sg_len;
  
-                       if ((offset + sg_len > 0x10000))
+                       if (offset + len > 0x10000)
                                 len = 0x10000 - offset;
  
                         mv_sg->addr = cpu_to_le32(addr & 0xffffffff);
                         mv_sg->addr_hi = cpu_to_le32((addr >> 16) >> 16);
                         mv_sg->flags_size = cpu_to_le32(len & 0xffff);
+                       mv_sg->reserved = 0;
  
                         sg_len -= len;
                         addr += len;
@@ -1251,6 +1776,7 @@ static void mv_fill_sg(struct ata_queued_cmd *qc)
  
         if (likely(last_sg))
                 last_sg->flags_size |= cpu_to_le32(EPRD_FLAG_END_OF_TBL);
+       mb(); /* ensure data structure is visible to the chipset */
  }
  
  static void mv_crqb_pack_cmd(__le16 *cmdw, u8 data, u8 addr, unsigned last)
@@ -1261,6 +1787,180 @@ static void mv_crqb_pack_cmd(__le16 *cmdw, u8 data, u8 addr, unsigned last)
  }
  
  /**
+ *     mv_sff_irq_clear - Clear hardware interrupt after DMA.
+ *     @ap: Port associated with this ATA transaction.
+ *
+ *     We need this only for ATAPI bmdma transactions,
+ *     as otherwise we experience spurious interrupts
+ *     after libata-sff handles the bmdma interrupts.
+ */
+static void mv_sff_irq_clear(struct ata_port *ap)
+{
+       mv_clear_and_enable_port_irqs(ap, mv_ap_base(ap), ERR_IRQ);
+}
+
+/**
+ *     mv_check_atapi_dma - Filter ATAPI cmds which are unsuitable for DMA.
+ *     @qc: queued command to check for chipset/DMA compatibility.
+ *
+ *     The bmdma engines cannot handle speculative data sizes
+ *     (bytecount under/over flow).  So only allow DMA for
+ *     data transfer commands with known data sizes.
+ *
+ *     LOCKING:
+ *     Inherited from caller.
+ */
+static int mv_check_atapi_dma(struct ata_queued_cmd *qc)
+{
+       struct scsi_cmnd *scmd = qc->scsicmd;
+
+       if (scmd) {
+               switch (scmd->cmnd[0]) {
+               case READ_6:
+               case READ_10:
+               case READ_12:
+               case WRITE_6:
+               case WRITE_10:
+               case WRITE_12:
+               case GPCMD_READ_CD:
+               case GPCMD_SEND_DVD_STRUCTURE:
+               case GPCMD_SEND_CUE_SHEET:
+                       return 0; /* DMA is safe */
+               }
+       }
+       return -EOPNOTSUPP; /* use PIO instead */
+}
+
+/**
+ *     mv_bmdma_setup - Set up BMDMA transaction
+ *     @qc: queued command to prepare DMA for.
+ *
+ *     LOCKING:
+ *     Inherited from caller.
+ */
+static void mv_bmdma_setup(struct ata_queued_cmd *qc)
+{
+       struct ata_port *ap = qc->ap;
+       void __iomem *port_mmio = mv_ap_base(ap);
+       struct mv_port_priv *pp = ap->private_data;
+
+       mv_fill_sg(qc);
+
+       /* clear all DMA cmd bits */
+       writel(0, port_mmio + BMDMA_CMD);
+
+       /* load PRD table addr. */
+       writel((pp->sg_tbl_dma[qc->tag] >> 16) >> 16,
+               port_mmio + BMDMA_PRD_HIGH);
+       writelfl(pp->sg_tbl_dma[qc->tag],
+               port_mmio + BMDMA_PRD_LOW);
+
+       /* issue r/w command */
+       ap->ops->sff_exec_command(ap, &qc->tf);
+}
+
+/**
+ *     mv_bmdma_start - Start a BMDMA transaction
+ *     @qc: queued command to start DMA on.
+ *
+ *     LOCKING:
+ *     Inherited from caller.
+ */
+static void mv_bmdma_start(struct ata_queued_cmd *qc)
+{
+       struct ata_port *ap = qc->ap;
+       void __iomem *port_mmio = mv_ap_base(ap);
+       unsigned int rw = (qc->tf.flags & ATA_TFLAG_WRITE);
+       u32 cmd = (rw ? 0 : ATA_DMA_WR) | ATA_DMA_START;
+
+       /* start host DMA transaction */
+       writelfl(cmd, port_mmio + BMDMA_CMD);
+}
+
+/**
+ *     mv_bmdma_stop - Stop BMDMA transfer
+ *     @qc: queued command to stop DMA on.
+ *
+ *     Clears the ATA_DMA_START flag in the bmdma control register
+ *
+ *     LOCKING:
+ *     Inherited from caller.
+ */
+static void mv_bmdma_stop(struct ata_queued_cmd *qc)
+{
+       struct ata_port *ap = qc->ap;
+       void __iomem *port_mmio = mv_ap_base(ap);
+       u32 cmd;
+
+       /* clear start/stop bit */
+       cmd = readl(port_mmio + BMDMA_CMD);
+       cmd &= ~ATA_DMA_START;
+       writelfl(cmd, port_mmio + BMDMA_CMD);
+
+       /* one-PIO-cycle guaranteed wait, per spec, for HDMA1:0 transition */
+       ata_sff_dma_pause(ap);
+}
+
+/**
+ *     mv_bmdma_status - Read BMDMA status
+ *     @ap: port for which to retrieve DMA status.
+ *
+ *     Read and return equivalent of the sff BMDMA status register.
+ *
+ *     LOCKING:
+ *     Inherited from caller.
+ */
+static u8 mv_bmdma_status(struct ata_port *ap)
+{
+       void __iomem *port_mmio = mv_ap_base(ap);
+       u32 reg, status;
+
+       /*
+        * Other bits are valid only if ATA_DMA_ACTIVE==0,
+        * and the ATA_DMA_INTR bit doesn't exist.
+        */
+       reg = readl(port_mmio + BMDMA_STATUS);
+       if (reg & ATA_DMA_ACTIVE)
+               status = ATA_DMA_ACTIVE;
+       else
+               status = (reg & ATA_DMA_ERR) | ATA_DMA_INTR;
+       return status;
+}
+
+static void mv_rw_multi_errata_sata24(struct ata_queued_cmd *qc)
+{
+       struct ata_taskfile *tf = &qc->tf;
+       /*
+        * Workaround for 88SX60x1 FEr SATA#24.
+        *
+        * Chip may corrupt WRITEs if multi_count >= 4kB.
+        * Note that READs are unaffected.
+        *
+        * It's not clear if this errata really means "4K bytes",
+        * or if it always happens for multi_count > 7
+        * regardless of device sector_size.
+        *
+        * So, for safety, any write with multi_count > 7
+        * gets converted here into a regular PIO write instead:
+        */
+       if ((tf->flags & ATA_TFLAG_WRITE) && is_multi_taskfile(tf)) {
+               if (qc->dev->multi_count > 7) {
+                       switch (tf->command) {
+                       case ATA_CMD_WRITE_MULTI:
+                               tf->command = ATA_CMD_PIO_WRITE;
+                               break;
+                       case ATA_CMD_WRITE_MULTI_FUA_EXT:
+                               tf->flags &= ~ATA_TFLAG_FUA; /* ugh */
+                               /* fall through */
+                       case ATA_CMD_WRITE_MULTI_EXT:
+                               tf->command = ATA_CMD_PIO_WRITE_EXT;
+                               break;
+                       }
+               }
+       }
+}
+
+/**
   *      mv_qc_prep - Host specific command preparation.
   *      @qc: queued command to prepare
   *
@@ -1277,24 +1977,31 @@ static void mv_qc_prep(struct ata_queued_cmd *qc)
         struct ata_port *ap = qc->ap;
         struct mv_port_priv *pp = ap->private_data;
         __le16 *cw;
-       struct ata_taskfile *tf;
+       struct ata_taskfile *tf = &qc->tf;
         u16 flags = 0;
         unsigned in_index;
  
-       if ((qc->tf.protocol != ATA_PROT_DMA) &&
-           (qc->tf.protocol != ATA_PROT_NCQ))
+       switch (tf->protocol) {
+       case ATA_PROT_DMA:
+       case ATA_PROT_NCQ:
+               break;  /* continue below */
+       case ATA_PROT_PIO:
+               mv_rw_multi_errata_sata24(qc);
                 return;
+       default:
+               return;
+       }
  
         /* Fill in command request block
          */
-       if (!(qc->tf.flags & ATA_TFLAG_WRITE))
+       if (!(tf->flags & ATA_TFLAG_WRITE))
                 flags |= CRQB_FLAG_READ;
         WARN_ON(MV_MAX_Q_DEPTH <= qc->tag);
         flags |= qc->tag << CRQB_TAG_SHIFT;
         flags |= (qc->dev->link->pmp & 0xf) << CRQB_PMP_SHIFT;
  
         /* get current queue index from software */
-       in_index = pp->req_idx & MV_MAX_Q_DEPTH_MASK;
+       in_index = pp->req_idx;
  
         pp->crqb[in_index].sg_addr =
                 cpu_to_le32(pp->sg_tbl_dma[qc->tag] & 0xffffffff);
@@ -1303,13 +2010,13 @@ static void mv_qc_prep(struct ata_queued_cmd *qc)
         pp->crqb[in_index].ctrl_flags = cpu_to_le16(flags);
  
         cw = &pp->crqb[in_index].ata_cmd[0];
-       tf = &qc->tf;
  
         /* Sadly, the CRQB cannot accomodate all registers--there are
          * only 11 bytes...so we must pick and choose required
          * registers based on the command.  So, we drop feature and
          * hob_feature for [RW] DMA commands, but they are needed for
-        * NCQ.  NCQ will drop hob_nsect.
+        * NCQ.  NCQ will drop hob_nsect, which is not needed there
+        * (nsect is used only for the tag; feat/hob_feat hold true nsect).
          */
         switch (tf->command) {
         case ATA_CMD_READ:
@@ -1368,16 +2075,16 @@ static void mv_qc_prep_iie(struct ata_queued_cmd *qc)
         struct ata_port *ap = qc->ap;
         struct mv_port_priv *pp = ap->private_data;
         struct mv_crqb_iie *crqb;
-       struct ata_taskfile *tf;
+       struct ata_taskfile *tf = &qc->tf;
         unsigned in_index;
         u32 flags = 0;
  
-       if ((qc->tf.protocol != ATA_PROT_DMA) &&
-           (qc->tf.protocol != ATA_PROT_NCQ))
+       if ((tf->protocol != ATA_PROT_DMA) &&
+           (tf->protocol != ATA_PROT_NCQ))
                 return;
  
         /* Fill in Gen IIE command request block */
-       if (!(qc->tf.flags & ATA_TFLAG_WRITE))
+       if (!(tf->flags & ATA_TFLAG_WRITE))
                 flags |= CRQB_FLAG_READ;
  
         WARN_ON(MV_MAX_Q_DEPTH <= qc->tag);
@@ -1386,14 +2093,13 @@ static void mv_qc_prep_iie(struct ata_queued_cmd *qc)
         flags |= (qc->dev->link->pmp & 0xf) << CRQB_PMP_SHIFT;
  
         /* get current queue index from software */
-       in_index = pp->req_idx & MV_MAX_Q_DEPTH_MASK;
+       in_index = pp->req_idx;
  
         crqb = (struct mv_crqb_iie *) &pp->crqb[in_index];
         crqb->addr = cpu_to_le32(pp->sg_tbl_dma[qc->tag] & 0xffffffff);
         crqb->addr_hi = cpu_to_le32((pp->sg_tbl_dma[qc->tag] >> 16) >> 16);
         crqb->flags = cpu_to_le32(flags);
  
-       tf = &qc->tf;
         crqb->ata_cmd[0] = cpu_to_le32(
                         (tf->command << 16) |
                         (tf->feature << 24)
@@ -1421,6 +2127,132 @@ static void mv_qc_prep_iie(struct ata_queued_cmd *qc)
  }
  
  /**
+ *     mv_sff_check_status - fetch device status, if valid
+ *     @ap: ATA port to fetch status from
+ *
+ *     When using command issue via mv_qc_issue_fis(),
+ *     the initial ATA_BUSY state does not show up in the
+ *     ATA status (shadow) register.  This can confuse libata!
+ *
+ *     So we have a hook here to fake ATA_BUSY for that situation,
+ *     until the first time a BUSY, DRQ, or ERR bit is seen.
+ *
+ *     The rest of the time, it simply returns the ATA status register.
+ */
+static u8 mv_sff_check_status(struct ata_port *ap)
+{
+       u8 stat = ioread8(ap->ioaddr.status_addr);
+       struct mv_port_priv *pp = ap->private_data;
+
+       if (pp->pp_flags & MV_PP_FLAG_FAKE_ATA_BUSY) {
+               if (stat & (ATA_BUSY | ATA_DRQ | ATA_ERR))
+                       pp->pp_flags &= ~MV_PP_FLAG_FAKE_ATA_BUSY;
+               else
+                       stat = ATA_BUSY;
+       }
+       return stat;
+}
+
+/**
+ *     mv_send_fis - Send a FIS, using the "Vendor-Unique FIS" register
+ *     @fis: fis to be sent
+ *     @nwords: number of 32-bit words in the fis
+ */
+static unsigned int mv_send_fis(struct ata_port *ap, u32 *fis, int nwords)
+{
+       void __iomem *port_mmio = mv_ap_base(ap);
+       u32 ifctl, old_ifctl, ifstat;
+       int i, timeout = 200, final_word = nwords - 1;
+
+       /* Initiate FIS transmission mode */
+       old_ifctl = readl(port_mmio + SATA_IFCTL);
+       ifctl = 0x100 | (old_ifctl & 0xf);
+       writelfl(ifctl, port_mmio + SATA_IFCTL);
+
+       /* Send all words of the FIS except for the final word */
+       for (i = 0; i < final_word; ++i)
+               writel(fis[i], port_mmio + VENDOR_UNIQUE_FIS);
+
+       /* Flag end-of-transmission, and then send the final word */
+       writelfl(ifctl | 0x200, port_mmio + SATA_IFCTL);
+       writelfl(fis[final_word], port_mmio + VENDOR_UNIQUE_FIS);
+
+       /*
+        * Wait for FIS transmission to complete.
+        * This typically takes just a single iteration.
+        */
+       do {
+               ifstat = readl(port_mmio + SATA_IFSTAT);
+       } while (!(ifstat & 0x1000) && --timeout);
+
+       /* Restore original port configuration */
+       writelfl(old_ifctl, port_mmio + SATA_IFCTL);
+
+       /* See if it worked */
+       if ((ifstat & 0x3000) != 0x1000) {
+               ata_port_printk(ap, KERN_WARNING,
+                               "%s transmission error, ifstat=%08x\n",
+                               __func__, ifstat);
+               return AC_ERR_OTHER;
+       }
+       return 0;
+}
+
+/**
+ *     mv_qc_issue_fis - Issue a command directly as a FIS
+ *     @qc: queued command to start
+ *
+ *     Note that the ATA shadow registers are not updated
+ *     after command issue, so the device will appear "READY"
+ *     if polled, even while it is BUSY processing the command.
+ *
+ *     So we use a status hook to fake ATA_BUSY until the drive changes state.
+ *
+ *     Note: we don't get updated shadow regs on *completion*
+ *     of non-data commands. So avoid sending them via this function,
+ *     as they will appear to have completed immediately.
+ *
+ *     GEN_IIE has special registers that we could get the result tf from,
+ *     but earlier chipsets do not.  For now, we ignore those registers.
+ */
+static unsigned int mv_qc_issue_fis(struct ata_queued_cmd *qc)
+{
+       struct ata_port *ap = qc->ap;
+       struct mv_port_priv *pp = ap->private_data;
+       struct ata_link *link = qc->dev->link;
+       u32 fis[5];
+       int err = 0;
+
+       ata_tf_to_fis(&qc->tf, link->pmp, 1, (void *)fis);
+       err = mv_send_fis(ap, fis, ARRAY_SIZE(fis));
+       if (err)
+               return err;
+
+       switch (qc->tf.protocol) {
+       case ATAPI_PROT_PIO:
+               pp->pp_flags |= MV_PP_FLAG_FAKE_ATA_BUSY;
+               /* fall through */
+       case ATAPI_PROT_NODATA:
+               ap->hsm_task_state = HSM_ST_FIRST;
+               break;
+       case ATA_PROT_PIO:
+               pp->pp_flags |= MV_PP_FLAG_FAKE_ATA_BUSY;
+               if (qc->tf.flags & ATA_TFLAG_WRITE)
+                       ap->hsm_task_state = HSM_ST_FIRST;
+               else
+                       ap->hsm_task_state = HSM_ST;
+               break;
+       default:
+               ap->hsm_task_state = HSM_ST_LAST;
+               break;
+       }
+
+       if (qc->tf.flags & ATA_TFLAG_POLLING)
+               ata_pio_queue_task(ap, qc, 0);
+       return 0;
+}
+
+/**
   *      mv_qc_issue - Initiate a command to the host
   *      @qc: queued command to start
   *
@@ -1434,79 +2266,372 @@ static void mv_qc_prep_iie(struct ata_queued_cmd *qc)
   */
  static unsigned int mv_qc_issue(struct ata_queued_cmd *qc)
  {
+       static int limit_warnings = 10;
         struct ata_port *ap = qc->ap;
         void __iomem *port_mmio = mv_ap_base(ap);
         struct mv_port_priv *pp = ap->private_data;
         u32 in_index;
+       unsigned int port_irqs;
  
-       if ((qc->tf.protocol != ATA_PROT_DMA) &&
-           (qc->tf.protocol != ATA_PROT_NCQ)) {
+       pp->pp_flags &= ~MV_PP_FLAG_FAKE_ATA_BUSY; /* paranoia */
+
+       switch (qc->tf.protocol) {
+       case ATA_PROT_DMA:
+       case ATA_PROT_NCQ:
+               mv_start_edma(ap, port_mmio, pp, qc->tf.protocol);
+               pp->req_idx = (pp->req_idx + 1) & MV_MAX_Q_DEPTH_MASK;
+               in_index = pp->req_idx << EDMA_REQ_Q_PTR_SHIFT;
+
+               /* Write the request in pointer to kick the EDMA to life */
+               writelfl((pp->crqb_dma & EDMA_REQ_Q_BASE_LO_MASK) | in_index,
+                                       port_mmio + EDMA_REQ_Q_IN_PTR);
+               return 0;
+
+       case ATA_PROT_PIO:
                 /*
-                * We're about to send a non-EDMA capable command to the
-                * port.  Turn off EDMA so there won't be problems accessing
-                * shadow block, etc registers.
+                * Errata SATA#16, SATA#24: warn if multiple DRQs expected.
+                *
+                * Someday, we might implement special polling workarounds
+                * for these, but it all seems rather unnecessary since we
+                * normally use only DMA for commands which transfer more
+                * than a single block of data.
+                *
+                * Much of the time, this could just work regardless.
+                * So for now, just log the incident, and allow the attempt.
                  */
+               if (limit_warnings > 0 && (qc->nbytes / qc->sect_size) > 1) {
+                       --limit_warnings;
+                       ata_link_printk(qc->dev->link, KERN_WARNING, DRV_NAME
+                                       ": attempting PIO w/multiple DRQ: "
+                                       "this may fail due to h/w errata\n");
+               }
+               /* drop through */
+       case ATA_PROT_NODATA:
+       case ATAPI_PROT_PIO:
+       case ATAPI_PROT_NODATA:
+               if (ap->flags & ATA_FLAG_PIO_POLLING)
+                       qc->tf.flags |= ATA_TFLAG_POLLING;
+               break;
+       }
+
+       if (qc->tf.flags & ATA_TFLAG_POLLING)
+               port_irqs = ERR_IRQ;    /* mask device interrupt when polling */
+       else
+               port_irqs = ERR_IRQ | DONE_IRQ; /* unmask all interrupts */
+
+       /*
+        * We're about to send a non-EDMA capable command to the
+        * port.  Turn off EDMA so there won't be problems accessing
+        * shadow block, etc registers.
+        */
+       mv_stop_edma(ap);
+       mv_clear_and_enable_port_irqs(ap, mv_ap_base(ap), port_irqs);
+       mv_pmp_select(ap, qc->dev->link->pmp);
+
+       if (qc->tf.command == ATA_CMD_READ_LOG_EXT) {
+               struct mv_host_priv *hpriv = ap->host->private_data;
+               /*
+                * Workaround for 88SX60x1 FEr SATA#25 (part 2).
+                *
+                * After any NCQ error, the READ_LOG_EXT command
+                * from libata-eh *must* use mv_qc_issue_fis().
+                * Otherwise it might fail, due to chip errata.
+                *
+                * Rather than special-case it, we'll just *always*
+                * use this method here for READ_LOG_EXT, making for
+                * easier testing.
+                */
+               if (IS_GEN_II(hpriv))
+                       return mv_qc_issue_fis(qc);
+       }
+       return ata_sff_qc_issue(qc);
+}
+
+static struct ata_queued_cmd *mv_get_active_qc(struct ata_port *ap)
+{
+       struct mv_port_priv *pp = ap->private_data;
+       struct ata_queued_cmd *qc;
+
+       if (pp->pp_flags & MV_PP_FLAG_NCQ_EN)
+               return NULL;
+       qc = ata_qc_from_tag(ap, ap->link.active_tag);
+       if (qc) {
+               if (qc->tf.flags & ATA_TFLAG_POLLING)
+                       qc = NULL;
+               else if (!(qc->flags & ATA_QCFLAG_ACTIVE))
+                       qc = NULL;
+       }
+       return qc;
+}
+
+static void mv_pmp_error_handler(struct ata_port *ap)
+{
+       unsigned int pmp, pmp_map;
+       struct mv_port_priv *pp = ap->private_data;
+
+       if (pp->pp_flags & MV_PP_FLAG_DELAYED_EH) {
+               /*
+                * Perform NCQ error analysis on failed PMPs
+                * before we freeze the port entirely.
+                *
+                * The failed PMPs are marked earlier by mv_pmp_eh_prep().
+                */
+               pmp_map = pp->delayed_eh_pmp_map;
+               pp->pp_flags &= ~MV_PP_FLAG_DELAYED_EH;
+               for (pmp = 0; pmp_map != 0; pmp++) {
+                       unsigned int this_pmp = (1 << pmp);
+                       if (pmp_map & this_pmp) {
+                               struct ata_link *link = &ap->pmp_link[pmp];
+                               pmp_map &= ~this_pmp;
+                               ata_eh_analyze_ncq_error(link);
+                       }
+               }
+               ata_port_freeze(ap);
+       }
+       sata_pmp_error_handler(ap);
+}
+
+static unsigned int mv_get_err_pmp_map(struct ata_port *ap)
+{
+       void __iomem *port_mmio = mv_ap_base(ap);
+
+       return readl(port_mmio + SATA_TESTCTL) >> 16;
+}
+
+static void mv_pmp_eh_prep(struct ata_port *ap, unsigned int pmp_map)
+{
+       struct ata_eh_info *ehi;
+       unsigned int pmp;
+
+       /*
+        * Initialize EH info for PMPs which saw device errors
+        */
+       ehi = &ap->link.eh_info;
+       for (pmp = 0; pmp_map != 0; pmp++) {
+               unsigned int this_pmp = (1 << pmp);
+               if (pmp_map & this_pmp) {
+                       struct ata_link *link = &ap->pmp_link[pmp];
+
+                       pmp_map &= ~this_pmp;
+                       ehi = &link->eh_info;
+                       ata_ehi_clear_desc(ehi);
+                       ata_ehi_push_desc(ehi, "dev err");
+                       ehi->err_mask |= AC_ERR_DEV;
+                       ehi->action |= ATA_EH_RESET;
+                       ata_link_abort(link);
+               }
+       }
+}
+
+static int mv_req_q_empty(struct ata_port *ap)
+{
+       void __iomem *port_mmio = mv_ap_base(ap);
+       u32 in_ptr, out_ptr;
+
+       in_ptr  = (readl(port_mmio + EDMA_REQ_Q_IN_PTR)
+                       >> EDMA_REQ_Q_PTR_SHIFT) & MV_MAX_Q_DEPTH_MASK;
+       out_ptr = (readl(port_mmio + EDMA_REQ_Q_OUT_PTR)
+                       >> EDMA_REQ_Q_PTR_SHIFT) & MV_MAX_Q_DEPTH_MASK;
+       return (in_ptr == out_ptr);     /* 1 == queue_is_empty */
+}
+
+static int mv_handle_fbs_ncq_dev_err(struct ata_port *ap)
+{
+       struct mv_port_priv *pp = ap->private_data;
+       int failed_links;
+       unsigned int old_map, new_map;
+
+       /*
+        * Device error during FBS+NCQ operation:
+        *
+        * Set a port flag to prevent further I/O being enqueued.
+        * Leave the EDMA running to drain outstanding commands from this port.
+        * Perform the post-mortem/EH only when all responses are complete.
+        * Follow recovery sequence from 6042/7042 datasheet (7.3.15.4.2.2).
+        */
+       if (!(pp->pp_flags & MV_PP_FLAG_DELAYED_EH)) {
+               pp->pp_flags |= MV_PP_FLAG_DELAYED_EH;
+               pp->delayed_eh_pmp_map = 0;
+       }
+       old_map = pp->delayed_eh_pmp_map;
+       new_map = old_map | mv_get_err_pmp_map(ap);
+
+       if (old_map != new_map) {
+               pp->delayed_eh_pmp_map = new_map;
+               mv_pmp_eh_prep(ap, new_map & ~old_map);
+       }
+       failed_links = hweight16(new_map);
+
+       ata_port_printk(ap, KERN_INFO, "%s: pmp_map=%04x qc_map=%04x "
+                       "failed_links=%d nr_active_links=%d\n",
+                       __func__, pp->delayed_eh_pmp_map,
+                       ap->qc_active, failed_links,
+                       ap->nr_active_links);
+
+       if (ap->nr_active_links <= failed_links && mv_req_q_empty(ap)) {
+               mv_process_crpb_entries(ap, pp);
                 mv_stop_edma(ap);
-               mv_pmp_select(ap, qc->dev->link->pmp);
-               return ata_sff_qc_issue(qc);
+               mv_eh_freeze(ap);
+               ata_port_printk(ap, KERN_INFO, "%s: done\n", __func__);
+               return 1;       /* handled */
         }
+       ata_port_printk(ap, KERN_INFO, "%s: waiting\n", __func__);
+       return 1;       /* handled */
+}
+
+static int mv_handle_fbs_non_ncq_dev_err(struct ata_port *ap)
+{
+       /*
+        * Possible future enhancement:
+        *
+        * FBS+non-NCQ operation is not yet implemented.
+        * See related notes in mv_edma_cfg().
+        *
+        * Device error during FBS+non-NCQ operation:
+        *
+        * We need to snapshot the shadow registers for each failed command.
+        * Follow recovery sequence from 6042/7042 datasheet (7.3.15.4.2.3).
+        */
+       return 0;       /* not handled */
+}
  
-       mv_start_dma(ap, port_mmio, pp, qc->tf.protocol);
+static int mv_handle_dev_err(struct ata_port *ap, u32 edma_err_cause)
+{
+       struct mv_port_priv *pp = ap->private_data;
  
-       pp->req_idx++;
+       if (!(pp->pp_flags & MV_PP_FLAG_EDMA_EN))
+               return 0;       /* EDMA was not active: not handled */
+       if (!(pp->pp_flags & MV_PP_FLAG_FBS_EN))
+               return 0;       /* FBS was not active: not handled */
  
-       in_index = (pp->req_idx & MV_MAX_Q_DEPTH_MASK) << EDMA_REQ_Q_PTR_SHIFT;
+       if (!(edma_err_cause & EDMA_ERR_DEV))
+               return 0;       /* non DEV error: not handled */
+       edma_err_cause &= ~EDMA_ERR_IRQ_TRANSIENT;
+       if (edma_err_cause & ~(EDMA_ERR_DEV | EDMA_ERR_SELF_DIS))
+               return 0;       /* other problems: not handled */
  
-       /* and write the request in pointer to kick the EDMA to life */
-       writelfl((pp->crqb_dma & EDMA_REQ_Q_BASE_LO_MASK) | in_index,
-                port_mmio + EDMA_REQ_Q_IN_PTR_OFS);
+       if (pp->pp_flags & MV_PP_FLAG_NCQ_EN) {
+               /*
+                * EDMA should NOT have self-disabled for this case.
+                * If it did, then something is wrong elsewhere,
+                * and we cannot handle it here.
+                */
+               if (edma_err_cause & EDMA_ERR_SELF_DIS) {
+                       ata_port_printk(ap, KERN_WARNING,
+                               "%s: err_cause=0x%x pp_flags=0x%x\n",
+                               __func__, edma_err_cause, pp->pp_flags);
+                       return 0; /* not handled */
+               }
+               return mv_handle_fbs_ncq_dev_err(ap);
+       } else {
+               /*
+                * EDMA should have self-disabled for this case.
+                * If it did not, then something is wrong elsewhere,
+                * and we cannot handle it here.
+                */
+               if (!(edma_err_cause & EDMA_ERR_SELF_DIS)) {
+                       ata_port_printk(ap, KERN_WARNING,
+                               "%s: err_cause=0x%x pp_flags=0x%x\n",
+                               __func__, edma_err_cause, pp->pp_flags);
+                       return 0; /* not handled */
+               }
+               return mv_handle_fbs_non_ncq_dev_err(ap);
+       }
+       return 0;       /* not handled */
+}
  
-       return 0;
+static void mv_unexpected_intr(struct ata_port *ap, int edma_was_enabled)
+{
+       struct ata_eh_info *ehi = &ap->link.eh_info;
+       char *when = "idle";
+
+       ata_ehi_clear_desc(ehi);
+       if (ap->flags & ATA_FLAG_DISABLED) {
+               when = "disabled";
+       } else if (edma_was_enabled) {
+               when = "EDMA enabled";
+       } else {
+               struct ata_queued_cmd *qc = ata_qc_from_tag(ap, ap->link.active_tag);
+               if (qc && (qc->tf.flags & ATA_TFLAG_POLLING))
+                       when = "polling";
+       }
+       ata_ehi_push_desc(ehi, "unexpected device interrupt while %s", when);
+       ehi->err_mask |= AC_ERR_OTHER;
+       ehi->action   |= ATA_EH_RESET;
+       ata_port_freeze(ap);
  }
  
  /**
   *      mv_err_intr - Handle error interrupts on the port
   *      @ap: ATA channel to manipulate
- *      @reset_allowed: bool: 0 == don't trigger from reset here
   *
- *      In most cases, just clear the interrupt and move on.  However,
- *      some cases require an eDMA reset, which also performs a COMRESET.
- *      The SERR case requires a clear of pending errors in the SATA
- *      SERROR register.  Finally, if the port disabled DMA,
- *      update our cached copy to match.
+ *      Most cases require a full reset of the chip's state machine,
+ *      which also performs a COMRESET.
+ *      Also, if the port disabled DMA, update our cached copy to match.
   *
   *      LOCKING:
   *      Inherited from caller.
   */
-static void mv_err_intr(struct ata_port *ap, struct ata_queued_cmd *qc)
+static void mv_err_intr(struct ata_port *ap)
  {
         void __iomem *port_mmio = mv_ap_base(ap);
         u32 edma_err_cause, eh_freeze_mask, serr = 0;
+       u32 fis_cause = 0;
         struct mv_port_priv *pp = ap->private_data;
         struct mv_host_priv *hpriv = ap->host->private_data;
-       unsigned int edma_enabled = (pp->pp_flags & MV_PP_FLAG_EDMA_EN);
         unsigned int action = 0, err_mask = 0;
         struct ata_eh_info *ehi = &ap->link.eh_info;
+       struct ata_queued_cmd *qc;
+       int abort = 0;
  
-       ata_ehi_clear_desc(ehi);
+       /*
+        * Read and clear the SError and err_cause bits.
+        * For GenIIe, if EDMA_ERR_TRANS_IRQ_7 is set, we also must read/clear
+        * the FIS_IRQ_CAUSE register before clearing edma_err_cause.
+        */
+       sata_scr_read(&ap->link, SCR_ERROR, &serr);
+       sata_scr_write_flush(&ap->link, SCR_ERROR, serr);
  
-       if (!edma_enabled) {
-               /* just a guess: do we need to do this? should we
-                * expand this, and do it in all cases?
-                */
-               sata_scr_read(&ap->link, SCR_ERROR, &serr);
-               sata_scr_write_flush(&ap->link, SCR_ERROR, serr);
+       edma_err_cause = readl(port_mmio + EDMA_ERR_IRQ_CAUSE);
+       if (IS_GEN_IIE(hpriv) && (edma_err_cause & EDMA_ERR_TRANS_IRQ_7)) {
+               fis_cause = readl(port_mmio + FIS_IRQ_CAUSE);
+               writelfl(~fis_cause, port_mmio + FIS_IRQ_CAUSE);
         }
+       writelfl(~edma_err_cause, port_mmio + EDMA_ERR_IRQ_CAUSE);
  
-       edma_err_cause = readl(port_mmio + EDMA_ERR_IRQ_CAUSE_OFS);
-
-       ata_ehi_push_desc(ehi, "edma_err_cause=%08x", edma_err_cause);
+       if (edma_err_cause & EDMA_ERR_DEV) {
+               /*
+                * Device errors during FIS-based switching operation
+                * require special handling.
+                */
+               if (mv_handle_dev_err(ap, edma_err_cause))
+                       return;
+       }
  
+       qc = mv_get_active_qc(ap);
+       ata_ehi_clear_desc(ehi);
+       ata_ehi_push_desc(ehi, "edma_err_cause=%08x pp_flags=%08x",
+                         edma_err_cause, pp->pp_flags);
+
+       if (IS_GEN_IIE(hpriv) && (edma_err_cause & EDMA_ERR_TRANS_IRQ_7)) {
+               ata_ehi_push_desc(ehi, "fis_cause=%08x", fis_cause);
+               if (fis_cause & FIS_IRQ_CAUSE_AN) {
+                       u32 ec = edma_err_cause &
+                              ~(EDMA_ERR_TRANS_IRQ_7 | EDMA_ERR_IRQ_TRANSIENT);
+                       sata_async_notification(ap);
+                       if (!ec)
+                               return; /* Just an AN; no need for the nukes */
+                       ata_ehi_push_desc(ehi, "SDB notify");
+               }
+       }
         /*
          * All generations share these EDMA error cause bits:
          */
-       if (edma_err_cause & EDMA_ERR_DEV)
+       if (edma_err_cause & EDMA_ERR_DEV) {
                 err_mask |= AC_ERR_DEV;
+               action |= ATA_EH_RESET;
+               ata_ehi_push_desc(ehi, "dev error");
+       }
         if (edma_err_cause & (EDMA_ERR_D_PAR | EDMA_ERR_PRD_PAR |
                         EDMA_ERR_CRQB_PAR | EDMA_ERR_CRPB_PAR |
                         EDMA_ERR_INTRL_PAR)) {
@@ -1538,16 +2663,12 @@ static void mv_err_intr(struct ata_port *ap, struct ata_queued_cmd *qc)
                         ata_ehi_push_desc(ehi, "EDMA self-disable");
                 }
                 if (edma_err_cause & EDMA_ERR_SERR) {
-                       sata_scr_read(&ap->link, SCR_ERROR, &serr);
-                       sata_scr_write_flush(&ap->link, SCR_ERROR, serr);
-                       err_mask = AC_ERR_ATA_BUS;
+                       ata_ehi_push_desc(ehi, "SError=%08x", serr);
+                       err_mask |= AC_ERR_ATA_BUS;
                         action |= ATA_EH_RESET;
                 }
         }
  
-       /* Clear EDMA now that SERR cleanup done */
-       writelfl(~edma_err_cause, port_mmio + EDMA_ERR_IRQ_CAUSE_OFS);
-
         if (!err_mask) {
                 err_mask = AC_ERR_OTHER;
                 action |= ATA_EH_RESET;
@@ -1561,185 +2682,213 @@ static void mv_err_intr(struct ata_port *ap, struct ata_queued_cmd *qc)
         else
                 ehi->err_mask |= err_mask;
  
-       if (edma_err_cause & eh_freeze_mask)
+       if (err_mask == AC_ERR_DEV) {
+               /*
+                * Cannot do ata_port_freeze() here,
+                * because it would kill PIO access,
+                * which is needed for further diagnosis.
+                */
+               mv_eh_freeze(ap);
+               abort = 1;
+       } else if (edma_err_cause & eh_freeze_mask) {
+               /*
+                * Note to self: ata_port_freeze() calls ata_port_abort()
+                */
                 ata_port_freeze(ap);
-       else
-               ata_port_abort(ap);
+       } else {
+               abort = 1;
+       }
+
+       if (abort) {
+               if (qc)
+                       ata_link_abort(qc->dev->link);
+               else
+                       ata_port_abort(ap);
+       }
  }
  
-static void mv_intr_pio(struct ata_port *ap)
+static void mv_process_crpb_response(struct ata_port *ap,
+               struct mv_crpb *response, unsigned int tag, int ncq_enabled)
  {
-       struct ata_queued_cmd *qc;
-       u8 ata_status;
-
-       /* ignore spurious intr if drive still BUSY */
-       ata_status = readb(ap->ioaddr.status_addr);
-       if (unlikely(ata_status & ATA_BUSY))
-               return;
-
-       /* get active ATA command */
-       qc = ata_qc_from_tag(ap, ap->link.active_tag);
-       if (unlikely(!qc))                      /* no active tag */
-               return;
-       if (qc->tf.flags & ATA_TFLAG_POLLING)   /* polling; we don't own qc */
-               return;
+       struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag);
  
-       /* and finally, complete the ATA command */
-       qc->err_mask |= ac_err_mask(ata_status);
-       ata_qc_complete(qc);
+       if (qc) {
+               u8 ata_status;
+               u16 edma_status = le16_to_cpu(response->flags);
+               /*
+                * edma_status from a response queue entry:
+                *   LSB is from EDMA_ERR_IRQ_CAUSE (non-NCQ only).
+                *   MSB is saved ATA status from command completion.
+                */
+               if (!ncq_enabled) {
+                       u8 err_cause = edma_status & 0xff & ~EDMA_ERR_DEV;
+                       if (err_cause) {
+                               /*
+                                * Error will be seen/handled by mv_err_intr().
+                                * So do nothing at all here.
+                                */
+                               return;
+                       }
+               }
+               ata_status = edma_status >> CRPB_FLAG_STATUS_SHIFT;
+               if (!ac_err_mask(ata_status))
+                       ata_qc_complete(qc);
+               /* else: leave it for mv_err_intr() */
+       } else {
+               ata_port_printk(ap, KERN_ERR, "%s: no qc for tag=%d\n",
+                               __func__, tag);
+       }
  }
  
-static void mv_intr_edma(struct ata_port *ap)
+static void mv_process_crpb_entries(struct ata_port *ap, struct mv_port_priv *pp)
  {
         void __iomem *port_mmio = mv_ap_base(ap);
         struct mv_host_priv *hpriv = ap->host->private_data;
-       struct mv_port_priv *pp = ap->private_data;
-       struct ata_queued_cmd *qc;
-       u32 out_index, in_index;
+       u32 in_index;
         bool work_done = false;
+       int ncq_enabled = (pp->pp_flags & MV_PP_FLAG_NCQ_EN);
  
-       /* get h/w response queue pointer */
-       in_index = (readl(port_mmio + EDMA_RSP_Q_IN_PTR_OFS)
+       /* Get the hardware queue position index */
+       in_index = (readl(port_mmio + EDMA_RSP_Q_IN_PTR)
                         >> EDMA_RSP_Q_PTR_SHIFT) & MV_MAX_Q_DEPTH_MASK;
  
-       while (1) {
-               u16 status;
+       /* Process new responses from since the last time we looked */
+       while (in_index != pp->resp_idx) {
                 unsigned int tag;
+               struct mv_crpb *response = &pp->crpb[pp->resp_idx];
  
-               /* get s/w response queue last-read pointer, and compare */
-               out_index = pp->resp_idx & MV_MAX_Q_DEPTH_MASK;
-               if (in_index == out_index)
-                       break;
+               pp->resp_idx = (pp->resp_idx + 1) & MV_MAX_Q_DEPTH_MASK;
  
-               /* 50xx: get active ATA command */
-               if (IS_GEN_I(hpriv))
+               if (IS_GEN_I(hpriv)) {
+                       /* 50xx: no NCQ, only one command active at a time */
                         tag = ap->link.active_tag;
-
-               /* Gen II/IIE: get active ATA command via tag, to enable
-                * support for queueing.  this works transparently for
-                * queued and non-queued modes.
-                */
-               else
-                       tag = le16_to_cpu(pp->crpb[out_index].id) & 0x1f;
-
-               qc = ata_qc_from_tag(ap, tag);
-
-               /* For non-NCQ mode, the lower 8 bits of status
-                * are from EDMA_ERR_IRQ_CAUSE_OFS,
-                * which should be zero if all went well.
-                */
-               status = le16_to_cpu(pp->crpb[out_index].flags);
-               if ((status & 0xff) && !(pp->pp_flags & MV_PP_FLAG_NCQ_EN)) {
-                       mv_err_intr(ap, qc);
-                       return;
-               }
-
-               /* and finally, complete the ATA command */
-               if (qc) {
-                       qc->err_mask |=
-                               ac_err_mask(status >> CRPB_FLAG_STATUS_SHIFT);
-                       ata_qc_complete(qc);
+               } else {
+                       /* Gen II/IIE: get command tag from CRPB entry */
+                       tag = le16_to_cpu(response->id) & 0x1f;
                 }
-
-               /* advance software response queue pointer, to
-                * indicate (after the loop completes) to hardware
-                * that we have consumed a response queue entry.
-                */
+               mv_process_crpb_response(ap, response, tag, ncq_enabled);
                 work_done = true;
-               pp->resp_idx++;
         }
  
         /* Update the software queue position index in hardware */
         if (work_done)
                 writelfl((pp->crpb_dma & EDMA_RSP_Q_BASE_LO_MASK) |
-                        (out_index << EDMA_RSP_Q_PTR_SHIFT),
-                        port_mmio + EDMA_RSP_Q_OUT_PTR_OFS);
+                        (pp->resp_idx << EDMA_RSP_Q_PTR_SHIFT),
+                        port_mmio + EDMA_RSP_Q_OUT_PTR);
+}
+
+static void mv_port_intr(struct ata_port *ap, u32 port_cause)
+{
+       struct mv_port_priv *pp;
+       int edma_was_enabled;
+
+       if (ap->flags & ATA_FLAG_DISABLED) {
+               mv_unexpected_intr(ap, 0);
+               return;
+       }
+       /*
+        * Grab a snapshot of the EDMA_EN flag setting,
+        * so that we have a consistent view for this port,
+        * even if something we call of our routines changes it.
+        */
+       pp = ap->private_data;
+       edma_was_enabled = (pp->pp_flags & MV_PP_FLAG_EDMA_EN);
+       /*
+        * Process completed CRPB response(s) before other events.
+        */
+       if (edma_was_enabled && (port_cause & DONE_IRQ)) {
+               mv_process_crpb_entries(ap, pp);
+               if (pp->pp_flags & MV_PP_FLAG_DELAYED_EH)
+                       mv_handle_fbs_ncq_dev_err(ap);
+       }
+       /*
+        * Handle chip-reported errors, or continue on to handle PIO.
+        */
+       if (unlikely(port_cause & ERR_IRQ)) {
+               mv_err_intr(ap);
+       } else if (!edma_was_enabled) {
+               struct ata_queued_cmd *qc = mv_get_active_qc(ap);
+               if (qc)
+                       ata_sff_host_intr(ap, qc);
+               else
+                       mv_unexpected_intr(ap, edma_was_enabled);
+       }
  }
  
  /**
   *      mv_host_intr - Handle all interrupts on the given host controller
   *      @host: host specific structure
- *      @relevant: port error bits relevant to this host controller
- *      @hc: which host controller we're to look at
- *
- *      Read then write clear the HC interrupt status then walk each
- *      port connected to the HC and see if it needs servicing.  Port
- *      success ints are reported in the HC interrupt status reg, the
- *      port error ints are reported in the higher level main
- *      interrupt status register and thus are passed in via the
- *      'relevant' argument.
+ *      @main_irq_cause: Main interrupt cause register for the chip.
   *
   *      LOCKING:
   *      Inherited from caller.
   */
-static void mv_host_intr(struct ata_host *host, u32 relevant, unsigned int hc)
+static int mv_host_intr(struct ata_host *host, u32 main_irq_cause)
  {
         struct mv_host_priv *hpriv = host->private_data;
-       void __iomem *mmio = hpriv->base;
-       void __iomem *hc_mmio = mv_hc_base(mmio, hc);
-       u32 hc_irq_cause;
-       int port, port0, last_port;
-
-       if (hc == 0)
-               port0 = 0;
-       else
-               port0 = MV_PORTS_PER_HC;
-
-       if (HAS_PCI(host))
-               last_port = port0 + MV_PORTS_PER_HC;
-       else
-               last_port = port0 + hpriv->n_ports;
-       /* we'll need the HC success int register in most cases */
-       hc_irq_cause = readl(hc_mmio + HC_IRQ_CAUSE_OFS);
-       if (!hc_irq_cause)
-               return;
-
-       writelfl(~hc_irq_cause, hc_mmio + HC_IRQ_CAUSE_OFS);
+       void __iomem *mmio = hpriv->base, *hc_mmio;
+       unsigned int handled = 0, port;
  
-       VPRINTK("ENTER, hc%u relevant=0x%08x HC IRQ cause=0x%08x\n",
-               hc, relevant, hc_irq_cause);
+       /* If asserted, clear the "all ports" IRQ coalescing bit */
+       if (main_irq_cause & ALL_PORTS_COAL_DONE)
+               writel(~ALL_PORTS_COAL_IRQ, mmio + IRQ_COAL_CAUSE);
  
-       for (port = port0; port < last_port; port++) {
+       for (port = 0; port < hpriv->n_ports; port++) {
                 struct ata_port *ap = host->ports[port];
-               struct mv_port_priv *pp;
-               int have_err_bits, hardport, shift;
-
-               if ((!ap) || (ap->flags & ATA_FLAG_DISABLED))
-                       continue;
-
-               pp = ap->private_data;
+               unsigned int p, shift, hardport, port_cause;
  
-               shift = port << 1;              /* (port * 2) */
-               if (port >= MV_PORTS_PER_HC)
-                       shift++;        /* skip bit 8 in the HC Main IRQ reg */
-
-               have_err_bits = ((ERR_IRQ << shift) & relevant);
-
-               if (unlikely(have_err_bits)) {
-                       struct ata_queued_cmd *qc;
-
-                       qc = ata_qc_from_tag(ap, ap->link.active_tag);
-                       if (qc && (qc->tf.flags & ATA_TFLAG_POLLING))
+               MV_PORT_TO_SHIFT_AND_HARDPORT(port, shift, hardport);
+               /*
+                * Each hc within the host has its own hc_irq_cause register,
+                * where the interrupting ports bits get ack'd.
+                */
+               if (hardport == 0) {    /* first port on this hc ? */
+                       u32 hc_cause = (main_irq_cause >> shift) & HC0_IRQ_PEND;
+                       u32 port_mask, ack_irqs;
+                       /*
+                        * Skip this entire hc if nothing pending for any ports
+                        */
+                       if (!hc_cause) {
+                               port += MV_PORTS_PER_HC - 1;
                                 continue;
-
-                       mv_err_intr(ap, qc);
-                       continue;
-               }
-
-               hardport = mv_hardport_from_port(port); /* range 0..3 */
-
-               if (pp->pp_flags & MV_PP_FLAG_EDMA_EN) {
-                       if ((DMA_IRQ << hardport) & hc_irq_cause)
-                               mv_intr_edma(ap);
-               } else {
-                       if ((DEV_IRQ << hardport) & hc_irq_cause)
-                               mv_intr_pio(ap);
+                       }
+                       /*
+                        * We don't need/want to read the hc_irq_cause register,
+                        * because doing so hurts performance, and
+                        * main_irq_cause already gives us everything we need.
+                        *
+                        * But we do have to *write* to the hc_irq_cause to ack
+                        * the ports that we are handling this time through.
+                        *
+                        * This requires that we create a bitmap for those
+                        * ports which interrupted us, and use that bitmap
+                        * to ack (only) those ports via hc_irq_cause.
+                        */
+                       ack_irqs = 0;
+                       if (hc_cause & PORTS_0_3_COAL_DONE)
+                               ack_irqs = HC_COAL_IRQ;
+                       for (p = 0; p < MV_PORTS_PER_HC; ++p) {
+                               if ((port + p) >= hpriv->n_ports)
+                                       break;
+                               port_mask = (DONE_IRQ | ERR_IRQ) << (p * 2);
+                               if (hc_cause & port_mask)
+                                       ack_irqs |= (DMA_IRQ | DEV_IRQ) << p;
+                       }
+                       hc_mmio = mv_hc_base_from_port(mmio, port);
+                       writelfl(~ack_irqs, hc_mmio + HC_IRQ_CAUSE);
+                       handled = 1;
                 }
+               /*
+                * Handle interrupts signalled for this port:
+                */
+               port_cause = (main_irq_cause >> shift) & (DONE_IRQ | ERR_IRQ);
+               if (port_cause)
+                       mv_port_intr(ap, port_cause);
         }
-       VPRINTK("EXIT\n");
+       return handled;
  }
  
-static void mv_pci_error(struct ata_host *host, void __iomem *mmio)
+static int mv_pci_error(struct ata_host *host, void __iomem *mmio)
  {
         struct mv_host_priv *hpriv = host->private_data;
         struct ata_port *ap;
@@ -1748,7 +2897,7 @@ static void mv_pci_error(struct ata_host *host, void __iomem *mmio)
         unsigned int i, err_mask, printed = 0;
         u32 err_cause;
  
-       err_cause = readl(mmio + hpriv->irq_cause_ofs);
+       err_cause = readl(mmio + hpriv->irq_cause_offset);
  
         dev_printk(KERN_ERR, host->dev, "PCI ERROR; PCI IRQ cause=0x%08x\n",
                    err_cause);
@@ -1756,7 +2905,7 @@ static void mv_pci_error(struct ata_host *host, void __iomem *mmio)
         DPRINTK("All regs @ PCI error\n");
         mv_dump_all_regs(mmio, -1, to_pci_dev(host->dev));
  
-       writelfl(0, mmio + hpriv->irq_cause_ofs);
+       writelfl(0, mmio + hpriv->irq_cause_offset);
  
         for (i = 0; i < host->n_ports; i++) {
                 ap = host->ports[i];
@@ -1777,6 +2926,7 @@ static void mv_pci_error(struct ata_host *host, void __iomem *mmio)
                         ata_port_freeze(ap);
                 }
         }
+       return 1;       /* handled */
  }
  
  /**
@@ -1797,38 +2947,35 @@ static irqreturn_t mv_interrupt(int irq, void *dev_instance)
  {
         struct ata_host *host = dev_instance;
         struct mv_host_priv *hpriv = host->private_data;
-       unsigned int hc, handled = 0, n_hcs;
-       void __iomem *mmio = hpriv->base;
-       u32 main_cause, main_mask;
+       unsigned int handled = 0;
+       int using_msi = hpriv->hp_flags & MV_HP_FLAG_MSI;
+       u32 main_irq_cause, pending_irqs;
  
         spin_lock(&host->lock);
-       main_cause = readl(hpriv->main_cause_reg_addr);
-       main_mask  = readl(hpriv->main_mask_reg_addr);
+
+       /* for MSI:  block new interrupts while in here */
+       if (using_msi)
+               mv_write_main_irq_mask(0, hpriv);
+
+       main_irq_cause = readl(hpriv->main_irq_cause_addr);
+       pending_irqs   = main_irq_cause & hpriv->main_irq_mask;
         /*
          * Deal with cases where we either have nothing pending, or have read
          * a bogus register value which can indicate HW removal or PCI fault.
          */
-       if (!(main_cause & main_mask) || (main_cause == 0xffffffffU))
-               goto out_unlock;
-
-       n_hcs = mv_get_hc_count(host->ports[0]->flags);
-
-       if (unlikely((main_cause & PCI_ERR) && HAS_PCI(host))) {
-               mv_pci_error(host, mmio);
-               handled = 1;
-               goto out_unlock;        /* skip all other HC irq handling */
+       if (pending_irqs && main_irq_cause != 0xffffffffU) {
+               if (unlikely((pending_irqs & PCI_ERR) && !IS_SOC(hpriv)))
+                       handled = mv_pci_error(host, hpriv->base);
+               else
+                       handled = mv_host_intr(host, pending_irqs);
         }
  
-       for (hc = 0; hc < n_hcs; hc++) {
-               u32 relevant = main_cause & (HC0_IRQ_PEND << (hc * HC_SHIFT));
-               if (relevant) {
-                       mv_host_intr(host, relevant, hc);
-                       handled = 1;
-               }
-       }
+       /* for MSI: unmask; interrupt cause bits will retrigger now */
+       if (using_msi)
+               mv_write_main_irq_mask(hpriv->main_irq_mask, hpriv);
  
-out_unlock:
         spin_unlock(&host->lock);
+
         return IRQ_RETVAL(handled);
  }
  
@@ -1849,11 +2996,11 @@ static unsigned int mv5_scr_offset(unsigned int sc_reg_in)
         return ofs;
  }
  
-static int mv5_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val)
+static int mv5_scr_read(struct ata_link *link, unsigned int sc_reg_in, u32 *val)
  {
-       struct mv_host_priv *hpriv = ap->host->private_data;
+       struct mv_host_priv *hpriv = link->ap->host->private_data;
         void __iomem *mmio = hpriv->base;
-       void __iomem *addr = mv5_phy_base(mmio, ap->port_no);
+       void __iomem *addr = mv5_phy_base(mmio, link->ap->port_no);
         unsigned int ofs = mv5_scr_offset(sc_reg_in);
  
         if (ofs != 0xffffffffU) {
@@ -1863,11 +3010,11 @@ static int mv5_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val)
                 return -EINVAL;
  }
  
-static int mv5_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val)
+static int mv5_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val)
  {
-       struct mv_host_priv *hpriv = ap->host->private_data;
+       struct mv_host_priv *hpriv = link->ap->host->private_data;
         void __iomem *mmio = hpriv->base;
-       void __iomem *addr = mv5_phy_base(mmio, ap->port_no);
+       void __iomem *addr = mv5_phy_base(mmio, link->ap->port_no);
         unsigned int ofs = mv5_scr_offset(sc_reg_in);
  
         if (ofs != 0xffffffffU) {
@@ -1895,7 +3042,7 @@ static void mv5_reset_bus(struct ata_host *host, void __iomem *mmio)
  
  static void mv5_reset_flash(struct mv_host_priv *hpriv, void __iomem *mmio)
  {
-       writel(0x0fcfffff, mmio + MV_FLASH_CTL);
+       writel(0x0fcfffff, mmio + FLASH_CTL);
  }
  
  static void mv5_read_preamp(struct mv_host_priv *hpriv, int idx,
@@ -1914,7 +3061,7 @@ static void mv5_enable_leds(struct mv_host_priv *hpriv, void __iomem *mmio)
  {
         u32 tmp;
  
-       writel(0, mmio + MV_GPIO_PORT_CTL);
+       writel(0, mmio + GPIO_PORT_CTL);
  
         /* FIXME: handle MV_HP_ERRATA_50XXB2 errata */
  
@@ -1932,9 +3079,9 @@ static void mv5_phy_errata(struct mv_host_priv *hpriv, void __iomem *mmio,
         int fix_apm_sq = (hpriv->hp_flags & MV_HP_ERRATA_50XXB0);
  
         if (fix_apm_sq) {
-               tmp = readl(phy_mmio + MV5_LT_MODE);
+               tmp = readl(phy_mmio + MV5_LTMODE);
                 tmp |= (1 << 19);
-               writel(tmp, phy_mmio + MV5_LT_MODE);
+               writel(tmp, phy_mmio + MV5_LTMODE);
  
                 tmp = readl(phy_mmio + MV5_PHY_CTL);
                 tmp &= ~0x3;
@@ -1957,15 +3104,10 @@ static void mv5_reset_hc_port(struct mv_host_priv *hpriv, void __iomem *mmio,
  {
         void __iomem *port_mmio = mv_port_base(mmio, port);
  
-       /*
-        * The datasheet warns against setting ATA_RST when EDMA is active
-        * (but doesn't say what the problem might be).  So we first try
-        * to disable the EDMA engine before doing the ATA_RST operation.
-        */
         mv_reset_channel(hpriv, mmio, port);
  
         ZERO(0x028);    /* command */
-       writel(0x11f, port_mmio + EDMA_CFG_OFS);
+       writel(0x11f, port_mmio + EDMA_CFG);
         ZERO(0x004);    /* timer */
         ZERO(0x008);    /* irq err cause */
         ZERO(0x00c);    /* irq err mask */
@@ -2029,10 +3171,9 @@ static void mv_reset_pci_bus(struct ata_host *host, void __iomem *mmio)
         ZERO(MV_PCI_DISC_TIMER);
         ZERO(MV_PCI_MSI_TRIGGER);
         writel(0x000100ff, mmio + MV_PCI_XBAR_TMOUT);
-       ZERO(HC_MAIN_IRQ_MASK_OFS);
         ZERO(MV_PCI_SERR_MASK);
-       ZERO(hpriv->irq_cause_ofs);
-       ZERO(hpriv->irq_mask_ofs);
+       ZERO(hpriv->irq_cause_offset);
+       ZERO(hpriv->irq_mask_offset);
         ZERO(MV_PCI_ERR_LOW_ADDRESS);
         ZERO(MV_PCI_ERR_HIGH_ADDRESS);
         ZERO(MV_PCI_ERR_ATTRIBUTE);
@@ -2046,10 +3187,10 @@ static void mv6_reset_flash(struct mv_host_priv *hpriv, void __iomem *mmio)
  
         mv5_reset_flash(hpriv, mmio);
  
-       tmp = readl(mmio + MV_GPIO_PORT_CTL);
+       tmp = readl(mmio + GPIO_PORT_CTL);
         tmp &= 0x3;
         tmp |= (1 << 5) | (1 << 6);
-       writel(tmp, mmio + MV_GPIO_PORT_CTL);
+       writel(tmp, mmio + GPIO_PORT_CTL);
  }
  
  /**
@@ -2064,7 +3205,7 @@ static void mv6_reset_flash(struct mv_host_priv *hpriv, void __iomem *mmio)
  static int mv6_reset_hc(struct mv_host_priv *hpriv, void __iomem *mmio,
                         unsigned int n_hc)
  {
-       void __iomem *reg = mmio + PCI_MAIN_CMD_STS_OFS;
+       void __iomem *reg = mmio + PCI_MAIN_CMD_STS;
         int i, rc = 0;
         u32 t;
  
@@ -2112,13 +3253,6 @@ static int mv6_reset_hc(struct mv_host_priv *hpriv, void __iomem *mmio,
                 printk(KERN_ERR DRV_NAME ": can't clear global reset\n");
                 rc = 1;
         }
-       /*
-        * Temporary: wait 3 seconds before port-probing can happen,
-        * so that we don't miss finding sleepy SilXXXX port-multipliers.
-        * This can go away once hotplug is fully/correctly implemented.
-        */
-       if (rc == 0)
-               msleep(3000);
  done:
         return rc;
  }
@@ -2129,7 +3263,7 @@ static void mv6_read_preamp(struct mv_host_priv *hpriv, int idx,
         void __iomem *port_mmio;
         u32 tmp;
  
-       tmp = readl(mmio + MV_RESET_CFG);
+       tmp = readl(mmio + RESET_CFG);
         if ((tmp & (1 << 0)) == 0) {
                 hpriv->signal[idx].amps = 0x7 << 8;
                 hpriv->signal[idx].pre = 0x1 << 5;
@@ -2145,7 +3279,7 @@ static void mv6_read_preamp(struct mv_host_priv *hpriv, int idx,
  
  static void mv6_enable_leds(struct mv_host_priv *hpriv, void __iomem *mmio)
  {
-       writel(0x00000060, mmio + MV_GPIO_PORT_CTL);
+       writel(0x00000060, mmio + GPIO_PORT_CTL);
  }
  
  static void mv6_phy_errata(struct mv_host_priv *hpriv, void __iomem *mmio,
@@ -2158,7 +3292,7 @@ static void mv6_phy_errata(struct mv_host_priv *hpriv, void __iomem *mmio,
                 hp_flags & (MV_HP_ERRATA_60X1B2 | MV_HP_ERRATA_60X1C0);
         int fix_phy_mode4 =
                 hp_flags & (MV_HP_ERRATA_60X1B2 | MV_HP_ERRATA_60X1C0);
-       u32 m2, tmp;
+       u32 m2, m3;
  
         if (fix_phy_mode2) {
                 m2 = readl(port_mmio + PHY_MODE2);
@@ -2175,28 +3309,37 @@ static void mv6_phy_errata(struct mv_host_priv *hpriv, void __iomem *mmio,
                 udelay(200);
         }
  
-       /* who knows what this magic does */
-       tmp = readl(port_mmio + PHY_MODE3);
-       tmp &= ~0x7F800000;
-       tmp |= 0x2A800000;
-       writel(tmp, port_mmio + PHY_MODE3);
-
-       if (fix_phy_mode4) {
-               u32 m4;
-
-               m4 = readl(port_mmio + PHY_MODE4);
-
-               if (hp_flags & MV_HP_ERRATA_60X1B2)
-                       tmp = readl(port_mmio + PHY_MODE3);
+       /*
+        * Gen-II/IIe PHY_MODE3 errata RM#2:
+        * Achieves better receiver noise performance than the h/w default:
+        */
+       m3 = readl(port_mmio + PHY_MODE3);
+       m3 = (m3 & 0x1f) | (0x5555601 << 5);
  
-               /* workaround for errata FEr SATA#10 (part 1) */
-               m4 = (m4 & ~(1 << 1)) | (1 << 0);
+       /* Guideline 88F5182 (GL# SATA-S11) */
+       if (IS_SOC(hpriv))
+               m3 &= ~0x1c;
  
+       if (fix_phy_mode4) {
+               u32 m4 = readl(port_mmio + PHY_MODE4);
+               /*
+                * Enforce reserved-bit restrictions on GenIIe devices only.
+                * For earlier chipsets, force only the internal config field
+                *  (workaround for errata FEr SATA#10 part 1).
+                */
+               if (IS_GEN_IIE(hpriv))
+                       m4 = (m4 & ~PHY_MODE4_RSVD_ZEROS) | PHY_MODE4_RSVD_ONES;
+               else
+                       m4 = (m4 & ~PHY_MODE4_CFG_MASK) | PHY_MODE4_CFG_VALUE;
                 writel(m4, port_mmio + PHY_MODE4);
-
-               if (hp_flags & MV_HP_ERRATA_60X1B2)
-                       writel(tmp, port_mmio + PHY_MODE3);
         }
+       /*
+        * Workaround for 60x1-B2 errata SATA#13:
+        * Any write to PHY_MODE4 (above) may corrupt PHY_MODE3,
+        * so we must always rewrite PHY_MODE3 after PHY_MODE4.
+        * Or ensure we use writelfl() when writing PHY_MODE4.
+        */
+       writel(m3, port_mmio + PHY_MODE3);
  
         /* Revert values of pre-emphasis and signal amps to the saved ones */
         m2 = readl(port_mmio + PHY_MODE2);
@@ -2243,15 +3386,10 @@ static void mv_soc_reset_hc_port(struct mv_host_priv *hpriv,
  {
         void __iomem *port_mmio = mv_port_base(mmio, port);
  
-       /*
-        * The datasheet warns against setting ATA_RST when EDMA is active
-        * (but doesn't say what the problem might be).  So we first try
-        * to disable the EDMA engine before doing the ATA_RST operation.
-        */
         mv_reset_channel(hpriv, mmio, port);
  
         ZERO(0x028);            /* command */
-       writel(0x101f, port_mmio + EDMA_CFG_OFS);
+       writel(0x101f, port_mmio + EDMA_CFG);
         ZERO(0x004);            /* timer */
         ZERO(0x008);            /* irq err cause */
         ZERO(0x00c);            /* irq err mask */
@@ -2262,7 +3400,7 @@ static void mv_soc_reset_hc_port(struct mv_host_priv *hpriv,
         ZERO(0x024);            /* respq outp */
         ZERO(0x020);            /* respq inp */
         ZERO(0x02c);            /* test control */
-       writel(0xbc, port_mmio + EDMA_IORDY_TMOUT);
+       writel(0x800, port_mmio + EDMA_IORDY_TMOUT);
  }
  
  #undef ZERO
@@ -2305,40 +3443,88 @@ static void mv_soc_reset_bus(struct ata_host *host, void __iomem *mmio)
         return;
  }
  
-static void mv_setup_ifctl(void __iomem *port_mmio, int want_gen2i)
+static void mv_soc_65n_phy_errata(struct mv_host_priv *hpriv,
+                                 void __iomem *mmio, unsigned int port)
+{
+       void __iomem *port_mmio = mv_port_base(mmio, port);
+       u32     reg;
+
+       reg = readl(port_mmio + PHY_MODE3);
+       reg &= ~(0x3 << 27);    /* SELMUPF (bits 28:27) to 1 */
+       reg |= (0x1 << 27);
+       reg &= ~(0x3 << 29);    /* SELMUPI (bits 30:29) to 1 */
+       reg |= (0x1 << 29);
+       writel(reg, port_mmio + PHY_MODE3);
+
+       reg = readl(port_mmio + PHY_MODE4);
+       reg &= ~0x1;    /* SATU_OD8 (bit 0) to 0, reserved bit 16 must be set */
+       reg |= (0x1 << 16);
+       writel(reg, port_mmio + PHY_MODE4);
+
+       reg = readl(port_mmio + PHY_MODE9_GEN2);
+       reg &= ~0xf;    /* TXAMP[3:0] (bits 3:0) to 8 */
+       reg |= 0x8;
+       reg &= ~(0x1 << 14);    /* TXAMP[4] (bit 14) to 0 */
+       writel(reg, port_mmio + PHY_MODE9_GEN2);
+
+       reg = readl(port_mmio + PHY_MODE9_GEN1);
+       reg &= ~0xf;    /* TXAMP[3:0] (bits 3:0) to 8 */
+       reg |= 0x8;
+       reg &= ~(0x1 << 14);    /* TXAMP[4] (bit 14) to 0 */
+       writel(reg, port_mmio + PHY_MODE9_GEN1);
+}
+
+/**
+ *     soc_is_65 - check if the soc is 65 nano device
+ *
+ *     Detect the type of the SoC, this is done by reading the PHYCFG_OFS
+ *     register, this register should contain non-zero value and it exists only
+ *     in the 65 nano devices, when reading it from older devices we get 0.
+ */
+static bool soc_is_65n(struct mv_host_priv *hpriv)
+{
+       void __iomem *port0_mmio = mv_port_base(hpriv->base, 0);
+
+       if (readl(port0_mmio + PHYCFG_OFS))
+               return true;
+       return false;
+}
+
+static void mv_setup_ifcfg(void __iomem *port_mmio, int want_gen2i)
  {
-       u32 ifctl = readl(port_mmio + SATA_INTERFACE_CFG);
+       u32 ifcfg = readl(port_mmio + SATA_IFCFG);
  
-       ifctl = (ifctl & 0xf7f) | 0x9b1000;     /* from chip spec */
+       ifcfg = (ifcfg & 0xf7f) | 0x9b1000;     /* from chip spec */
         if (want_gen2i)
-               ifctl |= (1 << 7);              /* enable gen2i speed */
-       writelfl(ifctl, port_mmio + SATA_INTERFACE_CFG);
+               ifcfg |= (1 << 7);              /* enable gen2i speed */
+       writelfl(ifcfg, port_mmio + SATA_IFCFG);
  }
  
-/*
- * Caller must ensure that EDMA is not active,
- * by first doing mv_stop_edma() where needed.
- */
  static void mv_reset_channel(struct mv_host_priv *hpriv, void __iomem *mmio,
                              unsigned int port_no)
  {
         void __iomem *port_mmio = mv_port_base(mmio, port_no);
  
+       /*
+        * The datasheet warns against setting EDMA_RESET when EDMA is active
+        * (but doesn't say what the problem might be).  So we first try
+        * to disable the EDMA engine before doing the EDMA_RESET operation.
+        */
         mv_stop_edma_engine(port_mmio);
-       writelfl(ATA_RST, port_mmio + EDMA_CMD_OFS);
+       writelfl(EDMA_RESET, port_mmio + EDMA_CMD);
  
         if (!IS_GEN_I(hpriv)) {
-               /* Enable 3.0gb/s link speed */
-               mv_setup_ifctl(port_mmio, 1);
+               /* Enable 3.0gb/s link speed: this survives EDMA_RESET */
+               mv_setup_ifcfg(port_mmio, 1);
         }
         /*
-        * Strobing ATA_RST here causes a hard reset of the SATA transport,
+        * Strobing EDMA_RESET here causes a hard reset of the SATA transport,
          * link, and physical layers.  It resets all SATA interface registers
-        * (except for SATA_INTERFACE_CFG), and issues a COMRESET to the dev.
+        * (except for SATA_IFCFG), and issues a COMRESET to the dev.
          */
-       writelfl(ATA_RST, port_mmio + EDMA_CMD_OFS);
+       writelfl(EDMA_RESET, port_mmio + EDMA_CMD);
         udelay(25);     /* allow reset propagation */
-       writelfl(0, port_mmio + EDMA_CMD_OFS);
+       writelfl(0, port_mmio + EDMA_CMD);
  
         hpriv->ops->phy_errata(hpriv, mmio, port_no);
  
@@ -2350,12 +3536,12 @@ static void mv_pmp_select(struct ata_port *ap, int pmp)
  {
         if (sata_pmp_supported(ap)) {
                 void __iomem *port_mmio = mv_ap_base(ap);
-               u32 reg = readl(port_mmio + SATA_IFCTL_OFS);
+               u32 reg = readl(port_mmio + SATA_IFCTL);
                 int old = reg & 0xf;
  
                 if (old != pmp) {
                         reg = (reg & ~0xf) | pmp;
-                       writelfl(reg, port_mmio + SATA_IFCTL_OFS);
+                       writelfl(reg, port_mmio + SATA_IFCTL);
                 }
         }
  }
@@ -2387,6 +3573,8 @@ static int mv_hardreset(struct ata_link *link, unsigned int *class,
  
         mv_reset_channel(hpriv, mmio, ap->port_no);
         pp->pp_flags &= ~MV_PP_FLAG_EDMA_EN;
+       pp->pp_flags &=
+         ~(MV_PP_FLAG_FBS_EN | MV_PP_FLAG_NCQ_EN | MV_PP_FLAG_FAKE_ATA_BUSY);
  
         /* Workaround for errata FEr SATA#10 (part 2) */
         do {
@@ -2395,69 +3583,46 @@ static int mv_hardreset(struct ata_link *link, unsigned int *class,
  
                 rc = sata_link_hardreset(link, timing, deadline + extra,
                                          &online, NULL);
+               rc = online ? -EAGAIN : rc;
                 if (rc)
                         return rc;
                 sata_scr_read(link, SCR_STATUS, &sstatus);
                 if (!IS_GEN_I(hpriv) && ++attempts >= 5 && sstatus == 0x121) {
                         /* Force 1.5gb/s link speed and try again */
-                       mv_setup_ifctl(mv_ap_base(ap), 0);
+                       mv_setup_ifcfg(mv_ap_base(ap), 0);
                         if (time_after(jiffies + HZ, deadline))
                                 extra = HZ; /* only extend it once, max */
                 }
         } while (sstatus != 0x0 && sstatus != 0x113 && sstatus != 0x123);
+       mv_save_cached_regs(ap);
+       mv_edma_cfg(ap, 0, 0);
  
         return rc;
  }
  
  static void mv_eh_freeze(struct ata_port *ap)
  {
-       struct mv_host_priv *hpriv = ap->host->private_data;
-       unsigned int hc = (ap->port_no > 3) ? 1 : 0;
-       unsigned int shift;
-       u32 main_mask;
-
-       /* FIXME: handle coalescing completion events properly */
-
-       shift = ap->port_no * 2;
-       if (hc > 0)
-               shift++;
-
-       /* disable assertion of portN err, done events */
-       main_mask = readl(hpriv->main_mask_reg_addr);
-       main_mask &= ~((DONE_IRQ | ERR_IRQ) << shift);
-       writelfl(main_mask, hpriv->main_mask_reg_addr);
+       mv_stop_edma(ap);
+       mv_enable_port_irqs(ap, 0);
  }
  
  static void mv_eh_thaw(struct ata_port *ap)
  {
         struct mv_host_priv *hpriv = ap->host->private_data;
-       void __iomem *mmio = hpriv->base;
-       unsigned int hc = (ap->port_no > 3) ? 1 : 0;
-       void __iomem *hc_mmio = mv_hc_base(mmio, hc);
+       unsigned int port = ap->port_no;
+       unsigned int hardport = mv_hardport_from_port(port);
+       void __iomem *hc_mmio = mv_hc_base_from_port(hpriv->base, port);
         void __iomem *port_mmio = mv_ap_base(ap);
-       unsigned int shift, hc_port_no = ap->port_no;
-       u32 main_mask, hc_irq_cause;
-
-       /* FIXME: handle coalescing completion events properly */
-
-       shift = ap->port_no * 2;
-       if (hc > 0) {
-               shift++;
-               hc_port_no -= 4;
-       }
+       u32 hc_irq_cause;
  
         /* clear EDMA errors on this port */
-       writel(0, port_mmio + EDMA_ERR_IRQ_CAUSE_OFS);
+       writel(0, port_mmio + EDMA_ERR_IRQ_CAUSE);
  
         /* clear pending irq events */
-       hc_irq_cause = readl(hc_mmio + HC_IRQ_CAUSE_OFS);
-       hc_irq_cause &= ~((DEV_IRQ | DMA_IRQ) << hc_port_no);
-       writel(hc_irq_cause, hc_mmio + HC_IRQ_CAUSE_OFS);
+       hc_irq_cause = ~((DEV_IRQ | DMA_IRQ) << hardport);
+       writelfl(hc_irq_cause, hc_mmio + HC_IRQ_CAUSE);
  
-       /* enable assertion of portN err, done events */
-       main_mask = readl(hpriv->main_mask_reg_addr);
-       main_mask |= ((DONE_IRQ | ERR_IRQ) << shift);
-       writelfl(main_mask, hpriv->main_mask_reg_addr);
+       mv_enable_port_irqs(ap, ERR_IRQ);
  }
  
  /**
@@ -2474,8 +3639,7 @@ static void mv_eh_thaw(struct ata_port *ap)
   */
  static void mv_port_init(struct ata_ioports *port,  void __iomem *port_mmio)
  {
-       void __iomem *shd_base = port_mmio + SHD_BLK_OFS;
-       unsigned serr_ofs;
+       void __iomem *serr, *shd_base = port_mmio + SHD_BLK;
  
         /* PIO related setup
          */
@@ -2490,23 +3654,63 @@ static void mv_port_init(struct ata_ioports *port,  void __iomem *port_mmio)
         port->status_addr =
                 port->command_addr = shd_base + (sizeof(u32) * ATA_REG_STATUS);
         /* special case: control/altstatus doesn't have ATA_REG_ address */
-       port->altstatus_addr = port->ctl_addr = shd_base + SHD_CTL_AST_OFS;
+       port->altstatus_addr = port->ctl_addr = shd_base + SHD_CTL_AST;
  
         /* unused: */
         port->cmd_addr = port->bmdma_addr = port->scr_addr = NULL;
  
         /* Clear any currently outstanding port interrupt conditions */
-       serr_ofs = mv_scr_offset(SCR_ERROR);
-       writelfl(readl(port_mmio + serr_ofs), port_mmio + serr_ofs);
-       writelfl(0, port_mmio + EDMA_ERR_IRQ_CAUSE_OFS);
+       serr = port_mmio + mv_scr_offset(SCR_ERROR);
+       writelfl(readl(serr), serr);
+       writelfl(0, port_mmio + EDMA_ERR_IRQ_CAUSE);
  
         /* unmask all non-transient EDMA error interrupts */
-       writelfl(~EDMA_ERR_IRQ_TRANSIENT, port_mmio + EDMA_ERR_IRQ_MASK_OFS);
+       writelfl(~EDMA_ERR_IRQ_TRANSIENT, port_mmio + EDMA_ERR_IRQ_MASK);
  
         VPRINTK("EDMA cfg=0x%08x EDMA IRQ err cause/mask=0x%08x/0x%08x\n",
-               readl(port_mmio + EDMA_CFG_OFS),
-               readl(port_mmio + EDMA_ERR_IRQ_CAUSE_OFS),
-               readl(port_mmio + EDMA_ERR_IRQ_MASK_OFS));
+               readl(port_mmio + EDMA_CFG),
+               readl(port_mmio + EDMA_ERR_IRQ_CAUSE),
+               readl(port_mmio + EDMA_ERR_IRQ_MASK));
+}
+
+static unsigned int mv_in_pcix_mode(struct ata_host *host)
+{
+       struct mv_host_priv *hpriv = host->private_data;
+       void __iomem *mmio = hpriv->base;
+       u32 reg;
+
+       if (IS_SOC(hpriv) || !IS_PCIE(hpriv))
+               return 0;       /* not PCI-X capable */
+       reg = readl(mmio + MV_PCI_MODE);
+       if ((reg & MV_PCI_MODE_MASK) == 0)
+               return 0;       /* conventional PCI mode */
+       return 1;       /* chip is in PCI-X mode */
+}
+
+static int mv_pci_cut_through_okay(struct ata_host *host)
+{
+       struct mv_host_priv *hpriv = host->private_data;
+       void __iomem *mmio = hpriv->base;
+       u32 reg;
+
+       if (!mv_in_pcix_mode(host)) {
+               reg = readl(mmio + MV_PCI_COMMAND);
+               if (reg & MV_PCI_COMMAND_MRDTRIG)
+                       return 0; /* not okay */
+       }
+       return 1; /* okay */
+}
+
+static void mv_60x1b2_errata_pci7(struct ata_host *host)
+{
+       struct mv_host_priv *hpriv = host->private_data;
+       void __iomem *mmio = hpriv->base;
+
+       /* workaround for 60x1-B2 errata PCI#7 */
+       if (mv_in_pcix_mode(host)) {
+               u32 reg = readl(mmio + MV_PCI_COMMAND);
+               writelfl(reg & ~MV_PCI_COMMAND_MWRCOM, mmio + MV_PCI_COMMAND);
+       }
  }
  
  static int mv_chip_id(struct ata_host *host, unsigned int board_idx)
@@ -2562,6 +3766,7 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx)
  
                 switch (pdev->revision) {
                 case 0x7:
+                       mv_60x1b2_errata_pci7(host);
                         hp_flags |= MV_HP_ERRATA_60X1B2;
                         break;
                 case 0x9:
@@ -2576,7 +3781,7 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx)
                 break;
  
         case chip_7042:
-               hp_flags |= MV_HP_PCIE;
+               hp_flags |= MV_HP_PCIE | MV_HP_CUT_THROUGH;
                 if (pdev->vendor == PCI_VENDOR_ID_TTI &&
                     (pdev->device == 0x2300 || pdev->device == 0x2310))
                 {
@@ -2606,15 +3811,15 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx)
                                 " and avoid the final two gigabytes on"
                                 " all RocketRAID BIOS initialized drives.\n");
                 }
+               /* drop through */
         case chip_6042:
                 hpriv->ops = &mv6xxx_ops;
                 hp_flags |= MV_HP_GEN_IIE;
+               if (board_idx == chip_6042 && mv_pci_cut_through_okay(host))
+                       hp_flags |= MV_HP_CUT_THROUGH;
  
                 switch (pdev->revision) {
-               case 0x0:
-                       hp_flags |= MV_HP_ERRATA_XX42A0;
-                       break;
-               case 0x1:
+               case 0x2: /* Rev.B0: the first/only public release */
                         hp_flags |= MV_HP_ERRATA_60X1C0;
                         break;
                 default:
@@ -2625,8 +3830,12 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx)
                 }
                 break;
         case chip_soc:
-               hpriv->ops = &mv_soc_ops;
-               hp_flags |= MV_HP_ERRATA_60X1C0;
+               if (soc_is_65n(hpriv))
+                       hpriv->ops = &mv_soc_65n_ops;
+               else
+                       hpriv->ops = &mv_soc_ops;
+               hp_flags |= MV_HP_FLAG_SOC | MV_HP_GEN_IIE |
+                       MV_HP_ERRATA_60X1C0;
                 break;
  
         default:
@@ -2637,12 +3846,12 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx)
  
         hpriv->hp_flags = hp_flags;
         if (hp_flags & MV_HP_PCIE) {
-               hpriv->irq_cause_ofs    = PCIE_IRQ_CAUSE_OFS;
-               hpriv->irq_mask_ofs     = PCIE_IRQ_MASK_OFS;
+               hpriv->irq_cause_offset = PCIE_IRQ_CAUSE;
+               hpriv->irq_mask_offset  = PCIE_IRQ_MASK;
                 hpriv->unmask_all_irqs  = PCIE_UNMASK_ALL_IRQS;
         } else {
-               hpriv->irq_cause_ofs    = PCI_IRQ_CAUSE_OFS;
-               hpriv->irq_mask_ofs     = PCI_IRQ_MASK_OFS;
+               hpriv->irq_cause_offset = PCI_IRQ_CAUSE;
+               hpriv->irq_mask_offset  = PCI_IRQ_MASK;
                 hpriv->unmask_all_irqs  = PCI_UNMASK_ALL_IRQS;
         }
  
@@ -2652,7 +3861,6 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx)
  /**
   *      mv_init_host - Perform some early initialization of the host.
   *     @host: ATA host to initialize
- *      @board_idx: controller index
   *
   *      If possible, do an early global reset of the host.  Then do
   *      our port init and clear/unmask all/relevant host interrupts.
@@ -2660,31 +3868,35 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx)
   *      LOCKING:
   *      Inherited from caller.
   */
-static int mv_init_host(struct ata_host *host, unsigned int board_idx)
+static int mv_init_host(struct ata_host *host)
  {
         int rc = 0, n_hc, port, hc;
         struct mv_host_priv *hpriv = host->private_data;
         void __iomem *mmio = hpriv->base;
  
-       rc = mv_chip_id(host, board_idx);
+       rc = mv_chip_id(host, hpriv->board_idx);
         if (rc)
                 goto done;
  
-       if (HAS_PCI(host)) {
-               hpriv->main_cause_reg_addr = mmio + HC_MAIN_IRQ_CAUSE_OFS;
-               hpriv->main_mask_reg_addr  = mmio + HC_MAIN_IRQ_MASK_OFS;
+       if (IS_SOC(hpriv)) {
+               hpriv->main_irq_cause_addr = mmio + SOC_HC_MAIN_IRQ_CAUSE;
+               hpriv->main_irq_mask_addr  = mmio + SOC_HC_MAIN_IRQ_MASK;
         } else {
-               hpriv->main_cause_reg_addr = mmio + HC_SOC_MAIN_IRQ_CAUSE_OFS;
-               hpriv->main_mask_reg_addr  = mmio + HC_SOC_MAIN_IRQ_MASK_OFS;
+               hpriv->main_irq_cause_addr = mmio + PCI_HC_MAIN_IRQ_CAUSE;
+               hpriv->main_irq_mask_addr  = mmio + PCI_HC_MAIN_IRQ_MASK;
         }
  
+       /* initialize shadow irq mask with register's value */
+       hpriv->main_irq_mask = readl(hpriv->main_irq_mask_addr);
+
         /* global interrupt mask: 0 == mask everything */
-       writel(0, hpriv->main_mask_reg_addr);
+       mv_set_main_irq_mask(host, ~0, 0);
  
         n_hc = mv_get_hc_count(host->ports[0]->flags);
  
         for (port = 0; port < host->n_ports; port++)
-               hpriv->ops->read_preamp(hpriv, port, mmio);
+               if (hpriv->ops->read_preamp)
+                       hpriv->ops->read_preamp(hpriv, port, mmio);
  
         rc = hpriv->ops->reset_hc(hpriv, mmio, n_hc);
         if (rc)
@@ -2699,14 +3911,6 @@ static int mv_init_host(struct ata_host *host, unsigned int board_idx)
                 void __iomem *port_mmio = mv_port_base(mmio, port);
  
                 mv_port_init(&ap->ioaddr, port_mmio);
-
-#ifdef CONFIG_PCI
-               if (HAS_PCI(host)) {
-                       unsigned int offset = port_mmio - mmio;
-                       ata_port_pbar_desc(ap, MV_PRIMARY_BAR, -1, "mmio");
-                       ata_port_pbar_desc(ap, MV_PRIMARY_BAR, offset, "port");
-               }
-#endif
         }
  
         for (hc = 0; hc < n_hc; hc++) {
@@ -2714,39 +3918,28 @@ static int mv_init_host(struct ata_host *host, unsigned int board_idx)
  
                 VPRINTK("HC%i: HC config=0x%08x HC IRQ cause "
                         "(before clear)=0x%08x\n", hc,
-                       readl(hc_mmio + HC_CFG_OFS),
-                       readl(hc_mmio + HC_IRQ_CAUSE_OFS));
+                       readl(hc_mmio + HC_CFG),
+                       readl(hc_mmio + HC_IRQ_CAUSE));
  
                 /* Clear any currently outstanding hc interrupt conditions */
-               writelfl(0, hc_mmio + HC_IRQ_CAUSE_OFS);
+               writelfl(0, hc_mmio + HC_IRQ_CAUSE);
         }
  
-       if (HAS_PCI(host)) {
+       if (!IS_SOC(hpriv)) {
                 /* Clear any currently outstanding host interrupt conditions */
-               writelfl(0, mmio + hpriv->irq_cause_ofs);
+               writelfl(0, mmio + hpriv->irq_cause_offset);
  
                 /* and unmask interrupt generation for host regs */
-               writelfl(hpriv->unmask_all_irqs, mmio + hpriv->irq_mask_ofs);
-               if (IS_GEN_I(hpriv))
-                       writelfl(~HC_MAIN_MASKED_IRQS_5,
-                                hpriv->main_mask_reg_addr);
-               else
-                       writelfl(~HC_MAIN_MASKED_IRQS,
-                                hpriv->main_mask_reg_addr);
-
-               VPRINTK("HC MAIN IRQ cause/mask=0x%08x/0x%08x "
-                       "PCI int cause/mask=0x%08x/0x%08x\n",
-                       readl(hpriv->main_cause_reg_addr),
-                       readl(hpriv->main_mask_reg_addr),
-                       readl(mmio + hpriv->irq_cause_ofs),
-                       readl(mmio + hpriv->irq_mask_ofs));
-       } else {
-               writelfl(~HC_MAIN_MASKED_IRQS_SOC,
-                        hpriv->main_mask_reg_addr);
-               VPRINTK("HC MAIN IRQ cause/mask=0x%08x/0x%08x\n",
-                       readl(hpriv->main_cause_reg_addr),
-                       readl(hpriv->main_mask_reg_addr));
+               writelfl(hpriv->unmask_all_irqs, mmio + hpriv->irq_mask_offset);
         }
+
+       /*
+        * enable only global host interrupts for now.
+        * The per-port interrupts get done later as ports are set up.
+        */
+       mv_set_main_irq_mask(host, 0, PCI_ERR);
+       mv_set_irq_coalescing(host, irq_coalescing_io_count,
+                                   irq_coalescing_usecs);
  done:
         return rc;
  }
@@ -2840,11 +4033,20 @@ static int mv_platform_probe(struct platform_device *pdev)
                 return -ENOMEM;
         host->private_data = hpriv;
         hpriv->n_ports = n_ports;
+       hpriv->board_idx = chip_soc;
  
         host->iomap = NULL;
         hpriv->base = devm_ioremap(&pdev->dev, res->start,
-                                  res->end - res->start + 1);
-       hpriv->base -= MV_SATAHC0_REG_BASE;
+                                  resource_size(res));
+       hpriv->base -= SATAHC0_REG_BASE;
+
+#if defined(CONFIG_HAVE_CLK)
+       hpriv->clk = clk_get(&pdev->dev, NULL);
+       if (IS_ERR(hpriv->clk))
+               dev_notice(&pdev->dev, "cannot get clkdev\n");
+       else
+               clk_enable(hpriv->clk);
+#endif
  
         /*
          * (Re-)program MBUS remapping windows if we are asked to.
@@ -2854,12 +4056,12 @@ static int mv_platform_probe(struct platform_device *pdev)
  
         rc = mv_create_dma_pools(hpriv, &pdev->dev);
         if (rc)
-               return rc;
+               goto err;
  
         /* initialize adapter */
-       rc = mv_init_host(host, chip_soc);
+       rc = mv_init_host(host);
         if (rc)
-               return rc;
+               goto err;
  
         dev_printk(KERN_INFO, &pdev->dev,
                    "slots %u ports %d\n", (unsigned)MV_MAX_Q_DEPTH,
@@ -2867,6 +4069,15 @@ static int mv_platform_probe(struct platform_device *pdev)
  
         return ata_host_activate(host, platform_get_irq(pdev, 0), mv_interrupt,
                                  IRQF_SHARED, &mv6_sht);
+err:
+#if defined(CONFIG_HAVE_CLK)
+       if (!IS_ERR(hpriv->clk)) {
+               clk_disable(hpriv->clk);
+               clk_put(hpriv->clk);
+       }
+#endif
+
+       return rc;
  }
  
  /*
@@ -2881,14 +4092,66 @@ static int __devexit mv_platform_remove(struct platform_device *pdev)
  {
         struct device *dev = &pdev->dev;
         struct ata_host *host = dev_get_drvdata(dev);
-
+#if defined(CONFIG_HAVE_CLK)
+       struct mv_host_priv *hpriv = host->private_data;
+#endif
         ata_host_detach(host);
+
+#if defined(CONFIG_HAVE_CLK)
+       if (!IS_ERR(hpriv->clk)) {
+               clk_disable(hpriv->clk);
+               clk_put(hpriv->clk);
+       }
+#endif
+       return 0;
+}
+
+#ifdef CONFIG_PM
+static int mv_platform_suspend(struct platform_device *pdev, pm_message_t state)
+{
+       struct ata_host *host = dev_get_drvdata(&pdev->dev);
+       if (host)
+               return ata_host_suspend(host, state);
+       else
+               return 0;
+}
+
+static int mv_platform_resume(struct platform_device *pdev)
+{
+       struct ata_host *host = dev_get_drvdata(&pdev->dev);
+       int ret;
+
+       if (host) {
+               struct mv_host_priv *hpriv = host->private_data;
+               const struct mv_sata_platform_data *mv_platform_data = \
+                       pdev->dev.platform_data;
+               /*
+                * (Re-)program MBUS remapping windows if we are asked to.
+                */
+               if (mv_platform_data->dram != NULL)
+                       mv_conf_mbus_windows(hpriv, mv_platform_data->dram);
+
+               /* initialize adapter */
+               ret = mv_init_host(host);
+               if (ret) {
+                       printk(KERN_ERR DRV_NAME ": Error during HW init\n");
+                       return ret;
+               }
+               ata_host_resume(host);
+       }
+
         return 0;
  }
+#else
+#define mv_platform_suspend NULL
+#define mv_platform_resume NULL
+#endif
  
  static struct platform_driver mv_platform_driver = {
         .probe                  = mv_platform_probe,
         .remove                 = __devexit_p(mv_platform_remove),
+       .suspend                = mv_platform_suspend,
+       .resume                 = mv_platform_resume,
         .driver                 = {
                                    .name = DRV_NAME,
                                    .owner = THIS_MODULE,
@@ -2899,6 +4162,9 @@ static struct platform_driver mv_platform_driver = {
  #ifdef CONFIG_PCI
  static int mv_pci_init_one(struct pci_dev *pdev,
                            const struct pci_device_id *ent);
+#ifdef CONFIG_PM
+static int mv_pci_device_resume(struct pci_dev *pdev);
+#endif
  
  
  static struct pci_driver mv_pci_driver = {
@@ -2906,23 +4172,22 @@ static struct pci_driver mv_pci_driver = {
         .id_table               = mv_pci_tbl,
         .probe                  = mv_pci_init_one,
         .remove                 = ata_pci_remove_one,
-};
-
-/*
- * module options
- */
-static int msi;              /* Use PCI msi; either zero (off, default) or non-zero */
+#ifdef CONFIG_PM
+       .suspend                = ata_pci_device_suspend,
+       .resume                 = mv_pci_device_resume,
+#endif
  
+};
  
  /* move to PCI layer or libata core? */
  static int pci_go_64(struct pci_dev *pdev)
  {
         int rc;
  
-       if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK)) {
-               rc = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
+       if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
+               rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
                 if (rc) {
-                       rc = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+                       rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
                         if (rc) {
                                 dev_printk(KERN_ERR, &pdev->dev,
                                            "64-bit DMA enable failed\n");
@@ -2930,13 +4195,13 @@ static int pci_go_64(struct pci_dev *pdev)
                         }
                 }
         } else {
-               rc = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+               rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
                 if (rc) {
                         dev_printk(KERN_ERR, &pdev->dev,
                                    "32-bit DMA enable failed\n");
                         return rc;
                 }
-               rc = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+               rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
                 if (rc) {
                         dev_printk(KERN_ERR, &pdev->dev,
                                    "32-bit consistent DMA enable failed\n");
@@ -3005,7 +4270,7 @@ static int mv_pci_init_one(struct pci_dev *pdev,
         const struct ata_port_info *ppi[] = { &mv_port_info[board_idx], NULL };
         struct ata_host *host;
         struct mv_host_priv *hpriv;
-       int n_ports, rc;
+       int n_ports, port, rc;
  
         if (!printed_version++)
                 dev_printk(KERN_INFO, &pdev->dev, "version " DRV_VERSION "\n");
@@ -3019,6 +4284,7 @@ static int mv_pci_init_one(struct pci_dev *pdev,
                 return -ENOMEM;
         host->private_data = hpriv;
         hpriv->n_ports = n_ports;
+       hpriv->board_idx = board_idx;
  
         /* acquire resources */
         rc = pcim_enable_device(pdev);
@@ -3041,14 +4307,23 @@ static int mv_pci_init_one(struct pci_dev *pdev,
         if (rc)
                 return rc;
  
+       for (port = 0; port < host->n_ports; port++) {
+               struct ata_port *ap = host->ports[port];
+               void __iomem *port_mmio = mv_port_base(hpriv->base, port);
+               unsigned int offset = port_mmio - hpriv->base;
+
+               ata_port_pbar_desc(ap, MV_PRIMARY_BAR, -1, "mmio");
+               ata_port_pbar_desc(ap, MV_PRIMARY_BAR, offset, "port");
+       }
+
         /* initialize adapter */
-       rc = mv_init_host(host, board_idx);
+       rc = mv_init_host(host);
         if (rc)
                 return rc;
  
-       /* Enable interrupts */
-       if (msi && pci_enable_msi(pdev))
-               pci_intx(pdev, 1);
+       /* Enable message-switched interrupts, if requested */
+       if (msi && pci_enable_msi(pdev) == 0)
+               hpriv->hp_flags |= MV_HP_FLAG_MSI;
  
         mv_dump_pci_cfg(pdev, 0x68);
         mv_print_info(host);
@@ -3058,6 +4333,27 @@ static int mv_pci_init_one(struct pci_dev *pdev,
         return ata_host_activate(host, pdev->irq, mv_interrupt, IRQF_SHARED,
                                  IS_GEN_I(hpriv) ? &mv5_sht : &mv6_sht);
  }
+
+#ifdef CONFIG_PM
+static int mv_pci_device_resume(struct pci_dev *pdev)
+{
+       struct ata_host *host = dev_get_drvdata(&pdev->dev);
+       int rc;
+
+       rc = ata_pci_device_do_resume(pdev);
+       if (rc)
+               return rc;
+
+       /* initialize adapter */
+       rc = mv_init_host(host);
+       if (rc)
+               return rc;
+
+       ata_host_resume(host);
+
+       return 0;
+}
+#endif
  #endif
  
  static int mv_platform_probe(struct platform_device *pdev);
@@ -3095,10 +4391,5 @@ MODULE_DEVICE_TABLE(pci, mv_pci_tbl);
  MODULE_VERSION(DRV_VERSION);
  MODULE_ALIAS("platform:" DRV_NAME);
  
-#ifdef CONFIG_PCI
-module_param(msi, int, 0444);
-MODULE_PARM_DESC(msi, "Enable use of PCI MSI (0=off, 1=on)");
-#endif
-
  module_init(mv_init);
  module_exit(mv_exit);