Merge branches 'misc', 'eeepc-laptop' and 'bugzilla-14445' into release
[safe/jmp/linux-2.6] / drivers / edac / amd64_edac.c
index 09991c8..d4560d9 100644 (file)
@@ -1,4 +1,5 @@
 #include "amd64_edac.h"
+#include <asm/k8.h>
 
 static struct edac_pci_ctl_info *amd64_ctl_pci;
 
@@ -14,8 +15,65 @@ module_param(ecc_enable_override, int, 0644);
 
 /* Lookup table for all possible MC control instances */
 struct amd64_pvt;
-static struct mem_ctl_info *mci_lookup[MAX_NUMNODES];
-static struct amd64_pvt *pvt_lookup[MAX_NUMNODES];
+static struct mem_ctl_info *mci_lookup[EDAC_MAX_NUMNODES];
+static struct amd64_pvt *pvt_lookup[EDAC_MAX_NUMNODES];
+
+/*
+ * See F2x80 for K8 and F2x[1,0]80 for Fam10 and later. The table below is only
+ * for DDR2 DRAM mapping.
+ */
+u32 revf_quad_ddr2_shift[] = {
+       0,      /* 0000b NULL DIMM (128mb) */
+       28,     /* 0001b 256mb */
+       29,     /* 0010b 512mb */
+       29,     /* 0011b 512mb */
+       29,     /* 0100b 512mb */
+       30,     /* 0101b 1gb */
+       30,     /* 0110b 1gb */
+       31,     /* 0111b 2gb */
+       31,     /* 1000b 2gb */
+       32,     /* 1001b 4gb */
+       32,     /* 1010b 4gb */
+       33,     /* 1011b 8gb */
+       0,      /* 1100b future */
+       0,      /* 1101b future */
+       0,      /* 1110b future */
+       0       /* 1111b future */
+};
+
+/*
+ * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
+ * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
+ * or higher value'.
+ *
+ *FIXME: Produce a better mapping/linearisation.
+ */
+
+struct scrubrate scrubrates[] = {
+       { 0x01, 1600000000UL},
+       { 0x02, 800000000UL},
+       { 0x03, 400000000UL},
+       { 0x04, 200000000UL},
+       { 0x05, 100000000UL},
+       { 0x06, 50000000UL},
+       { 0x07, 25000000UL},
+       { 0x08, 12284069UL},
+       { 0x09, 6274509UL},
+       { 0x0A, 3121951UL},
+       { 0x0B, 1560975UL},
+       { 0x0C, 781440UL},
+       { 0x0D, 390720UL},
+       { 0x0E, 195300UL},
+       { 0x0F, 97650UL},
+       { 0x10, 48854UL},
+       { 0x11, 24427UL},
+       { 0x12, 12213UL},
+       { 0x13, 6101UL},
+       { 0x14, 3051UL},
+       { 0x15, 1523UL},
+       { 0x16, 761UL},
+       { 0x00, 0UL},        /* scrubbing off */
+};
 
 /*
  * Memory scrubber control interface. For K8, memory scrubbing is handled by
@@ -131,7 +189,10 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci, u32 *bw)
 /* Map from a CSROW entry to the mask entry that operates on it */
 static inline u32 amd64_map_to_dcs_mask(struct amd64_pvt *pvt, int csrow)
 {
-       return csrow >> (pvt->num_dcsm >> 3);
+       if (boot_cpu_data.x86 == 0xf && pvt->ext_model < OPTERON_CPU_REV_F)
+               return csrow;
+       else
+               return csrow >> 1;
 }
 
 /* return the 'base' address the i'th CS entry of the 'dct' DRAM controller */
@@ -221,29 +282,26 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
        intlv_en = pvt->dram_IntlvEn[0];
 
        if (intlv_en == 0) {
-               for (node_id = 0; ) {
+               for (node_id = 0; node_id < DRAM_REG_COUNT; node_id++) {
                        if (amd64_base_limit_match(pvt, sys_addr, node_id))
-                               break;
-
-                       if (++node_id >= DRAM_REG_COUNT)
-                               goto err_no_match;
+                               goto found;
                }
-               goto found;
+               goto err_no_match;
        }
 
-       if (unlikely((intlv_en != (0x01 << 8)) &&
-                    (intlv_en != (0x03 << 8)) &&
-                    (intlv_en != (0x07 << 8)))) {
+       if (unlikely((intlv_en != 0x01) &&
+                    (intlv_en != 0x03) &&
+                    (intlv_en != 0x07))) {
                amd64_printk(KERN_WARNING, "junk value of 0x%x extracted from "
                             "IntlvEn field of DRAM Base Register for node 0: "
-                            "This probably indicates a BIOS bug.\n", intlv_en);
+                            "this probably indicates a BIOS bug.\n", intlv_en);
                return NULL;
        }
 
        bits = (((u32) sys_addr) >> 12) & intlv_en;
 
        for (node_id = 0; ; ) {
-               if ((pvt->dram_limit[node_id] & intlv_en) == bits)
+               if ((pvt->dram_IntlvSel[node_id] & intlv_en) == bits)
                        break;  /* intlv_sel field matches */
 
                if (++node_id >= DRAM_REG_COUNT)
@@ -253,10 +311,10 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
        /* sanity test for sys_addr */
        if (unlikely(!amd64_base_limit_match(pvt, sys_addr, node_id))) {
                amd64_printk(KERN_WARNING,
-                         "%s(): sys_addr 0x%lx falls outside base/limit "
-                         "address range for node %d with node interleaving "
-                         "enabled.\n", __func__, (unsigned long)sys_addr,
-                         node_id);
+                            "%s(): sys_addr 0x%llx falls outside base/limit "
+                            "address range for node %d with node interleaving "
+                            "enabled.\n",
+                            __func__, sys_addr, node_id);
                return NULL;
        }
 
@@ -319,7 +377,7 @@ static int input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr)
         * base/mask register pair, test the condition shown near the start of
         * section 3.5.4 (p. 84, BKDG #26094, K8, revA-E).
         */
-       for (csrow = 0; csrow < CHIPSELECT_COUNT; csrow++) {
+       for (csrow = 0; csrow < pvt->cs_count; csrow++) {
 
                /* This DRAM chip select is disabled on this node */
                if ((pvt->dcsb0[csrow] & K8_DCSB_CS_ENABLE) == 0)
@@ -676,7 +734,7 @@ static void find_csrow_limits(struct mem_ctl_info *mci, int csrow,
        u64 base, mask;
 
        pvt = mci->pvt_info;
-       BUG_ON((csrow < 0) || (csrow >= CHIPSELECT_COUNT));
+       BUG_ON((csrow < 0) || (csrow >= pvt->cs_count));
 
        base = base_from_dct_base(pvt, csrow);
        mask = mask_from_dct_mask(pvt, csrow);
@@ -692,7 +750,7 @@ static void find_csrow_limits(struct mem_ctl_info *mci, int csrow,
  * specific.
  */
 static u64 extract_error_address(struct mem_ctl_info *mci,
-                                struct amd64_error_info_regs *info)
+                                struct err_regs *info)
 {
        struct amd64_pvt *pvt = mci->pvt_info;
 
@@ -753,13 +811,13 @@ static void amd64_cpu_display_info(struct amd64_pvt *pvt)
 static enum edac_type amd64_determine_edac_cap(struct amd64_pvt *pvt)
 {
        int bit;
-       enum dev_type edac_cap = EDAC_NONE;
+       enum dev_type edac_cap = EDAC_FLAG_NONE;
 
        bit = (boot_cpu_data.x86 > 0xf || pvt->ext_model >= OPTERON_CPU_REV_F)
                ? 19
                : 17;
 
-       if (pvt->dclr0 >> BIT(bit))
+       if (pvt->dclr0 & BIT(bit))
                edac_cap = EDAC_FLAG_SECDED;
 
        return edac_cap;
@@ -867,6 +925,8 @@ static void amd64_read_dbam_reg(struct amd64_pvt *pvt)
                        goto err_reg;
        }
 
+       return;
+
 err_reg:
        debugf0("Error reading F2x%03x.\n", reg);
 }
@@ -902,35 +962,27 @@ err_reg:
  */
 static void amd64_set_dct_base_and_mask(struct amd64_pvt *pvt)
 {
-       if (pvt->ext_model >= OPTERON_CPU_REV_F) {
+
+       if (boot_cpu_data.x86 == 0xf && pvt->ext_model < OPTERON_CPU_REV_F) {
+               pvt->dcsb_base          = REV_E_DCSB_BASE_BITS;
+               pvt->dcsm_mask          = REV_E_DCSM_MASK_BITS;
+               pvt->dcs_mask_notused   = REV_E_DCS_NOTUSED_BITS;
+               pvt->dcs_shift          = REV_E_DCS_SHIFT;
+               pvt->cs_count           = 8;
+               pvt->num_dcsm           = 8;
+       } else {
                pvt->dcsb_base          = REV_F_F1Xh_DCSB_BASE_BITS;
                pvt->dcsm_mask          = REV_F_F1Xh_DCSM_MASK_BITS;
                pvt->dcs_mask_notused   = REV_F_F1Xh_DCS_NOTUSED_BITS;
                pvt->dcs_shift          = REV_F_F1Xh_DCS_SHIFT;
 
-               switch (boot_cpu_data.x86) {
-               case 0xf:
-                       pvt->num_dcsm = REV_F_DCSM_COUNT;
-                       break;
-
-               case 0x10:
-                       pvt->num_dcsm = F10_DCSM_COUNT;
-                       break;
-
-               case 0x11:
-                       pvt->num_dcsm = F11_DCSM_COUNT;
-                       break;
-
-               default:
-                       amd64_printk(KERN_ERR, "Unsupported family!\n");
-                       break;
+               if (boot_cpu_data.x86 == 0x11) {
+                       pvt->cs_count = 4;
+                       pvt->num_dcsm = 2;
+               } else {
+                       pvt->cs_count = 8;
+                       pvt->num_dcsm = 4;
                }
-       } else {
-               pvt->dcsb_base          = REV_E_DCSB_BASE_BITS;
-               pvt->dcsm_mask          = REV_E_DCSM_MASK_BITS;
-               pvt->dcs_mask_notused   = REV_E_DCS_NOTUSED_BITS;
-               pvt->dcs_shift          = REV_E_DCS_SHIFT;
-               pvt->num_dcsm           = REV_E_DCSM_COUNT;
        }
 }
 
@@ -943,7 +995,7 @@ static void amd64_read_dct_base_mask(struct amd64_pvt *pvt)
 
        amd64_set_dct_base_and_mask(pvt);
 
-       for (cs = 0; cs < CHIPSELECT_COUNT; cs++) {
+       for (cs = 0; cs < pvt->cs_count; cs++) {
                reg = K8_DCSB0 + (cs * 4);
                err = pci_read_config_dword(pvt->dram_f2_ctl, reg,
                                                &pvt->dcsb0[cs]);
@@ -969,7 +1021,7 @@ static void amd64_read_dct_base_mask(struct amd64_pvt *pvt)
        }
 
        for (cs = 0; cs < pvt->num_dcsm; cs++) {
-               reg = K8_DCSB0 + (cs * 4);
+               reg = K8_DCSM0 + (cs * 4);
                err = pci_read_config_dword(pvt->dram_f2_ctl, reg,
                                        &pvt->dcsm0[cs]);
                if (unlikely(err))
@@ -1046,7 +1098,7 @@ static int k8_early_channel_count(struct amd64_pvt *pvt)
 
 /* extract the ERROR ADDRESS for the K8 CPUs */
 static u64 k8_get_error_address(struct mem_ctl_info *mci,
-                               struct amd64_error_info_regs *info)
+                               struct err_regs *info)
 {
        return (((u64) (info->nbeah & 0xff)) << 32) +
                        (info->nbeal & ~0x03);
@@ -1089,7 +1141,7 @@ static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
 }
 
 static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
-                                       struct amd64_error_info_regs *info,
+                                       struct err_regs *info,
                                        u64 SystemAddress)
 {
        struct mem_ctl_info *src_mci;
@@ -1098,8 +1150,8 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
        u32 page, offset;
 
        /* Extract the syndrome parts and form a 16-bit syndrome */
-       syndrome = EXTRACT_HIGH_SYNDROME(info->nbsl) << 8;
-       syndrome |= EXTRACT_LOW_SYNDROME(info->nbsh);
+       syndrome  = HIGH_SYNDROME(info->nbsl) << 8;
+       syndrome |= LOW_SYNDROME(info->nbsh);
 
        /* CHIPKILL enabled */
        if (info->nbcfg & K8_NBCFG_CHIPKILL) {
@@ -1133,7 +1185,7 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
         * different from the node that detected the error.
         */
        src_mci = find_mc_by_sys_addr(mci, SystemAddress);
-       if (src_mci) {
+       if (!src_mci) {
                amd64_mc_printk(mci, KERN_ERR,
                             "failed to map error address 0x%lx to a node\n",
                             (unsigned long)SystemAddress);
@@ -1195,7 +1247,9 @@ static int k8_dbam_map_to_pages(struct amd64_pvt *pvt, int dram_map)
  */
 static int f10_early_channel_count(struct amd64_pvt *pvt)
 {
+       int dbams[] = { DBAM0, DBAM1 };
        int err = 0, channels = 0;
+       int i, j;
        u32 dbam;
 
        err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0);
@@ -1228,47 +1282,20 @@ static int f10_early_channel_count(struct amd64_pvt *pvt)
         * is more than just one DIMM present in unganged mode. Need to check
         * both controllers since DIMMs can be placed in either one.
         */
-       channels = 0;
-       err = pci_read_config_dword(pvt->dram_f2_ctl, DBAM0, &dbam);
-       if (err)
-               goto err_reg;
-
-       if (DBAM_DIMM(0, dbam) > 0)
-               channels++;
-       if (DBAM_DIMM(1, dbam) > 0)
-               channels++;
-       if (DBAM_DIMM(2, dbam) > 0)
-               channels++;
-       if (DBAM_DIMM(3, dbam) > 0)
-               channels++;
-
-       /* If more than 2 DIMMs are present, then we have 2 channels */
-       if (channels > 2)
-               channels = 2;
-       else if (channels == 0) {
-               /* No DIMMs on DCT0, so look at DCT1 */
-               err = pci_read_config_dword(pvt->dram_f2_ctl, DBAM1, &dbam);
+       for (i = 0; i < ARRAY_SIZE(dbams); i++) {
+               err = pci_read_config_dword(pvt->dram_f2_ctl, dbams[i], &dbam);
                if (err)
                        goto err_reg;
 
-               if (DBAM_DIMM(0, dbam) > 0)
-                       channels++;
-               if (DBAM_DIMM(1, dbam) > 0)
-                       channels++;
-               if (DBAM_DIMM(2, dbam) > 0)
-                       channels++;
-               if (DBAM_DIMM(3, dbam) > 0)
-                       channels++;
-
-               if (channels > 2)
-                       channels = 2;
+               for (j = 0; j < 4; j++) {
+                       if (DBAM_DIMM(j, dbam) > 0) {
+                               channels++;
+                               break;
+                       }
+               }
        }
 
-       /* If we found ALL 0 values, then assume just ONE DIMM-ONE Channel */
-       if (channels == 0)
-               channels = 1;
-
-       debugf0("DIMM count= %d\n", channels);
+       debugf0("MCT channel count: %d\n", channels);
 
        return channels;
 
@@ -1308,7 +1335,7 @@ static void amd64_teardown(struct amd64_pvt *pvt)
 }
 
 static u64 f10_get_error_address(struct mem_ctl_info *mci,
-                       struct amd64_error_info_regs *info)
+                       struct err_regs *info)
 {
        return (((u64) (info->nbeah & 0xffff)) << 32) +
                        (info->nbeal & ~0x01);
@@ -1341,8 +1368,8 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
 
        pvt->dram_IntlvEn[dram] = (low_base >> 8) & 0x7;
 
-       pvt->dram_base[dram] = (((((u64) high_base & 0x000000FF) << 32) |
-                               ((u64) low_base & 0xFFFF0000))) << 8;
+       pvt->dram_base[dram] = (((u64)high_base & 0x000000FF) << 40) |
+                              (((u64)low_base  & 0xFFFF0000) << 8);
 
        low_offset = K8_DRAM_LIMIT_LOW + (dram << 3);
        high_offset = F10_DRAM_LIMIT_HIGH + (dram << 3);
@@ -1363,9 +1390,9 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
         * Extract address values and form a LIMIT address. Limit is the HIGHEST
         * memory location of the region, so low 24 bits need to be all ones.
         */
-       low_limit |= 0x0000FFFF;
-       pvt->dram_limit[dram] =
-               ((((u64) high_limit << 32) + (u64) low_limit) << 8) | (0xFF);
+       pvt->dram_limit[dram] = (((u64)high_limit & 0x000000FF) << 40) |
+                               (((u64) low_limit & 0xFFFF0000) << 8) |
+                               0x00FFFFFF;
 }
 
 static void f10_read_dram_ctl_register(struct amd64_pvt *pvt)
@@ -1531,7 +1558,7 @@ static int f10_lookup_addr_in_dct(u32 in_addr, u32 nid, u32 cs)
 
        debugf1("InputAddr=0x%x  channelselect=%d\n", in_addr, cs);
 
-       for (csrow = 0; csrow < CHIPSELECT_COUNT; csrow++) {
+       for (csrow = 0; csrow < pvt->cs_count; csrow++) {
 
                cs_base = amd64_get_dct_base(pvt, cs, csrow);
                if (!(cs_base & K8_DCSB_CS_ENABLE))
@@ -1685,7 +1712,7 @@ static int f10_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
  * The @sys_addr is usually an error address received from the hardware.
  */
 static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
-                                    struct amd64_error_info_regs *info,
+                                    struct err_regs *info,
                                     u64 sys_addr)
 {
        struct amd64_pvt *pvt = mci->pvt_info;
@@ -1698,8 +1725,8 @@ static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
        if (csrow >= 0) {
                error_address_to_page_and_offset(sys_addr, &page, &offset);
 
-               syndrome = EXTRACT_HIGH_SYNDROME(info->nbsl) << 8;
-               syndrome |= EXTRACT_LOW_SYNDROME(info->nbsh);
+               syndrome  = HIGH_SYNDROME(info->nbsl) << 8;
+               syndrome |= LOW_SYNDROME(info->nbsh);
 
                /*
                 * Is CHIPKILL on? If so, then we can attempt to use the
@@ -2042,7 +2069,7 @@ static int get_channel_from_ecc_syndrome(unsigned short syndrome)
  *     - 0: if no valid error is indicated
  */
 static int amd64_get_error_info_regs(struct mem_ctl_info *mci,
-                                    struct amd64_error_info_regs *regs)
+                                    struct err_regs *regs)
 {
        struct amd64_pvt *pvt;
        struct pci_dev *misc_f3_ctl;
@@ -2091,10 +2118,10 @@ err_reg:
  *     - 0: if no error is found
  */
 static int amd64_get_error_info(struct mem_ctl_info *mci,
-                               struct amd64_error_info_regs *info)
+                               struct err_regs *info)
 {
        struct amd64_pvt *pvt;
-       struct amd64_error_info_regs regs;
+       struct err_regs regs;
 
        pvt = mci->pvt_info;
 
@@ -2149,48 +2176,12 @@ static int amd64_get_error_info(struct mem_ctl_info *mci,
        return 1;
 }
 
-static inline void amd64_decode_gart_tlb_error(struct mem_ctl_info *mci,
-                                        struct amd64_error_info_regs *info)
-{
-       u32 err_code;
-       u32 ec_tt;              /* error code transaction type (2b) */
-       u32 ec_ll;              /* error code cache level (2b) */
-
-       err_code = EXTRACT_ERROR_CODE(info->nbsl);
-       ec_ll = EXTRACT_LL_CODE(err_code);
-       ec_tt = EXTRACT_TT_CODE(err_code);
-
-       amd64_mc_printk(mci, KERN_ERR,
-                    "GART TLB event: transaction type(%s), "
-                    "cache level(%s)\n", tt_msgs[ec_tt], ll_msgs[ec_ll]);
-}
-
-static inline void amd64_decode_mem_cache_error(struct mem_ctl_info *mci,
-                                     struct amd64_error_info_regs *info)
-{
-       u32 err_code;
-       u32 ec_rrrr;            /* error code memory transaction (4b) */
-       u32 ec_tt;              /* error code transaction type (2b) */
-       u32 ec_ll;              /* error code cache level (2b) */
-
-       err_code = EXTRACT_ERROR_CODE(info->nbsl);
-       ec_ll = EXTRACT_LL_CODE(err_code);
-       ec_tt = EXTRACT_TT_CODE(err_code);
-       ec_rrrr = EXTRACT_RRRR_CODE(err_code);
-
-       amd64_mc_printk(mci, KERN_ERR,
-                    "cache hierarchy error: memory transaction type(%s), "
-                    "transaction type(%s), cache level(%s)\n",
-                    rrrr_msgs[ec_rrrr], tt_msgs[ec_tt], ll_msgs[ec_ll]);
-}
-
-
 /*
  * Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR
  * ADDRESS and process.
  */
 static void amd64_handle_ce(struct mem_ctl_info *mci,
-                           struct amd64_error_info_regs *info)
+                           struct err_regs *info)
 {
        struct amd64_pvt *pvt = mci->pvt_info;
        u64 SystemAddress;
@@ -2213,7 +2204,7 @@ static void amd64_handle_ce(struct mem_ctl_info *mci,
 
 /* Handle any Un-correctable Errors (UEs) */
 static void amd64_handle_ue(struct mem_ctl_info *mci,
-                           struct amd64_error_info_regs *info)
+                           struct err_regs *info)
 {
        int csrow;
        u64 SystemAddress;
@@ -2258,59 +2249,24 @@ static void amd64_handle_ue(struct mem_ctl_info *mci,
        }
 }
 
-static void amd64_decode_bus_error(struct mem_ctl_info *mci,
-                                  struct amd64_error_info_regs *info)
+static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
+                                           struct err_regs *info)
 {
-       u32 err_code, ext_ec;
-       u32 ec_pp;              /* error code participating processor (2p) */
-       u32 ec_to;              /* error code timed out (1b) */
-       u32 ec_rrrr;            /* error code memory transaction (4b) */
-       u32 ec_ii;              /* error code memory or I/O (2b) */
-       u32 ec_ll;              /* error code cache level (2b) */
-
-       ext_ec = EXTRACT_EXT_ERROR_CODE(info->nbsl);
-       err_code = EXTRACT_ERROR_CODE(info->nbsl);
+       u32 ec  = ERROR_CODE(info->nbsl);
+       u32 xec = EXT_ERROR_CODE(info->nbsl);
+       int ecc_type = info->nbsh & (0x3 << 13);
 
-       ec_ll = EXTRACT_LL_CODE(err_code);
-       ec_ii = EXTRACT_II_CODE(err_code);
-       ec_rrrr = EXTRACT_RRRR_CODE(err_code);
-       ec_to = EXTRACT_TO_CODE(err_code);
-       ec_pp = EXTRACT_PP_CODE(err_code);
-
-       amd64_mc_printk(mci, KERN_ERR,
-               "BUS ERROR:\n"
-               "  time-out(%s) mem or i/o(%s)\n"
-               "  participating processor(%s)\n"
-               "  memory transaction type(%s)\n"
-               "  cache level(%s) Error Found by: %s\n",
-               to_msgs[ec_to],
-               ii_msgs[ec_ii],
-               pp_msgs[ec_pp],
-               rrrr_msgs[ec_rrrr],
-               ll_msgs[ec_ll],
-               (info->nbsh & K8_NBSH_ERR_SCRUBER) ?
-                       "Scrubber" : "Normal Operation");
-
-       /* If this was an 'observed' error, early out */
-       if (ec_pp == K8_NBSL_PP_OBS)
-               return;         /* We aren't the node involved */
-
-       /* Parse out the extended error code for ECC events */
-       switch (ext_ec) {
-       /* F10 changed to one Extended ECC error code */
-       case F10_NBSL_EXT_ERR_RES:              /* Reserved field */
-       case F10_NBSL_EXT_ERR_ECC:              /* F10 ECC ext err code */
-               break;
+       /* Bail early out if this was an 'observed' error */
+       if (PP(ec) == K8_NBSL_PP_OBS)
+               return;
 
-       default:
-               amd64_mc_printk(mci, KERN_ERR, "NOT ECC: no special error "
-                                              "handling for this error\n");
+       /* Do only ECC errors */
+       if (xec && xec != F10_NBSL_EXT_ERR_ECC)
                return;
-       }
 
-       if (info->nbsh & K8_NBSH_CECC)
+       if (ecc_type == 2)
                amd64_handle_ce(mci, info);
-       else if (info->nbsh & K8_NBSH_UECC)
+       else if (ecc_type == 1)
                amd64_handle_ue(mci, info);
 
        /*
@@ -2321,138 +2277,941 @@ static void amd64_decode_bus_error(struct mem_ctl_info *mci,
         * catastrophic.
         */
        if (info->nbsh & K8_NBSH_OVERFLOW)
-               edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR
-                                         "Error Overflow set");
+               edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR "Error Overflow");
 }
 
-int amd64_process_error_info(struct mem_ctl_info *mci,
-                            struct amd64_error_info_regs *info,
-                            int handle_errors)
+void amd64_decode_bus_error(int node_id, struct err_regs *regs)
 {
-       struct amd64_pvt *pvt;
-       struct amd64_error_info_regs *regs;
-       u32 err_code, ext_ec;
-       int gart_tlb_error = 0;
+       struct mem_ctl_info *mci = mci_lookup[node_id];
 
-       pvt = mci->pvt_info;
+       __amd64_decode_bus_error(mci, regs);
+
+       /*
+        * Check the UE bit of the NB status high register, if set generate some
+        * logs. If NOT a GART error, then process the event as a NO-INFO event.
+        * If it was a GART error, skip that process.
+        *
+        * FIXME: this should go somewhere else, if at all.
+        */
+       if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors)
+               edac_mc_handle_ue_no_info(mci, "UE bit is set");
+
+}
+
+/*
+ * The main polling 'check' function, called FROM the edac core to perform the
+ * error checking and if an error is encountered, error processing.
+ */
+static void amd64_check(struct mem_ctl_info *mci)
+{
+       struct err_regs regs;
+
+       if (amd64_get_error_info(mci, &regs)) {
+               struct amd64_pvt *pvt = mci->pvt_info;
+               amd_decode_nb_mce(pvt->mc_node_id, &regs, 1);
+       }
+}
+
+/*
+ * Input:
+ *     1) struct amd64_pvt which contains pvt->dram_f2_ctl pointer
+ *     2) AMD Family index value
+ *
+ * Ouput:
+ *     Upon return of 0, the following filled in:
+ *
+ *             struct pvt->addr_f1_ctl
+ *             struct pvt->misc_f3_ctl
+ *
+ *     Filled in with related device funcitions of 'dram_f2_ctl'
+ *     These devices are "reserved" via the pci_get_device()
+ *
+ *     Upon return of 1 (error status):
+ *
+ *             Nothing reserved
+ */
+static int amd64_reserve_mc_sibling_devices(struct amd64_pvt *pvt, int mc_idx)
+{
+       const struct amd64_family_type *amd64_dev = &amd64_family_types[mc_idx];
+
+       /* Reserve the ADDRESS MAP Device */
+       pvt->addr_f1_ctl = pci_get_related_function(pvt->dram_f2_ctl->vendor,
+                                                   amd64_dev->addr_f1_ctl,
+                                                   pvt->dram_f2_ctl);
 
-       /* If caller doesn't want us to process the error, return */
-       if (!handle_errors)
+       if (!pvt->addr_f1_ctl) {
+               amd64_printk(KERN_ERR, "error address map device not found: "
+                            "vendor %x device 0x%x (broken BIOS?)\n",
+                            PCI_VENDOR_ID_AMD, amd64_dev->addr_f1_ctl);
                return 1;
+       }
 
-       regs = info;
-
-       debugf1("NorthBridge ERROR: mci(0x%p)\n", mci);
-       debugf1("  MC node(%d) Error-Address(0x%.8x-%.8x)\n",
-               pvt->mc_node_id, regs->nbeah, regs->nbeal);
-       debugf1("  nbsh(0x%.8x) nbsl(0x%.8x)\n",
-               regs->nbsh, regs->nbsl);
-       debugf1("  Valid Error=%s Overflow=%s\n",
-               (regs->nbsh & K8_NBSH_VALID_BIT) ? "True" : "False",
-               (regs->nbsh & K8_NBSH_OVERFLOW) ? "True" : "False");
-       debugf1("  Err Uncorrected=%s MCA Error Reporting=%s\n",
-               (regs->nbsh & K8_NBSH_UNCORRECTED_ERR) ?
-                       "True" : "False",
-               (regs->nbsh & K8_NBSH_ERR_ENABLE) ?
-                       "True" : "False");
-       debugf1("  MiscErr Valid=%s ErrAddr Valid=%s PCC=%s\n",
-               (regs->nbsh & K8_NBSH_MISC_ERR_VALID) ?
-                       "True" : "False",
-               (regs->nbsh & K8_NBSH_VALID_ERROR_ADDR) ?
-                       "True" : "False",
-               (regs->nbsh & K8_NBSH_PCC) ?
-                       "True" : "False");
-       debugf1("  CECC=%s UECC=%s Found by Scruber=%s\n",
-               (regs->nbsh & K8_NBSH_CECC) ?
-                       "True" : "False",
-               (regs->nbsh & K8_NBSH_UECC) ?
-                       "True" : "False",
-               (regs->nbsh & K8_NBSH_ERR_SCRUBER) ?
-                       "True" : "False");
-       debugf1("  CORE0=%s CORE1=%s CORE2=%s CORE3=%s\n",
-               (regs->nbsh & K8_NBSH_CORE0) ? "True" : "False",
-               (regs->nbsh & K8_NBSH_CORE1) ? "True" : "False",
-               (regs->nbsh & K8_NBSH_CORE2) ? "True" : "False",
-               (regs->nbsh & K8_NBSH_CORE3) ? "True" : "False");
+       /* Reserve the MISC Device */
+       pvt->misc_f3_ctl = pci_get_related_function(pvt->dram_f2_ctl->vendor,
+                                                   amd64_dev->misc_f3_ctl,
+                                                   pvt->dram_f2_ctl);
+
+       if (!pvt->misc_f3_ctl) {
+               pci_dev_put(pvt->addr_f1_ctl);
+               pvt->addr_f1_ctl = NULL;
+
+               amd64_printk(KERN_ERR, "error miscellaneous device not found: "
+                            "vendor %x device 0x%x (broken BIOS?)\n",
+                            PCI_VENDOR_ID_AMD, amd64_dev->misc_f3_ctl);
+               return 1;
+       }
+
+       debugf1("    Addr Map device PCI Bus ID:\t%s\n",
+               pci_name(pvt->addr_f1_ctl));
+       debugf1("    DRAM MEM-CTL PCI Bus ID:\t%s\n",
+               pci_name(pvt->dram_f2_ctl));
+       debugf1("    Misc device PCI Bus ID:\t%s\n",
+               pci_name(pvt->misc_f3_ctl));
+
+       return 0;
+}
 
+static void amd64_free_mc_sibling_devices(struct amd64_pvt *pvt)
+{
+       pci_dev_put(pvt->addr_f1_ctl);
+       pci_dev_put(pvt->misc_f3_ctl);
+}
 
-       err_code = EXTRACT_ERROR_CODE(regs->nbsl);
+/*
+ * Retrieve the hardware registers of the memory controller (this includes the
+ * 'Address Map' and 'Misc' device regs)
+ */
+static void amd64_read_mc_registers(struct amd64_pvt *pvt)
+{
+       u64 msr_val;
+       int dram, err = 0;
 
-       /* Determine which error type:
-        *      1) GART errors - non-fatal, developmental events
-        *      2) MEMORY errors
-        *      3) BUS errors
-        *      4) Unknown error
+       /*
+        * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since
+        * those are Read-As-Zero
         */
-       if (TEST_TLB_ERROR(err_code)) {
+       rdmsrl(MSR_K8_TOP_MEM1, msr_val);
+       pvt->top_mem = msr_val >> 23;
+       debugf0("  TOP_MEM=0x%08llx\n", pvt->top_mem);
+
+       /* check first whether TOP_MEM2 is enabled */
+       rdmsrl(MSR_K8_SYSCFG, msr_val);
+       if (msr_val & (1U << 21)) {
+               rdmsrl(MSR_K8_TOP_MEM2, msr_val);
+               pvt->top_mem2 = msr_val >> 23;
+               debugf0("  TOP_MEM2=0x%08llx\n", pvt->top_mem2);
+       } else
+               debugf0("  TOP_MEM2 disabled.\n");
+
+       amd64_cpu_display_info(pvt);
+
+       err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCAP, &pvt->nbcap);
+       if (err)
+               goto err_reg;
+
+       if (pvt->ops->read_dram_ctl_register)
+               pvt->ops->read_dram_ctl_register(pvt);
+
+       for (dram = 0; dram < DRAM_REG_COUNT; dram++) {
                /*
-                * GART errors are intended to help graphics driver developers
-                * to detect bad GART PTEs. It is recommended by AMD to disable
-                * GART table walk error reporting by default[1] (currently
-                * being disabled in mce_cpu_quirks()) and according to the
-                * comment in mce_cpu_quirks(), such GART errors can be
-                * incorrectly triggered. We may see these errors anyway and
-                * unless requested by the user, they won't be reported.
-                *
-                * [1] section 13.10.1 on BIOS and Kernel Developers Guide for
-                *     AMD NPT family 0Fh processors
+                * Call CPU specific READ function to get the DRAM Base and
+                * Limit values from the DCT.
                 */
-               if (report_gart_errors == 0)
-                       return 1;
+               pvt->ops->read_dram_base_limit(pvt, dram);
 
                /*
-                * Only if GART error reporting is requested should we generate
-                * any logs.
+                * Only print out debug info on rows with both R and W Enabled.
+                * Normal processing, compiler should optimize this whole 'if'
+                * debug output block away.
                 */
-               gart_tlb_error = 1;
-
-               debugf1("GART TLB error\n");
-               amd64_decode_gart_tlb_error(mci, info);
-       } else if (TEST_MEM_ERROR(err_code)) {
-               debugf1("Memory/Cache error\n");
-               amd64_decode_mem_cache_error(mci, info);
-       } else if (TEST_BUS_ERROR(err_code)) {
-               debugf1("Bus (Link/DRAM) error\n");
-               amd64_decode_bus_error(mci, info);
-       } else {
-               /* shouldn't reach here! */
-               amd64_mc_printk(mci, KERN_WARNING,
-                            "%s(): unknown MCE error 0x%x\n", __func__,
-                            err_code);
+               if (pvt->dram_rw_en[dram] != 0) {
+                       debugf1("  DRAM_BASE[%d]: 0x%8.08x-%8.08x "
+                               "DRAM_LIMIT:  0x%8.08x-%8.08x\n",
+                               dram,
+                               (u32)(pvt->dram_base[dram] >> 32),
+                               (u32)(pvt->dram_base[dram] & 0xFFFFFFFF),
+                               (u32)(pvt->dram_limit[dram] >> 32),
+                               (u32)(pvt->dram_limit[dram] & 0xFFFFFFFF));
+                       debugf1("        IntlvEn=%s %s %s "
+                               "IntlvSel=%d DstNode=%d\n",
+                               pvt->dram_IntlvEn[dram] ?
+                                       "Enabled" : "Disabled",
+                               (pvt->dram_rw_en[dram] & 0x2) ? "W" : "!W",
+                               (pvt->dram_rw_en[dram] & 0x1) ? "R" : "!R",
+                               pvt->dram_IntlvSel[dram],
+                               pvt->dram_DstNode[dram]);
+               }
        }
 
-       ext_ec = EXTRACT_EXT_ERROR_CODE(regs->nbsl);
-       amd64_mc_printk(mci, KERN_ERR,
-               "ExtErr=(0x%x) %s\n", ext_ec, ext_msgs[ext_ec]);
+       amd64_read_dct_base_mask(pvt);
 
-       if (((ext_ec >= F10_NBSL_EXT_ERR_CRC &&
-                       ext_ec <= F10_NBSL_EXT_ERR_TGT) ||
-                       (ext_ec == F10_NBSL_EXT_ERR_RMW)) &&
-                       EXTRACT_LDT_LINK(info->nbsh)) {
+       err = pci_read_config_dword(pvt->addr_f1_ctl, K8_DHAR, &pvt->dhar);
+       if (err)
+               goto err_reg;
 
-               amd64_mc_printk(mci, KERN_ERR,
-                       "Error on hypertransport link: %s\n",
-                       htlink_msgs[
-                       EXTRACT_LDT_LINK(info->nbsh)]);
+       amd64_read_dbam_reg(pvt);
+
+       err = pci_read_config_dword(pvt->misc_f3_ctl,
+                               F10_ONLINE_SPARE, &pvt->online_spare);
+       if (err)
+               goto err_reg;
+
+       err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0);
+       if (err)
+               goto err_reg;
+
+       err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCHR_0, &pvt->dchr0);
+       if (err)
+               goto err_reg;
+
+       if (!dct_ganging_enabled(pvt)) {
+               err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_1,
+                                               &pvt->dclr1);
+               if (err)
+                       goto err_reg;
+
+               err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCHR_1,
+                                               &pvt->dchr1);
+               if (err)
+                       goto err_reg;
        }
 
+       amd64_dump_misc_regs(pvt);
+
+       return;
+
+err_reg:
+       debugf0("Reading an MC register failed\n");
+
+}
+
+/*
+ * NOTE: CPU Revision Dependent code
+ *
+ * Input:
+ *     @csrow_nr ChipSelect Row Number (0..pvt->cs_count-1)
+ *     k8 private pointer to -->
+ *                     DRAM Bank Address mapping register
+ *                     node_id
+ *                     DCL register where dual_channel_active is
+ *
+ * The DBAM register consists of 4 sets of 4 bits each definitions:
+ *
+ * Bits:       CSROWs
+ * 0-3         CSROWs 0 and 1
+ * 4-7         CSROWs 2 and 3
+ * 8-11                CSROWs 4 and 5
+ * 12-15       CSROWs 6 and 7
+ *
+ * Values range from: 0 to 15
+ * The meaning of the values depends on CPU revision and dual-channel state,
+ * see relevant BKDG more info.
+ *
+ * The memory controller provides for total of only 8 CSROWs in its current
+ * architecture. Each "pair" of CSROWs normally represents just one DIMM in
+ * single channel or two (2) DIMMs in dual channel mode.
+ *
+ * The following code logic collapses the various tables for CSROW based on CPU
+ * revision.
+ *
+ * Returns:
+ *     The number of PAGE_SIZE pages on the specified CSROW number it
+ *     encompasses
+ *
+ */
+static u32 amd64_csrow_nr_pages(int csrow_nr, struct amd64_pvt *pvt)
+{
+       u32 dram_map, nr_pages;
+
        /*
-        * Check the UE bit of the NB status high register, if set generate some
-        * logs. If NOT a GART error, then process the event as a NO-INFO event.
-        * If it was a GART error, skip that process.
+        * The math on this doesn't look right on the surface because x/2*4 can
+        * be simplified to x*2 but this expression makes use of the fact that
+        * it is integral math where 1/2=0. This intermediate value becomes the
+        * number of bits to shift the DBAM register to extract the proper CSROW
+        * field.
+        */
+       dram_map = (pvt->dbam0 >> ((csrow_nr / 2) * 4)) & 0xF;
+
+       nr_pages = pvt->ops->dbam_map_to_pages(pvt, dram_map);
+
+       /*
+        * If dual channel then double the memory size of single channel.
+        * Channel count is 1 or 2
         */
-       if (regs->nbsh & K8_NBSH_UNCORRECTED_ERR) {
-               amd64_mc_printk(mci, KERN_CRIT, "uncorrected error\n");
-               if (!gart_tlb_error)
-                       edac_mc_handle_ue_no_info(mci, "UE bit is set\n");
+       nr_pages <<= (pvt->channel_count - 1);
+
+       debugf0("  (csrow=%d) DBAM map index= %d\n", csrow_nr, dram_map);
+       debugf0("    nr_pages= %u  channel-count = %d\n",
+               nr_pages, pvt->channel_count);
+
+       return nr_pages;
+}
+
+/*
+ * Initialize the array of csrow attribute instances, based on the values
+ * from pci config hardware registers.
+ */
+static int amd64_init_csrows(struct mem_ctl_info *mci)
+{
+       struct csrow_info *csrow;
+       struct amd64_pvt *pvt;
+       u64 input_addr_min, input_addr_max, sys_addr;
+       int i, err = 0, empty = 1;
+
+       pvt = mci->pvt_info;
+
+       err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &pvt->nbcfg);
+       if (err)
+               debugf0("Reading K8_NBCFG failed\n");
+
+       debugf0("NBCFG= 0x%x  CHIPKILL= %s DRAM ECC= %s\n", pvt->nbcfg,
+               (pvt->nbcfg & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
+               (pvt->nbcfg & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"
+               );
+
+       for (i = 0; i < pvt->cs_count; i++) {
+               csrow = &mci->csrows[i];
+
+               if ((pvt->dcsb0[i] & K8_DCSB_CS_ENABLE) == 0) {
+                       debugf1("----CSROW %d EMPTY for node %d\n", i,
+                               pvt->mc_node_id);
+                       continue;
+               }
+
+               debugf1("----CSROW %d VALID for MC node %d\n",
+                       i, pvt->mc_node_id);
+
+               empty = 0;
+               csrow->nr_pages = amd64_csrow_nr_pages(i, pvt);
+               find_csrow_limits(mci, i, &input_addr_min, &input_addr_max);
+               sys_addr = input_addr_to_sys_addr(mci, input_addr_min);
+               csrow->first_page = (u32) (sys_addr >> PAGE_SHIFT);
+               sys_addr = input_addr_to_sys_addr(mci, input_addr_max);
+               csrow->last_page = (u32) (sys_addr >> PAGE_SHIFT);
+               csrow->page_mask = ~mask_from_dct_mask(pvt, i);
+               /* 8 bytes of resolution */
+
+               csrow->mtype = amd64_determine_memory_type(pvt);
+
+               debugf1("  for MC node %d csrow %d:\n", pvt->mc_node_id, i);
+               debugf1("    input_addr_min: 0x%lx input_addr_max: 0x%lx\n",
+                       (unsigned long)input_addr_min,
+                       (unsigned long)input_addr_max);
+               debugf1("    sys_addr: 0x%lx  page_mask: 0x%lx\n",
+                       (unsigned long)sys_addr, csrow->page_mask);
+               debugf1("    nr_pages: %u  first_page: 0x%lx "
+                       "last_page: 0x%lx\n",
+                       (unsigned)csrow->nr_pages,
+                       csrow->first_page, csrow->last_page);
+
+               /*
+                * determine whether CHIPKILL or JUST ECC or NO ECC is operating
+                */
+               if (pvt->nbcfg & K8_NBCFG_ECC_ENABLE)
+                       csrow->edac_mode =
+                           (pvt->nbcfg & K8_NBCFG_CHIPKILL) ?
+                           EDAC_S4ECD4ED : EDAC_SECDED;
+               else
+                       csrow->edac_mode = EDAC_NONE;
        }
 
-       if (regs->nbsh & K8_NBSH_PCC)
-               amd64_mc_printk(mci, KERN_CRIT,
-                       "PCC (processor context corrupt) set\n");
+       return empty;
+}
 
-       return 1;
+/*
+ * Only if 'ecc_enable_override' is set AND BIOS had ECC disabled, do "we"
+ * enable it.
+ */
+static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
+{
+       struct amd64_pvt *pvt = mci->pvt_info;
+       const cpumask_t *cpumask = cpumask_of_node(pvt->mc_node_id);
+       int cpu, idx = 0, err = 0;
+       struct msr msrs[cpumask_weight(cpumask)];
+       u32 value;
+       u32 mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
+
+       if (!ecc_enable_override)
+               return;
+
+       memset(msrs, 0, sizeof(msrs));
+
+       amd64_printk(KERN_WARNING,
+               "'ecc_enable_override' parameter is active, "
+               "Enabling AMD ECC hardware now: CAUTION\n");
+
+       err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value);
+       if (err)
+               debugf0("Reading K8_NBCTL failed\n");
+
+       /* turn on UECCn and CECCEn bits */
+       pvt->old_nbctl = value & mask;
+       pvt->nbctl_mcgctl_saved = 1;
+
+       value |= mask;
+       pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value);
+
+       rdmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
+
+       for_each_cpu(cpu, cpumask) {
+               if (msrs[idx].l & K8_MSR_MCGCTL_NBE)
+                       set_bit(idx, &pvt->old_mcgctl);
+
+               msrs[idx].l |= K8_MSR_MCGCTL_NBE;
+               idx++;
+       }
+       wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
+
+       err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
+       if (err)
+               debugf0("Reading K8_NBCFG failed\n");
+
+       debugf0("NBCFG(1)= 0x%x  CHIPKILL= %s ECC_ENABLE= %s\n", value,
+               (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
+               (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled");
+
+       if (!(value & K8_NBCFG_ECC_ENABLE)) {
+               amd64_printk(KERN_WARNING,
+                       "This node reports that DRAM ECC is "
+                       "currently Disabled; ENABLING now\n");
+
+               /* Attempt to turn on DRAM ECC Enable */
+               value |= K8_NBCFG_ECC_ENABLE;
+               pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCFG, value);
+
+               err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
+               if (err)
+                       debugf0("Reading K8_NBCFG failed\n");
+
+               if (!(value & K8_NBCFG_ECC_ENABLE)) {
+                       amd64_printk(KERN_WARNING,
+                               "Hardware rejects Enabling DRAM ECC checking\n"
+                               "Check memory DIMM configuration\n");
+               } else {
+                       amd64_printk(KERN_DEBUG,
+                               "Hardware accepted DRAM ECC Enable\n");
+               }
+       }
+       debugf0("NBCFG(2)= 0x%x  CHIPKILL= %s ECC_ENABLE= %s\n", value,
+               (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
+               (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled");
+
+       pvt->ctl_error_info.nbcfg = value;
+}
+
+static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
+{
+       const cpumask_t *cpumask = cpumask_of_node(pvt->mc_node_id);
+       int cpu, idx = 0, err = 0;
+       struct msr msrs[cpumask_weight(cpumask)];
+       u32 value;
+       u32 mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
+
+       if (!pvt->nbctl_mcgctl_saved)
+               return;
+
+       memset(msrs, 0, sizeof(msrs));
+
+       err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value);
+       if (err)
+               debugf0("Reading K8_NBCTL failed\n");
+       value &= ~mask;
+       value |= pvt->old_nbctl;
+
+       /* restore the NB Enable MCGCTL bit */
+       pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value);
+
+       rdmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
+
+       for_each_cpu(cpu, cpumask) {
+               msrs[idx].l &= ~K8_MSR_MCGCTL_NBE;
+               msrs[idx].l |=
+                       test_bit(idx, &pvt->old_mcgctl) << K8_MSR_MCGCTL_NBE;
+               idx++;
+       }
+
+       wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
+}
+
+/* get all cores on this DCT */
+static void get_cpus_on_this_dct_cpumask(cpumask_t *mask, int nid)
+{
+       int cpu;
+
+       for_each_online_cpu(cpu)
+               if (amd_get_nb_id(cpu) == nid)
+                       cpumask_set_cpu(cpu, mask);
+}
+
+/* check MCG_CTL on all the cpus on this node */
+static bool amd64_nb_mce_bank_enabled_on_node(int nid)
+{
+       cpumask_t mask;
+       struct msr *msrs;
+       int cpu, nbe, idx = 0;
+       bool ret = false;
+
+       cpumask_clear(&mask);
+
+       get_cpus_on_this_dct_cpumask(&mask, nid);
+
+       msrs = kzalloc(sizeof(struct msr) * cpumask_weight(&mask), GFP_KERNEL);
+       if (!msrs) {
+               amd64_printk(KERN_WARNING, "%s: error allocating msrs\n",
+                             __func__);
+                return false;
+       }
+
+       rdmsr_on_cpus(&mask, MSR_IA32_MCG_CTL, msrs);
+
+       for_each_cpu(cpu, &mask) {
+               nbe = msrs[idx].l & K8_MSR_MCGCTL_NBE;
+
+               debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
+                       cpu, msrs[idx].q,
+                       (nbe ? "enabled" : "disabled"));
+
+               if (!nbe)
+                       goto out;
+
+               idx++;
+       }
+       ret = true;
+
+out:
+       kfree(msrs);
+       return ret;
+}
+
+/*
+ * EDAC requires that the BIOS have ECC enabled before taking over the
+ * processing of ECC errors. This is because the BIOS can properly initialize
+ * the memory system completely. A command line option allows to force-enable
+ * hardware ECC later in amd64_enable_ecc_error_reporting().
+ */
+static const char *ecc_warning =
+       "WARNING: ECC is disabled by BIOS. Module will NOT be loaded.\n"
+       " Either Enable ECC in the BIOS, or set 'ecc_enable_override'.\n"
+       " Also, use of the override can cause unknown side effects.\n";
+
+static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
+{
+       u32 value;
+       int err = 0;
+       u8 ecc_enabled = 0;
+       bool nb_mce_en = false;
+
+       err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
+       if (err)
+               debugf0("Reading K8_NBCTL failed\n");
+
+       ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE);
+       if (!ecc_enabled)
+               amd64_printk(KERN_WARNING, "This node reports that Memory ECC "
+                            "is currently disabled, set F3x%x[22] (%s).\n",
+                            K8_NBCFG, pci_name(pvt->misc_f3_ctl));
+       else
+               amd64_printk(KERN_INFO, "ECC is enabled by BIOS.\n");
+
+       nb_mce_en = amd64_nb_mce_bank_enabled_on_node(pvt->mc_node_id);
+       if (!nb_mce_en)
+               amd64_printk(KERN_WARNING, "NB MCE bank disabled, set MSR "
+                            "0x%08x[4] on node %d to enable.\n",
+                            MSR_IA32_MCG_CTL, pvt->mc_node_id);
+
+       if (!ecc_enabled || !nb_mce_en) {
+               if (!ecc_enable_override) {
+                       amd64_printk(KERN_WARNING, "%s", ecc_warning);
+                       return -ENODEV;
+               }
+       } else
+               /* CLEAR the override, since BIOS controlled it */
+               ecc_enable_override = 0;
+
+       return 0;
+}
+
+struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) +
+                                         ARRAY_SIZE(amd64_inj_attrs) +
+                                         1];
+
+struct mcidev_sysfs_attribute terminator = { .attr = { .name = NULL } };
+
+static void amd64_set_mc_sysfs_attributes(struct mem_ctl_info *mci)
+{
+       unsigned int i = 0, j = 0;
+
+       for (; i < ARRAY_SIZE(amd64_dbg_attrs); i++)
+               sysfs_attrs[i] = amd64_dbg_attrs[i];
+
+       for (j = 0; j < ARRAY_SIZE(amd64_inj_attrs); j++, i++)
+               sysfs_attrs[i] = amd64_inj_attrs[j];
+
+       sysfs_attrs[i] = terminator;
+
+       mci->mc_driver_sysfs_attributes = sysfs_attrs;
+}
+
+static void amd64_setup_mci_misc_attributes(struct mem_ctl_info *mci)
+{
+       struct amd64_pvt *pvt = mci->pvt_info;
+
+       mci->mtype_cap          = MEM_FLAG_DDR2 | MEM_FLAG_RDDR2;
+       mci->edac_ctl_cap       = EDAC_FLAG_NONE;
+
+       if (pvt->nbcap & K8_NBCAP_SECDED)
+               mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
+
+       if (pvt->nbcap & K8_NBCAP_CHIPKILL)
+               mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
+
+       mci->edac_cap           = amd64_determine_edac_cap(pvt);
+       mci->mod_name           = EDAC_MOD_STR;
+       mci->mod_ver            = EDAC_AMD64_VERSION;
+       mci->ctl_name           = get_amd_family_name(pvt->mc_type_index);
+       mci->dev_name           = pci_name(pvt->dram_f2_ctl);
+       mci->ctl_page_to_phys   = NULL;
+
+       /* IMPORTANT: Set the polling 'check' function in this module */
+       mci->edac_check         = amd64_check;
+
+       /* memory scrubber interface */
+       mci->set_sdram_scrub_rate = amd64_set_scrub_rate;
+       mci->get_sdram_scrub_rate = amd64_get_scrub_rate;
+}
+
+/*
+ * Init stuff for this DRAM Controller device.
+ *
+ * Due to a hardware feature on Fam10h CPUs, the Enable Extended Configuration
+ * Space feature MUST be enabled on ALL Processors prior to actually reading
+ * from the ECS registers. Since the loading of the module can occur on any
+ * 'core', and cores don't 'see' all the other processors ECS data when the
+ * others are NOT enabled. Our solution is to first enable ECS access in this
+ * routine on all processors, gather some data in a amd64_pvt structure and
+ * later come back in a finish-setup function to perform that final
+ * initialization. See also amd64_init_2nd_stage() for that.
+ */
+static int amd64_probe_one_instance(struct pci_dev *dram_f2_ctl,
+                                   int mc_type_index)
+{
+       struct amd64_pvt *pvt = NULL;
+       int err = 0, ret;
+
+       ret = -ENOMEM;
+       pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
+       if (!pvt)
+               goto err_exit;
+
+       pvt->mc_node_id = get_node_id(dram_f2_ctl);
+
+       pvt->dram_f2_ctl        = dram_f2_ctl;
+       pvt->ext_model          = boot_cpu_data.x86_model >> 4;
+       pvt->mc_type_index      = mc_type_index;
+       pvt->ops                = family_ops(mc_type_index);
+       pvt->old_mcgctl         = 0;
+
+       /*
+        * We have the dram_f2_ctl device as an argument, now go reserve its
+        * sibling devices from the PCI system.
+        */
+       ret = -ENODEV;
+       err = amd64_reserve_mc_sibling_devices(pvt, mc_type_index);
+       if (err)
+               goto err_free;
+
+       ret = -EINVAL;
+       err = amd64_check_ecc_enabled(pvt);
+       if (err)
+               goto err_put;
+
+       /*
+        * Key operation here: setup of HW prior to performing ops on it. Some
+        * setup is required to access ECS data. After this is performed, the
+        * 'teardown' function must be called upon error and normal exit paths.
+        */
+       if (boot_cpu_data.x86 >= 0x10)
+               amd64_setup(pvt);
+
+       /*
+        * Save the pointer to the private data for use in 2nd initialization
+        * stage
+        */
+       pvt_lookup[pvt->mc_node_id] = pvt;
+
+       return 0;
+
+err_put:
+       amd64_free_mc_sibling_devices(pvt);
+
+err_free:
+       kfree(pvt);
+
+err_exit:
+       return ret;
 }
-EXPORT_SYMBOL_GPL(amd64_process_error_info);
 
+/*
+ * This is the finishing stage of the init code. Needs to be performed after all
+ * MCs' hardware have been prepped for accessing extended config space.
+ */
+static int amd64_init_2nd_stage(struct amd64_pvt *pvt)
+{
+       int node_id = pvt->mc_node_id;
+       struct mem_ctl_info *mci;
+       int ret, err = 0;
+
+       amd64_read_mc_registers(pvt);
+
+       ret = -ENODEV;
+       if (pvt->ops->probe_valid_hardware) {
+               err = pvt->ops->probe_valid_hardware(pvt);
+               if (err)
+                       goto err_exit;
+       }
+
+       /*
+        * We need to determine how many memory channels there are. Then use
+        * that information for calculating the size of the dynamic instance
+        * tables in the 'mci' structure
+        */
+       pvt->channel_count = pvt->ops->early_channel_count(pvt);
+       if (pvt->channel_count < 0)
+               goto err_exit;
+
+       ret = -ENOMEM;
+       mci = edac_mc_alloc(0, pvt->cs_count, pvt->channel_count, node_id);
+       if (!mci)
+               goto err_exit;
+
+       mci->pvt_info = pvt;
+
+       mci->dev = &pvt->dram_f2_ctl->dev;
+       amd64_setup_mci_misc_attributes(mci);
+
+       if (amd64_init_csrows(mci))
+               mci->edac_cap = EDAC_FLAG_NONE;
+
+       amd64_enable_ecc_error_reporting(mci);
+       amd64_set_mc_sysfs_attributes(mci);
+
+       ret = -ENODEV;
+       if (edac_mc_add_mc(mci)) {
+               debugf1("failed edac_mc_add_mc()\n");
+               goto err_add_mc;
+       }
+
+       mci_lookup[node_id] = mci;
+       pvt_lookup[node_id] = NULL;
+
+       /* register stuff with EDAC MCE */
+       if (report_gart_errors)
+               amd_report_gart_errors(true);
+
+       amd_register_ecc_decoder(amd64_decode_bus_error);
+
+       return 0;
+
+err_add_mc:
+       edac_mc_free(mci);
+
+err_exit:
+       debugf0("failure to init 2nd stage: ret=%d\n", ret);
+
+       amd64_restore_ecc_error_reporting(pvt);
+
+       if (boot_cpu_data.x86 > 0xf)
+               amd64_teardown(pvt);
+
+       amd64_free_mc_sibling_devices(pvt);
+
+       kfree(pvt_lookup[pvt->mc_node_id]);
+       pvt_lookup[node_id] = NULL;
+
+       return ret;
+}
+
+
+static int __devinit amd64_init_one_instance(struct pci_dev *pdev,
+                                const struct pci_device_id *mc_type)
+{
+       int ret = 0;
+
+       debugf0("(MC node=%d,mc_type='%s')\n", get_node_id(pdev),
+               get_amd_family_name(mc_type->driver_data));
+
+       ret = pci_enable_device(pdev);
+       if (ret < 0)
+               ret = -EIO;
+       else
+               ret = amd64_probe_one_instance(pdev, mc_type->driver_data);
+
+       if (ret < 0)
+               debugf0("ret=%d\n", ret);
+
+       return ret;
+}
+
+static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
+{
+       struct mem_ctl_info *mci;
+       struct amd64_pvt *pvt;
+
+       /* Remove from EDAC CORE tracking list */
+       mci = edac_mc_del_mc(&pdev->dev);
+       if (!mci)
+               return;
+
+       pvt = mci->pvt_info;
+
+       amd64_restore_ecc_error_reporting(pvt);
+
+       if (boot_cpu_data.x86 > 0xf)
+               amd64_teardown(pvt);
+
+       amd64_free_mc_sibling_devices(pvt);
+
+       kfree(pvt);
+       mci->pvt_info = NULL;
+
+       mci_lookup[pvt->mc_node_id] = NULL;
+
+       /* unregister from EDAC MCE */
+       amd_report_gart_errors(false);
+       amd_unregister_ecc_decoder(amd64_decode_bus_error);
+
+       /* Free the EDAC CORE resources */
+       edac_mc_free(mci);
+}
+
+/*
+ * This table is part of the interface for loading drivers for PCI devices. The
+ * PCI core identifies what devices are on a system during boot, and then
+ * inquiry this table to see if this driver is for a given device found.
+ */
+static const struct pci_device_id amd64_pci_table[] __devinitdata = {
+       {
+               .vendor         = PCI_VENDOR_ID_AMD,
+               .device         = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .class          = 0,
+               .class_mask     = 0,
+               .driver_data    = K8_CPUS
+       },
+       {
+               .vendor         = PCI_VENDOR_ID_AMD,
+               .device         = PCI_DEVICE_ID_AMD_10H_NB_DRAM,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .class          = 0,
+               .class_mask     = 0,
+               .driver_data    = F10_CPUS
+       },
+       {
+               .vendor         = PCI_VENDOR_ID_AMD,
+               .device         = PCI_DEVICE_ID_AMD_11H_NB_DRAM,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .class          = 0,
+               .class_mask     = 0,
+               .driver_data    = F11_CPUS
+       },
+       {0, }
+};
+MODULE_DEVICE_TABLE(pci, amd64_pci_table);
+
+static struct pci_driver amd64_pci_driver = {
+       .name           = EDAC_MOD_STR,
+       .probe          = amd64_init_one_instance,
+       .remove         = __devexit_p(amd64_remove_one_instance),
+       .id_table       = amd64_pci_table,
+};
+
+static void amd64_setup_pci_device(void)
+{
+       struct mem_ctl_info *mci;
+       struct amd64_pvt *pvt;
+
+       if (amd64_ctl_pci)
+               return;
+
+       mci = mci_lookup[0];
+       if (mci) {
+
+               pvt = mci->pvt_info;
+               amd64_ctl_pci =
+                       edac_pci_create_generic_ctl(&pvt->dram_f2_ctl->dev,
+                                                   EDAC_MOD_STR);
+
+               if (!amd64_ctl_pci) {
+                       pr_warning("%s(): Unable to create PCI control\n",
+                                  __func__);
+
+                       pr_warning("%s(): PCI error report via EDAC not set\n",
+                                  __func__);
+                       }
+       }
+}
+
+static int __init amd64_edac_init(void)
+{
+       int nb, err = -ENODEV;
+
+       edac_printk(KERN_INFO, EDAC_MOD_STR, EDAC_AMD64_VERSION "\n");
+
+       opstate_init();
+
+       if (cache_k8_northbridges() < 0)
+               goto err_exit;
+
+       err = pci_register_driver(&amd64_pci_driver);
+       if (err)
+               return err;
+
+       /*
+        * At this point, the array 'pvt_lookup[]' contains pointers to alloc'd
+        * amd64_pvt structs. These will be used in the 2nd stage init function
+        * to finish initialization of the MC instances.
+        */
+       for (nb = 0; nb < num_k8_northbridges; nb++) {
+               if (!pvt_lookup[nb])
+                       continue;
+
+               err = amd64_init_2nd_stage(pvt_lookup[nb]);
+               if (err)
+                       goto err_2nd_stage;
+       }
+
+       amd64_setup_pci_device();
+
+       return 0;
+
+err_2nd_stage:
+       debugf0("2nd stage failed\n");
+
+err_exit:
+       pci_unregister_driver(&amd64_pci_driver);
+
+       return err;
+}
+
+static void __exit amd64_edac_exit(void)
+{
+       if (amd64_ctl_pci)
+               edac_pci_release_generic_ctl(amd64_ctl_pci);
+
+       pci_unregister_driver(&amd64_pci_driver);
+}
+
+module_init(amd64_edac_init);
+module_exit(amd64_edac_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, "
+               "Dave Peterson, Thayne Harbaugh");
+MODULE_DESCRIPTION("MC support for AMD64 memory controllers - "
+               EDAC_AMD64_VERSION);
 
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");