ixgbevf: Tell network stack to stop tx when the VF detects PF reset
[safe/jmp/linux-2.6] / drivers / edac / edac_mce_amd.c
index b30a830..8fc91a0 100644 (file)
@@ -228,6 +228,69 @@ wrong_ic_mce:
        pr_warning("Corrupted IC MCE info?\n");
 }
 
+static void amd_decode_bu_mce(u64 mc2_status)
+{
+       u32 ec = mc2_status & 0xffff;
+       u32 xec = (mc2_status >> 16) & 0xf;
+
+       pr_emerg(" Bus Unit Error");
+
+       if (xec == 0x1)
+               pr_cont(" in the write data buffers.\n");
+       else if (xec == 0x3)
+               pr_cont(" in the victim data buffers.\n");
+       else if (xec == 0x2 && MEM_ERROR(ec))
+               pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec));
+       else if (xec == 0x0) {
+               if (TLB_ERROR(ec))
+                       pr_cont(": %s error in a Page Descriptor Cache or "
+                               "Guest TLB.\n", TT_MSG(ec));
+               else if (BUS_ERROR(ec))
+                       pr_cont(": %s/ECC error in data read from NB: %s.\n",
+                               RRRR_MSG(ec), PP_MSG(ec));
+               else if (MEM_ERROR(ec)) {
+                       u8 rrrr = (ec >> 4) & 0xf;
+
+                       if (rrrr >= 0x7)
+                               pr_cont(": %s error during data copyback.\n",
+                                       RRRR_MSG(ec));
+                       else if (rrrr <= 0x1)
+                               pr_cont(": %s parity/ECC error during data "
+                                       "access from L2.\n", RRRR_MSG(ec));
+                       else
+                               goto wrong_bu_mce;
+               } else
+                       goto wrong_bu_mce;
+       } else
+               goto wrong_bu_mce;
+
+       return;
+
+wrong_bu_mce:
+       pr_warning("Corrupted BU MCE info?\n");
+}
+
+static void amd_decode_ls_mce(u64 mc3_status)
+{
+       u32 ec  = mc3_status & 0xffff;
+       u32 xec = (mc3_status >> 16) & 0xf;
+
+       pr_emerg(" Load Store Error");
+
+       if (xec == 0x0) {
+               u8 rrrr = (ec >> 4) & 0xf;
+
+               if (!BUS_ERROR(ec) || (rrrr != 0x3 && rrrr != 0x4))
+                       goto wrong_ls_mce;
+
+               pr_cont(" during %s.\n", RRRR_MSG(ec));
+       }
+       return;
+
+wrong_ls_mce:
+       pr_warning("Corrupted LS MCE info?\n");
+}
+
 void amd_decode_nb_mce(int node_id, struct err_regs *regs, int handle_errors)
 {
        u32 ec  = ERROR_CODE(regs->nbsl);
@@ -236,6 +299,12 @@ void amd_decode_nb_mce(int node_id, struct err_regs *regs, int handle_errors)
        if (!handle_errors)
                return;
 
+       /*
+        * GART TLB error reporting is disabled by default. Bail out early.
+        */
+       if (TLB_ERROR(ec) && !report_gart_errors)
+               return;
+
        pr_emerg(" Northbridge Error, node %d", node_id);
 
        /*
@@ -243,14 +312,13 @@ void amd_decode_nb_mce(int node_id, struct err_regs *regs, int handle_errors)
         * value encoding has changed so interpret those differently
         */
        if ((boot_cpu_data.x86 == 0x10) &&
-           (boot_cpu_data.x86_model > 8)) {
+           (boot_cpu_data.x86_model > 7)) {
                if (regs->nbsh & K8_NBSH_ERR_CPU_VAL)
                        pr_cont(", core: %u\n", (u8)(regs->nbsh & 0xf));
        } else {
-               pr_cont(", core: %d\n", ilog2((regs->nbsh & 0xf)));
+               pr_cont(", core: %d\n", fls((regs->nbsh & 0xf) - 1));
        }
 
-
        pr_emerg("%s.\n", EXT_ERR_MSG(xec));
 
        if (BUS_ERROR(ec) && nb_bus_decoder)
@@ -258,24 +326,18 @@ void amd_decode_nb_mce(int node_id, struct err_regs *regs, int handle_errors)
 }
 EXPORT_SYMBOL_GPL(amd_decode_nb_mce);
 
+static void amd_decode_fr_mce(u64 mc5_status)
+{
+       /* we have only one error signature so match all fields at once. */
+       if ((mc5_status & 0xffff) == 0x0f0f)
+               pr_emerg(" FR Error: CPU Watchdog timer expire.\n");
+       else
+               pr_warning("Corrupted FR MCE info?\n");
+}
+
 static inline void amd_decode_err_code(unsigned int ec)
 {
        if (TLB_ERROR(ec)) {
-               /*
-                * GART errors are intended to help graphics driver developers
-                * to detect bad GART PTEs. It is recommended by AMD to disable
-                * GART table walk error reporting by default[1] (currently
-                * being disabled in mce_cpu_quirks()) and according to the
-                * comment in mce_cpu_quirks(), such GART errors can be
-                * incorrectly triggered. We may see these errors anyway and
-                * unless requested by the user, they won't be reported.
-                *
-                * [1] section 13.10.1 on BIOS and Kernel Developers Guide for
-                *     AMD NPT family 0Fh processors
-                */
-               if (!report_gart_errors)
-                       return;
-
                pr_emerg(" Transaction: %s, Cache Level %s\n",
                         TT_MSG(ec), LL_MSG(ec));
        } else if (MEM_ERROR(ec)) {
@@ -290,8 +352,10 @@ static inline void amd_decode_err_code(unsigned int ec)
                pr_warning("Huh? Unknown MCE error 0x%x\n", ec);
 }
 
-void decode_mce(struct mce *m)
+static int amd_decode_mce(struct notifier_block *nb, unsigned long val,
+                          void *data)
 {
+       struct mce *m = (struct mce *)data;
        struct err_regs regs;
        int node, ecc;
 
@@ -320,19 +384,62 @@ void decode_mce(struct mce *m)
                amd_decode_ic_mce(m->status);
                break;
 
+       case 2:
+               amd_decode_bu_mce(m->status);
+               break;
+
+       case 3:
+               amd_decode_ls_mce(m->status);
+               break;
+
        case 4:
                regs.nbsl  = (u32) m->status;
                regs.nbsh  = (u32)(m->status >> 32);
                regs.nbeal = (u32) m->addr;
                regs.nbeah = (u32)(m->addr >> 32);
-               node       = per_cpu(cpu_llc_id, m->extcpu);
+               node       = amd_get_nb_id(m->extcpu);
 
                amd_decode_nb_mce(node, &regs, 1);
                break;
 
+       case 5:
+               amd_decode_fr_mce(m->status);
+               break;
+
        default:
                break;
        }
 
        amd_decode_err_code(m->status & 0xffff);
+
+       return NOTIFY_STOP;
 }
+
+static struct notifier_block amd_mce_dec_nb = {
+       .notifier_call  = amd_decode_mce,
+};
+
+static int __init mce_amd_init(void)
+{
+       /*
+        * We can decode MCEs for Opteron and later CPUs:
+        */
+       if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
+           (boot_cpu_data.x86 >= 0xf))
+               atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb);
+
+       return 0;
+}
+early_initcall(mce_amd_init);
+
+#ifdef MODULE
+static void __exit mce_amd_exit(void)
+{
+       atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb);
+}
+
+MODULE_DESCRIPTION("AMD MCE decoder");
+MODULE_ALIAS("edac-mce-amd");
+MODULE_LICENSE("GPL");
+module_exit(mce_amd_exit);
+#endif