x86, mce: implement new status bits

author Andi Kleen <andi@firstfloor.org>

Wed, 27 May 2009 19:56:57 +0000 (21:56 +0200)

committer H. Peter Anvin <hpa@zytor.com>

Wed, 3 Jun 2009 21:45:34 +0000 (14:45 -0700)
author Andi Kleen <andi@firstfloor.org>
Wed, 27 May 2009 19:56:57 +0000 (21:56 +0200)
committer H. Peter Anvin <hpa@zytor.com>
Wed, 3 Jun 2009 21:45:34 +0000 (14:45 -0700)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h

index ba1f889..afd3cdf 100644 (file)
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -15,6 +15,7 @@
  #define MCG_EXT_CNT_MASK       0xff0000     /* Number of Extended registers */
  #define MCG_EXT_CNT_SHIFT      16
  #define MCG_EXT_CNT(c)         (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
+#define MCG_SER_P              (1ULL<<24)   /* MCA recovery/new status bits */
  
  #define MCG_STATUS_RIPV  (1ULL<<0)   /* restart ip valid */
  #define MCG_STATUS_EIPV  (1ULL<<1)   /* ip points to correct instruction */
@@ -27,6 +28,15 @@
  #define MCI_STATUS_MISCV (1ULL<<59)  /* misc error reg. valid */
  #define MCI_STATUS_ADDRV (1ULL<<58)  /* addr reg. valid */
  #define MCI_STATUS_PCC   (1ULL<<57)  /* processor context corrupt */
+#define MCI_STATUS_S    (1ULL<<56)  /* Signaled machine check */
+#define MCI_STATUS_AR   (1ULL<<55)  /* Action required */
+
+/* MISC register defines */
+#define MCM_ADDR_SEGOFF  0     /* segment offset */
+#define MCM_ADDR_LINEAR  1     /* linear address */
+#define MCM_ADDR_PHYS   2      /* physical address */
+#define MCM_ADDR_MEM    3      /* memory address */
+#define MCM_ADDR_GENERIC 7     /* generic */
  
  /* Fields are zero when not available */
  struct mce {
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h

index f126b4a..54dcb8f 100644 (file)
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -2,9 +2,14 @@
  
  enum severity_level {
         MCE_NO_SEVERITY,
+       MCE_KEEP_SEVERITY,
         MCE_SOME_SEVERITY,
+       MCE_AO_SEVERITY,
         MCE_UC_SEVERITY,
+       MCE_AR_SEVERITY,
         MCE_PANIC_SEVERITY,
  };
  
  int mce_severity(struct mce *a, int tolerant, char **msg);
+
+extern int mce_ser;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c

index c189e89..4f4d2ca 100644 (file)
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -19,43 +19,117 @@
   * first. Since there are quite a lot of combinations test the bits in a
   * table-driven way. The rules are simply processed in order, first
   * match wins.
+ *
+ * Note this is only used for machine check exceptions, the corrected
+ * errors use much simpler rules. The exceptions still check for the corrected
+ * errors, but only to leave them alone for the CMCI handler (except for
+ * panic situations)
   */
  
+enum context { IN_KERNEL = 1, IN_USER = 2 };
+enum ser { SER_REQUIRED = 1, NO_SER = 2 };
+
  static struct severity {
         u64 mask;
         u64 result;
         unsigned char sev;
         unsigned char mcgmask;
         unsigned char mcgres;
+       unsigned char ser;
+       unsigned char context;
         char *msg;
  } severities[] = {
+#define KERNEL .context = IN_KERNEL
+#define USER .context = IN_USER
+#define SER .ser = SER_REQUIRED
+#define NOSER .ser = NO_SER
  #define SEV(s) .sev = MCE_ ## s ## _SEVERITY
  #define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r }
  #define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r }
  #define MCGMASK(x, res, s, m, r...) \
         { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r }
+#define MASK(x, y, s, m, r...) \
+       { .mask = x, .result = y, SEV(s), .msg = m, ## r }
+#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
+#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
+#define MCACOD 0xffff
+
         BITCLR(MCI_STATUS_VAL, NO, "Invalid"),
         BITCLR(MCI_STATUS_EN, NO, "Not enabled"),
         BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"),
-       MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "No restart IP"),
+       /* When MCIP is not set something is very confused */
+       MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"),
+       /* Neither return not error IP -- no chance to recover -> PANIC */
+       MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC,
+               "Neither restart nor error IP"),
+       MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP",
+               KERNEL),
+       BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER),
+       MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME,
+            "Spurious not enabled", SER),
+
+       /* ignore OVER for UCNA */
+       MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP,
+            "Uncorrected no action required", SER),
+       MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC,
+            "Illegal combination (UCNA with AR=1)", SER),
+       MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER),
+
+       /* AR add known MCACODs here */
+       MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC,
+            "Action required with lost events", SER),
+       MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC,
+            "Action required; unknown MCACOD", SER),
+
+       /* known AO MCACODs: */
+       MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO,
+            "Action optional: memory scrubbing error", SER),
+       MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO,
+            "Action optional: last level cache writeback error", SER),
+
+       MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME,
+            "Action optional unknown MCACOD", SER),
+       MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME,
+            "Action optional with lost events", SER),
         BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"),
         BITSET(MCI_STATUS_UC, UC, "Uncorrected"),
         BITSET(0, SOME, "No match")     /* always matches. keep at end */
  };
  
+/*
+ * If the EIPV bit is set, it means the saved IP is the
+ * instruction which caused the MCE.
+ */
+static int error_context(struct mce *m)
+{
+       if (m->mcgstatus & MCG_STATUS_EIPV)
+               return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
+       /* Unknown, assume kernel */
+       return IN_KERNEL;
+}
+
  int mce_severity(struct mce *a, int tolerant, char **msg)
  {
+       enum context ctx = error_context(a);
         struct severity *s;
+
         for (s = severities;; s++) {
                 if ((a->status & s->mask) != s->result)
                         continue;
                 if ((a->mcgstatus & s->mcgmask) != s->mcgres)
                         continue;
-               if (s->sev > MCE_NO_SEVERITY && (a->status & MCI_STATUS_UC) &&
-                       tolerant < 1)
-                       return MCE_PANIC_SEVERITY;
+               if (s->ser == SER_REQUIRED && !mce_ser)
+                       continue;
+               if (s->ser == NO_SER && mce_ser)
+                       continue;
+               if (s->context && ctx != s->context)
+                       continue;
                 if (msg)
                         *msg = s->msg;
+               if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
+                       if (panic_on_oops || tolerant < 1)
+                               return MCE_PANIC_SEVERITY;
+               }
                 return s->sev;
         }
  }
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c

index ff9c732..f051a78 100644 (file)
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -83,6 +83,7 @@ static int                    rip_msr;
  static int                     mce_bootlog = -1;
  static int                     monarch_timeout = -1;
  static int                     mce_panic_timeout;
+int                            mce_ser;
  
  static char                    trigger[128];
  static char                    *trigger_argv[2] = { trigger, NULL };
@@ -391,6 +392,15 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
   * Those are just logged through /dev/mcelog.
   *
   * This is executed in standard interrupt context.
+ *
+ * Note: spec recommends to panic for fatal unsignalled
+ * errors here. However this would be quite problematic --
+ * we would need to reimplement the Monarch handling and
+ * it would mess up the exclusion between exception handler
+ * and poll hander -- * so we skip this for now.
+ * These cases should not happen anyways, or only when the CPU
+ * is already totally * confused. In this case it's likely it will
+ * not fully execute the machine check handler either.
   */
  void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
  {
@@ -417,13 +427,13 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
                         continue;
  
                 /*
-                * Uncorrected events are handled by the exception handler
-                * when it is enabled. But when the exception is disabled log
-                * everything.
+                * Uncorrected or signalled events are handled by the exception
+                * handler when it is enabled, so don't process those here.
                  *
                  * TBD do the same check for MCI_STATUS_EN here?
                  */
-               if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC))
+               if (!(flags & MCP_UC) &&
+                   (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
                         continue;
  
                 if (m.status & MCI_STATUS_MISCV)
@@ -790,6 +800,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
         barrier();
  
         /*
+        * When no restart IP must always kill or panic.
+        */
+       if (!(m.mcgstatus & MCG_STATUS_RIPV))
+               kill_it = 1;
+
+       /*
          * Go through all the banks in exclusion of the other CPUs.
          * This way we don't report duplicated events on shared banks
          * because the first one to see it will clear it.
@@ -809,10 +825,11 @@ void do_machine_check(struct pt_regs *regs, long error_code)
                         continue;
  
                 /*
-                * Non uncorrected errors are handled by machine_check_poll
-                * Leave them alone, unless this panics.
+                * Non uncorrected or non signaled errors are handled by
+                * machine_check_poll. Leave them alone, unless this panics.
                  */
-               if ((m.status & MCI_STATUS_UC) == 0 && !no_way_out)
+               if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
+                       !no_way_out)
                         continue;
  
                 /*
@@ -820,17 +837,16 @@ void do_machine_check(struct pt_regs *regs, long error_code)
                  */
                 add_taint(TAINT_MACHINE_CHECK);
  
-               __set_bit(i, toclear);
+               severity = mce_severity(&m, tolerant, NULL);
  
-               if (m.status & MCI_STATUS_EN) {
-                       /*
-                        * If this error was uncorrectable and there was
-                        * an overflow, we're in trouble.  If no overflow,
-                        * we might get away with just killing a task.
-                        */
-                       if (m.status & MCI_STATUS_UC)
-                               kill_it = 1;
-               } else {
+               /*
+                * When machine check was for corrected handler don't touch,
+                * unless we're panicing.
+                */
+               if (severity == MCE_KEEP_SEVERITY && !no_way_out)
+                       continue;
+               __set_bit(i, toclear);
+               if (severity == MCE_NO_SEVERITY) {
                         /*
                          * Machine check event was not enabled. Clear, but
                          * ignore.
@@ -838,6 +854,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
                         continue;
                 }
  
+               /*
+                * Kill on action required.
+                */
+               if (severity == MCE_AR_SEVERITY)
+                       kill_it = 1;
+
                 if (m.status & MCI_STATUS_MISCV)
                         m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
                 if (m.status & MCI_STATUS_ADDRV)
@@ -846,7 +868,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
                 mce_get_rip(&m, regs);
                 mce_log(&m);
  
-               severity = mce_severity(&m, tolerant, NULL);
                 if (severity > worst) {
                         *final = m;
                         worst = severity;
@@ -879,29 +900,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
          * one task, do that.  If the user has set the tolerance very
          * high, don't try to do anything at all.
          */
-       if (kill_it && tolerant < 3) {
-               int user_space = 0;
-
-               /*
-                * If the EIPV bit is set, it means the saved IP is the
-                * instruction which caused the MCE.
-                */
-               if (m.mcgstatus & MCG_STATUS_EIPV)
-                       user_space = final->ip && (final->cs & 3);
  
-               /*
-                * If we know that the error was in user space, send a
-                * SIGBUS.  Otherwise, panic if tolerance is low.
-                *
-                * force_sig() takes an awful lot of locks and has a slight
-                * risk of deadlocking.
-                */
-               if (user_space) {
-                       force_sig(SIGBUS, current);
-               } else if (panic_on_oops || tolerant < 2) {
-                       mce_panic("Uncorrected machine check", final, msg);
-               }
-       }
+       if (kill_it && tolerant < 3)
+               force_sig(SIGBUS, current);
  
         /* notify userspace ASAP */
         set_thread_flag(TIF_MCE_NOTIFY);
@@ -1049,6 +1050,9 @@ static int mce_cap_init(void)
         if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
                 rip_msr = MSR_IA32_MCG_EIP;
  
+       if (cap & MCG_SER_P)
+               mce_ser = 1;
+
         return 0;
  }
author	Andi Kleen <andi@firstfloor.org>
	Wed, 27 May 2009 19:56:57 +0000 (21:56 +0200)
committer	H. Peter Anvin <hpa@zytor.com>
	Wed, 3 Jun 2009 21:45:34 +0000 (14:45 -0700)
arch/x86/include/asm/mce.h		patch \| blob \| history
arch/x86/kernel/cpu/mcheck/mce-internal.h		patch \| blob \| history
arch/x86/kernel/cpu/mcheck/mce-severity.c		patch \| blob \| history
arch/x86/kernel/cpu/mcheck/mce.c		patch \| blob \| history