Merge branch 'hwpoison' of git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux...
[safe/jmp/linux-2.6] / arch / x86 / mm / fault.c
index c403526..f4cee90 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/bootmem.h>             /* max_low_pfn                  */
 #include <linux/kprobes.h>             /* __kprobes, ...               */
 #include <linux/mmiotrace.h>           /* kmmio_handler, ...           */
-#include <linux/perf_counter.h>                /* perf_swcounter_event         */
+#include <linux/perf_event.h>          /* perf_sw_event                */
 
 #include <asm/traps.h>                 /* dotraplinkage, ...           */
 #include <asm/pgalloc.h>               /* pgd_*(), ...                 */
@@ -167,6 +167,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
        info.si_errno   = 0;
        info.si_code    = si_code;
        info.si_addr    = (void __user *)address;
+       info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0;
 
        force_sig_info(si_signo, &info, tsk);
 }
@@ -285,26 +286,25 @@ check_v8086_mode(struct pt_regs *regs, unsigned long address,
                tsk->thread.screen_bitmap |= 1 << bit;
 }
 
-static void dump_pagetable(unsigned long address)
+static bool low_pfn(unsigned long pfn)
 {
-       __typeof__(pte_val(__pte(0))) page;
+       return pfn < max_low_pfn;
+}
 
-       page = read_cr3();
-       page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
+static void dump_pagetable(unsigned long address)
+{
+       pgd_t *base = __va(read_cr3());
+       pgd_t *pgd = &base[pgd_index(address)];
+       pmd_t *pmd;
+       pte_t *pte;
 
 #ifdef CONFIG_X86_PAE
-       printk("*pdpt = %016Lx ", page);
-       if ((page >> PAGE_SHIFT) < max_low_pfn
-           && page & _PAGE_PRESENT) {
-               page &= PAGE_MASK;
-               page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
-                                                       & (PTRS_PER_PMD - 1)];
-               printk(KERN_CONT "*pde = %016Lx ", page);
-               page &= ~_PAGE_NX;
-       }
-#else
-       printk("*pde = %08lx ", page);
+       printk("*pdpt = %016Lx ", pgd_val(*pgd));
+       if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
+               goto out;
 #endif
+       pmd = pmd_offset(pud_offset(pgd, address), address);
+       printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
 
        /*
         * We must not directly access the pte in the highpte
@@ -312,16 +312,12 @@ static void dump_pagetable(unsigned long address)
         * And let's rather not kmap-atomic the pte, just in case
         * it's allocated already:
         */
-       if ((page >> PAGE_SHIFT) < max_low_pfn
-           && (page & _PAGE_PRESENT)
-           && !(page & _PAGE_PSE)) {
-
-               page &= PAGE_MASK;
-               page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
-                                                       & (PTRS_PER_PTE - 1)];
-               printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
-       }
+       if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd))
+               goto out;
 
+       pte = pte_offset_kernel(pmd, address);
+       printk("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte));
+out:
        printk("\n");
 }
 
@@ -426,10 +422,11 @@ static noinline int vmalloc_fault(unsigned long address)
 }
 
 static const char errata93_warning[] =
-KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
-KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
-KERN_ERR "******* Please consider a BIOS update.\n"
-KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
+KERN_ERR 
+"******* Your BIOS seems to not contain a fix for K8 errata #93\n"
+"******* Working around it, but it may cause SEGVs or burn power.\n"
+"******* Please consider a BIOS update.\n"
+"******* Disabling USB legacy in the BIOS may also help.\n";
 
 /*
  * No vm86 mode in 64-bit mode:
@@ -449,16 +446,12 @@ static int bad_address(void *p)
 
 static void dump_pagetable(unsigned long address)
 {
-       pgd_t *pgd;
+       pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+       pgd_t *pgd = base + pgd_index(address);
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
 
-       pgd = (pgd_t *)read_cr3();
-
-       pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
-
-       pgd += pgd_index(address);
        if (bad_address(pgd))
                goto bad;
 
@@ -696,7 +689,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
        if (!printk_ratelimit())
                return;
 
-       printk(KERN_CONT "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
+       printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
                task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
                tsk->comm, task_pid_nr(tsk), address,
                (void *)regs->ip, (void *)regs->sp, error_code);
@@ -798,10 +791,12 @@ out_of_memory(struct pt_regs *regs, unsigned long error_code,
 }
 
 static void
-do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
+         unsigned int fault)
 {
        struct task_struct *tsk = current;
        struct mm_struct *mm = tsk->mm;
+       int code = BUS_ADRERR;
 
        up_read(&mm->mmap_sem);
 
@@ -817,7 +812,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
        tsk->thread.error_code  = error_code;
        tsk->thread.trap_no     = 14;
 
-       force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+#ifdef CONFIG_MEMORY_FAILURE
+       if (fault & VM_FAULT_HWPOISON) {
+               printk(KERN_ERR
+       "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
+                       tsk->comm, tsk->pid, address);
+               code = BUS_MCEERR_AR;
+       }
+#endif
+       force_sig_info_fault(SIGBUS, code, address, tsk);
 }
 
 static noinline void
@@ -827,8 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
        if (fault & VM_FAULT_OOM) {
                out_of_memory(regs, error_code, address);
        } else {
-               if (fault & VM_FAULT_SIGBUS)
-                       do_sigbus(regs, error_code, address);
+               if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
+                       do_sigbus(regs, error_code, address, fault);
                else
                        BUG();
        }
@@ -1025,7 +1028,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
        if (unlikely(error_code & PF_RSVD))
                pgtable_bad(regs, error_code, address);
 
-       perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
        /*
         * If we're in an interrupt, have no user context or are running
@@ -1113,7 +1116,7 @@ good_area:
         * make sure we exit gracefully rather than endlessly redo
         * the fault:
         */
-       fault = handle_mm_fault(mm, vma, address, write);
+       fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
 
        if (unlikely(fault & VM_FAULT_ERROR)) {
                mm_fault_error(regs, error_code, address, fault);
@@ -1122,11 +1125,11 @@ good_area:
 
        if (fault & VM_FAULT_MAJOR) {
                tsk->maj_flt++;
-               perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
                                     regs, address);
        } else {
                tsk->min_flt++;
-               perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
                                     regs, address);
        }