kcore: register module area in generic way
[safe/jmp/linux-2.6] / arch / x86 / mm / fault.c
index 5ec7ae3..82728f2 100644 (file)
 #include <linux/bootmem.h>             /* max_low_pfn                  */
 #include <linux/kprobes.h>             /* __kprobes, ...               */
 #include <linux/mmiotrace.h>           /* kmmio_handler, ...           */
+#include <linux/perf_event.h>          /* perf_sw_event                */
 
 #include <asm/traps.h>                 /* dotraplinkage, ...           */
 #include <asm/pgalloc.h>               /* pgd_*(), ...                 */
+#include <asm/kmemcheck.h>             /* kmemcheck_*(), ...           */
 
 /*
  * Page fault error code bits:
@@ -283,26 +285,25 @@ check_v8086_mode(struct pt_regs *regs, unsigned long address,
                tsk->thread.screen_bitmap |= 1 << bit;
 }
 
-static void dump_pagetable(unsigned long address)
+static bool low_pfn(unsigned long pfn)
 {
-       __typeof__(pte_val(__pte(0))) page;
+       return pfn < max_low_pfn;
+}
 
-       page = read_cr3();
-       page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
+static void dump_pagetable(unsigned long address)
+{
+       pgd_t *base = __va(read_cr3());
+       pgd_t *pgd = &base[pgd_index(address)];
+       pmd_t *pmd;
+       pte_t *pte;
 
 #ifdef CONFIG_X86_PAE
-       printk("*pdpt = %016Lx ", page);
-       if ((page >> PAGE_SHIFT) < max_low_pfn
-           && page & _PAGE_PRESENT) {
-               page &= PAGE_MASK;
-               page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
-                                                       & (PTRS_PER_PMD - 1)];
-               printk(KERN_CONT "*pde = %016Lx ", page);
-               page &= ~_PAGE_NX;
-       }
-#else
-       printk("*pde = %08lx ", page);
+       printk("*pdpt = %016Lx ", pgd_val(*pgd));
+       if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
+               goto out;
 #endif
+       pmd = pmd_offset(pud_offset(pgd, address), address);
+       printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
 
        /*
         * We must not directly access the pte in the highpte
@@ -310,16 +311,12 @@ static void dump_pagetable(unsigned long address)
         * And let's rather not kmap-atomic the pte, just in case
         * it's allocated already:
         */
-       if ((page >> PAGE_SHIFT) < max_low_pfn
-           && (page & _PAGE_PRESENT)
-           && !(page & _PAGE_PSE)) {
-
-               page &= PAGE_MASK;
-               page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
-                                                       & (PTRS_PER_PTE - 1)];
-               printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
-       }
+       if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd))
+               goto out;
 
+       pte = pte_offset_kernel(pmd, address);
+       printk("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte));
+out:
        printk("\n");
 }
 
@@ -424,10 +421,11 @@ static noinline int vmalloc_fault(unsigned long address)
 }
 
 static const char errata93_warning[] =
-KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
-KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
-KERN_ERR "******* Please consider a BIOS update.\n"
-KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
+KERN_ERR 
+"******* Your BIOS seems to not contain a fix for K8 errata #93\n"
+"******* Working around it, but it may cause SEGVs or burn power.\n"
+"******* Please consider a BIOS update.\n"
+"******* Disabling USB legacy in the BIOS may also help.\n";
 
 /*
  * No vm86 mode in 64-bit mode:
@@ -447,16 +445,12 @@ static int bad_address(void *p)
 
 static void dump_pagetable(unsigned long address)
 {
-       pgd_t *pgd;
+       pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+       pgd_t *pgd = base + pgd_index(address);
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
 
-       pgd = (pgd_t *)read_cr3();
-
-       pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
-
-       pgd += pgd_index(address);
        if (bad_address(pgd))
                goto bad;
 
@@ -694,7 +688,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
        if (!printk_ratelimit())
                return;
 
-       printk(KERN_CONT "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
+       printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
                task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
                tsk->comm, task_pid_nr(tsk), address,
                (void *)regs->ip, (void *)regs->sp, error_code);
@@ -950,11 +944,17 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
        tsk = current;
        mm = tsk->mm;
 
-       prefetchw(&mm->mmap_sem);
-
        /* Get the faulting address: */
        address = read_cr2();
 
+       /*
+        * Detect and handle instructions that would cause a page fault for
+        * both a tracked kernel page and a userspace page.
+        */
+       if (kmemcheck_active(regs))
+               kmemcheck_hide(regs);
+       prefetchw(&mm->mmap_sem);
+
        if (unlikely(kmmio_fault(regs, address)))
                return;
 
@@ -972,9 +972,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
         * protection error (error_code & 9) == 0.
         */
        if (unlikely(fault_in_kernel_space(address))) {
-               if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
-                   vmalloc_fault(address) >= 0)
-                       return;
+               if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
+                       if (vmalloc_fault(address) >= 0)
+                               return;
+
+                       if (kmemcheck_fault(regs, address, error_code))
+                               return;
+               }
 
                /* Can handle a stale RO->RW TLB: */
                if (spurious_fault(error_code, address))
@@ -1013,6 +1017,8 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
        if (unlikely(error_code & PF_RSVD))
                pgtable_bad(regs, error_code, address);
 
+       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+
        /*
         * If we're in an interrupt, have no user context or are running
         * in an atomic region then we must not take the fault:
@@ -1099,17 +1105,22 @@ good_area:
         * make sure we exit gracefully rather than endlessly redo
         * the fault:
         */
-       fault = handle_mm_fault(mm, vma, address, write);
+       fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
 
        if (unlikely(fault & VM_FAULT_ERROR)) {
                mm_fault_error(regs, error_code, address, fault);
                return;
        }
 
-       if (fault & VM_FAULT_MAJOR)
+       if (fault & VM_FAULT_MAJOR) {
                tsk->maj_flt++;
-       else
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+                                    regs, address);
+       } else {
                tsk->min_flt++;
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+                                    regs, address);
+       }
 
        check_v8086_mode(regs, address, tsk);