powerpc: Convert pmac_pic_lock to raw_spinlock
[safe/jmp/linux-2.6] / arch / powerpc / mm / fault.c
index 8660986..26fb6b9 100644 (file)
@@ -29,7 +29,9 @@
 #include <linux/module.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
+#include <linux/perf_event.h>
 
+#include <asm/firmware.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/mmu.h>
@@ -38,7 +40,7 @@
 #include <asm/uaccess.h>
 #include <asm/tlbflush.h>
 #include <asm/siginfo.h>
-
+#include <mm/mmu_decl.h>
 
 #ifdef CONFIG_KPROBES
 static inline int notify_page_fault(struct pt_regs *regs)
@@ -169,6 +171,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
                die("Weird page fault", regs, SIGSEGV);
        }
 
+       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+
        /* When running in the kernel we expect faults to occur only to
         * addresses in user space.  All other faults represent errors in the
         * kernel and should generate an OOPS.  Unfortunately, in the case of an
@@ -242,6 +246,12 @@ good_area:
                goto bad_area;
 #endif /* CONFIG_6xx */
 #if defined(CONFIG_8xx)
+       /* 8xx sometimes need to load a invalid/non-present TLBs.
+        * These must be invalidated separately as linux mm don't.
+        */
+       if (error_code & 0x40000000) /* no translation? */
+               _tlbil_va(address, 0, 0, 0);
+
         /* The MPC8xx seems to always set 0x80000000, which is
          * "undefined".  Of those that can be set, this is the only
          * one which seems bad.
@@ -252,45 +262,33 @@ good_area:
 #endif /* CONFIG_8xx */
 
        if (is_exec) {
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
-               /* protection fault */
+#ifdef CONFIG_PPC_STD_MMU
+               /* Protection fault on exec go straight to failure on
+                * Hash based MMUs as they either don't support per-page
+                * execute permission, or if they do, it's handled already
+                * at the hash level. This test would probably have to
+                * be removed if we change the way this works to make hash
+                * processors use the same I/D cache coherency mechanism
+                * as embedded.
+                */
                if (error_code & DSISR_PROTFAULT)
                        goto bad_area;
+#endif /* CONFIG_PPC_STD_MMU */
+
                /*
                 * Allow execution from readable areas if the MMU does not
                 * provide separate controls over reading and executing.
+                *
+                * Note: That code used to not be enabled for 4xx/BookE.
+                * It is now as I/D cache coherency for these is done at
+                * set_pte_at() time and I see no reason why the test
+                * below wouldn't be valid on those processors. This -may-
+                * break programs compiled with a really old ABI though.
                 */
                if (!(vma->vm_flags & VM_EXEC) &&
                    (cpu_has_feature(CPU_FTR_NOEXECUTE) ||
                     !(vma->vm_flags & (VM_READ | VM_WRITE))))
                        goto bad_area;
-#else
-               pte_t *ptep;
-               pmd_t *pmdp;
-
-               /* Since 4xx/Book-E supports per-page execute permission,
-                * we lazily flush dcache to icache. */
-               ptep = NULL;
-               if (get_pteptr(mm, address, &ptep, &pmdp)) {
-                       spinlock_t *ptl = pte_lockptr(mm, pmdp);
-                       spin_lock(ptl);
-                       if (pte_present(*ptep)) {
-                               struct page *page = pte_page(*ptep);
-
-                               if (!test_bit(PG_arch_1, &page->flags)) {
-                                       flush_dcache_icache_page(page);
-                                       set_bit(PG_arch_1, &page->flags);
-                               }
-                               pte_update(ptep, 0, _PAGE_HWEXEC |
-                                          _PAGE_ACCESSED);
-                               _tlbie(address, mm->context.id);
-                               pte_unmap_unlock(ptep, ptl);
-                               up_read(&mm->mmap_sem);
-                               return 0;
-                       }
-                       pte_unmap_unlock(ptep, ptl);
-               }
-#endif
        /* a write */
        } else if (is_write) {
                if (!(vma->vm_flags & VM_WRITE))
@@ -310,7 +308,7 @@ good_area:
         * the fault.
         */
  survive:
-       ret = handle_mm_fault(mm, vma, address, is_write);
+       ret = handle_mm_fault(mm, vma, address, is_write ? FAULT_FLAG_WRITE : 0);
        if (unlikely(ret & VM_FAULT_ERROR)) {
                if (ret & VM_FAULT_OOM)
                        goto out_of_memory;
@@ -318,10 +316,22 @@ good_area:
                        goto do_sigbus;
                BUG();
        }
-       if (ret & VM_FAULT_MAJOR)
+       if (ret & VM_FAULT_MAJOR) {
                current->maj_flt++;
-       else
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+                                    regs, address);
+#ifdef CONFIG_PPC_SMLPAR
+               if (firmware_has_feature(FW_FEATURE_CMO)) {
+                       preempt_disable();
+                       get_lppaca()->page_ins += (1 << PAGE_FACTOR);
+                       preempt_enable();
+               }
+#endif
+       } else {
                current->min_flt++;
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+                                    regs, address);
+       }
        up_read(&mm->mmap_sem);
        return 0;