perf_counter: x86: Fix call-chain support to use NMI-safe methods
authorPeter Zijlstra <a.p.zijlstra@chello.nl>
Mon, 15 Jun 2009 11:07:24 +0000 (13:07 +0200)
committerIngo Molnar <mingo@elte.hu>
Mon, 15 Jun 2009 13:57:53 +0000 (15:57 +0200)
__copy_from_user_inatomic() isn't NMI safe in that it can trigger
the page fault handler which is another trap and its return path
invokes IRET which will also close the NMI context.

Therefore use a GUP based approach to copy the stack frames over.

We tried an alternative solution as well: we used a forward ported
version of Mathieu Desnoyers's "NMI safe INT3 and Page Fault" patch
that modifies the exception return path to use an open-coded IRET with
explicit stack unrolling and TF checking.

This didnt work as it interacted with faulting user-space instructions,
causing them not to restart properly, which corrupts user-space
registers.

Solving that would probably involve disassembling those instructions
and backtracing the RIP. But even without that, the code was deemed
rather complex to the already non-trivial x86 entry assembly code,
so instead we went for this GUP based method that does a
software-walk of the pagetables.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/x86/kernel/cpu/perf_counter.c

index 6d5e7cf..e8c68a5 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/kdebug.h>
 #include <linux/sched.h>
 #include <linux/uaccess.h>
+#include <linux/highmem.h>
 
 #include <asm/apic.h>
 #include <asm/stacktrace.h>
@@ -1617,20 +1618,48 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
        entry->kernel = entry->nr - nr;
 }
 
-static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+/*
+ * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
+ */
+static unsigned long
+copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
 {
+       unsigned long offset, addr = (unsigned long)from;
+       int type = in_nmi() ? KM_NMI : KM_IRQ0;
+       unsigned long size, len = 0;
+       struct page *page;
+       void *map;
        int ret;
 
-       if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
-               return 0;
+       do {
+               ret = __get_user_pages_fast(addr, 1, 0, &page);
+               if (!ret)
+                       break;
 
-       ret = 1;
-       pagefault_disable();
-       if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
-               ret = 0;
-       pagefault_enable();
+               offset = addr & (PAGE_SIZE - 1);
+               size = min(PAGE_SIZE - offset, n - len);
 
-       return ret;
+               map = kmap_atomic(page, type);
+               memcpy(to, map+offset, size);
+               kunmap_atomic(map, type);
+               put_page(page);
+
+               len  += size;
+               to   += size;
+               addr += size;
+
+       } while (len < n);
+
+       return len;
+}
+
+static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+{
+       unsigned long bytes;
+
+       bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
+
+       return bytes == sizeof(*frame);
 }
 
 static void
@@ -1643,7 +1672,7 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
        if (!user_mode(regs))
                regs = task_pt_regs(current);
 
-       fp   = (void __user *)regs->bp;
+       fp = (void __user *)regs->bp;
 
        callchain_store(entry, regs->ip);