hw-breakpoints: Modify breakpoints without unregistering them
[safe/jmp/linux-2.6] / arch / x86 / kernel / ptrace.c
index cabdabc..7079dda 100644 (file)
@@ -22,6 +22,8 @@
 #include <linux/seccomp.h>
 #include <linux/signal.h>
 #include <linux/workqueue.h>
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/ds.h>
 #include <asm/hw_breakpoint.h>
 
-#include <trace/syscall.h>
-
 #include "tls.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
 enum x86_regset {
        REGSET_GENERAL,
        REGSET_FP,
@@ -49,6 +52,118 @@ enum x86_regset {
        REGSET_IOPERM32,
 };
 
+struct pt_regs_offset {
+       const char *name;
+       int offset;
+};
+
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+static const struct pt_regs_offset regoffset_table[] = {
+#ifdef CONFIG_X86_64
+       REG_OFFSET_NAME(r15),
+       REG_OFFSET_NAME(r14),
+       REG_OFFSET_NAME(r13),
+       REG_OFFSET_NAME(r12),
+       REG_OFFSET_NAME(r11),
+       REG_OFFSET_NAME(r10),
+       REG_OFFSET_NAME(r9),
+       REG_OFFSET_NAME(r8),
+#endif
+       REG_OFFSET_NAME(bx),
+       REG_OFFSET_NAME(cx),
+       REG_OFFSET_NAME(dx),
+       REG_OFFSET_NAME(si),
+       REG_OFFSET_NAME(di),
+       REG_OFFSET_NAME(bp),
+       REG_OFFSET_NAME(ax),
+#ifdef CONFIG_X86_32
+       REG_OFFSET_NAME(ds),
+       REG_OFFSET_NAME(es),
+       REG_OFFSET_NAME(fs),
+       REG_OFFSET_NAME(gs),
+#endif
+       REG_OFFSET_NAME(orig_ax),
+       REG_OFFSET_NAME(ip),
+       REG_OFFSET_NAME(cs),
+       REG_OFFSET_NAME(flags),
+       REG_OFFSET_NAME(sp),
+       REG_OFFSET_NAME(ss),
+       REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name:      the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+       const struct pt_regs_offset *roff;
+       for (roff = regoffset_table; roff->name != NULL; roff++)
+               if (!strcmp(roff->name, name))
+                       return roff->offset;
+       return -EINVAL;
+}
+
+/**
+ * regs_query_register_name() - query register name from its offset
+ * @offset:    the offset of a register in struct pt_regs.
+ *
+ * regs_query_register_name() returns the name of a register from its
+ * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
+ */
+const char *regs_query_register_name(unsigned int offset)
+{
+       const struct pt_regs_offset *roff;
+       for (roff = regoffset_table; roff->name != NULL; roff++)
+               if (roff->offset == offset)
+                       return roff->name;
+       return NULL;
+}
+
+static const int arg_offs_table[] = {
+#ifdef CONFIG_X86_32
+       [0] = offsetof(struct pt_regs, ax),
+       [1] = offsetof(struct pt_regs, dx),
+       [2] = offsetof(struct pt_regs, cx)
+#else /* CONFIG_X86_64 */
+       [0] = offsetof(struct pt_regs, di),
+       [1] = offsetof(struct pt_regs, si),
+       [2] = offsetof(struct pt_regs, dx),
+       [3] = offsetof(struct pt_regs, cx),
+       [4] = offsetof(struct pt_regs, r8),
+       [5] = offsetof(struct pt_regs, r9)
+#endif
+};
+
+/**
+ * regs_get_argument_nth() - get Nth argument at function call
+ * @regs:      pt_regs which contains registers at function entry.
+ * @n:         argument number.
+ *
+ * regs_get_argument_nth() returns @n th argument of a function call.
+ * Since usually the kernel stack will be changed right after function entry,
+ * you must use this at function entry. If the @n th entry is NOT in the
+ * kernel stack or pt_regs, this returns 0.
+ */
+unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
+{
+       if (n < ARRAY_SIZE(arg_offs_table))
+               return *(unsigned long *)((char *)regs + arg_offs_table[n]);
+       else {
+               /*
+                * The typical case: arg n is on the stack.
+                * (Note: stack[0] = return address, so skip it)
+                */
+               n -= ARRAY_SIZE(arg_offs_table);
+               return regs_get_kernel_stack_nth(regs, 1 + n);
+       }
+}
+
 /*
  * does not yet catch signals sent when the child dies.
  * in exit.c or in signal.c.
@@ -311,16 +426,6 @@ static int putreg(struct task_struct *child,
                return set_flags(child, value);
 
 #ifdef CONFIG_X86_64
-       /*
-        * Orig_ax is really just a flag with small positive and
-        * negative values, so make sure to always sign-extend it
-        * from 32 bits so that it works correctly regardless of
-        * whether we come from a 32-bit environment or not.
-        */
-       case offsetof(struct user_regs_struct, orig_ax):
-               value = (long) (s32) value;
-               break;
-
        case offsetof(struct user_regs_struct,fs_base):
                if (value >= TASK_SIZE_OF(child))
                        return -EIO;
@@ -450,39 +555,72 @@ static int genregs_set(struct task_struct *target,
        return ret;
 }
 
+static void ptrace_triggered(struct perf_event *bp, int nmi,
+                            struct perf_sample_data *data,
+                            struct pt_regs *regs)
+{
+       int i;
+       struct thread_struct *thread = &(current->thread);
+
+       /*
+        * Store in the virtual DR6 register the fact that the breakpoint
+        * was hit so the thread's debugger will see it.
+        */
+       for (i = 0; i < HBP_NUM; i++) {
+               if (thread->ptrace_bps[i] == bp)
+                       break;
+       }
+
+       thread->debugreg6 |= (DR_TRAP0 << i);
+}
+
 /*
- * Decode the length and type bits for a particular breakpoint as
- * stored in debug register 7.  Return the "enabled" status.
+ * Walk through every ptrace breakpoints for this thread and
+ * build the dr7 value on top of their attributes.
+ *
  */
-static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len,
-               unsigned *type)
+static unsigned long ptrace_get_dr7(struct perf_event *bp[])
 {
-       int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
+       int i;
+       int dr7 = 0;
+       struct arch_hw_breakpoint *info;
 
-       *len = (bp_info & 0xc) | 0x40;
-       *type = (bp_info & 0x3) | 0x80;
-       return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
+       for (i = 0; i < HBP_NUM; i++) {
+               if (bp[i] && !bp[i]->attr.disabled) {
+                       info = counter_arch_bp(bp[i]);
+                       dr7 |= encode_dr7(i, info->len, info->type);
+               }
+       }
+
+       return dr7;
 }
 
-static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
+static int
+ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
+                        struct task_struct *tsk, int disabled)
 {
-       struct thread_struct *thread = &(current->thread);
-       int i;
+       int err;
+       int gen_len, gen_type;
+       struct perf_event_attr attr;
 
        /*
-        * Store in the virtual DR6 register the fact that the breakpoint
-        * was hit so the thread's debugger will see it.
+        * We shoud have at least an inactive breakpoint at this
+        * slot. It means the user is writing dr7 without having
+        * written the address register first
         */
-       for (i = 0; i < hbp_kernel_pos; i++)
-               /*
-                * We will check bp->info.address against the address stored in
-                * thread's hbp structure and not debugreg[i]. This is to ensure
-                * that the corresponding bit for 'i' in DR7 register is enabled
-                */
-               if (bp->info.address == thread->hbp[i]->info.address)
-                       break;
+       if (!bp)
+               return -EINVAL;
 
-       thread->debugreg6 |= (DR_TRAP0 << i);
+       err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
+       if (err)
+               return err;
+
+       attr = bp->attr;
+       attr.bp_len = gen_len;
+       attr.bp_type = gen_type;
+       attr.disabled = disabled;
+
+       return modify_user_hw_breakpoint(bp, &attr);
 }
 
 /*
@@ -491,13 +629,14 @@ static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
 static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
 {
        struct thread_struct *thread = &(tsk->thread);
-       unsigned long old_dr7 = thread->debugreg7;
+       unsigned long old_dr7;
        int i, orig_ret = 0, rc = 0;
        int enabled, second_pass = 0;
        unsigned len, type;
-       struct hw_breakpoint *bp;
+       struct perf_event *bp;
 
        data &= ~DR_CONTROL_RESERVED;
+       old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
 restore:
        /*
         * Loop through all the hardware breakpoints, making the
@@ -505,11 +644,12 @@ restore:
         */
        for (i = 0; i < HBP_NUM; i++) {
                enabled = decode_dr7(data, i, &len, &type);
-               bp = thread->hbp[i];
+               bp = thread->ptrace_bps[i];
 
                if (!enabled) {
                        if (bp) {
-                               /* Don't unregister the breakpoints right-away,
+                               /*
+                                * Don't unregister the breakpoints right-away,
                                 * unless all register_user_hw_breakpoint()
                                 * requests have succeeded. This prevents
                                 * any window of opportunity for debug
@@ -517,25 +657,16 @@ restore:
                                 */
                                if (!second_pass)
                                        continue;
-                               unregister_user_hw_breakpoint(tsk, bp);
-                               kfree(bp);
+
+                               rc = ptrace_modify_breakpoint(bp, len, type,
+                                                             tsk, 1);
+                               if (rc)
+                                       break;
                        }
                        continue;
                }
-               if (!bp) {
-                       rc = -ENOMEM;
-                       bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
-                       if (bp) {
-                               bp->info.address = thread->debugreg[i];
-                               bp->triggered = ptrace_triggered;
-                               bp->info.len = len;
-                               bp->info.type = type;
-                               rc = register_user_hw_breakpoint(tsk, bp);
-                               if (rc)
-                                       kfree(bp);
-                       }
-               } else
-                       rc = modify_user_hw_breakpoint(tsk, bp);
+
+               rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0);
                if (rc)
                        break;
        }
@@ -562,15 +693,69 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
        struct thread_struct *thread = &(tsk->thread);
        unsigned long val = 0;
 
-       if (n < HBP_NUM)
-               val = thread->debugreg[n];
-       else if (n == 6)
+       if (n < HBP_NUM) {
+               struct perf_event *bp;
+               bp = thread->ptrace_bps[n];
+               if (!bp)
+                       return 0;
+               val = bp->hw.info.address;
+       } else if (n == 6) {
                val = thread->debugreg6;
-       else if (n == 7)
-               val = thread->debugreg7;
+        } else if (n == 7) {
+               val = ptrace_get_dr7(thread->ptrace_bps);
+       }
        return val;
 }
 
+static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
+                                     unsigned long addr)
+{
+       struct perf_event *bp;
+       struct thread_struct *t = &tsk->thread;
+       struct perf_event_attr attr;
+
+       if (!t->ptrace_bps[nr]) {
+               hw_breakpoint_init(&attr);
+               /*
+                * Put stub len and type to register (reserve) an inactive but
+                * correct bp
+                */
+               attr.bp_addr = addr;
+               attr.bp_len = HW_BREAKPOINT_LEN_1;
+               attr.bp_type = HW_BREAKPOINT_W;
+               attr.disabled = 1;
+
+               bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
+
+               /*
+                * CHECKME: the previous code returned -EIO if the addr wasn't
+                * a valid task virtual addr. The new one will return -EINVAL in
+                *  this case.
+                * -EINVAL may be what we want for in-kernel breakpoints users,
+                * but -EIO looks better for ptrace, since we refuse a register
+                * writing for the user. And anyway this is the previous
+                * behaviour.
+                */
+               if (IS_ERR(bp))
+                       return PTR_ERR(bp);
+
+               t->ptrace_bps[nr] = bp;
+       } else {
+               int err;
+
+               bp = t->ptrace_bps[nr];
+
+               attr = bp->attr;
+               attr.bp_addr = addr;
+               err = modify_user_hw_breakpoint(bp, &attr);
+               if (err)
+                       return err;
+       }
+
+
+       return 0;
+}
+
 /*
  * Handle PTRACE_POKEUSR calls for the debug register area.
  */
@@ -584,19 +769,13 @@ int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
                return -EIO;
 
        if (n == 6) {
-               tsk->thread.debugreg6 = val;
+               thread->debugreg6 = val;
                goto ret_path;
        }
        if (n < HBP_NUM) {
-               if (thread->hbp[n]) {
-                       if (arch_check_va_in_userspace(val,
-                                       thread->hbp[n]->info.len) == 0) {
-                               rc = -EIO;
-                               goto ret_path;
-                       }
-                       thread->hbp[n]->info.address = val;
-               }
-               thread->debugreg[n] = val;
+               rc = ptrace_set_breakpoint_addr(tsk, n, val);
+               if (rc)
+                       return rc;
        }
        /* All that's left is DR7 */
        if (n == 7)
@@ -1176,10 +1355,15 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
 
        case offsetof(struct user32, regs.orig_eax):
                /*
-                * Sign-extend the value so that orig_eax = -1
-                * causes (long)orig_ax < 0 tests to fire correctly.
+                * A 32-bit debugger setting orig_eax means to restore
+                * the state of the task restarting a 32-bit syscall.
+                * Make sure we interpret the -ERESTART* codes correctly
+                * in case the task is not actually still sitting at the
+                * exit from a 32-bit syscall with TS_COMPAT still set.
                 */
-               regs->orig_ax = (long) (s32) value;
+               regs->orig_ax = value;
+               if (syscall_get_nr(child, regs) >= 0)
+                       task_thread_info(child)->status |= TS_COMPAT;
                break;
 
        case offsetof(struct user32, regs.eflags):
@@ -1548,8 +1732,8 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
            tracehook_report_syscall_entry(regs))
                ret = -1L;
 
-       if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
-               ftrace_syscall_enter(regs);
+       if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+               trace_sys_enter(regs, regs->orig_ax);
 
        if (unlikely(current->audit_context)) {
                if (IS_IA32)
@@ -1574,8 +1758,8 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
        if (unlikely(current->audit_context))
                audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 
-       if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
-               ftrace_syscall_exit(regs);
+       if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+               trace_sys_exit(regs, regs->ax);
 
        if (test_thread_flag(TIF_SYSCALL_TRACE))
                tracehook_report_syscall_exit(regs, 0);