Merge branches 'x86/acpi', 'x86/apic', 'x86/cpudetect', 'x86/headers', 'x86/paravirt...
[safe/jmp/linux-2.6] / arch / x86 / kernel / ptrace.c
index 5098049..d2f7cd5 100644 (file)
@@ -2,6 +2,9 @@
 /*
  * Pentium III FXSR, SSE support
  *     Gareth Hughes <gareth@valinux.com>, May 2000
+ *
+ * BTS tracing
+ *     Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
  */
 
 #include <linux/kernel.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
+#include <linux/regset.h>
+#include <linux/tracehook.h>
 #include <linux/user.h>
+#include <linux/elf.h>
 #include <linux/security.h>
 #include <linux/audit.h>
 #include <linux/seccomp.h>
 #include <asm/desc.h>
 #include <asm/prctl.h>
 #include <asm/proto.h>
+#include <asm/ds.h>
+
+#include "tls.h"
+
+enum x86_regset {
+       REGSET_GENERAL,
+       REGSET_FP,
+       REGSET_XFP,
+       REGSET_IOPERM64 = REGSET_XFP,
+       REGSET_TLS,
+       REGSET_IOPERM32,
+};
 
 /*
  * does not yet catch signals sent when the child dies.
@@ -54,13 +72,10 @@ static inline bool invalid_selector(u16 value)
 
 #define FLAG_MASK              FLAG_MASK_32
 
-static long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
+static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
 {
        BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
-       regno >>= 2;
-       if (regno > FS)
-               --regno;
-       return &regs->bx + regno;
+       return &regs->bx + (regno >> 2);
 }
 
 static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
@@ -72,9 +87,10 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
        if (offset != offsetof(struct user_regs_struct, gs))
                retval = *pt_regs_access(task_pt_regs(task), offset);
        else {
-               retval = task->thread.gs;
                if (task == current)
-                       savesegment(gs, retval);
+                       retval = get_user_gs(task_pt_regs(task));
+               else
+                       retval = task_user_gs(task);
        }
        return retval;
 }
@@ -88,16 +104,30 @@ static int set_segment_reg(struct task_struct *task,
        if (invalid_selector(value))
                return -EIO;
 
-       if (offset != offsetof(struct user_regs_struct, gs))
+       /*
+        * For %cs and %ss we cannot permit a null selector.
+        * We can permit a bogus selector as long as it has USER_RPL.
+        * Null selectors are fine for other segment registers, but
+        * we will never get back to user mode with invalid %cs or %ss
+        * and will take the trap in iret instead.  Much code relies
+        * on user_mode() to distinguish a user trap frame (which can
+        * safely use invalid selectors) from a kernel trap frame.
+        */
+       switch (offset) {
+       case offsetof(struct user_regs_struct, cs):
+       case offsetof(struct user_regs_struct, ss):
+               if (unlikely(value == 0))
+                       return -EIO;
+
+       default:
                *pt_regs_access(task_pt_regs(task), offset) = value;
-       else {
-               task->thread.gs = value;
+               break;
+
+       case offsetof(struct user_regs_struct, gs):
                if (task == current)
-                       /*
-                        * The user-mode %gs is not affected by
-                        * kernel entry, so we must update the CPU.
-                        */
-                       loadsegment(gs, value);
+                       set_user_gs(task_pt_regs(task), value);
+               else
+                       task_user_gs(task) = value;
        }
 
        return 0;
@@ -212,17 +242,21 @@ static int set_segment_reg(struct task_struct *task,
                 * Can't actually change these in 64-bit mode.
                 */
        case offsetof(struct user_regs_struct,cs):
+               if (unlikely(value == 0))
+                       return -EIO;
 #ifdef CONFIG_IA32_EMULATION
                if (test_tsk_thread_flag(task, TIF_IA32))
                        task_pt_regs(task)->cs = value;
-               break;
 #endif
+               break;
        case offsetof(struct user_regs_struct,ss):
+               if (unlikely(value == 0))
+                       return -EIO;
 #ifdef CONFIG_IA32_EMULATION
                if (test_tsk_thread_flag(task, TIF_IA32))
                        task_pt_regs(task)->ss = value;
-               break;
 #endif
+               break;
        }
 
        return 0;
@@ -287,6 +321,16 @@ static int putreg(struct task_struct *child,
                return set_flags(child, value);
 
 #ifdef CONFIG_X86_64
+       /*
+        * Orig_ax is really just a flag with small positive and
+        * negative values, so make sure to always sign-extend it
+        * from 32 bits so that it works correctly regardless of
+        * whether we come from a 32-bit environment or not.
+        */
+       case offsetof(struct user_regs_struct, orig_ax):
+               value = (long) (s32) value;
+               break;
+
        case offsetof(struct user_regs_struct,fs_base):
                if (value >= TASK_SIZE_OF(child))
                        return -EIO;
@@ -363,6 +407,59 @@ static unsigned long getreg(struct task_struct *task, unsigned long offset)
        return *pt_regs_access(task_pt_regs(task), offset);
 }
 
+static int genregs_get(struct task_struct *target,
+                      const struct user_regset *regset,
+                      unsigned int pos, unsigned int count,
+                      void *kbuf, void __user *ubuf)
+{
+       if (kbuf) {
+               unsigned long *k = kbuf;
+               while (count > 0) {
+                       *k++ = getreg(target, pos);
+                       count -= sizeof(*k);
+                       pos += sizeof(*k);
+               }
+       } else {
+               unsigned long __user *u = ubuf;
+               while (count > 0) {
+                       if (__put_user(getreg(target, pos), u++))
+                               return -EFAULT;
+                       count -= sizeof(*u);
+                       pos += sizeof(*u);
+               }
+       }
+
+       return 0;
+}
+
+static int genregs_set(struct task_struct *target,
+                      const struct user_regset *regset,
+                      unsigned int pos, unsigned int count,
+                      const void *kbuf, const void __user *ubuf)
+{
+       int ret = 0;
+       if (kbuf) {
+               const unsigned long *k = kbuf;
+               while (count > 0 && !ret) {
+                       ret = putreg(target, pos, *k++);
+                       count -= sizeof(*k);
+                       pos += sizeof(*k);
+               }
+       } else {
+               const unsigned long  __user *u = ubuf;
+               while (count > 0 && !ret) {
+                       unsigned long word;
+                       ret = __get_user(word, u++);
+                       if (ret)
+                               break;
+                       ret = putreg(target, pos, word);
+                       count -= sizeof(*u);
+                       pos += sizeof(*u);
+               }
+       }
+       return ret;
+}
+
 /*
  * This function is trivial and will be inlined by the compiler.
  * Having it separates the implementation details of debug
@@ -456,6 +553,286 @@ static int ptrace_set_debugreg(struct task_struct *child,
 }
 
 /*
+ * These access the current or another (stopped) task's io permission
+ * bitmap for debugging or core dump.
+ */
+static int ioperm_active(struct task_struct *target,
+                        const struct user_regset *regset)
+{
+       return target->thread.io_bitmap_max / regset->size;
+}
+
+static int ioperm_get(struct task_struct *target,
+                     const struct user_regset *regset,
+                     unsigned int pos, unsigned int count,
+                     void *kbuf, void __user *ubuf)
+{
+       if (!target->thread.io_bitmap_ptr)
+               return -ENXIO;
+
+       return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                  target->thread.io_bitmap_ptr,
+                                  0, IO_BITMAP_BYTES);
+}
+
+#ifdef CONFIG_X86_PTRACE_BTS
+static int ptrace_bts_read_record(struct task_struct *child, size_t index,
+                                 struct bts_struct __user *out)
+{
+       const struct bts_trace *trace;
+       struct bts_struct bts;
+       const unsigned char *at;
+       int error;
+
+       trace = ds_read_bts(child->bts);
+       if (!trace)
+               return -EPERM;
+
+       at = trace->ds.top - ((index + 1) * trace->ds.size);
+       if ((void *)at < trace->ds.begin)
+               at += (trace->ds.n * trace->ds.size);
+
+       if (!trace->read)
+               return -EOPNOTSUPP;
+
+       error = trace->read(child->bts, at, &bts);
+       if (error < 0)
+               return error;
+
+       if (copy_to_user(out, &bts, sizeof(bts)))
+               return -EFAULT;
+
+       return sizeof(bts);
+}
+
+static int ptrace_bts_drain(struct task_struct *child,
+                           long size,
+                           struct bts_struct __user *out)
+{
+       const struct bts_trace *trace;
+       const unsigned char *at;
+       int error, drained = 0;
+
+       trace = ds_read_bts(child->bts);
+       if (!trace)
+               return -EPERM;
+
+       if (!trace->read)
+               return -EOPNOTSUPP;
+
+       if (size < (trace->ds.top - trace->ds.begin))
+               return -EIO;
+
+       for (at = trace->ds.begin; (void *)at < trace->ds.top;
+            out++, drained++, at += trace->ds.size) {
+               struct bts_struct bts;
+               int error;
+
+               error = trace->read(child->bts, at, &bts);
+               if (error < 0)
+                       return error;
+
+               if (copy_to_user(out, &bts, sizeof(bts)))
+                       return -EFAULT;
+       }
+
+       memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
+
+       error = ds_reset_bts(child->bts);
+       if (error < 0)
+               return error;
+
+       return drained;
+}
+
+static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size)
+{
+       child->bts_buffer = alloc_locked_buffer(size);
+       if (!child->bts_buffer)
+               return -ENOMEM;
+
+       child->bts_size = size;
+
+       return 0;
+}
+
+static void ptrace_bts_free_buffer(struct task_struct *child)
+{
+       free_locked_buffer(child->bts_buffer, child->bts_size);
+       child->bts_buffer = NULL;
+       child->bts_size = 0;
+}
+
+static int ptrace_bts_config(struct task_struct *child,
+                            long cfg_size,
+                            const struct ptrace_bts_config __user *ucfg)
+{
+       struct ptrace_bts_config cfg;
+       unsigned int flags = 0;
+
+       if (cfg_size < sizeof(cfg))
+               return -EIO;
+
+       if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
+               return -EFAULT;
+
+       if (child->bts) {
+               ds_release_bts(child->bts);
+               child->bts = NULL;
+       }
+
+       if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
+               if (!cfg.signal)
+                       return -EINVAL;
+
+               return -EOPNOTSUPP;
+
+               child->thread.bts_ovfl_signal = cfg.signal;
+       }
+
+       if ((cfg.flags & PTRACE_BTS_O_ALLOC) &&
+           (cfg.size != child->bts_size)) {
+               int error;
+
+               ptrace_bts_free_buffer(child);
+
+               error = ptrace_bts_allocate_buffer(child, cfg.size);
+               if (error < 0)
+                       return error;
+       }
+
+       if (cfg.flags & PTRACE_BTS_O_TRACE)
+               flags |= BTS_USER;
+
+       if (cfg.flags & PTRACE_BTS_O_SCHED)
+               flags |= BTS_TIMESTAMPS;
+
+       child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size,
+                                   /* ovfl = */ NULL, /* th = */ (size_t)-1,
+                                   flags);
+       if (IS_ERR(child->bts)) {
+               int error = PTR_ERR(child->bts);
+
+               ptrace_bts_free_buffer(child);
+               child->bts = NULL;
+
+               return error;
+       }
+
+       return sizeof(cfg);
+}
+
+static int ptrace_bts_status(struct task_struct *child,
+                            long cfg_size,
+                            struct ptrace_bts_config __user *ucfg)
+{
+       const struct bts_trace *trace;
+       struct ptrace_bts_config cfg;
+
+       if (cfg_size < sizeof(cfg))
+               return -EIO;
+
+       trace = ds_read_bts(child->bts);
+       if (!trace)
+               return -EPERM;
+
+       memset(&cfg, 0, sizeof(cfg));
+       cfg.size = trace->ds.end - trace->ds.begin;
+       cfg.signal = child->thread.bts_ovfl_signal;
+       cfg.bts_size = sizeof(struct bts_struct);
+
+       if (cfg.signal)
+               cfg.flags |= PTRACE_BTS_O_SIGNAL;
+
+       if (trace->ds.flags & BTS_USER)
+               cfg.flags |= PTRACE_BTS_O_TRACE;
+
+       if (trace->ds.flags & BTS_TIMESTAMPS)
+               cfg.flags |= PTRACE_BTS_O_SCHED;
+
+       if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
+               return -EFAULT;
+
+       return sizeof(cfg);
+}
+
+static int ptrace_bts_clear(struct task_struct *child)
+{
+       const struct bts_trace *trace;
+
+       trace = ds_read_bts(child->bts);
+       if (!trace)
+               return -EPERM;
+
+       memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
+
+       return ds_reset_bts(child->bts);
+}
+
+static int ptrace_bts_size(struct task_struct *child)
+{
+       const struct bts_trace *trace;
+
+       trace = ds_read_bts(child->bts);
+       if (!trace)
+               return -EPERM;
+
+       return (trace->ds.top - trace->ds.begin) / trace->ds.size;
+}
+
+static void ptrace_bts_fork(struct task_struct *tsk)
+{
+       tsk->bts = NULL;
+       tsk->bts_buffer = NULL;
+       tsk->bts_size = 0;
+       tsk->thread.bts_ovfl_signal = 0;
+}
+
+static void ptrace_bts_untrace(struct task_struct *child)
+{
+       if (unlikely(child->bts)) {
+               ds_release_bts(child->bts);
+               child->bts = NULL;
+
+               /* We cannot update total_vm and locked_vm since
+                  child's mm is already gone. But we can reclaim the
+                  memory. */
+               kfree(child->bts_buffer);
+               child->bts_buffer = NULL;
+               child->bts_size = 0;
+       }
+}
+
+static void ptrace_bts_detach(struct task_struct *child)
+{
+       /*
+        * Ptrace_detach() races with ptrace_untrace() in case
+        * the child dies and is reaped by another thread.
+        *
+        * We only do the memory accounting at this point and
+        * leave the buffer deallocation and the bts tracer
+        * release to ptrace_bts_untrace() which will be called
+        * later on with tasklist_lock held.
+        */
+       release_locked_buffer(child->bts_buffer, child->bts_size);
+}
+#else
+static inline void ptrace_bts_fork(struct task_struct *tsk) {}
+static inline void ptrace_bts_detach(struct task_struct *child) {}
+static inline void ptrace_bts_untrace(struct task_struct *child) {}
+#endif /* CONFIG_X86_PTRACE_BTS */
+
+void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags)
+{
+       ptrace_bts_fork(child);
+}
+
+void x86_ptrace_untrace(struct task_struct *child)
+{
+       ptrace_bts_untrace(child);
+}
+
+/*
  * Called by kernel/ptrace.c when detaching..
  *
  * Make sure the single step bit is not set.
@@ -466,20 +843,19 @@ void ptrace_disable(struct task_struct *child)
 #ifdef TIF_SYSCALL_EMU
        clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 #endif
+       ptrace_bts_detach(child);
 }
 
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+static const struct user_regset_view user_x86_32_view; /* Initialized below. */
+#endif
+
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
-       int i, ret;
+       int ret;
        unsigned long __user *datap = (unsigned long __user *)data;
 
        switch (request) {
-       /* when I and D space are separate, these will need to be fixed. */
-       case PTRACE_PEEKTEXT: /* read word at location addr. */
-       case PTRACE_PEEKDATA:
-               ret = generic_ptrace_peekdata(child, addr, data);
-               break;
-
        /* read the word at location addr in the USER area. */
        case PTRACE_PEEKUSR: {
                unsigned long tmp;
@@ -501,12 +877,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                break;
        }
 
-       /* when I and D space are separate, this will have to be fixed. */
-       case PTRACE_POKETEXT: /* write the word at location addr. */
-       case PTRACE_POKEDATA:
-               ret = generic_ptrace_pokedata(child, addr, data);
-               break;
-
        case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
                ret = -EIO;
                if ((addr & (sizeof(data) - 1)) || addr < 0 ||
@@ -523,82 +893,46 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                }
                break;
 
-       case PTRACE_GETREGS: { /* Get all gp regs from the child. */
-               if (!access_ok(VERIFY_WRITE, datap, sizeof(struct user_regs_struct))) {
-                       ret = -EIO;
-                       break;
-               }
-               for (i = 0; i < sizeof(struct user_regs_struct); i += sizeof(long)) {
-                       __put_user(getreg(child, i), datap);
-                       datap++;
-               }
-               ret = 0;
-               break;
-       }
-
-       case PTRACE_SETREGS: { /* Set all gp regs in the child. */
-               unsigned long tmp;
-               if (!access_ok(VERIFY_READ, datap, sizeof(struct user_regs_struct))) {
-                       ret = -EIO;
-                       break;
-               }
-               for (i = 0; i < sizeof(struct user_regs_struct); i += sizeof(long)) {
-                       __get_user(tmp, datap);
-                       putreg(child, i, tmp);
-                       datap++;
-               }
-               ret = 0;
-               break;
-       }
-
-       case PTRACE_GETFPREGS: { /* Get the child FPU state. */
-               if (!access_ok(VERIFY_WRITE, datap,
-                              sizeof(struct user_i387_struct))) {
-                       ret = -EIO;
-                       break;
-               }
-               ret = 0;
-               if (!tsk_used_math(child))
-                       init_fpu(child);
-               get_fpregs((struct user_i387_struct __user *)data, child);
-               break;
-       }
-
-       case PTRACE_SETFPREGS: { /* Set the child FPU state. */
-               if (!access_ok(VERIFY_READ, datap,
-                              sizeof(struct user_i387_struct))) {
-                       ret = -EIO;
-                       break;
-               }
-               set_stopped_child_used_math(child);
-               set_fpregs(child, (struct user_i387_struct __user *)data);
-               ret = 0;
-               break;
-       }
+       case PTRACE_GETREGS:    /* Get all gp regs from the child. */
+               return copy_regset_to_user(child,
+                                          task_user_regset_view(current),
+                                          REGSET_GENERAL,
+                                          0, sizeof(struct user_regs_struct),
+                                          datap);
+
+       case PTRACE_SETREGS:    /* Set all gp regs in the child. */
+               return copy_regset_from_user(child,
+                                            task_user_regset_view(current),
+                                            REGSET_GENERAL,
+                                            0, sizeof(struct user_regs_struct),
+                                            datap);
+
+       case PTRACE_GETFPREGS:  /* Get the child FPU state. */
+               return copy_regset_to_user(child,
+                                          task_user_regset_view(current),
+                                          REGSET_FP,
+                                          0, sizeof(struct user_i387_struct),
+                                          datap);
+
+       case PTRACE_SETFPREGS:  /* Set the child FPU state. */
+               return copy_regset_from_user(child,
+                                            task_user_regset_view(current),
+                                            REGSET_FP,
+                                            0, sizeof(struct user_i387_struct),
+                                            datap);
 
 #ifdef CONFIG_X86_32
-       case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */
-               if (!access_ok(VERIFY_WRITE, datap,
-                              sizeof(struct user_fxsr_struct))) {
-                       ret = -EIO;
-                       break;
-               }
-               if (!tsk_used_math(child))
-                       init_fpu(child);
-               ret = get_fpxregs((struct user_fxsr_struct __user *)data, child);
-               break;
-       }
-
-       case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */
-               if (!access_ok(VERIFY_READ, datap,
-                              sizeof(struct user_fxsr_struct))) {
-                       ret = -EIO;
-                       break;
-               }
-               set_stopped_child_used_math(child);
-               ret = set_fpxregs(child, (struct user_fxsr_struct __user *)data);
-               break;
-       }
+       case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */
+               return copy_regset_to_user(child, &user_x86_32_view,
+                                          REGSET_XFP,
+                                          0, sizeof(struct user_fxsr_struct),
+                                          datap) ? -EIO : 0;
+
+       case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */
+               return copy_regset_from_user(child, &user_x86_32_view,
+                                            REGSET_XFP,
+                                            0, sizeof(struct user_fxsr_struct),
+                                            datap) ? -EIO : 0;
 #endif
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -626,6 +960,39 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                break;
 #endif
 
+       /*
+        * These bits need more cooking - not enabled yet:
+        */
+#ifdef CONFIG_X86_PTRACE_BTS
+       case PTRACE_BTS_CONFIG:
+               ret = ptrace_bts_config
+                       (child, data, (struct ptrace_bts_config __user *)addr);
+               break;
+
+       case PTRACE_BTS_STATUS:
+               ret = ptrace_bts_status
+                       (child, data, (struct ptrace_bts_config __user *)addr);
+               break;
+
+       case PTRACE_BTS_SIZE:
+               ret = ptrace_bts_size(child);
+               break;
+
+       case PTRACE_BTS_GET:
+               ret = ptrace_bts_read_record
+                       (child, data, (struct bts_struct __user *) addr);
+               break;
+
+       case PTRACE_BTS_CLEAR:
+               ret = ptrace_bts_clear(child);
+               break;
+
+       case PTRACE_BTS_DRAIN:
+               ret = ptrace_bts_drain
+                       (child, data, (struct bts_struct __user *) addr);
+               break;
+#endif /* CONFIG_X86_PTRACE_BTS */
+
        default:
                ret = ptrace_request(child, request, addr, data);
                break;
@@ -634,164 +1001,461 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
        return ret;
 }
 
-#ifdef CONFIG_X86_32
+#ifdef CONFIG_IA32_EMULATION
+
+#include <linux/compat.h>
+#include <linux/syscalls.h>
+#include <asm/ia32.h>
+#include <asm/user32.h>
+
+#define R32(l,q)                                                       \
+       case offsetof(struct user32, regs.l):                           \
+               regs->q = value; break
 
-void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
+#define SEG32(rs)                                                      \
+       case offsetof(struct user32, regs.rs):                          \
+               return set_segment_reg(child,                           \
+                                      offsetof(struct user_regs_struct, rs), \
+                                      value);                          \
+               break
+
+static int putreg32(struct task_struct *child, unsigned regno, u32 value)
 {
-       struct siginfo info;
+       struct pt_regs *regs = task_pt_regs(child);
+
+       switch (regno) {
+
+       SEG32(cs);
+       SEG32(ds);
+       SEG32(es);
+       SEG32(fs);
+       SEG32(gs);
+       SEG32(ss);
+
+       R32(ebx, bx);
+       R32(ecx, cx);
+       R32(edx, dx);
+       R32(edi, di);
+       R32(esi, si);
+       R32(ebp, bp);
+       R32(eax, ax);
+       R32(eip, ip);
+       R32(esp, sp);
+
+       case offsetof(struct user32, regs.orig_eax):
+               /*
+                * Sign-extend the value so that orig_eax = -1
+                * causes (long)orig_ax < 0 tests to fire correctly.
+                */
+               regs->orig_ax = (long) (s32) value;
+               break;
 
-       tsk->thread.trap_no = 1;
-       tsk->thread.error_code = error_code;
+       case offsetof(struct user32, regs.eflags):
+               return set_flags(child, value);
 
-       memset(&info, 0, sizeof(info));
-       info.si_signo = SIGTRAP;
-       info.si_code = TRAP_BRKPT;
+       case offsetof(struct user32, u_debugreg[0]) ...
+               offsetof(struct user32, u_debugreg[7]):
+               regno -= offsetof(struct user32, u_debugreg[0]);
+               return ptrace_set_debugreg(child, regno / 4, value);
 
-       /* User-mode ip? */
-       info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL;
+       default:
+               if (regno > sizeof(struct user32) || (regno & 3))
+                       return -EIO;
 
-       /* Send us the fake SIGTRAP */
-       force_sig_info(SIGTRAP, &info, tsk);
+               /*
+                * Other dummy fields in the virtual user structure
+                * are ignored
+                */
+               break;
+       }
+       return 0;
 }
 
-/* notification of system call entry/exit
- * - triggered by current->work.syscall_trace
- */
-__attribute__((regparm(3)))
-int do_syscall_trace(struct pt_regs *regs, int entryexit)
+#undef R32
+#undef SEG32
+
+#define R32(l,q)                                                       \
+       case offsetof(struct user32, regs.l):                           \
+               *val = regs->q; break
+
+#define SEG32(rs)                                                      \
+       case offsetof(struct user32, regs.rs):                          \
+               *val = get_segment_reg(child,                           \
+                                      offsetof(struct user_regs_struct, rs)); \
+               break
+
+static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
 {
-       int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU);
-       /*
-        * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall
-        * interception
-        */
-       int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP);
-       int ret = 0;
+       struct pt_regs *regs = task_pt_regs(child);
+
+       switch (regno) {
+
+       SEG32(ds);
+       SEG32(es);
+       SEG32(fs);
+       SEG32(gs);
+
+       R32(cs, cs);
+       R32(ss, ss);
+       R32(ebx, bx);
+       R32(ecx, cx);
+       R32(edx, dx);
+       R32(edi, di);
+       R32(esi, si);
+       R32(ebp, bp);
+       R32(eax, ax);
+       R32(orig_eax, orig_ax);
+       R32(eip, ip);
+       R32(esp, sp);
+
+       case offsetof(struct user32, regs.eflags):
+               *val = get_flags(child);
+               break;
 
-       /* do the secure computing check first */
-       if (!entryexit)
-               secure_computing(regs->orig_ax);
+       case offsetof(struct user32, u_debugreg[0]) ...
+               offsetof(struct user32, u_debugreg[7]):
+               regno -= offsetof(struct user32, u_debugreg[0]);
+               *val = ptrace_get_debugreg(child, regno / 4);
+               break;
 
-       if (unlikely(current->audit_context)) {
-               if (entryexit)
-                       audit_syscall_exit(AUDITSC_RESULT(regs->ax),
-                                               regs->ax);
-               /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
-                * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
-                * not used, entry.S will call us only on syscall exit, not
-                * entry; so when TIF_SYSCALL_AUDIT is used we must avoid
-                * calling send_sigtrap() on syscall entry.
-                *
-                * Note that when PTRACE_SYSEMU_SINGLESTEP is used,
-                * is_singlestep is false, despite his name, so we will still do
-                * the correct thing.
+       default:
+               if (regno > sizeof(struct user32) || (regno & 3))
+                       return -EIO;
+
+               /*
+                * Other dummy fields in the virtual user structure
+                * are ignored
                 */
-               else if (is_singlestep)
-                       goto out;
+               *val = 0;
+               break;
        }
+       return 0;
+}
 
-       if (!(current->ptrace & PT_PTRACED))
-               goto out;
+#undef R32
+#undef SEG32
 
-       /* If a process stops on the 1st tracepoint with SYSCALL_TRACE
-        * and then is resumed with SYSEMU_SINGLESTEP, it will come in
-        * here. We have to check this and return */
-       if (is_sysemu && entryexit)
-               return 0;
+static int genregs32_get(struct task_struct *target,
+                        const struct user_regset *regset,
+                        unsigned int pos, unsigned int count,
+                        void *kbuf, void __user *ubuf)
+{
+       if (kbuf) {
+               compat_ulong_t *k = kbuf;
+               while (count > 0) {
+                       getreg32(target, pos, k++);
+                       count -= sizeof(*k);
+                       pos += sizeof(*k);
+               }
+       } else {
+               compat_ulong_t __user *u = ubuf;
+               while (count > 0) {
+                       compat_ulong_t word;
+                       getreg32(target, pos, &word);
+                       if (__put_user(word, u++))
+                               return -EFAULT;
+                       count -= sizeof(*u);
+                       pos += sizeof(*u);
+               }
+       }
 
-       /* Fake a debug trap */
-       if (is_singlestep)
-               send_sigtrap(current, regs, 0);
+       return 0;
+}
 
-       if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu)
-               goto out;
+static int genregs32_set(struct task_struct *target,
+                        const struct user_regset *regset,
+                        unsigned int pos, unsigned int count,
+                        const void *kbuf, const void __user *ubuf)
+{
+       int ret = 0;
+       if (kbuf) {
+               const compat_ulong_t *k = kbuf;
+               while (count > 0 && !ret) {
+                       ret = putreg32(target, pos, *k++);
+                       count -= sizeof(*k);
+                       pos += sizeof(*k);
+               }
+       } else {
+               const compat_ulong_t __user *u = ubuf;
+               while (count > 0 && !ret) {
+                       compat_ulong_t word;
+                       ret = __get_user(word, u++);
+                       if (ret)
+                               break;
+                       ret = putreg32(target, pos, word);
+                       count -= sizeof(*u);
+                       pos += sizeof(*u);
+               }
+       }
+       return ret;
+}
 
-       /* the 0x80 provides a way for the tracing parent to distinguish
-          between a syscall stop and SIGTRAP delivery */
-       /* Note that the debugger could change the result of test_thread_flag!*/
-       ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0));
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+                       compat_ulong_t caddr, compat_ulong_t cdata)
+{
+       unsigned long addr = caddr;
+       unsigned long data = cdata;
+       void __user *datap = compat_ptr(data);
+       int ret;
+       __u32 val;
 
-       /*
-        * this isn't the same as continuing with a signal, but it will do
-        * for normal use.  strace only continues with a signal if the
-        * stopping signal is not SIGTRAP.  -brl
-        */
-       if (current->exit_code) {
-               send_sig(current->exit_code, current, 1);
-               current->exit_code = 0;
+       switch (request) {
+       case PTRACE_PEEKUSR:
+               ret = getreg32(child, addr, &val);
+               if (ret == 0)
+                       ret = put_user(val, (__u32 __user *)datap);
+               break;
+
+       case PTRACE_POKEUSR:
+               ret = putreg32(child, addr, data);
+               break;
+
+       case PTRACE_GETREGS:    /* Get all gp regs from the child. */
+               return copy_regset_to_user(child, &user_x86_32_view,
+                                          REGSET_GENERAL,
+                                          0, sizeof(struct user_regs_struct32),
+                                          datap);
+
+       case PTRACE_SETREGS:    /* Set all gp regs in the child. */
+               return copy_regset_from_user(child, &user_x86_32_view,
+                                            REGSET_GENERAL, 0,
+                                            sizeof(struct user_regs_struct32),
+                                            datap);
+
+       case PTRACE_GETFPREGS:  /* Get the child FPU state. */
+               return copy_regset_to_user(child, &user_x86_32_view,
+                                          REGSET_FP, 0,
+                                          sizeof(struct user_i387_ia32_struct),
+                                          datap);
+
+       case PTRACE_SETFPREGS:  /* Set the child FPU state. */
+               return copy_regset_from_user(
+                       child, &user_x86_32_view, REGSET_FP,
+                       0, sizeof(struct user_i387_ia32_struct), datap);
+
+       case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */
+               return copy_regset_to_user(child, &user_x86_32_view,
+                                          REGSET_XFP, 0,
+                                          sizeof(struct user32_fxsr_struct),
+                                          datap);
+
+       case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */
+               return copy_regset_from_user(child, &user_x86_32_view,
+                                            REGSET_XFP, 0,
+                                            sizeof(struct user32_fxsr_struct),
+                                            datap);
+
+       case PTRACE_GET_THREAD_AREA:
+       case PTRACE_SET_THREAD_AREA:
+#ifdef CONFIG_X86_PTRACE_BTS
+       case PTRACE_BTS_CONFIG:
+       case PTRACE_BTS_STATUS:
+       case PTRACE_BTS_SIZE:
+       case PTRACE_BTS_GET:
+       case PTRACE_BTS_CLEAR:
+       case PTRACE_BTS_DRAIN:
+#endif /* CONFIG_X86_PTRACE_BTS */
+               return arch_ptrace(child, request, addr, data);
+
+       default:
+               return compat_ptrace_request(child, request, addr, data);
        }
-       ret = is_sysemu;
-out:
-       if (unlikely(current->audit_context) && !entryexit)
-               audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax,
-                                   regs->bx, regs->cx, regs->dx, regs->si);
-       if (ret == 0)
-               return 0;
 
-       regs->orig_ax = -1; /* force skip of syscall restarting */
-       if (unlikely(current->audit_context))
-               audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
-       return 1;
+       return ret;
 }
 
-#else  /* CONFIG_X86_64 */
+#endif /* CONFIG_IA32_EMULATION */
+
+#ifdef CONFIG_X86_64
+
+static const struct user_regset x86_64_regsets[] = {
+       [REGSET_GENERAL] = {
+               .core_note_type = NT_PRSTATUS,
+               .n = sizeof(struct user_regs_struct) / sizeof(long),
+               .size = sizeof(long), .align = sizeof(long),
+               .get = genregs_get, .set = genregs_set
+       },
+       [REGSET_FP] = {
+               .core_note_type = NT_PRFPREG,
+               .n = sizeof(struct user_i387_struct) / sizeof(long),
+               .size = sizeof(long), .align = sizeof(long),
+               .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
+       },
+       [REGSET_IOPERM64] = {
+               .core_note_type = NT_386_IOPERM,
+               .n = IO_BITMAP_LONGS,
+               .size = sizeof(long), .align = sizeof(long),
+               .active = ioperm_active, .get = ioperm_get
+       },
+};
+
+static const struct user_regset_view user_x86_64_view = {
+       .name = "x86_64", .e_machine = EM_X86_64,
+       .regsets = x86_64_regsets, .n = ARRAY_SIZE(x86_64_regsets)
+};
+
+#else  /* CONFIG_X86_32 */
+
+#define user_regs_struct32     user_regs_struct
+#define genregs32_get          genregs_get
+#define genregs32_set          genregs_set
+
+#define user_i387_ia32_struct  user_i387_struct
+#define user32_fxsr_struct     user_fxsr_struct
+
+#endif /* CONFIG_X86_64 */
 
-static void syscall_trace(struct pt_regs *regs)
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+static const struct user_regset x86_32_regsets[] = {
+       [REGSET_GENERAL] = {
+               .core_note_type = NT_PRSTATUS,
+               .n = sizeof(struct user_regs_struct32) / sizeof(u32),
+               .size = sizeof(u32), .align = sizeof(u32),
+               .get = genregs32_get, .set = genregs32_set
+       },
+       [REGSET_FP] = {
+               .core_note_type = NT_PRFPREG,
+               .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32),
+               .size = sizeof(u32), .align = sizeof(u32),
+               .active = fpregs_active, .get = fpregs_get, .set = fpregs_set
+       },
+       [REGSET_XFP] = {
+               .core_note_type = NT_PRXFPREG,
+               .n = sizeof(struct user32_fxsr_struct) / sizeof(u32),
+               .size = sizeof(u32), .align = sizeof(u32),
+               .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
+       },
+       [REGSET_TLS] = {
+               .core_note_type = NT_386_TLS,
+               .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN,
+               .size = sizeof(struct user_desc),
+               .align = sizeof(struct user_desc),
+               .active = regset_tls_active,
+               .get = regset_tls_get, .set = regset_tls_set
+       },
+       [REGSET_IOPERM32] = {
+               .core_note_type = NT_386_IOPERM,
+               .n = IO_BITMAP_BYTES / sizeof(u32),
+               .size = sizeof(u32), .align = sizeof(u32),
+               .active = ioperm_active, .get = ioperm_get
+       },
+};
+
+static const struct user_regset_view user_x86_32_view = {
+       .name = "i386", .e_machine = EM_386,
+       .regsets = x86_32_regsets, .n = ARRAY_SIZE(x86_32_regsets)
+};
+#endif
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
 {
+#ifdef CONFIG_IA32_EMULATION
+       if (test_tsk_thread_flag(task, TIF_IA32))
+#endif
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+               return &user_x86_32_view;
+#endif
+#ifdef CONFIG_X86_64
+       return &user_x86_64_view;
+#endif
+}
+
+void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
+                                        int error_code, int si_code)
+{
+       struct siginfo info;
 
-#if 0
-       printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
-              current->comm,
-              regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0),
-              current_thread_info()->flags, current->ptrace);
+       tsk->thread.trap_no = 1;
+       tsk->thread.error_code = error_code;
+
+       memset(&info, 0, sizeof(info));
+       info.si_signo = SIGTRAP;
+       info.si_code = si_code;
+
+       /* User-mode ip? */
+       info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL;
+
+       /* Send us the fake SIGTRAP */
+       force_sig_info(SIGTRAP, &info, tsk);
+}
+
+
+#ifdef CONFIG_X86_32
+# define IS_IA32       1
+#elif defined CONFIG_IA32_EMULATION
+# define IS_IA32       test_thread_flag(TIF_IA32)
+#else
+# define IS_IA32       0
 #endif
 
-       ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
-                               ? 0x80 : 0));
+/*
+ * We must return the syscall number to actually look up in the table.
+ * This can be -1L to skip running any syscall at all.
+ */
+asmregparm long syscall_trace_enter(struct pt_regs *regs)
+{
+       long ret = 0;
+
        /*
-        * this isn't the same as continuing with a signal, but it will do
-        * for normal use.  strace only continues with a signal if the
-        * stopping signal is not SIGTRAP.  -brl
+        * If we stepped into a sysenter/syscall insn, it trapped in
+        * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
+        * If user-mode had set TF itself, then it's still clear from
+        * do_debug() and we need to set it again to restore the user
+        * state.  If we entered on the slow path, TF was already set.
         */
-       if (current->exit_code) {
-               send_sig(current->exit_code, current, 1);
-               current->exit_code = 0;
-       }
-}
+       if (test_thread_flag(TIF_SINGLESTEP))
+               regs->flags |= X86_EFLAGS_TF;
 
-asmlinkage void syscall_trace_enter(struct pt_regs *regs)
-{
        /* do the secure computing check first */
        secure_computing(regs->orig_ax);
 
-       if (test_thread_flag(TIF_SYSCALL_TRACE)
-           && (current->ptrace & PT_PTRACED))
-               syscall_trace(regs);
+       if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
+               ret = -1L;
+
+       if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
+           tracehook_report_syscall_entry(regs))
+               ret = -1L;
 
        if (unlikely(current->audit_context)) {
-               if (test_thread_flag(TIF_IA32)) {
+               if (IS_IA32)
                        audit_syscall_entry(AUDIT_ARCH_I386,
                                            regs->orig_ax,
                                            regs->bx, regs->cx,
                                            regs->dx, regs->si);
-               } else {
+#ifdef CONFIG_X86_64
+               else
                        audit_syscall_entry(AUDIT_ARCH_X86_64,
                                            regs->orig_ax,
                                            regs->di, regs->si,
                                            regs->dx, regs->r10);
-               }
+#endif
        }
+
+       return ret ?: regs->orig_ax;
 }
 
-asmlinkage void syscall_trace_leave(struct pt_regs *regs)
+asmregparm void syscall_trace_leave(struct pt_regs *regs)
 {
        if (unlikely(current->audit_context))
                audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 
-       if ((test_thread_flag(TIF_SYSCALL_TRACE)
-            || test_thread_flag(TIF_SINGLESTEP))
-           && (current->ptrace & PT_PTRACED))
-               syscall_trace(regs);
-}
+       if (test_thread_flag(TIF_SYSCALL_TRACE))
+               tracehook_report_syscall_exit(regs, 0);
 
-#endif /* CONFIG_X86_32 */
+       /*
+        * If TIF_SYSCALL_EMU is set, we only get here because of
+        * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
+        * We already reported this syscall instruction in
+        * syscall_trace_enter(), so don't do any more now.
+        */
+       if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
+               return;
+
+       /*
+        * If we are single-stepping, synthesize a trap to follow the
+        * system call instruction.
+        */
+       if (test_thread_flag(TIF_SINGLESTEP) &&
+           tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL))
+               send_sigtrap(current, regs, 0, TRAP_BRKPT);
+}