sysctl: remove "struct file *" argument of ->proc_handler
[safe/jmp/linux-2.6] / arch / x86 / kernel / ptrace.c
index 702c33e..7b058a2 100644 (file)
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/regset.h>
+#include <linux/tracehook.h>
 #include <linux/user.h>
 #include <linux/elf.h>
 #include <linux/security.h>
 #include <linux/audit.h>
 #include <linux/seccomp.h>
 #include <linux/signal.h>
+#include <linux/workqueue.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
 #include "tls.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
 enum x86_regset {
        REGSET_GENERAL,
        REGSET_FP,
        REGSET_XFP,
+       REGSET_IOPERM64 = REGSET_XFP,
        REGSET_TLS,
+       REGSET_IOPERM32,
 };
 
 /*
@@ -69,13 +76,10 @@ static inline bool invalid_selector(u16 value)
 
 #define FLAG_MASK              FLAG_MASK_32
 
-static long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
+static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
 {
        BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
-       regno >>= 2;
-       if (regno > FS)
-               --regno;
-       return &regs->bx + regno;
+       return &regs->bx + (regno >> 2);
 }
 
 static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
@@ -87,9 +91,10 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
        if (offset != offsetof(struct user_regs_struct, gs))
                retval = *pt_regs_access(task_pt_regs(task), offset);
        else {
-               retval = task->thread.gs;
                if (task == current)
-                       savesegment(gs, retval);
+                       retval = get_user_gs(task_pt_regs(task));
+               else
+                       retval = task_user_gs(task);
        }
        return retval;
 }
@@ -123,13 +128,10 @@ static int set_segment_reg(struct task_struct *task,
                break;
 
        case offsetof(struct user_regs_struct, gs):
-               task->thread.gs = value;
                if (task == current)
-                       /*
-                        * The user-mode %gs is not affected by
-                        * kernel entry, so we must update the CPU.
-                        */
-                       loadsegment(gs, value);
+                       set_user_gs(task_pt_regs(task), value);
+               else
+                       task_user_gs(task) = value;
        }
 
        return 0;
@@ -270,7 +272,7 @@ static unsigned long debugreg_addr_limit(struct task_struct *task)
        if (test_tsk_thread_flag(task, TIF_IA32))
                return IA32_PAGE_OFFSET - 3;
 #endif
-       return TASK_SIZE64 - 7;
+       return TASK_SIZE_MAX - 7;
 }
 
 #endif /* CONFIG_X86_32 */
@@ -544,183 +546,223 @@ static int ptrace_set_debugreg(struct task_struct *child,
        return 0;
 }
 
-static int ptrace_bts_get_size(struct task_struct *child)
+/*
+ * These access the current or another (stopped) task's io permission
+ * bitmap for debugging or core dump.
+ */
+static int ioperm_active(struct task_struct *target,
+                        const struct user_regset *regset)
 {
-       if (!child->thread.ds_area_msr)
-               return -ENXIO;
-
-       return ds_get_bts_index((void *)child->thread.ds_area_msr);
+       return target->thread.io_bitmap_max / regset->size;
 }
 
-static int ptrace_bts_read_record(struct task_struct *child,
-                                 long index,
-                                 struct bts_struct __user *out)
+static int ioperm_get(struct task_struct *target,
+                     const struct user_regset *regset,
+                     unsigned int pos, unsigned int count,
+                     void *kbuf, void __user *ubuf)
 {
-       struct bts_struct ret;
-       int retval;
-       int bts_end;
-       int bts_index;
-
-       if (!child->thread.ds_area_msr)
+       if (!target->thread.io_bitmap_ptr)
                return -ENXIO;
 
-       if (index < 0)
-               return -EINVAL;
+       return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                  target->thread.io_bitmap_ptr,
+                                  0, IO_BITMAP_BYTES);
+}
 
-       bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr);
-       if (bts_end <= index)
-               return -EINVAL;
+#ifdef CONFIG_X86_PTRACE_BTS
+/*
+ * A branch trace store context.
+ *
+ * Contexts may only be installed by ptrace_bts_config() and only for
+ * ptraced tasks.
+ *
+ * Contexts are destroyed when the tracee is detached from the tracer.
+ * The actual destruction work requires interrupts enabled, so the
+ * work is deferred and will be scheduled during __ptrace_unlink().
+ *
+ * Contexts hold an additional task_struct reference on the traced
+ * task, as well as a reference on the tracer's mm.
+ *
+ * Ptrace already holds a task_struct for the duration of ptrace operations,
+ * but since destruction is deferred, it may be executed after both
+ * tracer and tracee exited.
+ */
+struct bts_context {
+       /* The branch trace handle. */
+       struct bts_tracer       *tracer;
 
-       /* translate the ptrace bts index into the ds bts index */
-       bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr);
-       bts_index -= (index + 1);
-       if (bts_index < 0)
-               bts_index += bts_end;
+       /* The buffer used to store the branch trace and its size. */
+       void                    *buffer;
+       unsigned int            size;
 
-       retval = ds_read_bts((void *)child->thread.ds_area_msr,
-                            bts_index, &ret);
-       if (retval < 0)
-               return retval;
+       /* The mm that paid for the above buffer. */
+       struct mm_struct        *mm;
 
-       if (copy_to_user(out, &ret, sizeof(ret)))
-               return -EFAULT;
+       /* The task this context belongs to. */
+       struct task_struct      *task;
 
-       return sizeof(ret);
-}
+       /* The signal to send on a bts buffer overflow. */
+       unsigned int            bts_ovfl_signal;
+
+       /* The work struct to destroy a context. */
+       struct work_struct      work;
+};
 
-static int ptrace_bts_write_record(struct task_struct *child,
-                                  const struct bts_struct *in)
+static int alloc_bts_buffer(struct bts_context *context, unsigned int size)
 {
-       int retval;
+       void *buffer = NULL;
+       int err = -ENOMEM;
 
-       if (!child->thread.ds_area_msr)
-               return -ENXIO;
+       err = account_locked_memory(current->mm, current->signal->rlim, size);
+       if (err < 0)
+               return err;
+
+       buffer = kzalloc(size, GFP_KERNEL);
+       if (!buffer)
+               goto out_refund;
 
-       retval = ds_write_bts((void *)child->thread.ds_area_msr, in);
-       if (retval)
-               return retval;
+       context->buffer = buffer;
+       context->size = size;
+       context->mm = get_task_mm(current);
 
-       return sizeof(*in);
+       return 0;
+
+ out_refund:
+       refund_locked_memory(current->mm, size);
+       return err;
 }
 
-static int ptrace_bts_clear(struct task_struct *child)
+static inline void free_bts_buffer(struct bts_context *context)
 {
-       if (!child->thread.ds_area_msr)
-               return -ENXIO;
+       if (!context->buffer)
+               return;
+
+       kfree(context->buffer);
+       context->buffer = NULL;
 
-       return ds_clear((void *)child->thread.ds_area_msr);
+       refund_locked_memory(context->mm, context->size);
+       context->size = 0;
+
+       mmput(context->mm);
+       context->mm = NULL;
 }
 
-static int ptrace_bts_drain(struct task_struct *child,
-                           long size,
-                           struct bts_struct __user *out)
+static void free_bts_context_work(struct work_struct *w)
 {
-       int end, i;
-       void *ds = (void *)child->thread.ds_area_msr;
+       struct bts_context *context;
 
-       if (!ds)
-               return -ENXIO;
+       context = container_of(w, struct bts_context, work);
 
-       end = ds_get_bts_index(ds);
-       if (end <= 0)
-               return end;
-
-       if (size < (end * sizeof(struct bts_struct)))
-               return -EIO;
+       ds_release_bts(context->tracer);
+       put_task_struct(context->task);
+       free_bts_buffer(context);
+       kfree(context);
+}
 
-       for (i = 0; i < end; i++, out++) {
-               struct bts_struct ret;
-               int retval;
+static inline void free_bts_context(struct bts_context *context)
+{
+       INIT_WORK(&context->work, free_bts_context_work);
+       schedule_work(&context->work);
+}
 
-               retval = ds_read_bts(ds, i, &ret);
-               if (retval < 0)
-                       return retval;
+static inline struct bts_context *alloc_bts_context(struct task_struct *task)
+{
+       struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL);
+       if (context) {
+               context->task = task;
+               task->bts = context;
 
-               if (copy_to_user(out, &ret, sizeof(ret)))
-                       return -EFAULT;
+               get_task_struct(task);
        }
 
-       ds_clear(ds);
-
-       return end;
+       return context;
 }
 
-static int ptrace_bts_realloc(struct task_struct *child,
-                             int size, int reduce_size)
+static int ptrace_bts_read_record(struct task_struct *child, size_t index,
+                                 struct bts_struct __user *out)
 {
-       unsigned long rlim, vm;
-       int ret, old_size;
+       struct bts_context *context;
+       const struct bts_trace *trace;
+       struct bts_struct bts;
+       const unsigned char *at;
+       int error;
 
-       if (size < 0)
-               return -EINVAL;
+       context = child->bts;
+       if (!context)
+               return -ESRCH;
 
-       old_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
-       if (old_size < 0)
-               return old_size;
+       trace = ds_read_bts(context->tracer);
+       if (!trace)
+               return -ESRCH;
 
-       ret = ds_free((void **)&child->thread.ds_area_msr);
-       if (ret < 0)
-               goto out;
+       at = trace->ds.top - ((index + 1) * trace->ds.size);
+       if ((void *)at < trace->ds.begin)
+               at += (trace->ds.n * trace->ds.size);
 
-       size >>= PAGE_SHIFT;
-       old_size >>= PAGE_SHIFT;
+       if (!trace->read)
+               return -EOPNOTSUPP;
 
-       current->mm->total_vm  -= old_size;
-       current->mm->locked_vm -= old_size;
+       error = trace->read(context->tracer, at, &bts);
+       if (error < 0)
+               return error;
 
-       if (size == 0)
-               goto out;
+       if (copy_to_user(out, &bts, sizeof(bts)))
+               return -EFAULT;
 
-       rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
-       vm = current->mm->total_vm  + size;
-       if (rlim < vm) {
-               ret = -ENOMEM;
+       return sizeof(bts);
+}
 
-               if (!reduce_size)
-                       goto out;
+static int ptrace_bts_drain(struct task_struct *child,
+                           long size,
+                           struct bts_struct __user *out)
+{
+       struct bts_context *context;
+       const struct bts_trace *trace;
+       const unsigned char *at;
+       int error, drained = 0;
 
-               size = rlim - current->mm->total_vm;
-               if (size <= 0)
-                       goto out;
-       }
+       context = child->bts;
+       if (!context)
+               return -ESRCH;
 
-       rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
-       vm = current->mm->locked_vm  + size;
-       if (rlim < vm) {
-               ret = -ENOMEM;
+       trace = ds_read_bts(context->tracer);
+       if (!trace)
+               return -ESRCH;
 
-               if (!reduce_size)
-                       goto out;
+       if (!trace->read)
+               return -EOPNOTSUPP;
 
-               size = rlim - current->mm->locked_vm;
-               if (size <= 0)
-                       goto out;
-       }
+       if (size < (trace->ds.top - trace->ds.begin))
+               return -EIO;
 
-       ret = ds_allocate((void **)&child->thread.ds_area_msr,
-                         size << PAGE_SHIFT);
-       if (ret < 0)
-               goto out;
+       for (at = trace->ds.begin; (void *)at < trace->ds.top;
+            out++, drained++, at += trace->ds.size) {
+               struct bts_struct bts;
 
-       current->mm->total_vm  += size;
-       current->mm->locked_vm += size;
+               error = trace->read(context->tracer, at, &bts);
+               if (error < 0)
+                       return error;
 
-out:
-       if (child->thread.ds_area_msr)
-               set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
-       else
-               clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
+               if (copy_to_user(out, &bts, sizeof(bts)))
+                       return -EFAULT;
+       }
 
-       return ret;
+       memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
+
+       error = ds_reset_bts(context->tracer);
+       if (error < 0)
+               return error;
+
+       return drained;
 }
 
 static int ptrace_bts_config(struct task_struct *child,
                             long cfg_size,
                             const struct ptrace_bts_config __user *ucfg)
 {
+       struct bts_context *context;
        struct ptrace_bts_config cfg;
-       int bts_size, ret = 0;
-       void *ds;
+       unsigned int flags = 0;
 
        if (cfg_size < sizeof(cfg))
                return -EIO;
@@ -728,87 +770,87 @@ static int ptrace_bts_config(struct task_struct *child,
        if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
                return -EFAULT;
 
-       if ((int)cfg.size < 0)
-               return -EINVAL;
+       context = child->bts;
+       if (!context)
+               context = alloc_bts_context(child);
+       if (!context)
+               return -ENOMEM;
+
+       if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
+               if (!cfg.signal)
+                       return -EINVAL;
 
-       bts_size = 0;
-       ds = (void *)child->thread.ds_area_msr;
-       if (ds) {
-               bts_size = ds_get_bts_size(ds);
-               if (bts_size < 0)
-                       return bts_size;
+               return -EOPNOTSUPP;
+               context->bts_ovfl_signal = cfg.signal;
        }
-       cfg.size = PAGE_ALIGN(cfg.size);
 
-       if (bts_size != cfg.size) {
-               ret = ptrace_bts_realloc(child, cfg.size,
-                                        cfg.flags & PTRACE_BTS_O_CUT_SIZE);
-               if (ret < 0)
-                       goto errout;
+       ds_release_bts(context->tracer);
+       context->tracer = NULL;
 
-               ds = (void *)child->thread.ds_area_msr;
-       }
+       if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
+               int err;
 
-       if (cfg.flags & PTRACE_BTS_O_SIGNAL)
-               ret = ds_set_overflow(ds, DS_O_SIGNAL);
-       else
-               ret = ds_set_overflow(ds, DS_O_WRAP);
-       if (ret < 0)
-               goto errout;
+               free_bts_buffer(context);
+               if (!cfg.size)
+                       return 0;
+
+               err = alloc_bts_buffer(context, cfg.size);
+               if (err < 0)
+                       return err;
+       }
 
        if (cfg.flags & PTRACE_BTS_O_TRACE)
-               child->thread.debugctlmsr |= ds_debugctl_mask();
-       else
-               child->thread.debugctlmsr &= ~ds_debugctl_mask();
+               flags |= BTS_USER;
 
        if (cfg.flags & PTRACE_BTS_O_SCHED)
-               set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
-       else
-               clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+               flags |= BTS_TIMESTAMPS;
 
-       ret = sizeof(cfg);
+       context->tracer =
+               ds_request_bts_task(child, context->buffer, context->size,
+                                   NULL, (size_t)-1, flags);
+       if (unlikely(IS_ERR(context->tracer))) {
+               int error = PTR_ERR(context->tracer);
 
-out:
-       if (child->thread.debugctlmsr)
-               set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
-       else
-               clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
-
-       return ret;
+               free_bts_buffer(context);
+               context->tracer = NULL;
+               return error;
+       }
 
-errout:
-       child->thread.debugctlmsr &= ~ds_debugctl_mask();
-       clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
-       goto out;
+       return sizeof(cfg);
 }
 
 static int ptrace_bts_status(struct task_struct *child,
                             long cfg_size,
                             struct ptrace_bts_config __user *ucfg)
 {
-       void *ds = (void *)child->thread.ds_area_msr;
+       struct bts_context *context;
+       const struct bts_trace *trace;
        struct ptrace_bts_config cfg;
 
+       context = child->bts;
+       if (!context)
+               return -ESRCH;
+
        if (cfg_size < sizeof(cfg))
                return -EIO;
 
-       memset(&cfg, 0, sizeof(cfg));
-
-       if (ds) {
-               cfg.size = ds_get_bts_size(ds);
+       trace = ds_read_bts(context->tracer);
+       if (!trace)
+               return -ESRCH;
 
-               if (ds_get_overflow(ds) == DS_O_SIGNAL)
-                       cfg.flags |= PTRACE_BTS_O_SIGNAL;
+       memset(&cfg, 0, sizeof(cfg));
+       cfg.size        = trace->ds.end - trace->ds.begin;
+       cfg.signal      = context->bts_ovfl_signal;
+       cfg.bts_size    = sizeof(struct bts_struct);
 
-               if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
-                   child->thread.debugctlmsr & ds_debugctl_mask())
-                       cfg.flags |= PTRACE_BTS_O_TRACE;
+       if (cfg.signal)
+               cfg.flags |= PTRACE_BTS_O_SIGNAL;
 
-               if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
-                       cfg.flags |= PTRACE_BTS_O_SCHED;
-       }
+       if (trace->ds.flags & BTS_USER)
+               cfg.flags |= PTRACE_BTS_O_TRACE;
 
-       cfg.bts_size = sizeof(struct bts_struct);
+       if (trace->ds.flags & BTS_TIMESTAMPS)
+               cfg.flags |= PTRACE_BTS_O_SCHED;
 
        if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
                return -EFAULT;
@@ -816,17 +858,53 @@ static int ptrace_bts_status(struct task_struct *child,
        return sizeof(cfg);
 }
 
-void ptrace_bts_take_timestamp(struct task_struct *tsk,
-                              enum bts_qualifier qualifier)
+static int ptrace_bts_clear(struct task_struct *child)
 {
-       struct bts_struct rec = {
-               .qualifier = qualifier,
-               .variant.jiffies = jiffies_64
-       };
+       struct bts_context *context;
+       const struct bts_trace *trace;
+
+       context = child->bts;
+       if (!context)
+               return -ESRCH;
+
+       trace = ds_read_bts(context->tracer);
+       if (!trace)
+               return -ESRCH;
 
-       ptrace_bts_write_record(tsk, &rec);
+       memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
+
+       return ds_reset_bts(context->tracer);
 }
 
+static int ptrace_bts_size(struct task_struct *child)
+{
+       struct bts_context *context;
+       const struct bts_trace *trace;
+
+       context = child->bts;
+       if (!context)
+               return -ESRCH;
+
+       trace = ds_read_bts(context->tracer);
+       if (!trace)
+               return -ESRCH;
+
+       return (trace->ds.top - trace->ds.begin) / trace->ds.size;
+}
+
+/*
+ * Called from __ptrace_unlink() after the child has been moved back
+ * to its original parent.
+ */
+void ptrace_bts_untrace(struct task_struct *child)
+{
+       if (unlikely(child->bts)) {
+               free_bts_context(child->bts);
+               child->bts = NULL;
+       }
+}
+#endif /* CONFIG_X86_PTRACE_BTS */
+
 /*
  * Called by kernel/ptrace.c when detaching..
  *
@@ -838,13 +916,6 @@ void ptrace_disable(struct task_struct *child)
 #ifdef TIF_SYSCALL_EMU
        clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 #endif
-       if (child->thread.ds_area_msr) {
-               ptrace_bts_realloc(child, 0, 0);
-               child->thread.debugctlmsr &= ~ds_debugctl_mask();
-               if (!child->thread.debugctlmsr)
-                       clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
-               clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
-       }
 }
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -927,13 +998,13 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                return copy_regset_to_user(child, &user_x86_32_view,
                                           REGSET_XFP,
                                           0, sizeof(struct user_fxsr_struct),
-                                          datap);
+                                          datap) ? -EIO : 0;
 
        case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */
                return copy_regset_from_user(child, &user_x86_32_view,
                                             REGSET_XFP,
                                             0, sizeof(struct user_fxsr_struct),
-                                            datap);
+                                            datap) ? -EIO : 0;
 #endif
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -961,6 +1032,10 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                break;
 #endif
 
+       /*
+        * These bits need more cooking - not enabled yet:
+        */
+#ifdef CONFIG_X86_PTRACE_BTS
        case PTRACE_BTS_CONFIG:
                ret = ptrace_bts_config
                        (child, data, (struct ptrace_bts_config __user *)addr);
@@ -972,7 +1047,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                break;
 
        case PTRACE_BTS_SIZE:
-               ret = ptrace_bts_get_size(child);
+               ret = ptrace_bts_size(child);
                break;
 
        case PTRACE_BTS_GET:
@@ -988,6 +1063,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                ret = ptrace_bts_drain
                        (child, data, (struct bts_struct __user *) addr);
                break;
+#endif /* CONFIG_X86_PTRACE_BTS */
 
        default:
                ret = ptrace_request(child, request, addr, data);
@@ -1035,10 +1111,22 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
        R32(esi, si);
        R32(ebp, bp);
        R32(eax, ax);
-       R32(orig_eax, orig_ax);
        R32(eip, ip);
        R32(esp, sp);
 
+       case offsetof(struct user32, regs.orig_eax):
+               /*
+                * A 32-bit debugger setting orig_eax means to restore
+                * the state of the task restarting a 32-bit syscall.
+                * Make sure we interpret the -ERESTART* codes correctly
+                * in case the task is not actually still sitting at the
+                * exit from a 32-bit syscall with TS_COMPAT still set.
+                */
+               regs->orig_ax = value;
+               if (syscall_get_nr(child, regs) >= 0)
+                       task_thread_info(child)->status |= TS_COMPAT;
+               break;
+
        case offsetof(struct user32, regs.eflags):
                return set_flags(child, value);
 
@@ -1160,7 +1248,7 @@ static int genregs32_set(struct task_struct *target,
        if (kbuf) {
                const compat_ulong_t *k = kbuf;
                while (count > 0 && !ret) {
-                       ret = putreg(target, pos, *k++);
+                       ret = putreg32(target, pos, *k++);
                        count -= sizeof(*k);
                        pos += sizeof(*k);
                }
@@ -1171,7 +1259,7 @@ static int genregs32_set(struct task_struct *target,
                        ret = __get_user(word, u++);
                        if (ret)
                                break;
-                       ret = putreg(target, pos, word);
+                       ret = putreg32(target, pos, word);
                        count -= sizeof(*u);
                        pos += sizeof(*u);
                }
@@ -1179,95 +1267,16 @@ static int genregs32_set(struct task_struct *target,
        return ret;
 }
 
-static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data)
-{
-       siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t));
-       compat_siginfo_t __user *si32 = compat_ptr(data);
-       siginfo_t ssi;
-       int ret;
-
-       if (request == PTRACE_SETSIGINFO) {
-               memset(&ssi, 0, sizeof(siginfo_t));
-               ret = copy_siginfo_from_user32(&ssi, si32);
-               if (ret)
-                       return ret;
-               if (copy_to_user(si, &ssi, sizeof(siginfo_t)))
-                       return -EFAULT;
-       }
-       ret = sys_ptrace(request, pid, addr, (unsigned long)si);
-       if (ret)
-               return ret;
-       if (request == PTRACE_GETSIGINFO) {
-               if (copy_from_user(&ssi, si, sizeof(siginfo_t)))
-                       return -EFAULT;
-               ret = copy_siginfo_to_user32(si32, &ssi);
-       }
-       return ret;
-}
-
-asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+                       compat_ulong_t caddr, compat_ulong_t cdata)
 {
-       struct task_struct *child;
-       struct pt_regs *childregs;
+       unsigned long addr = caddr;
+       unsigned long data = cdata;
        void __user *datap = compat_ptr(data);
        int ret;
        __u32 val;
 
        switch (request) {
-       case PTRACE_TRACEME:
-       case PTRACE_ATTACH:
-       case PTRACE_KILL:
-       case PTRACE_CONT:
-       case PTRACE_SINGLESTEP:
-       case PTRACE_SINGLEBLOCK:
-       case PTRACE_DETACH:
-       case PTRACE_SYSCALL:
-       case PTRACE_OLDSETOPTIONS:
-       case PTRACE_SETOPTIONS:
-       case PTRACE_SET_THREAD_AREA:
-       case PTRACE_GET_THREAD_AREA:
-       case PTRACE_BTS_CONFIG:
-       case PTRACE_BTS_STATUS:
-       case PTRACE_BTS_SIZE:
-       case PTRACE_BTS_GET:
-       case PTRACE_BTS_CLEAR:
-       case PTRACE_BTS_DRAIN:
-               return sys_ptrace(request, pid, addr, data);
-
-       default:
-               return -EINVAL;
-
-       case PTRACE_PEEKTEXT:
-       case PTRACE_PEEKDATA:
-       case PTRACE_POKEDATA:
-       case PTRACE_POKETEXT:
-       case PTRACE_POKEUSR:
-       case PTRACE_PEEKUSR:
-       case PTRACE_GETREGS:
-       case PTRACE_SETREGS:
-       case PTRACE_SETFPREGS:
-       case PTRACE_GETFPREGS:
-       case PTRACE_SETFPXREGS:
-       case PTRACE_GETFPXREGS:
-       case PTRACE_GETEVENTMSG:
-               break;
-
-       case PTRACE_SETSIGINFO:
-       case PTRACE_GETSIGINFO:
-               return ptrace32_siginfo(request, pid, addr, data);
-       }
-
-       child = ptrace_get_task_struct(pid);
-       if (IS_ERR(child))
-               return PTR_ERR(child);
-
-       ret = ptrace_check_attach(child, request == PTRACE_KILL);
-       if (ret < 0)
-               goto out;
-
-       childregs = task_pt_regs(child);
-
-       switch (request) {
        case PTRACE_PEEKUSR:
                ret = getreg32(child, addr, &val);
                if (ret == 0)
@@ -1313,12 +1322,22 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
                                             sizeof(struct user32_fxsr_struct),
                                             datap);
 
+       case PTRACE_GET_THREAD_AREA:
+       case PTRACE_SET_THREAD_AREA:
+#ifdef CONFIG_X86_PTRACE_BTS
+       case PTRACE_BTS_CONFIG:
+       case PTRACE_BTS_STATUS:
+       case PTRACE_BTS_SIZE:
+       case PTRACE_BTS_GET:
+       case PTRACE_BTS_CLEAR:
+       case PTRACE_BTS_DRAIN:
+#endif /* CONFIG_X86_PTRACE_BTS */
+               return arch_ptrace(child, request, addr, data);
+
        default:
                return compat_ptrace_request(child, request, addr, data);
        }
 
- out:
-       put_task_struct(child);
        return ret;
 }
 
@@ -1339,6 +1358,12 @@ static const struct user_regset x86_64_regsets[] = {
                .size = sizeof(long), .align = sizeof(long),
                .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
        },
+       [REGSET_IOPERM64] = {
+               .core_note_type = NT_386_IOPERM,
+               .n = IO_BITMAP_LONGS,
+               .size = sizeof(long), .align = sizeof(long),
+               .active = ioperm_active, .get = ioperm_get
+       },
 };
 
 static const struct user_regset_view user_x86_64_view = {
@@ -1352,6 +1377,9 @@ static const struct user_regset_view user_x86_64_view = {
 #define genregs32_get          genregs_get
 #define genregs32_set          genregs_set
 
+#define user_i387_ia32_struct  user_i387_struct
+#define user32_fxsr_struct     user_fxsr_struct
+
 #endif /* CONFIG_X86_64 */
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -1364,13 +1392,13 @@ static const struct user_regset x86_32_regsets[] = {
        },
        [REGSET_FP] = {
                .core_note_type = NT_PRFPREG,
-               .n = sizeof(struct user_i387_struct) / sizeof(u32),
+               .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32),
                .size = sizeof(u32), .align = sizeof(u32),
                .active = fpregs_active, .get = fpregs_get, .set = fpregs_set
        },
        [REGSET_XFP] = {
                .core_note_type = NT_PRXFPREG,
-               .n = sizeof(struct user_i387_struct) / sizeof(u32),
+               .n = sizeof(struct user32_fxsr_struct) / sizeof(u32),
                .size = sizeof(u32), .align = sizeof(u32),
                .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
        },
@@ -1382,6 +1410,12 @@ static const struct user_regset x86_32_regsets[] = {
                .active = regset_tls_active,
                .get = regset_tls_get, .set = regset_tls_set
        },
+       [REGSET_IOPERM32] = {
+               .core_note_type = NT_386_IOPERM,
+               .n = IO_BITMAP_BYTES / sizeof(u32),
+               .size = sizeof(u32), .align = sizeof(u32),
+               .active = ioperm_active, .get = ioperm_get
+       },
 };
 
 static const struct user_regset_view user_x86_32_view = {
@@ -1403,9 +1437,8 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
 #endif
 }
 
-#ifdef CONFIG_X86_32
-
-void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
+void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
+                                        int error_code, int si_code)
 {
        struct siginfo info;
 
@@ -1414,7 +1447,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
 
        memset(&info, 0, sizeof(info));
        info.si_signo = SIGTRAP;
-       info.si_code = TRAP_BRKPT;
+       info.si_code = si_code;
 
        /* User-mode ip? */
        info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL;
@@ -1423,144 +1456,89 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
        force_sig_info(SIGTRAP, &info, tsk);
 }
 
-/* notification of system call entry/exit
- * - triggered by current->work.syscall_trace
- */
-__attribute__((regparm(3)))
-int do_syscall_trace(struct pt_regs *regs, int entryexit)
-{
-       int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU);
-       /*
-        * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall
-        * interception
-        */
-       int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP);
-       int ret = 0;
 
-       /* do the secure computing check first */
-       if (!entryexit)
-               secure_computing(regs->orig_ax);
-
-       if (unlikely(current->audit_context)) {
-               if (entryexit)
-                       audit_syscall_exit(AUDITSC_RESULT(regs->ax),
-                                               regs->ax);
-               /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
-                * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
-                * not used, entry.S will call us only on syscall exit, not
-                * entry; so when TIF_SYSCALL_AUDIT is used we must avoid
-                * calling send_sigtrap() on syscall entry.
-                *
-                * Note that when PTRACE_SYSEMU_SINGLESTEP is used,
-                * is_singlestep is false, despite his name, so we will still do
-                * the correct thing.
-                */
-               else if (is_singlestep)
-                       goto out;
-       }
-
-       if (!(current->ptrace & PT_PTRACED))
-               goto out;
-
-       /* If a process stops on the 1st tracepoint with SYSCALL_TRACE
-        * and then is resumed with SYSEMU_SINGLESTEP, it will come in
-        * here. We have to check this and return */
-       if (is_sysemu && entryexit)
-               return 0;
-
-       /* Fake a debug trap */
-       if (is_singlestep)
-               send_sigtrap(current, regs, 0);
-
-       if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu)
-               goto out;
-
-       /* the 0x80 provides a way for the tracing parent to distinguish
-          between a syscall stop and SIGTRAP delivery */
-       /* Note that the debugger could change the result of test_thread_flag!*/
-       ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0));
-
-       /*
-        * this isn't the same as continuing with a signal, but it will do
-        * for normal use.  strace only continues with a signal if the
-        * stopping signal is not SIGTRAP.  -brl
-        */
-       if (current->exit_code) {
-               send_sig(current->exit_code, current, 1);
-               current->exit_code = 0;
-       }
-       ret = is_sysemu;
-out:
-       if (unlikely(current->audit_context) && !entryexit)
-               audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax,
-                                   regs->bx, regs->cx, regs->dx, regs->si);
-       if (ret == 0)
-               return 0;
-
-       regs->orig_ax = -1; /* force skip of syscall restarting */
-       if (unlikely(current->audit_context))
-               audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
-       return 1;
-}
-
-#else  /* CONFIG_X86_64 */
+#ifdef CONFIG_X86_32
+# define IS_IA32       1
+#elif defined CONFIG_IA32_EMULATION
+# define IS_IA32       is_compat_task()
+#else
+# define IS_IA32       0
+#endif
 
-static void syscall_trace(struct pt_regs *regs)
+/*
+ * We must return the syscall number to actually look up in the table.
+ * This can be -1L to skip running any syscall at all.
+ */
+asmregparm long syscall_trace_enter(struct pt_regs *regs)
 {
+       long ret = 0;
 
-#if 0
-       printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
-              current->comm,
-              regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0),
-              current_thread_info()->flags, current->ptrace);
-#endif
-
-       ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
-                               ? 0x80 : 0));
        /*
-        * this isn't the same as continuing with a signal, but it will do
-        * for normal use.  strace only continues with a signal if the
-        * stopping signal is not SIGTRAP.  -brl
+        * If we stepped into a sysenter/syscall insn, it trapped in
+        * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
+        * If user-mode had set TF itself, then it's still clear from
+        * do_debug() and we need to set it again to restore the user
+        * state.  If we entered on the slow path, TF was already set.
         */
-       if (current->exit_code) {
-               send_sig(current->exit_code, current, 1);
-               current->exit_code = 0;
-       }
-}
+       if (test_thread_flag(TIF_SINGLESTEP))
+               regs->flags |= X86_EFLAGS_TF;
 
-asmlinkage void syscall_trace_enter(struct pt_regs *regs)
-{
        /* do the secure computing check first */
        secure_computing(regs->orig_ax);
 
-       if (test_thread_flag(TIF_SYSCALL_TRACE)
-           && (current->ptrace & PT_PTRACED))
-               syscall_trace(regs);
+       if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
+               ret = -1L;
+
+       if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
+           tracehook_report_syscall_entry(regs))
+               ret = -1L;
+
+       if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+               trace_sys_enter(regs, regs->orig_ax);
 
        if (unlikely(current->audit_context)) {
-               if (test_thread_flag(TIF_IA32)) {
+               if (IS_IA32)
                        audit_syscall_entry(AUDIT_ARCH_I386,
                                            regs->orig_ax,
                                            regs->bx, regs->cx,
                                            regs->dx, regs->si);
-               } else {
+#ifdef CONFIG_X86_64
+               else
                        audit_syscall_entry(AUDIT_ARCH_X86_64,
                                            regs->orig_ax,
                                            regs->di, regs->si,
                                            regs->dx, regs->r10);
-               }
+#endif
        }
+
+       return ret ?: regs->orig_ax;
 }
 
-asmlinkage void syscall_trace_leave(struct pt_regs *regs)
+asmregparm void syscall_trace_leave(struct pt_regs *regs)
 {
        if (unlikely(current->audit_context))
                audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 
-       if ((test_thread_flag(TIF_SYSCALL_TRACE)
-            || test_thread_flag(TIF_SINGLESTEP))
-           && (current->ptrace & PT_PTRACED))
-               syscall_trace(regs);
-}
+       if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+               trace_sys_exit(regs, regs->ax);
 
-#endif /* CONFIG_X86_32 */
+       if (test_thread_flag(TIF_SYSCALL_TRACE))
+               tracehook_report_syscall_exit(regs, 0);
+
+       /*
+        * If TIF_SYSCALL_EMU is set, we only get here because of
+        * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
+        * We already reported this syscall instruction in
+        * syscall_trace_enter(), so don't do any more now.
+        */
+       if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
+               return;
+
+       /*
+        * If we are single-stepping, synthesize a trap to follow the
+        * system call instruction.
+        */
+       if (test_thread_flag(TIF_SINGLESTEP) &&
+           tracehook_consider_fatal_signal(current, SIGTRAP))
+               send_sigtrap(current, regs, 0, TRAP_BRKPT);
+}