X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=arch%2Fx86%2Fkernel%2Fptrace.c;h=a503b1fd04e51048c25fca9b675add81cac37304;hb=318ae2edc3b29216abd8a2510f3f80b764f06858;hp=3399c1be79b8b730649b4b11cb46d8c40d652f6c;hpb=099cd6e9dac84baafdef00c3955ee68e71282f86;p=safe%2Fjmp%2Flinux-2.6 diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 3399c1b..a503b1f 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -2,6 +2,9 @@ /* * Pentium III FXSR, SSE support * Gareth Hughes , May 2000 + * + * BTS tracing + * Markus Metzger , Dec 2007 */ #include @@ -10,11 +13,17 @@ #include #include #include +#include +#include #include +#include #include #include #include #include +#include +#include +#include #include #include @@ -26,6 +35,111 @@ #include #include #include +#include +#include + +#include "tls.h" + +#define CREATE_TRACE_POINTS +#include + +enum x86_regset { + REGSET_GENERAL, + REGSET_FP, + REGSET_XFP, + REGSET_IOPERM64 = REGSET_XFP, + REGSET_XSTATE, + REGSET_TLS, + REGSET_IOPERM32, +}; + +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +static const struct pt_regs_offset regoffset_table[] = { +#ifdef CONFIG_X86_64 + REG_OFFSET_NAME(r15), + REG_OFFSET_NAME(r14), + REG_OFFSET_NAME(r13), + REG_OFFSET_NAME(r12), + REG_OFFSET_NAME(r11), + REG_OFFSET_NAME(r10), + REG_OFFSET_NAME(r9), + REG_OFFSET_NAME(r8), +#endif + REG_OFFSET_NAME(bx), + REG_OFFSET_NAME(cx), + REG_OFFSET_NAME(dx), + REG_OFFSET_NAME(si), + REG_OFFSET_NAME(di), + REG_OFFSET_NAME(bp), + REG_OFFSET_NAME(ax), +#ifdef CONFIG_X86_32 + REG_OFFSET_NAME(ds), + REG_OFFSET_NAME(es), + REG_OFFSET_NAME(fs), + REG_OFFSET_NAME(gs), +#endif + REG_OFFSET_NAME(orig_ax), + REG_OFFSET_NAME(ip), + REG_OFFSET_NAME(cs), + REG_OFFSET_NAME(flags), + REG_OFFSET_NAME(sp), + REG_OFFSET_NAME(ss), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_query_register_name() - query register name from its offset + * @offset: the offset of a register in struct pt_regs. + * + * regs_query_register_name() returns the name of a register from its + * offset in struct pt_regs. If the @offset is invalid, this returns NULL; + */ +const char *regs_query_register_name(unsigned int offset) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (roff->offset == offset) + return roff->name; + return NULL; +} + +static const int arg_offs_table[] = { +#ifdef CONFIG_X86_32 + [0] = offsetof(struct pt_regs, ax), + [1] = offsetof(struct pt_regs, dx), + [2] = offsetof(struct pt_regs, cx) +#else /* CONFIG_X86_64 */ + [0] = offsetof(struct pt_regs, di), + [1] = offsetof(struct pt_regs, si), + [2] = offsetof(struct pt_regs, dx), + [3] = offsetof(struct pt_regs, cx), + [4] = offsetof(struct pt_regs, r8), + [5] = offsetof(struct pt_regs, r9) +#endif +}; /* * does not yet catch signals sent when the child dies. @@ -54,13 +168,10 @@ static inline bool invalid_selector(u16 value) #define FLAG_MASK FLAG_MASK_32 -static long *pt_regs_access(struct pt_regs *regs, unsigned long regno) +static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) { BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); - regno >>= 2; - if (regno > FS) - --regno; - return ®s->bx + regno; + return ®s->bx + (regno >> 2); } static u16 get_segment_reg(struct task_struct *task, unsigned long offset) @@ -72,9 +183,10 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset) if (offset != offsetof(struct user_regs_struct, gs)) retval = *pt_regs_access(task_pt_regs(task), offset); else { - retval = task->thread.gs; if (task == current) - savesegment(gs, retval); + retval = get_user_gs(task_pt_regs(task)); + else + retval = task_user_gs(task); } return retval; } @@ -88,26 +200,35 @@ static int set_segment_reg(struct task_struct *task, if (invalid_selector(value)) return -EIO; - if (offset != offsetof(struct user_regs_struct, gs)) + /* + * For %cs and %ss we cannot permit a null selector. + * We can permit a bogus selector as long as it has USER_RPL. + * Null selectors are fine for other segment registers, but + * we will never get back to user mode with invalid %cs or %ss + * and will take the trap in iret instead. Much code relies + * on user_mode() to distinguish a user trap frame (which can + * safely use invalid selectors) from a kernel trap frame. + */ + switch (offset) { + case offsetof(struct user_regs_struct, cs): + case offsetof(struct user_regs_struct, ss): + if (unlikely(value == 0)) + return -EIO; + + default: *pt_regs_access(task_pt_regs(task), offset) = value; - else { - task->thread.gs = value; + break; + + case offsetof(struct user_regs_struct, gs): if (task == current) - /* - * The user-mode %gs is not affected by - * kernel entry, so we must update the CPU. - */ - loadsegment(gs, value); + set_user_gs(task_pt_regs(task), value); + else + task_user_gs(task) = value; } return 0; } -static unsigned long debugreg_addr_limit(struct task_struct *task) -{ - return TASK_SIZE - 3; -} - #else /* CONFIG_X86_64 */ #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) @@ -212,12 +333,16 @@ static int set_segment_reg(struct task_struct *task, * Can't actually change these in 64-bit mode. */ case offsetof(struct user_regs_struct,cs): + if (unlikely(value == 0)) + return -EIO; #ifdef CONFIG_IA32_EMULATION if (test_tsk_thread_flag(task, TIF_IA32)) task_pt_regs(task)->cs = value; #endif break; case offsetof(struct user_regs_struct,ss): + if (unlikely(value == 0)) + return -EIO; #ifdef CONFIG_IA32_EMULATION if (test_tsk_thread_flag(task, TIF_IA32)) task_pt_regs(task)->ss = value; @@ -228,15 +353,6 @@ static int set_segment_reg(struct task_struct *task, return 0; } -static unsigned long debugreg_addr_limit(struct task_struct *task) -{ -#ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(task, TIF_IA32)) - return IA32_PAGE_OFFSET - 3; -#endif - return TASK_SIZE64 - 7; -} - #endif /* CONFIG_X86_32 */ static unsigned long get_flags(struct task_struct *task) @@ -363,99 +479,652 @@ static unsigned long getreg(struct task_struct *task, unsigned long offset) return *pt_regs_access(task_pt_regs(task), offset); } -/* - * This function is trivial and will be inlined by the compiler. - * Having it separates the implementation details of debug - * registers from the interface details of ptrace. - */ -static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) +static int genregs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) { - switch (n) { - case 0: return child->thread.debugreg0; - case 1: return child->thread.debugreg1; - case 2: return child->thread.debugreg2; - case 3: return child->thread.debugreg3; - case 6: return child->thread.debugreg6; - case 7: return child->thread.debugreg7; + if (kbuf) { + unsigned long *k = kbuf; + while (count >= sizeof(*k)) { + *k++ = getreg(target, pos); + count -= sizeof(*k); + pos += sizeof(*k); + } + } else { + unsigned long __user *u = ubuf; + while (count >= sizeof(*u)) { + if (__put_user(getreg(target, pos), u++)) + return -EFAULT; + count -= sizeof(*u); + pos += sizeof(*u); + } } + return 0; } -static int ptrace_set_debugreg(struct task_struct *child, - int n, unsigned long data) +static int genregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret = 0; + if (kbuf) { + const unsigned long *k = kbuf; + while (count >= sizeof(*k) && !ret) { + ret = putreg(target, pos, *k++); + count -= sizeof(*k); + pos += sizeof(*k); + } + } else { + const unsigned long __user *u = ubuf; + while (count >= sizeof(*u) && !ret) { + unsigned long word; + ret = __get_user(word, u++); + if (ret) + break; + ret = putreg(target, pos, word); + count -= sizeof(*u); + pos += sizeof(*u); + } + } + return ret; +} + +static void ptrace_triggered(struct perf_event *bp, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) { int i; + struct thread_struct *thread = &(current->thread); - if (unlikely(n == 4 || n == 5)) - return -EIO; + /* + * Store in the virtual DR6 register the fact that the breakpoint + * was hit so the thread's debugger will see it. + */ + for (i = 0; i < HBP_NUM; i++) { + if (thread->ptrace_bps[i] == bp) + break; + } - if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) - return -EIO; + thread->debugreg6 |= (DR_TRAP0 << i); +} - switch (n) { - case 0: child->thread.debugreg0 = data; break; - case 1: child->thread.debugreg1 = data; break; - case 2: child->thread.debugreg2 = data; break; - case 3: child->thread.debugreg3 = data; break; +/* + * Walk through every ptrace breakpoints for this thread and + * build the dr7 value on top of their attributes. + * + */ +static unsigned long ptrace_get_dr7(struct perf_event *bp[]) +{ + int i; + int dr7 = 0; + struct arch_hw_breakpoint *info; - case 6: - if ((data & ~0xffffffffUL) != 0) - return -EIO; - child->thread.debugreg6 = data; - break; + for (i = 0; i < HBP_NUM; i++) { + if (bp[i] && !bp[i]->attr.disabled) { + info = counter_arch_bp(bp[i]); + dr7 |= encode_dr7(i, info->len, info->type); + } + } + + return dr7; +} + +static int +ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, + struct task_struct *tsk, int disabled) +{ + int err; + int gen_len, gen_type; + struct perf_event_attr attr; + + /* + * We should have at least an inactive breakpoint at this + * slot. It means the user is writing dr7 without having + * written the address register first + */ + if (!bp) + return -EINVAL; + + err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); + if (err) + return err; + + attr = bp->attr; + attr.bp_len = gen_len; + attr.bp_type = gen_type; + attr.disabled = disabled; + + return modify_user_hw_breakpoint(bp, &attr); +} + +/* + * Handle ptrace writes to debug register 7. + */ +static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) +{ + struct thread_struct *thread = &(tsk->thread); + unsigned long old_dr7; + int i, orig_ret = 0, rc = 0; + int enabled, second_pass = 0; + unsigned len, type; + struct perf_event *bp; + + data &= ~DR_CONTROL_RESERVED; + old_dr7 = ptrace_get_dr7(thread->ptrace_bps); +restore: + /* + * Loop through all the hardware breakpoints, making the + * appropriate changes to each. + */ + for (i = 0; i < HBP_NUM; i++) { + enabled = decode_dr7(data, i, &len, &type); + bp = thread->ptrace_bps[i]; + + if (!enabled) { + if (bp) { + /* + * Don't unregister the breakpoints right-away, + * unless all register_user_hw_breakpoint() + * requests have succeeded. This prevents + * any window of opportunity for debug + * register grabbing by other users. + */ + if (!second_pass) + continue; + + rc = ptrace_modify_breakpoint(bp, len, type, + tsk, 1); + if (rc) + break; + } + continue; + } + + rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0); + if (rc) + break; + } + /* + * Make a second pass to free the remaining unused breakpoints + * or to restore the original breakpoints if an error occurred. + */ + if (!second_pass) { + second_pass = 1; + if (rc < 0) { + orig_ret = rc; + data = old_dr7; + } + goto restore; + } + return ((orig_ret < 0) ? orig_ret : rc); +} + +/* + * Handle PTRACE_PEEKUSR calls for the debug register area. + */ +static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) +{ + struct thread_struct *thread = &(tsk->thread); + unsigned long val = 0; + + if (n < HBP_NUM) { + struct perf_event *bp; + bp = thread->ptrace_bps[n]; + if (!bp) + return 0; + val = bp->hw.info.address; + } else if (n == 6) { + val = thread->debugreg6; + } else if (n == 7) { + val = thread->ptrace_dr7; + } + return val; +} - case 7: +static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, + unsigned long addr) +{ + struct perf_event *bp; + struct thread_struct *t = &tsk->thread; + struct perf_event_attr attr; + + if (!t->ptrace_bps[nr]) { + hw_breakpoint_init(&attr); /* - * Sanity-check data. Take one half-byte at once with - * check = (val >> (16 + 4*i)) & 0xf. It contains the - * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits - * 2 and 3 are LENi. Given a list of invalid values, - * we do mask |= 1 << invalid_value, so that - * (mask >> check) & 1 is a correct test for invalid - * values. - * - * R/Wi contains the type of the breakpoint / - * watchpoint, LENi contains the length of the watched - * data in the watchpoint case. - * - * The invalid values are: - * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] - * - R/Wi == 0x10 (break on I/O reads or writes), so - * mask |= 0x4444. - * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= - * 0x1110. - * - * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. - * - * See the Intel Manual "System Programming Guide", - * 15.2.4 - * - * Note that LENi == 0x10 is defined on x86_64 in long - * mode (i.e. even for 32-bit userspace software, but - * 64-bit kernel), so the x86_64 mask value is 0x5454. - * See the AMD manual no. 24593 (AMD64 System Programming) + * Put stub len and type to register (reserve) an inactive but + * correct bp */ -#ifdef CONFIG_X86_32 -#define DR7_MASK 0x5f54 -#else -#define DR7_MASK 0x5554 -#endif - data &= ~DR_CONTROL_RESERVED; - for (i = 0; i < 4; i++) - if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) - return -EIO; - child->thread.debugreg7 = data; - if (data) - set_tsk_thread_flag(child, TIF_DEBUG); - else - clear_tsk_thread_flag(child, TIF_DEBUG); - break; + attr.bp_addr = addr; + attr.bp_len = HW_BREAKPOINT_LEN_1; + attr.bp_type = HW_BREAKPOINT_W; + attr.disabled = 1; + + bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); + + /* + * CHECKME: the previous code returned -EIO if the addr wasn't + * a valid task virtual addr. The new one will return -EINVAL in + * this case. + * -EINVAL may be what we want for in-kernel breakpoints users, + * but -EIO looks better for ptrace, since we refuse a register + * writing for the user. And anyway this is the previous + * behaviour. + */ + if (IS_ERR(bp)) + return PTR_ERR(bp); + + t->ptrace_bps[nr] = bp; + } else { + int err; + + bp = t->ptrace_bps[nr]; + + attr = bp->attr; + attr.bp_addr = addr; + err = modify_user_hw_breakpoint(bp, &attr); + if (err) + return err; + } + + + return 0; +} + +/* + * Handle PTRACE_POKEUSR calls for the debug register area. + */ +int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) +{ + struct thread_struct *thread = &(tsk->thread); + int rc = 0; + + /* There are no DR4 or DR5 registers */ + if (n == 4 || n == 5) + return -EIO; + + if (n == 6) { + thread->debugreg6 = val; + goto ret_path; + } + if (n < HBP_NUM) { + rc = ptrace_set_breakpoint_addr(tsk, n, val); + if (rc) + return rc; } + /* All that's left is DR7 */ + if (n == 7) { + rc = ptrace_write_dr7(tsk, val); + if (!rc) + thread->ptrace_dr7 = val; + } + +ret_path: + return rc; +} + +/* + * These access the current or another (stopped) task's io permission + * bitmap for debugging or core dump. + */ +static int ioperm_active(struct task_struct *target, + const struct user_regset *regset) +{ + return target->thread.io_bitmap_max / regset->size; +} + +static int ioperm_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + if (!target->thread.io_bitmap_ptr) + return -ENXIO; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + target->thread.io_bitmap_ptr, + 0, IO_BITMAP_BYTES); +} + +#ifdef CONFIG_X86_PTRACE_BTS +/* + * A branch trace store context. + * + * Contexts may only be installed by ptrace_bts_config() and only for + * ptraced tasks. + * + * Contexts are destroyed when the tracee is detached from the tracer. + * The actual destruction work requires interrupts enabled, so the + * work is deferred and will be scheduled during __ptrace_unlink(). + * + * Contexts hold an additional task_struct reference on the traced + * task, as well as a reference on the tracer's mm. + * + * Ptrace already holds a task_struct for the duration of ptrace operations, + * but since destruction is deferred, it may be executed after both + * tracer and tracee exited. + */ +struct bts_context { + /* The branch trace handle. */ + struct bts_tracer *tracer; + + /* The buffer used to store the branch trace and its size. */ + void *buffer; + unsigned int size; + + /* The mm that paid for the above buffer. */ + struct mm_struct *mm; + + /* The task this context belongs to. */ + struct task_struct *task; + + /* The signal to send on a bts buffer overflow. */ + unsigned int bts_ovfl_signal; + + /* The work struct to destroy a context. */ + struct work_struct work; +}; + +static int alloc_bts_buffer(struct bts_context *context, unsigned int size) +{ + void *buffer = NULL; + int err = -ENOMEM; + + err = account_locked_memory(current->mm, current->signal->rlim, size); + if (err < 0) + return err; + + buffer = kzalloc(size, GFP_KERNEL); + if (!buffer) + goto out_refund; + + context->buffer = buffer; + context->size = size; + context->mm = get_task_mm(current); return 0; + + out_refund: + refund_locked_memory(current->mm, size); + return err; +} + +static inline void free_bts_buffer(struct bts_context *context) +{ + if (!context->buffer) + return; + + kfree(context->buffer); + context->buffer = NULL; + + refund_locked_memory(context->mm, context->size); + context->size = 0; + + mmput(context->mm); + context->mm = NULL; +} + +static void free_bts_context_work(struct work_struct *w) +{ + struct bts_context *context; + + context = container_of(w, struct bts_context, work); + + ds_release_bts(context->tracer); + put_task_struct(context->task); + free_bts_buffer(context); + kfree(context); +} + +static inline void free_bts_context(struct bts_context *context) +{ + INIT_WORK(&context->work, free_bts_context_work); + schedule_work(&context->work); +} + +static inline struct bts_context *alloc_bts_context(struct task_struct *task) +{ + struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL); + if (context) { + context->task = task; + task->bts = context; + + get_task_struct(task); + } + + return context; +} + +static int ptrace_bts_read_record(struct task_struct *child, size_t index, + struct bts_struct __user *out) +{ + struct bts_context *context; + const struct bts_trace *trace; + struct bts_struct bts; + const unsigned char *at; + int error; + + context = child->bts; + if (!context) + return -ESRCH; + + trace = ds_read_bts(context->tracer); + if (!trace) + return -ESRCH; + + at = trace->ds.top - ((index + 1) * trace->ds.size); + if ((void *)at < trace->ds.begin) + at += (trace->ds.n * trace->ds.size); + + if (!trace->read) + return -EOPNOTSUPP; + + error = trace->read(context->tracer, at, &bts); + if (error < 0) + return error; + + if (copy_to_user(out, &bts, sizeof(bts))) + return -EFAULT; + + return sizeof(bts); +} + +static int ptrace_bts_drain(struct task_struct *child, + long size, + struct bts_struct __user *out) +{ + struct bts_context *context; + const struct bts_trace *trace; + const unsigned char *at; + int error, drained = 0; + + context = child->bts; + if (!context) + return -ESRCH; + + trace = ds_read_bts(context->tracer); + if (!trace) + return -ESRCH; + + if (!trace->read) + return -EOPNOTSUPP; + + if (size < (trace->ds.top - trace->ds.begin)) + return -EIO; + + for (at = trace->ds.begin; (void *)at < trace->ds.top; + out++, drained++, at += trace->ds.size) { + struct bts_struct bts; + + error = trace->read(context->tracer, at, &bts); + if (error < 0) + return error; + + if (copy_to_user(out, &bts, sizeof(bts))) + return -EFAULT; + } + + memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); + + error = ds_reset_bts(context->tracer); + if (error < 0) + return error; + + return drained; +} + +static int ptrace_bts_config(struct task_struct *child, + long cfg_size, + const struct ptrace_bts_config __user *ucfg) +{ + struct bts_context *context; + struct ptrace_bts_config cfg; + unsigned int flags = 0; + + if (cfg_size < sizeof(cfg)) + return -EIO; + + if (copy_from_user(&cfg, ucfg, sizeof(cfg))) + return -EFAULT; + + context = child->bts; + if (!context) + context = alloc_bts_context(child); + if (!context) + return -ENOMEM; + + if (cfg.flags & PTRACE_BTS_O_SIGNAL) { + if (!cfg.signal) + return -EINVAL; + + return -EOPNOTSUPP; + context->bts_ovfl_signal = cfg.signal; + } + + ds_release_bts(context->tracer); + context->tracer = NULL; + + if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) { + int err; + + free_bts_buffer(context); + if (!cfg.size) + return 0; + + err = alloc_bts_buffer(context, cfg.size); + if (err < 0) + return err; + } + + if (cfg.flags & PTRACE_BTS_O_TRACE) + flags |= BTS_USER; + + if (cfg.flags & PTRACE_BTS_O_SCHED) + flags |= BTS_TIMESTAMPS; + + context->tracer = + ds_request_bts_task(child, context->buffer, context->size, + NULL, (size_t)-1, flags); + if (unlikely(IS_ERR(context->tracer))) { + int error = PTR_ERR(context->tracer); + + free_bts_buffer(context); + context->tracer = NULL; + return error; + } + + return sizeof(cfg); +} + +static int ptrace_bts_status(struct task_struct *child, + long cfg_size, + struct ptrace_bts_config __user *ucfg) +{ + struct bts_context *context; + const struct bts_trace *trace; + struct ptrace_bts_config cfg; + + context = child->bts; + if (!context) + return -ESRCH; + + if (cfg_size < sizeof(cfg)) + return -EIO; + + trace = ds_read_bts(context->tracer); + if (!trace) + return -ESRCH; + + memset(&cfg, 0, sizeof(cfg)); + cfg.size = trace->ds.end - trace->ds.begin; + cfg.signal = context->bts_ovfl_signal; + cfg.bts_size = sizeof(struct bts_struct); + + if (cfg.signal) + cfg.flags |= PTRACE_BTS_O_SIGNAL; + + if (trace->ds.flags & BTS_USER) + cfg.flags |= PTRACE_BTS_O_TRACE; + + if (trace->ds.flags & BTS_TIMESTAMPS) + cfg.flags |= PTRACE_BTS_O_SCHED; + + if (copy_to_user(ucfg, &cfg, sizeof(cfg))) + return -EFAULT; + + return sizeof(cfg); +} + +static int ptrace_bts_clear(struct task_struct *child) +{ + struct bts_context *context; + const struct bts_trace *trace; + + context = child->bts; + if (!context) + return -ESRCH; + + trace = ds_read_bts(context->tracer); + if (!trace) + return -ESRCH; + + memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); + + return ds_reset_bts(context->tracer); +} + +static int ptrace_bts_size(struct task_struct *child) +{ + struct bts_context *context; + const struct bts_trace *trace; + + context = child->bts; + if (!context) + return -ESRCH; + + trace = ds_read_bts(context->tracer); + if (!trace) + return -ESRCH; + + return (trace->ds.top - trace->ds.begin) / trace->ds.size; } /* + * Called from __ptrace_unlink() after the child has been moved back + * to its original parent. + */ +void ptrace_bts_untrace(struct task_struct *child) +{ + if (unlikely(child->bts)) { + free_bts_context(child->bts); + child->bts = NULL; + } +} +#endif /* CONFIG_X86_PTRACE_BTS */ + +/* * Called by kernel/ptrace.c when detaching.. * * Make sure the single step bit is not set. @@ -468,18 +1137,16 @@ void ptrace_disable(struct task_struct *child) #endif } +#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION +static const struct user_regset_view user_x86_32_view; /* Initialized below. */ +#endif + long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - int i, ret; + int ret; unsigned long __user *datap = (unsigned long __user *)data; switch (request) { - /* when I and D space are separate, these will need to be fixed. */ - case PTRACE_PEEKTEXT: /* read word at location addr. */ - case PTRACE_PEEKDATA: - ret = generic_ptrace_peekdata(child, addr, data); - break; - /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: { unsigned long tmp; @@ -501,12 +1168,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; } - /* when I and D space are separate, this will have to be fixed. */ - case PTRACE_POKETEXT: /* write the word at location addr. */ - case PTRACE_POKEDATA: - ret = generic_ptrace_pokedata(child, addr, data); - break; - case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ ret = -EIO; if ((addr & (sizeof(data) - 1)) || addr < 0 || @@ -523,82 +1184,46 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) } break; - case PTRACE_GETREGS: { /* Get all gp regs from the child. */ - if (!access_ok(VERIFY_WRITE, datap, sizeof(struct user_regs_struct))) { - ret = -EIO; - break; - } - for (i = 0; i < sizeof(struct user_regs_struct); i += sizeof(long)) { - __put_user(getreg(child, i), datap); - datap++; - } - ret = 0; - break; - } - - case PTRACE_SETREGS: { /* Set all gp regs in the child. */ - unsigned long tmp; - if (!access_ok(VERIFY_READ, datap, sizeof(struct user_regs_struct))) { - ret = -EIO; - break; - } - for (i = 0; i < sizeof(struct user_regs_struct); i += sizeof(long)) { - __get_user(tmp, datap); - putreg(child, i, tmp); - datap++; - } - ret = 0; - break; - } - - case PTRACE_GETFPREGS: { /* Get the child FPU state. */ - if (!access_ok(VERIFY_WRITE, datap, - sizeof(struct user_i387_struct))) { - ret = -EIO; - break; - } - ret = 0; - if (!tsk_used_math(child)) - init_fpu(child); - get_fpregs((struct user_i387_struct __user *)data, child); - break; - } - - case PTRACE_SETFPREGS: { /* Set the child FPU state. */ - if (!access_ok(VERIFY_READ, datap, - sizeof(struct user_i387_struct))) { - ret = -EIO; - break; - } - set_stopped_child_used_math(child); - set_fpregs(child, (struct user_i387_struct __user *)data); - ret = 0; - break; - } + case PTRACE_GETREGS: /* Get all gp regs from the child. */ + return copy_regset_to_user(child, + task_user_regset_view(current), + REGSET_GENERAL, + 0, sizeof(struct user_regs_struct), + datap); + + case PTRACE_SETREGS: /* Set all gp regs in the child. */ + return copy_regset_from_user(child, + task_user_regset_view(current), + REGSET_GENERAL, + 0, sizeof(struct user_regs_struct), + datap); + + case PTRACE_GETFPREGS: /* Get the child FPU state. */ + return copy_regset_to_user(child, + task_user_regset_view(current), + REGSET_FP, + 0, sizeof(struct user_i387_struct), + datap); + + case PTRACE_SETFPREGS: /* Set the child FPU state. */ + return copy_regset_from_user(child, + task_user_regset_view(current), + REGSET_FP, + 0, sizeof(struct user_i387_struct), + datap); #ifdef CONFIG_X86_32 - case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */ - if (!access_ok(VERIFY_WRITE, datap, - sizeof(struct user_fxsr_struct))) { - ret = -EIO; - break; - } - if (!tsk_used_math(child)) - init_fpu(child); - ret = get_fpxregs((struct user_fxsr_struct __user *)data, child); - break; - } - - case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */ - if (!access_ok(VERIFY_READ, datap, - sizeof(struct user_fxsr_struct))) { - ret = -EIO; - break; - } - set_stopped_child_used_math(child); - ret = set_fpxregs(child, (struct user_fxsr_struct __user *)data); - break; - } + case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ + return copy_regset_to_user(child, &user_x86_32_view, + REGSET_XFP, + 0, sizeof(struct user_fxsr_struct), + datap) ? -EIO : 0; + + case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ + return copy_regset_from_user(child, &user_x86_32_view, + REGSET_XFP, + 0, sizeof(struct user_fxsr_struct), + datap) ? -EIO : 0; #endif #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION @@ -626,6 +1251,39 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; #endif + /* + * These bits need more cooking - not enabled yet: + */ +#ifdef CONFIG_X86_PTRACE_BTS + case PTRACE_BTS_CONFIG: + ret = ptrace_bts_config + (child, data, (struct ptrace_bts_config __user *)addr); + break; + + case PTRACE_BTS_STATUS: + ret = ptrace_bts_status + (child, data, (struct ptrace_bts_config __user *)addr); + break; + + case PTRACE_BTS_SIZE: + ret = ptrace_bts_size(child); + break; + + case PTRACE_BTS_GET: + ret = ptrace_bts_read_record + (child, data, (struct bts_struct __user *) addr); + break; + + case PTRACE_BTS_CLEAR: + ret = ptrace_bts_clear(child); + break; + + case PTRACE_BTS_DRAIN: + ret = ptrace_bts_drain + (child, data, (struct bts_struct __user *) addr); + break; +#endif /* CONFIG_X86_PTRACE_BTS */ + default: ret = ptrace_request(child, request, addr, data); break; @@ -639,7 +1297,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) #include #include #include -#include #include #define R32(l,q) \ @@ -673,10 +1330,22 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) R32(esi, si); R32(ebp, bp); R32(eax, ax); - R32(orig_eax, orig_ax); R32(eip, ip); R32(esp, sp); + case offsetof(struct user32, regs.orig_eax): + /* + * A 32-bit debugger setting orig_eax means to restore + * the state of the task restarting a 32-bit syscall. + * Make sure we interpret the -ERESTART* codes correctly + * in case the task is not actually still sitting at the + * exit from a 32-bit syscall with TS_COMPAT still set. + */ + regs->orig_ax = value; + if (syscall_get_nr(child, regs) >= 0) + task_thread_info(child)->status |= TS_COMPAT; + break; + case offsetof(struct user32, regs.eflags): return set_flags(child, value); @@ -762,107 +1431,71 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val) #undef R32 #undef SEG32 -static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data) +static int genregs32_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) { - siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t)); - compat_siginfo_t __user *si32 = compat_ptr(data); - siginfo_t ssi; - int ret; - - if (request == PTRACE_SETSIGINFO) { - memset(&ssi, 0, sizeof(siginfo_t)); - ret = copy_siginfo_from_user32(&ssi, si32); - if (ret) - return ret; - if (copy_to_user(si, &ssi, sizeof(siginfo_t))) - return -EFAULT; + if (kbuf) { + compat_ulong_t *k = kbuf; + while (count >= sizeof(*k)) { + getreg32(target, pos, k++); + count -= sizeof(*k); + pos += sizeof(*k); + } + } else { + compat_ulong_t __user *u = ubuf; + while (count >= sizeof(*u)) { + compat_ulong_t word; + getreg32(target, pos, &word); + if (__put_user(word, u++)) + return -EFAULT; + count -= sizeof(*u); + pos += sizeof(*u); + } } - ret = sys_ptrace(request, pid, addr, (unsigned long)si); - if (ret) - return ret; - if (request == PTRACE_GETSIGINFO) { - if (copy_from_user(&ssi, si, sizeof(siginfo_t))) - return -EFAULT; - ret = copy_siginfo_to_user32(si32, &ssi); + + return 0; +} + +static int genregs32_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret = 0; + if (kbuf) { + const compat_ulong_t *k = kbuf; + while (count >= sizeof(*k) && !ret) { + ret = putreg32(target, pos, *k++); + count -= sizeof(*k); + pos += sizeof(*k); + } + } else { + const compat_ulong_t __user *u = ubuf; + while (count >= sizeof(*u) && !ret) { + compat_ulong_t word; + ret = __get_user(word, u++); + if (ret) + break; + ret = putreg32(target, pos, word); + count -= sizeof(*u); + pos += sizeof(*u); + } } return ret; } -asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) { - struct task_struct *child; - struct pt_regs *childregs; + unsigned long addr = caddr; + unsigned long data = cdata; void __user *datap = compat_ptr(data); int ret; __u32 val; switch (request) { - case PTRACE_TRACEME: - case PTRACE_ATTACH: - case PTRACE_KILL: - case PTRACE_CONT: - case PTRACE_SINGLESTEP: - case PTRACE_SINGLEBLOCK: - case PTRACE_DETACH: - case PTRACE_SYSCALL: - case PTRACE_OLDSETOPTIONS: - case PTRACE_SETOPTIONS: - case PTRACE_SET_THREAD_AREA: - case PTRACE_GET_THREAD_AREA: - return sys_ptrace(request, pid, addr, data); - - default: - return -EINVAL; - - case PTRACE_PEEKTEXT: - case PTRACE_PEEKDATA: - case PTRACE_POKEDATA: - case PTRACE_POKETEXT: - case PTRACE_POKEUSR: - case PTRACE_PEEKUSR: - case PTRACE_GETREGS: - case PTRACE_SETREGS: - case PTRACE_SETFPREGS: - case PTRACE_GETFPREGS: - case PTRACE_SETFPXREGS: - case PTRACE_GETFPXREGS: - case PTRACE_GETEVENTMSG: - break; - - case PTRACE_SETSIGINFO: - case PTRACE_GETSIGINFO: - return ptrace32_siginfo(request, pid, addr, data); - } - - child = ptrace_get_task_struct(pid); - if (IS_ERR(child)) - return PTR_ERR(child); - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out; - - childregs = task_pt_regs(child); - - switch (request) { - case PTRACE_PEEKDATA: - case PTRACE_PEEKTEXT: - ret = 0; - if (access_process_vm(child, addr, &val, sizeof(u32), 0) != - sizeof(u32)) - ret = -EIO; - else - ret = put_user(val, (unsigned int __user *)datap); - break; - - case PTRACE_POKEDATA: - case PTRACE_POKETEXT: - ret = 0; - if (access_process_vm(child, addr, &data, sizeof(u32), 1) != - sizeof(u32)) - ret = -EIO; - break; - case PTRACE_PEEKUSR: ret = getreg32(child, addr, &val); if (ret == 0) @@ -873,265 +1506,292 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) ret = putreg32(child, addr, data); break; - case PTRACE_GETREGS: { /* Get all gp regs from the child. */ - int i; - - if (!access_ok(VERIFY_WRITE, datap, 16*4)) { - ret = -EIO; - break; - } - ret = 0; - for (i = 0; i < sizeof(struct user_regs_struct32); i += sizeof(__u32)) { - getreg32(child, i, &val); - ret |= __put_user(val, (u32 __user *)datap); - datap += sizeof(u32); - } - break; - } - - case PTRACE_SETREGS: { /* Set all gp regs in the child. */ - unsigned long tmp; - int i; - - if (!access_ok(VERIFY_READ, datap, 16*4)) { - ret = -EIO; - break; - } - ret = 0; - for (i = 0; i < sizeof(struct user_regs_struct32); i += sizeof(u32)) { - ret |= __get_user(tmp, (u32 __user *)datap); - putreg32(child, i, tmp); - datap += sizeof(u32); - } - break; - } - - case PTRACE_GETFPREGS: - ret = -EIO; - if (!access_ok(VERIFY_READ, compat_ptr(data), - sizeof(struct user_i387_struct))) - break; - save_i387_ia32(child, datap, childregs, 1); - ret = 0; - break; - - case PTRACE_SETFPREGS: - ret = -EIO; - if (!access_ok(VERIFY_WRITE, datap, - sizeof(struct user_i387_struct))) - break; - ret = 0; - /* don't check EFAULT to be bug-to-bug compatible to i386 */ - restore_i387_ia32(child, datap, 1); - break; - - case PTRACE_GETFPXREGS: { - struct user32_fxsr_struct __user *u = datap; - - init_fpu(child); - ret = -EIO; - if (!access_ok(VERIFY_WRITE, u, sizeof(*u))) - break; - ret = -EFAULT; - if (__copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u))) - break; - ret = __put_user(childregs->cs, &u->fcs); - ret |= __put_user(child->thread.ds, &u->fos); - break; - } - case PTRACE_SETFPXREGS: { - struct user32_fxsr_struct __user *u = datap; - - unlazy_fpu(child); - ret = -EIO; - if (!access_ok(VERIFY_READ, u, sizeof(*u))) - break; - /* - * no checking to be bug-to-bug compatible with i386. - * but silence warning - */ - if (__copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u))) - ; - set_stopped_child_used_math(child); - child->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; - ret = 0; - break; - } + case PTRACE_GETREGS: /* Get all gp regs from the child. */ + return copy_regset_to_user(child, &user_x86_32_view, + REGSET_GENERAL, + 0, sizeof(struct user_regs_struct32), + datap); + + case PTRACE_SETREGS: /* Set all gp regs in the child. */ + return copy_regset_from_user(child, &user_x86_32_view, + REGSET_GENERAL, 0, + sizeof(struct user_regs_struct32), + datap); + + case PTRACE_GETFPREGS: /* Get the child FPU state. */ + return copy_regset_to_user(child, &user_x86_32_view, + REGSET_FP, 0, + sizeof(struct user_i387_ia32_struct), + datap); + + case PTRACE_SETFPREGS: /* Set the child FPU state. */ + return copy_regset_from_user( + child, &user_x86_32_view, REGSET_FP, + 0, sizeof(struct user_i387_ia32_struct), datap); + + case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ + return copy_regset_to_user(child, &user_x86_32_view, + REGSET_XFP, 0, + sizeof(struct user32_fxsr_struct), + datap); + + case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ + return copy_regset_from_user(child, &user_x86_32_view, + REGSET_XFP, 0, + sizeof(struct user32_fxsr_struct), + datap); - case PTRACE_GETEVENTMSG: - ret = put_user(child->ptrace_message, - (unsigned int __user *)compat_ptr(data)); - break; + case PTRACE_GET_THREAD_AREA: + case PTRACE_SET_THREAD_AREA: +#ifdef CONFIG_X86_PTRACE_BTS + case PTRACE_BTS_CONFIG: + case PTRACE_BTS_STATUS: + case PTRACE_BTS_SIZE: + case PTRACE_BTS_GET: + case PTRACE_BTS_CLEAR: + case PTRACE_BTS_DRAIN: +#endif /* CONFIG_X86_PTRACE_BTS */ + return arch_ptrace(child, request, addr, data); default: - BUG(); + return compat_ptrace_request(child, request, addr, data); } - out: - put_task_struct(child); return ret; } #endif /* CONFIG_IA32_EMULATION */ -#ifdef CONFIG_X86_32 - -void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) -{ - struct siginfo info; +#ifdef CONFIG_X86_64 - tsk->thread.trap_no = 1; - tsk->thread.error_code = error_code; +static struct user_regset x86_64_regsets[] __read_mostly = { + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, + .n = sizeof(struct user_regs_struct) / sizeof(long), + .size = sizeof(long), .align = sizeof(long), + .get = genregs_get, .set = genregs_set + }, + [REGSET_FP] = { + .core_note_type = NT_PRFPREG, + .n = sizeof(struct user_i387_struct) / sizeof(long), + .size = sizeof(long), .align = sizeof(long), + .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set + }, + [REGSET_XSTATE] = { + .core_note_type = NT_X86_XSTATE, + .size = sizeof(u64), .align = sizeof(u64), + .active = xstateregs_active, .get = xstateregs_get, + .set = xstateregs_set + }, + [REGSET_IOPERM64] = { + .core_note_type = NT_386_IOPERM, + .n = IO_BITMAP_LONGS, + .size = sizeof(long), .align = sizeof(long), + .active = ioperm_active, .get = ioperm_get + }, +}; + +static const struct user_regset_view user_x86_64_view = { + .name = "x86_64", .e_machine = EM_X86_64, + .regsets = x86_64_regsets, .n = ARRAY_SIZE(x86_64_regsets) +}; + +#else /* CONFIG_X86_32 */ + +#define user_regs_struct32 user_regs_struct +#define genregs32_get genregs_get +#define genregs32_set genregs_set + +#define user_i387_ia32_struct user_i387_struct +#define user32_fxsr_struct user_fxsr_struct + +#endif /* CONFIG_X86_64 */ - memset(&info, 0, sizeof(info)); - info.si_signo = SIGTRAP; - info.si_code = TRAP_BRKPT; +#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION +static struct user_regset x86_32_regsets[] __read_mostly = { + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, + .n = sizeof(struct user_regs_struct32) / sizeof(u32), + .size = sizeof(u32), .align = sizeof(u32), + .get = genregs32_get, .set = genregs32_set + }, + [REGSET_FP] = { + .core_note_type = NT_PRFPREG, + .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32), + .size = sizeof(u32), .align = sizeof(u32), + .active = fpregs_active, .get = fpregs_get, .set = fpregs_set + }, + [REGSET_XFP] = { + .core_note_type = NT_PRXFPREG, + .n = sizeof(struct user32_fxsr_struct) / sizeof(u32), + .size = sizeof(u32), .align = sizeof(u32), + .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set + }, + [REGSET_XSTATE] = { + .core_note_type = NT_X86_XSTATE, + .size = sizeof(u64), .align = sizeof(u64), + .active = xstateregs_active, .get = xstateregs_get, + .set = xstateregs_set + }, + [REGSET_TLS] = { + .core_note_type = NT_386_TLS, + .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN, + .size = sizeof(struct user_desc), + .align = sizeof(struct user_desc), + .active = regset_tls_active, + .get = regset_tls_get, .set = regset_tls_set + }, + [REGSET_IOPERM32] = { + .core_note_type = NT_386_IOPERM, + .n = IO_BITMAP_BYTES / sizeof(u32), + .size = sizeof(u32), .align = sizeof(u32), + .active = ioperm_active, .get = ioperm_get + }, +}; + +static const struct user_regset_view user_x86_32_view = { + .name = "i386", .e_machine = EM_386, + .regsets = x86_32_regsets, .n = ARRAY_SIZE(x86_32_regsets) +}; +#endif - /* User-mode ip? */ - info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL; +/* + * This represents bytes 464..511 in the memory layout exported through + * the REGSET_XSTATE interface. + */ +u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; - /* Send us the fake SIGTRAP */ - force_sig_info(SIGTRAP, &info, tsk); +void update_regset_xstate_info(unsigned int size, u64 xstate_mask) +{ +#ifdef CONFIG_X86_64 + x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64); +#endif +#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION + x86_32_regsets[REGSET_XSTATE].n = size / sizeof(u64); +#endif + xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask; } -/* notification of system call entry/exit - * - triggered by current->work.syscall_trace - */ -__attribute__((regparm(3))) -int do_syscall_trace(struct pt_regs *regs, int entryexit) +const struct user_regset_view *task_user_regset_view(struct task_struct *task) { - int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU); - /* - * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall - * interception - */ - int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP); - int ret = 0; - - /* do the secure computing check first */ - if (!entryexit) - secure_computing(regs->orig_ax); - - if (unlikely(current->audit_context)) { - if (entryexit) - audit_syscall_exit(AUDITSC_RESULT(regs->ax), - regs->ax); - /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only - * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is - * not used, entry.S will call us only on syscall exit, not - * entry; so when TIF_SYSCALL_AUDIT is used we must avoid - * calling send_sigtrap() on syscall entry. - * - * Note that when PTRACE_SYSEMU_SINGLESTEP is used, - * is_singlestep is false, despite his name, so we will still do - * the correct thing. - */ - else if (is_singlestep) - goto out; - } - - if (!(current->ptrace & PT_PTRACED)) - goto out; - - /* If a process stops on the 1st tracepoint with SYSCALL_TRACE - * and then is resumed with SYSEMU_SINGLESTEP, it will come in - * here. We have to check this and return */ - if (is_sysemu && entryexit) - return 0; +#ifdef CONFIG_IA32_EMULATION + if (test_tsk_thread_flag(task, TIF_IA32)) +#endif +#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION + return &user_x86_32_view; +#endif +#ifdef CONFIG_X86_64 + return &user_x86_64_view; +#endif +} - /* Fake a debug trap */ - if (is_singlestep) - send_sigtrap(current, regs, 0); +static void fill_sigtrap_info(struct task_struct *tsk, + struct pt_regs *regs, + int error_code, int si_code, + struct siginfo *info) +{ + tsk->thread.trap_no = 1; + tsk->thread.error_code = error_code; - if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu) - goto out; + memset(info, 0, sizeof(*info)); + info->si_signo = SIGTRAP; + info->si_code = si_code; + info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL; +} - /* the 0x80 provides a way for the tracing parent to distinguish - between a syscall stop and SIGTRAP delivery */ - /* Note that the debugger could change the result of test_thread_flag!*/ - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0)); +void user_single_step_siginfo(struct task_struct *tsk, + struct pt_regs *regs, + struct siginfo *info) +{ + fill_sigtrap_info(tsk, regs, 0, TRAP_BRKPT, info); +} - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } - ret = is_sysemu; -out: - if (unlikely(current->audit_context) && !entryexit) - audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax, - regs->bx, regs->cx, regs->dx, regs->si); - if (ret == 0) - return 0; +void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, + int error_code, int si_code) +{ + struct siginfo info; - regs->orig_ax = -1; /* force skip of syscall restarting */ - if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); - return 1; + fill_sigtrap_info(tsk, regs, error_code, si_code, &info); + /* Send us the fake SIGTRAP */ + force_sig_info(SIGTRAP, &info, tsk); } -#else /* CONFIG_X86_64 */ - -static void syscall_trace(struct pt_regs *regs) -{ -#if 0 - printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", - current->comm, - regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0), - current_thread_info()->flags, current->ptrace); +#ifdef CONFIG_X86_32 +# define IS_IA32 1 +#elif defined CONFIG_IA32_EMULATION +# define IS_IA32 is_compat_task() +#else +# define IS_IA32 0 #endif - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) - ? 0x80 : 0)); +/* + * We must return the syscall number to actually look up in the table. + * This can be -1L to skip running any syscall at all. + */ +asmregparm long syscall_trace_enter(struct pt_regs *regs) +{ + long ret = 0; + /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl + * If we stepped into a sysenter/syscall insn, it trapped in + * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. + * If user-mode had set TF itself, then it's still clear from + * do_debug() and we need to set it again to restore the user + * state. If we entered on the slow path, TF was already set. */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } -} + if (test_thread_flag(TIF_SINGLESTEP)) + regs->flags |= X86_EFLAGS_TF; -asmlinkage void syscall_trace_enter(struct pt_regs *regs) -{ /* do the secure computing check first */ secure_computing(regs->orig_ax); - if (test_thread_flag(TIF_SYSCALL_TRACE) - && (current->ptrace & PT_PTRACED)) - syscall_trace(regs); + if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) + ret = -1L; + + if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && + tracehook_report_syscall_entry(regs)) + ret = -1L; + + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_enter(regs, regs->orig_ax); if (unlikely(current->audit_context)) { - if (test_thread_flag(TIF_IA32)) { + if (IS_IA32) audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax, regs->bx, regs->cx, regs->dx, regs->si); - } else { +#ifdef CONFIG_X86_64 + else audit_syscall_entry(AUDIT_ARCH_X86_64, regs->orig_ax, regs->di, regs->si, regs->dx, regs->r10); - } +#endif } + + return ret ?: regs->orig_ax; } -asmlinkage void syscall_trace_leave(struct pt_regs *regs) +asmregparm void syscall_trace_leave(struct pt_regs *regs) { + bool step; + if (unlikely(current->audit_context)) audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); - if ((test_thread_flag(TIF_SYSCALL_TRACE) - || test_thread_flag(TIF_SINGLESTEP)) - && (current->ptrace & PT_PTRACED)) - syscall_trace(regs); -} + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_exit(regs, regs->ax); -#endif /* CONFIG_X86_32 */ + /* + * If TIF_SYSCALL_EMU is set, we only get here because of + * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). + * We already reported this syscall instruction in + * syscall_trace_enter(). + */ + step = unlikely(test_thread_flag(TIF_SINGLESTEP)) && + !test_thread_flag(TIF_SYSCALL_EMU); + if (step || test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, step); +}