#include <linux/audit.h>
#include <linux/seccomp.h>
#include <linux/signal.h>
+#include <linux/workqueue.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include "tls.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
enum x86_regset {
REGSET_GENERAL,
REGSET_FP,
static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
{
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
- regno >>= 2;
- if (regno > FS)
- --regno;
- return ®s->bx + regno;
+ return ®s->bx + (regno >> 2);
}
static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
if (offset != offsetof(struct user_regs_struct, gs))
retval = *pt_regs_access(task_pt_regs(task), offset);
else {
- retval = task->thread.gs;
if (task == current)
- savesegment(gs, retval);
+ retval = get_user_gs(task_pt_regs(task));
+ else
+ retval = task_user_gs(task);
}
return retval;
}
break;
case offsetof(struct user_regs_struct, gs):
- task->thread.gs = value;
if (task == current)
- /*
- * The user-mode %gs is not affected by
- * kernel entry, so we must update the CPU.
- */
- loadsegment(gs, value);
+ set_user_gs(task_pt_regs(task), value);
+ else
+ task_user_gs(task) = value;
}
return 0;
if (test_tsk_thread_flag(task, TIF_IA32))
return IA32_PAGE_OFFSET - 3;
#endif
- return TASK_SIZE64 - 7;
+ return TASK_SIZE_MAX - 7;
}
#endif /* CONFIG_X86_32 */
return set_flags(child, value);
#ifdef CONFIG_X86_64
- /*
- * Orig_ax is really just a flag with small positive and
- * negative values, so make sure to always sign-extend it
- * from 32 bits so that it works correctly regardless of
- * whether we come from a 32-bit environment or not.
- */
- case offsetof(struct user_regs_struct, orig_ax):
- value = (long) (s32) value;
- break;
-
case offsetof(struct user_regs_struct,fs_base):
if (value >= TASK_SIZE_OF(child))
return -EIO;
}
#ifdef CONFIG_X86_PTRACE_BTS
+/*
+ * A branch trace store context.
+ *
+ * Contexts may only be installed by ptrace_bts_config() and only for
+ * ptraced tasks.
+ *
+ * Contexts are destroyed when the tracee is detached from the tracer.
+ * The actual destruction work requires interrupts enabled, so the
+ * work is deferred and will be scheduled during __ptrace_unlink().
+ *
+ * Contexts hold an additional task_struct reference on the traced
+ * task, as well as a reference on the tracer's mm.
+ *
+ * Ptrace already holds a task_struct for the duration of ptrace operations,
+ * but since destruction is deferred, it may be executed after both
+ * tracer and tracee exited.
+ */
+struct bts_context {
+ /* The branch trace handle. */
+ struct bts_tracer *tracer;
+
+ /* The buffer used to store the branch trace and its size. */
+ void *buffer;
+ unsigned int size;
+
+ /* The mm that paid for the above buffer. */
+ struct mm_struct *mm;
+
+ /* The task this context belongs to. */
+ struct task_struct *task;
+
+ /* The signal to send on a bts buffer overflow. */
+ unsigned int bts_ovfl_signal;
+
+ /* The work struct to destroy a context. */
+ struct work_struct work;
+};
+
+static int alloc_bts_buffer(struct bts_context *context, unsigned int size)
+{
+ void *buffer = NULL;
+ int err = -ENOMEM;
+
+ err = account_locked_memory(current->mm, current->signal->rlim, size);
+ if (err < 0)
+ return err;
+
+ buffer = kzalloc(size, GFP_KERNEL);
+ if (!buffer)
+ goto out_refund;
+
+ context->buffer = buffer;
+ context->size = size;
+ context->mm = get_task_mm(current);
+
+ return 0;
+
+ out_refund:
+ refund_locked_memory(current->mm, size);
+ return err;
+}
+
+static inline void free_bts_buffer(struct bts_context *context)
+{
+ if (!context->buffer)
+ return;
+
+ kfree(context->buffer);
+ context->buffer = NULL;
+
+ refund_locked_memory(context->mm, context->size);
+ context->size = 0;
+
+ mmput(context->mm);
+ context->mm = NULL;
+}
+
+static void free_bts_context_work(struct work_struct *w)
+{
+ struct bts_context *context;
+
+ context = container_of(w, struct bts_context, work);
+
+ ds_release_bts(context->tracer);
+ put_task_struct(context->task);
+ free_bts_buffer(context);
+ kfree(context);
+}
+
+static inline void free_bts_context(struct bts_context *context)
+{
+ INIT_WORK(&context->work, free_bts_context_work);
+ schedule_work(&context->work);
+}
+
+static inline struct bts_context *alloc_bts_context(struct task_struct *task)
+{
+ struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL);
+ if (context) {
+ context->task = task;
+ task->bts = context;
+
+ get_task_struct(task);
+ }
+
+ return context;
+}
+
static int ptrace_bts_read_record(struct task_struct *child, size_t index,
struct bts_struct __user *out)
{
+ struct bts_context *context;
const struct bts_trace *trace;
struct bts_struct bts;
const unsigned char *at;
int error;
- trace = ds_read_bts(child->bts);
+ context = child->bts;
+ if (!context)
+ return -ESRCH;
+
+ trace = ds_read_bts(context->tracer);
if (!trace)
- return -EPERM;
+ return -ESRCH;
at = trace->ds.top - ((index + 1) * trace->ds.size);
if ((void *)at < trace->ds.begin)
if (!trace->read)
return -EOPNOTSUPP;
- error = trace->read(child->bts, at, &bts);
+ error = trace->read(context->tracer, at, &bts);
if (error < 0)
return error;
long size,
struct bts_struct __user *out)
{
+ struct bts_context *context;
const struct bts_trace *trace;
const unsigned char *at;
int error, drained = 0;
- trace = ds_read_bts(child->bts);
+ context = child->bts;
+ if (!context)
+ return -ESRCH;
+
+ trace = ds_read_bts(context->tracer);
if (!trace)
- return -EPERM;
+ return -ESRCH;
if (!trace->read)
return -EOPNOTSUPP;
for (at = trace->ds.begin; (void *)at < trace->ds.top;
out++, drained++, at += trace->ds.size) {
struct bts_struct bts;
- int error;
- error = trace->read(child->bts, at, &bts);
+ error = trace->read(context->tracer, at, &bts);
if (error < 0)
return error;
memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
- error = ds_reset_bts(child->bts);
+ error = ds_reset_bts(context->tracer);
if (error < 0)
return error;
return drained;
}
-static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size)
-{
- child->bts_buffer = alloc_locked_buffer(size);
- if (!child->bts_buffer)
- return -ENOMEM;
-
- child->bts_size = size;
-
- return 0;
-}
-
-static void ptrace_bts_free_buffer(struct task_struct *child)
-{
- free_locked_buffer(child->bts_buffer, child->bts_size);
- child->bts_buffer = NULL;
- child->bts_size = 0;
-}
-
static int ptrace_bts_config(struct task_struct *child,
long cfg_size,
const struct ptrace_bts_config __user *ucfg)
{
+ struct bts_context *context;
struct ptrace_bts_config cfg;
unsigned int flags = 0;
if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
return -EFAULT;
- if (child->bts) {
- ds_release_bts(child->bts);
- child->bts = NULL;
- }
+ context = child->bts;
+ if (!context)
+ context = alloc_bts_context(child);
+ if (!context)
+ return -ENOMEM;
if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
if (!cfg.signal)
return -EINVAL;
return -EOPNOTSUPP;
-
- child->thread.bts_ovfl_signal = cfg.signal;
+ context->bts_ovfl_signal = cfg.signal;
}
- if ((cfg.flags & PTRACE_BTS_O_ALLOC) &&
- (cfg.size != child->bts_size)) {
- int error;
+ ds_release_bts(context->tracer);
+ context->tracer = NULL;
- ptrace_bts_free_buffer(child);
+ if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
+ int err;
- error = ptrace_bts_allocate_buffer(child, cfg.size);
- if (error < 0)
- return error;
+ free_bts_buffer(context);
+ if (!cfg.size)
+ return 0;
+
+ err = alloc_bts_buffer(context, cfg.size);
+ if (err < 0)
+ return err;
}
if (cfg.flags & PTRACE_BTS_O_TRACE)
if (cfg.flags & PTRACE_BTS_O_SCHED)
flags |= BTS_TIMESTAMPS;
- child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size,
- /* ovfl = */ NULL, /* th = */ (size_t)-1,
- flags);
- if (IS_ERR(child->bts)) {
- int error = PTR_ERR(child->bts);
-
- ptrace_bts_free_buffer(child);
- child->bts = NULL;
+ context->tracer =
+ ds_request_bts_task(child, context->buffer, context->size,
+ NULL, (size_t)-1, flags);
+ if (unlikely(IS_ERR(context->tracer))) {
+ int error = PTR_ERR(context->tracer);
+ free_bts_buffer(context);
+ context->tracer = NULL;
return error;
}
long cfg_size,
struct ptrace_bts_config __user *ucfg)
{
+ struct bts_context *context;
const struct bts_trace *trace;
struct ptrace_bts_config cfg;
+ context = child->bts;
+ if (!context)
+ return -ESRCH;
+
if (cfg_size < sizeof(cfg))
return -EIO;
- trace = ds_read_bts(child->bts);
+ trace = ds_read_bts(context->tracer);
if (!trace)
- return -EPERM;
+ return -ESRCH;
memset(&cfg, 0, sizeof(cfg));
- cfg.size = trace->ds.end - trace->ds.begin;
- cfg.signal = child->thread.bts_ovfl_signal;
- cfg.bts_size = sizeof(struct bts_struct);
+ cfg.size = trace->ds.end - trace->ds.begin;
+ cfg.signal = context->bts_ovfl_signal;
+ cfg.bts_size = sizeof(struct bts_struct);
if (cfg.signal)
cfg.flags |= PTRACE_BTS_O_SIGNAL;
static int ptrace_bts_clear(struct task_struct *child)
{
+ struct bts_context *context;
const struct bts_trace *trace;
- trace = ds_read_bts(child->bts);
+ context = child->bts;
+ if (!context)
+ return -ESRCH;
+
+ trace = ds_read_bts(context->tracer);
if (!trace)
- return -EPERM;
+ return -ESRCH;
memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
- return ds_reset_bts(child->bts);
+ return ds_reset_bts(context->tracer);
}
static int ptrace_bts_size(struct task_struct *child)
{
+ struct bts_context *context;
const struct bts_trace *trace;
- trace = ds_read_bts(child->bts);
+ context = child->bts;
+ if (!context)
+ return -ESRCH;
+
+ trace = ds_read_bts(context->tracer);
if (!trace)
- return -EPERM;
+ return -ESRCH;
return (trace->ds.top - trace->ds.begin) / trace->ds.size;
}
-static void ptrace_bts_fork(struct task_struct *tsk)
-{
- tsk->bts = NULL;
- tsk->bts_buffer = NULL;
- tsk->bts_size = 0;
- tsk->thread.bts_ovfl_signal = 0;
-}
-
-static void ptrace_bts_untrace(struct task_struct *child)
-{
- if (unlikely(child->bts)) {
- ds_release_bts(child->bts);
- child->bts = NULL;
-
- /* We cannot update total_vm and locked_vm since
- child's mm is already gone. But we can reclaim the
- memory. */
- kfree(child->bts_buffer);
- child->bts_buffer = NULL;
- child->bts_size = 0;
- }
-}
-
-static void ptrace_bts_detach(struct task_struct *child)
+/*
+ * Called from __ptrace_unlink() after the child has been moved back
+ * to its original parent.
+ */
+void ptrace_bts_untrace(struct task_struct *child)
{
if (unlikely(child->bts)) {
- ds_release_bts(child->bts);
+ free_bts_context(child->bts);
child->bts = NULL;
-
- ptrace_bts_free_buffer(child);
}
}
-#else
-static inline void ptrace_bts_fork(struct task_struct *tsk) {}
-static inline void ptrace_bts_detach(struct task_struct *child) {}
-static inline void ptrace_bts_untrace(struct task_struct *child) {}
#endif /* CONFIG_X86_PTRACE_BTS */
-void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags)
-{
- ptrace_bts_fork(child);
-}
-
-void x86_ptrace_untrace(struct task_struct *child)
-{
- ptrace_bts_untrace(child);
-}
-
/*
* Called by kernel/ptrace.c when detaching..
*
#ifdef TIF_SYSCALL_EMU
clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
#endif
- ptrace_bts_detach(child);
}
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
case offsetof(struct user32, regs.orig_eax):
/*
- * Sign-extend the value so that orig_eax = -1
- * causes (long)orig_ax < 0 tests to fire correctly.
+ * A 32-bit debugger setting orig_eax means to restore
+ * the state of the task restarting a 32-bit syscall.
+ * Make sure we interpret the -ERESTART* codes correctly
+ * in case the task is not actually still sitting at the
+ * exit from a 32-bit syscall with TS_COMPAT still set.
*/
- regs->orig_ax = (long) (s32) value;
+ regs->orig_ax = value;
+ if (syscall_get_nr(child, regs) >= 0)
+ task_thread_info(child)->status |= TS_COMPAT;
break;
case offsetof(struct user32, regs.eflags):
#ifdef CONFIG_X86_32
# define IS_IA32 1
#elif defined CONFIG_IA32_EMULATION
-# define IS_IA32 test_thread_flag(TIF_IA32)
+# define IS_IA32 is_compat_task()
#else
# define IS_IA32 0
#endif
tracehook_report_syscall_entry(regs))
ret = -1L;
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_enter(regs, regs->orig_ax);
+
if (unlikely(current->audit_context)) {
if (IS_IA32)
audit_syscall_entry(AUDIT_ARCH_I386,
if (unlikely(current->audit_context))
audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_exit(regs, regs->ax);
+
if (test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall_exit(regs, 0);
* system call instruction.
*/
if (test_thread_flag(TIF_SINGLESTEP) &&
- tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL))
+ tracehook_consider_fatal_signal(current, SIGTRAP))
send_sigtrap(current, regs, 0, TRAP_BRKPT);
}