async: Handle kthread_run() return codes.
[safe/jmp/linux-2.6] / kernel / ptrace.c
index 4a1745f..c9cf48b 100644 (file)
 #include <linux/security.h>
 #include <linux/signal.h>
 #include <linux/audit.h>
+#include <linux/pid_namespace.h>
+#include <linux/syscalls.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
 
+
+/*
+ * Initialize a new task whose father had been ptraced.
+ *
+ * Called from copy_process().
+ */
+void ptrace_fork(struct task_struct *child, unsigned long clone_flags)
+{
+       arch_ptrace_fork(child, clone_flags);
+}
+
 /*
  * ptrace a task: make the debugger its new parent and
  * move it to the ptrace list.
  */
 void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
 {
-       BUG_ON(!list_empty(&child->ptrace_list));
-       if (child->parent == new_parent)
-               return;
-       list_add(&child->ptrace_list, &child->parent->ptrace_children);
-       remove_parent(child);
+       BUG_ON(!list_empty(&child->ptrace_entry));
+       list_add(&child->ptrace_entry, &new_parent->ptraced);
        child->parent = new_parent;
-       add_parent(child);
 }
  
 /*
@@ -47,12 +56,12 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
  * TASK_TRACED, resume it now.
  * Requires that irqs be disabled.
  */
-void ptrace_untrace(struct task_struct *child)
+static void ptrace_untrace(struct task_struct *child)
 {
        spin_lock(&child->sighand->siglock);
-       if (child->state == TASK_TRACED) {
+       if (task_is_traced(child)) {
                if (child->signal->flags & SIGNAL_STOP_STOPPED) {
-                       child->state = TASK_STOPPED;
+                       __set_task_state(child, TASK_STOPPED);
                } else {
                        signal_wake_up(child, 1);
                }
@@ -71,14 +80,11 @@ void __ptrace_unlink(struct task_struct *child)
        BUG_ON(!child->ptrace);
 
        child->ptrace = 0;
-       if (!list_empty(&child->ptrace_list)) {
-               list_del_init(&child->ptrace_list);
-               remove_parent(child);
-               child->parent = child->real_parent;
-               add_parent(child);
-       }
+       child->parent = child->real_parent;
+       list_del_init(&child->ptrace_entry);
 
-       if (child->state == TASK_TRACED)
+       arch_ptrace_untrace(child);
+       if (task_is_traced(child))
                ptrace_untrace(child);
 }
 
@@ -97,30 +103,32 @@ int ptrace_check_attach(struct task_struct *child, int kill)
         * be changed by us so it's not changing right after this.
         */
        read_lock(&tasklist_lock);
-       if ((child->ptrace & PT_PTRACED) && child->parent == current &&
-           (!(child->ptrace & PT_ATTACHED) || child->real_parent != current)
-           && child->signal != NULL) {
+       if ((child->ptrace & PT_PTRACED) && child->parent == current) {
                ret = 0;
+               /*
+                * child->sighand can't be NULL, release_task()
+                * does ptrace_unlink() before __exit_signal().
+                */
                spin_lock_irq(&child->sighand->siglock);
-               if (child->state == TASK_STOPPED) {
+               if (task_is_stopped(child))
                        child->state = TASK_TRACED;
-               } else if (child->state != TASK_TRACED && !kill) {
+               else if (!task_is_traced(child) && !kill)
                        ret = -ESRCH;
-               }
                spin_unlock_irq(&child->sighand->siglock);
        }
        read_unlock(&tasklist_lock);
 
-       if (!ret && !kill) {
-               wait_task_inactive(child);
-       }
+       if (!ret && !kill)
+               ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH;
 
        /* All systems go.. */
        return ret;
 }
 
-static int may_attach(struct task_struct *task)
+int __ptrace_may_access(struct task_struct *task, unsigned int mode)
 {
+       const struct cred *cred = current_cred(), *tcred;
+
        /* May we inspect the given task?
         * This check is used both for attaching with ptrace
         * and for allowing access to sensitive information in /proc.
@@ -133,29 +141,35 @@ static int may_attach(struct task_struct *task)
        /* Don't let security modules deny introspection */
        if (task == current)
                return 0;
-       if (((current->uid != task->euid) ||
-            (current->uid != task->suid) ||
-            (current->uid != task->uid) ||
-            (current->gid != task->egid) ||
-            (current->gid != task->sgid) ||
-            (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
+       rcu_read_lock();
+       tcred = __task_cred(task);
+       if ((cred->uid != tcred->euid ||
+            cred->uid != tcred->suid ||
+            cred->uid != tcred->uid  ||
+            cred->gid != tcred->egid ||
+            cred->gid != tcred->sgid ||
+            cred->gid != tcred->gid) &&
+           !capable(CAP_SYS_PTRACE)) {
+               rcu_read_unlock();
                return -EPERM;
+       }
+       rcu_read_unlock();
        smp_rmb();
        if (task->mm)
-               dumpable = task->mm->dumpable;
+               dumpable = get_dumpable(task->mm);
        if (!dumpable && !capable(CAP_SYS_PTRACE))
                return -EPERM;
 
-       return security_ptrace(current, task);
+       return security_ptrace_may_access(task, mode);
 }
 
-int ptrace_may_attach(struct task_struct *task)
+bool ptrace_may_access(struct task_struct *task, unsigned int mode)
 {
        int err;
        task_lock(task);
-       err = may_attach(task);
+       err = __ptrace_may_access(task, mode);
        task_unlock(task);
-       return !err;
+       return (!err ? true : false);
 }
 
 int ptrace_attach(struct task_struct *task)
@@ -166,11 +180,17 @@ int ptrace_attach(struct task_struct *task)
        audit_ptrace(task);
 
        retval = -EPERM;
-       if (task->pid <= 1)
+       if (same_thread_group(task, current))
                goto out;
-       if (task->tgid == current->tgid)
+
+       /* Protect exec's credential calculations against our interference;
+        * SUID, SGID and LSM creds get determined differently under ptrace.
+        */
+       retval = mutex_lock_interruptible(&current->cred_exec_mutex);
+       if (retval  < 0)
                goto out;
 
+       retval = -EPERM;
 repeat:
        /*
         * Nasty, nasty.
@@ -195,23 +215,22 @@ repeat:
        /* the same process cannot be attached many times */
        if (task->ptrace & PT_PTRACED)
                goto bad;
-       retval = may_attach(task);
+       retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH);
        if (retval)
                goto bad;
 
        /* Go */
-       task->ptrace |= PT_PTRACED | ((task->real_parent != current)
-                                     ? PT_ATTACHED : 0);
+       task->ptrace |= PT_PTRACED;
        if (capable(CAP_SYS_PTRACE))
                task->ptrace |= PT_PTRACE_CAP;
 
        __ptrace_link(task, current);
 
-       force_sig_specific(SIGSTOP, task);
-
+       send_sig_info(SIGSTOP, SEND_SIG_FORCED, task);
 bad:
        write_unlock_irqrestore(&tasklist_lock, flags);
        task_unlock(task);
+       mutex_unlock(&current->cred_exec_mutex);
 out:
        return retval;
 }
@@ -233,6 +252,7 @@ int ptrace_detach(struct task_struct *child, unsigned int data)
 
        /* Architecture-specific hardware disable .. */
        ptrace_disable(child);
+       clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 
        write_lock_irq(&tasklist_lock);
        /* protect against de_thread()->release_task() */
@@ -321,9 +341,8 @@ static int ptrace_setoptions(struct task_struct *child, long data)
        return (data & ~PTRACE_O_MASK) ? -EINVAL : 0;
 }
 
-static int ptrace_getsiginfo(struct task_struct *child, siginfo_t __user * data)
+static int ptrace_getsiginfo(struct task_struct *child, siginfo_t *info)
 {
-       siginfo_t lastinfo;
        int error = -ESRCH;
 
        read_lock(&tasklist_lock);
@@ -331,31 +350,25 @@ static int ptrace_getsiginfo(struct task_struct *child, siginfo_t __user * data)
                error = -EINVAL;
                spin_lock_irq(&child->sighand->siglock);
                if (likely(child->last_siginfo != NULL)) {
-                       lastinfo = *child->last_siginfo;
+                       *info = *child->last_siginfo;
                        error = 0;
                }
                spin_unlock_irq(&child->sighand->siglock);
        }
        read_unlock(&tasklist_lock);
-       if (!error)
-               return copy_siginfo_to_user(data, &lastinfo);
        return error;
 }
 
-static int ptrace_setsiginfo(struct task_struct *child, siginfo_t __user * data)
+static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info)
 {
-       siginfo_t newinfo;
        int error = -ESRCH;
 
-       if (copy_from_user(&newinfo, data, sizeof (siginfo_t)))
-               return -EFAULT;
-
        read_lock(&tasklist_lock);
        if (likely(child->sighand != NULL)) {
                error = -EINVAL;
                spin_lock_irq(&child->sighand->siglock);
                if (likely(child->last_siginfo != NULL)) {
-                       *child->last_siginfo = newinfo;
+                       *child->last_siginfo = *info;
                        error = 0;
                }
                spin_unlock_irq(&child->sighand->siglock);
@@ -364,12 +377,74 @@ static int ptrace_setsiginfo(struct task_struct *child, siginfo_t __user * data)
        return error;
 }
 
+
+#ifdef PTRACE_SINGLESTEP
+#define is_singlestep(request)         ((request) == PTRACE_SINGLESTEP)
+#else
+#define is_singlestep(request)         0
+#endif
+
+#ifdef PTRACE_SINGLEBLOCK
+#define is_singleblock(request)                ((request) == PTRACE_SINGLEBLOCK)
+#else
+#define is_singleblock(request)                0
+#endif
+
+#ifdef PTRACE_SYSEMU
+#define is_sysemu_singlestep(request)  ((request) == PTRACE_SYSEMU_SINGLESTEP)
+#else
+#define is_sysemu_singlestep(request)  0
+#endif
+
+static int ptrace_resume(struct task_struct *child, long request, long data)
+{
+       if (!valid_signal(data))
+               return -EIO;
+
+       if (request == PTRACE_SYSCALL)
+               set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+       else
+               clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+
+#ifdef TIF_SYSCALL_EMU
+       if (request == PTRACE_SYSEMU || request == PTRACE_SYSEMU_SINGLESTEP)
+               set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+       else
+               clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+#endif
+
+       if (is_singleblock(request)) {
+               if (unlikely(!arch_has_block_step()))
+                       return -EIO;
+               user_enable_block_step(child);
+       } else if (is_singlestep(request) || is_sysemu_singlestep(request)) {
+               if (unlikely(!arch_has_single_step()))
+                       return -EIO;
+               user_enable_single_step(child);
+       }
+       else
+               user_disable_single_step(child);
+
+       child->exit_code = data;
+       wake_up_process(child);
+
+       return 0;
+}
+
 int ptrace_request(struct task_struct *child, long request,
                   long addr, long data)
 {
        int ret = -EIO;
+       siginfo_t siginfo;
 
        switch (request) {
+       case PTRACE_PEEKTEXT:
+       case PTRACE_PEEKDATA:
+               return generic_ptrace_peekdata(child, addr, data);
+       case PTRACE_POKETEXT:
+       case PTRACE_POKEDATA:
+               return generic_ptrace_pokedata(child, addr, data);
+
 #ifdef PTRACE_OLDSETOPTIONS
        case PTRACE_OLDSETOPTIONS:
 #endif
@@ -379,12 +454,45 @@ int ptrace_request(struct task_struct *child, long request,
        case PTRACE_GETEVENTMSG:
                ret = put_user(child->ptrace_message, (unsigned long __user *) data);
                break;
+
        case PTRACE_GETSIGINFO:
-               ret = ptrace_getsiginfo(child, (siginfo_t __user *) data);
+               ret = ptrace_getsiginfo(child, &siginfo);
+               if (!ret)
+                       ret = copy_siginfo_to_user((siginfo_t __user *) data,
+                                                  &siginfo);
                break;
+
        case PTRACE_SETSIGINFO:
-               ret = ptrace_setsiginfo(child, (siginfo_t __user *) data);
+               if (copy_from_user(&siginfo, (siginfo_t __user *) data,
+                                  sizeof siginfo))
+                       ret = -EFAULT;
+               else
+                       ret = ptrace_setsiginfo(child, &siginfo);
                break;
+
+       case PTRACE_DETACH:      /* detach a process that was attached. */
+               ret = ptrace_detach(child, data);
+               break;
+
+#ifdef PTRACE_SINGLESTEP
+       case PTRACE_SINGLESTEP:
+#endif
+#ifdef PTRACE_SINGLEBLOCK
+       case PTRACE_SINGLEBLOCK:
+#endif
+#ifdef PTRACE_SYSEMU
+       case PTRACE_SYSEMU:
+       case PTRACE_SYSEMU_SINGLESTEP:
+#endif
+       case PTRACE_SYSCALL:
+       case PTRACE_CONT:
+               return ptrace_resume(child, request, data);
+
+       case PTRACE_KILL:
+               if (child->exit_state)  /* already dead */
+                       return 0;
+               return ptrace_resume(child, request, SIGKILL);
+
        default:
                break;
        }
@@ -405,14 +513,33 @@ int ptrace_traceme(void)
        /*
         * Are we already being traced?
         */
+repeat:
        task_lock(current);
        if (!(current->ptrace & PT_PTRACED)) {
-               ret = security_ptrace(current->parent, current);
+               /*
+                * See ptrace_attach() comments about the locking here.
+                */
+               unsigned long flags;
+               if (!write_trylock_irqsave(&tasklist_lock, flags)) {
+                       task_unlock(current);
+                       do {
+                               cpu_relax();
+                       } while (!write_can_lock(&tasklist_lock));
+                       goto repeat;
+               }
+
+               ret = security_ptrace_traceme(current->parent);
+
                /*
                 * Set the ptrace bit in the process ptrace flags.
+                * Then link us on our parent's ptraced list.
                 */
-               if (!ret)
+               if (!ret) {
                        current->ptrace |= PT_PTRACED;
+                       __ptrace_link(current, current->real_parent);
+               }
+
+               write_unlock_irqrestore(&tasklist_lock, flags);
        }
        task_unlock(current);
        return ret;
@@ -432,14 +559,8 @@ struct task_struct *ptrace_get_task_struct(pid_t pid)
 {
        struct task_struct *child;
 
-       /*
-        * Tracing init is not allowed.
-        */
-       if (pid == 1)
-               return ERR_PTR(-EPERM);
-
        read_lock(&tasklist_lock);
-       child = find_task_by_pid(pid);
+       child = find_task_by_vpid(pid);
        if (child)
                get_task_struct(child);
 
@@ -449,8 +570,11 @@ struct task_struct *ptrace_get_task_struct(pid_t pid)
        return child;
 }
 
-#ifndef __ARCH_SYS_PTRACE
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
+#ifndef arch_ptrace_attach
+#define arch_ptrace_attach(child)      do { } while (0)
+#endif
+
+SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data)
 {
        struct task_struct *child;
        long ret;
@@ -461,6 +585,8 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
        lock_kernel();
        if (request == PTRACE_TRACEME) {
                ret = ptrace_traceme();
+               if (!ret)
+                       arch_ptrace_attach(current);
                goto out;
        }
 
@@ -472,6 +598,12 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 
        if (request == PTRACE_ATTACH) {
                ret = ptrace_attach(child);
+               /*
+                * Some architectures need to do book-keeping after
+                * a ptrace attach.
+                */
+               if (!ret)
+                       arch_ptrace_attach(child);
                goto out_put_task_struct;
        }
 
@@ -489,7 +621,6 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
        unlock_kernel();
        return ret;
 }
-#endif /* __ARCH_SYS_PTRACE */
 
 int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data)
 {
@@ -509,3 +640,102 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data)
        copied = access_process_vm(tsk, addr, &data, sizeof(data), 1);
        return (copied == sizeof(data)) ? 0 : -EIO;
 }
+
+#if defined CONFIG_COMPAT
+#include <linux/compat.h>
+
+int compat_ptrace_request(struct task_struct *child, compat_long_t request,
+                         compat_ulong_t addr, compat_ulong_t data)
+{
+       compat_ulong_t __user *datap = compat_ptr(data);
+       compat_ulong_t word;
+       siginfo_t siginfo;
+       int ret;
+
+       switch (request) {
+       case PTRACE_PEEKTEXT:
+       case PTRACE_PEEKDATA:
+               ret = access_process_vm(child, addr, &word, sizeof(word), 0);
+               if (ret != sizeof(word))
+                       ret = -EIO;
+               else
+                       ret = put_user(word, datap);
+               break;
+
+       case PTRACE_POKETEXT:
+       case PTRACE_POKEDATA:
+               ret = access_process_vm(child, addr, &data, sizeof(data), 1);
+               ret = (ret != sizeof(data) ? -EIO : 0);
+               break;
+
+       case PTRACE_GETEVENTMSG:
+               ret = put_user((compat_ulong_t) child->ptrace_message, datap);
+               break;
+
+       case PTRACE_GETSIGINFO:
+               ret = ptrace_getsiginfo(child, &siginfo);
+               if (!ret)
+                       ret = copy_siginfo_to_user32(
+                               (struct compat_siginfo __user *) datap,
+                               &siginfo);
+               break;
+
+       case PTRACE_SETSIGINFO:
+               memset(&siginfo, 0, sizeof siginfo);
+               if (copy_siginfo_from_user32(
+                           &siginfo, (struct compat_siginfo __user *) datap))
+                       ret = -EFAULT;
+               else
+                       ret = ptrace_setsiginfo(child, &siginfo);
+               break;
+
+       default:
+               ret = ptrace_request(child, request, addr, data);
+       }
+
+       return ret;
+}
+
+asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
+                                 compat_long_t addr, compat_long_t data)
+{
+       struct task_struct *child;
+       long ret;
+
+       /*
+        * This lock_kernel fixes a subtle race with suid exec
+        */
+       lock_kernel();
+       if (request == PTRACE_TRACEME) {
+               ret = ptrace_traceme();
+               goto out;
+       }
+
+       child = ptrace_get_task_struct(pid);
+       if (IS_ERR(child)) {
+               ret = PTR_ERR(child);
+               goto out;
+       }
+
+       if (request == PTRACE_ATTACH) {
+               ret = ptrace_attach(child);
+               /*
+                * Some architectures need to do book-keeping after
+                * a ptrace attach.
+                */
+               if (!ret)
+                       arch_ptrace_attach(child);
+               goto out_put_task_struct;
+       }
+
+       ret = ptrace_check_attach(child, request == PTRACE_KILL);
+       if (!ret)
+               ret = compat_arch_ptrace(child, request, addr, data);
+
+ out_put_task_struct:
+       put_task_struct(child);
+ out:
+       unlock_kernel();
+       return ret;
+}
+#endif /* CONFIG_COMPAT */