proc: move /proc/vmstat boilerplate to mm/vmstat.c
[safe/jmp/linux-2.6] / fs / proc / base.c
index b48ddb1..b5918ae 100644 (file)
 #include <linux/time.h>
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/init.h>
 #include <linux/capability.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/string.h>
 #include <linux/seq_file.h>
 #include <linux/namei.h>
@@ -68,6 +70,7 @@
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/ptrace.h>
+#include <linux/tracehook.h>
 #include <linux/cgroup.h>
 #include <linux/cpuset.h>
 #include <linux/audit.h>
@@ -126,8 +129,24 @@ struct pid_entry {
                NULL, &proc_single_file_operations,     \
                { .proc_show = &proc_##OTYPE } )
 
-int maps_protect;
-EXPORT_SYMBOL(maps_protect);
+/*
+ * Count the number of hardlinks for the pid_entry table, excluding the .
+ * and .. links.
+ */
+static unsigned int pid_entry_count_dirs(const struct pid_entry *entries,
+       unsigned int n)
+{
+       unsigned int i;
+       unsigned int count;
+
+       count = 0;
+       for (i = 0; i < n; ++i) {
+               if (S_ISDIR(entries[i].mode))
+                       ++count;
+       }
+
+       return count;
+}
 
 static struct fs_struct *get_fs_struct(struct task_struct *task)
 {
@@ -142,7 +161,6 @@ static struct fs_struct *get_fs_struct(struct task_struct *task)
 
 static int get_nr_threads(struct task_struct *tsk)
 {
-       /* Must be called with the rcu_read_lock held */
        unsigned long flags;
        int count = 0;
 
@@ -195,12 +213,36 @@ static int proc_root_link(struct inode *inode, struct path *path)
        return result;
 }
 
-#define MAY_PTRACE(task) \
-       (task == current || \
-       (task->parent == current && \
-       (task->ptrace & PT_PTRACED) && \
-        (task_is_stopped_or_traced(task)) && \
-        security_ptrace(current,task) == 0))
+/*
+ * Return zero if current may access user memory in @task, -error if not.
+ */
+static int check_mem_permission(struct task_struct *task)
+{
+       /*
+        * A task can always look at itself, in case it chooses
+        * to use system calls instead of load instructions.
+        */
+       if (task == current)
+               return 0;
+
+       /*
+        * If current is actively ptrace'ing, and would also be
+        * permitted to freshly attach with ptrace now, permit it.
+        */
+       if (task_is_stopped_or_traced(task)) {
+               int match;
+               rcu_read_lock();
+               match = (tracehook_tracer_task(task) == current);
+               rcu_read_unlock();
+               if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
+                       return 0;
+       }
+
+       /*
+        * Noone else is allowed.
+        */
+       return -EPERM;
+}
 
 struct mm_struct *mm_for_maps(struct task_struct *task)
 {
@@ -211,7 +253,8 @@ struct mm_struct *mm_for_maps(struct task_struct *task)
        task_lock(task);
        if (task->mm != mm)
                goto out;
-       if (task->mm != current->mm && __ptrace_may_attach(task) < 0)
+       if (task->mm != current->mm &&
+           __ptrace_may_access(task, PTRACE_MODE_READ) < 0)
                goto out;
        task_unlock(task);
        return mm;
@@ -424,14 +467,10 @@ static int proc_pid_limits(struct task_struct *task, char *buffer)
 
        struct rlimit rlim[RLIM_NLIMITS];
 
-       rcu_read_lock();
-       if (!lock_task_sighand(task,&flags)) {
-               rcu_read_unlock();
+       if (!lock_task_sighand(task, &flags))
                return 0;
-       }
        memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
        unlock_task_sighand(task, &flags);
-       rcu_read_unlock();
 
        /*
         * print the file header
@@ -463,6 +502,26 @@ static int proc_pid_limits(struct task_struct *task, char *buffer)
        return count;
 }
 
+#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
+static int proc_pid_syscall(struct task_struct *task, char *buffer)
+{
+       long nr;
+       unsigned long args[6], sp, pc;
+
+       if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
+               return sprintf(buffer, "running\n");
+
+       if (nr < 0)
+               return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
+
+       return sprintf(buffer,
+                      "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
+                      nr,
+                      args[0], args[1], args[2], args[3], args[4], args[5],
+                      sp, pc);
+}
+#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
+
 /************************************************************************/
 /*                       Here the fs part begins                        */
 /************************************************************************/
@@ -478,7 +537,7 @@ static int proc_fd_access_allowed(struct inode *inode)
         */
        task = get_proc_task(inode);
        if (task) {
-               allowed = ptrace_may_attach(task);
+               allowed = ptrace_may_access(task, PTRACE_MODE_READ);
                put_task_struct(task);
        }
        return allowed;
@@ -722,7 +781,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
        if (!task)
                goto out_no_task;
 
-       if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
+       if (check_mem_permission(task))
                goto out;
 
        ret = -ENOMEM;
@@ -748,7 +807,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
 
                this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
                retval = access_process_vm(task, src, page, this_len, 0);
-               if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) {
+               if (!retval || check_mem_permission(task)) {
                        if (!ret)
                                ret = -EIO;
                        break;
@@ -792,7 +851,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
        if (!task)
                goto out_no_task;
 
-       if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
+       if (check_mem_permission(task))
                goto out;
 
        copied = -ENOMEM;
@@ -864,7 +923,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
        if (!task)
                goto out_no_task;
 
-       if (!ptrace_may_attach(task))
+       if (!ptrace_may_access(task, PTRACE_MODE_READ))
                goto out;
 
        ret = -ENOMEM;
@@ -1793,8 +1852,7 @@ static const struct file_operations proc_fd_operations = {
  * /proc/pid/fd needs a special permission handler so that a process can still
  * access /proc/self/fd after it has executed a setuid().
  */
-static int proc_fd_permission(struct inode *inode, int mask,
-                               struct nameidata *nd)
+static int proc_fd_permission(struct inode *inode, int mask)
 {
        int rv;
 
@@ -2335,29 +2393,54 @@ static int proc_base_fill_cache(struct file *filp, void *dirent,
 }
 
 #ifdef CONFIG_TASK_IO_ACCOUNTING
-static int proc_pid_io_accounting(struct task_struct *task, char *buffer)
+static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
 {
+       struct task_io_accounting acct = task->ioac;
+       unsigned long flags;
+
+       if (whole && lock_task_sighand(task, &flags)) {
+               struct task_struct *t = task;
+
+               task_io_accounting_add(&acct, &task->signal->ioac);
+               while_each_thread(task, t)
+                       task_io_accounting_add(&acct, &t->ioac);
+
+               unlock_task_sighand(task, &flags);
+       }
        return sprintf(buffer,
-#ifdef CONFIG_TASK_XACCT
                        "rchar: %llu\n"
                        "wchar: %llu\n"
                        "syscr: %llu\n"
                        "syscw: %llu\n"
-#endif
                        "read_bytes: %llu\n"
                        "write_bytes: %llu\n"
                        "cancelled_write_bytes: %llu\n",
-#ifdef CONFIG_TASK_XACCT
-                       (unsigned long long)task->rchar,
-                       (unsigned long long)task->wchar,
-                       (unsigned long long)task->syscr,
-                       (unsigned long long)task->syscw,
-#endif
-                       (unsigned long long)task->ioac.read_bytes,
-                       (unsigned long long)task->ioac.write_bytes,
-                       (unsigned long long)task->ioac.cancelled_write_bytes);
+                       (unsigned long long)acct.rchar,
+                       (unsigned long long)acct.wchar,
+                       (unsigned long long)acct.syscr,
+                       (unsigned long long)acct.syscw,
+                       (unsigned long long)acct.read_bytes,
+                       (unsigned long long)acct.write_bytes,
+                       (unsigned long long)acct.cancelled_write_bytes);
+}
+
+static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
+{
+       return do_io_accounting(task, buffer, 0);
+}
+
+static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
+{
+       return do_io_accounting(task, buffer, 1);
+}
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
+
+static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
+                               struct pid *pid, struct task_struct *task)
+{
+       seq_printf(m, "%08x\n", task->personality);
+       return 0;
 }
-#endif
 
 /*
  * Thread groups
@@ -2375,10 +2458,14 @@ static const struct pid_entry tgid_base_stuff[] = {
        REG("environ",    S_IRUSR, environ),
        INF("auxv",       S_IRUSR, pid_auxv),
        ONE("status",     S_IRUGO, pid_status),
+       ONE("personality", S_IRUSR, pid_personality),
        INF("limits",     S_IRUSR, pid_limits),
 #ifdef CONFIG_SCHED_DEBUG
        REG("sched",      S_IRUGO|S_IWUSR, pid_sched),
 #endif
+#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
+       INF("syscall",    S_IRUSR, pid_syscall),
+#endif
        INF("cmdline",    S_IRUGO, pid_cmdline),
        ONE("stat",       S_IRUGO, tgid_stat),
        ONE("statm",      S_IRUGO, pid_statm),
@@ -2420,7 +2507,7 @@ static const struct pid_entry tgid_base_stuff[] = {
        REG("oom_adj",    S_IRUGO|S_IWUSR, oom_adjust),
 #ifdef CONFIG_AUDITSYSCALL
        REG("loginuid",   S_IWUSR|S_IRUGO, loginuid),
-       REG("sessionid",  S_IRUSR, sessionid),
+       REG("sessionid",  S_IRUGO, sessionid),
 #endif
 #ifdef CONFIG_FAULT_INJECTION
        REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
@@ -2429,7 +2516,7 @@ static const struct pid_entry tgid_base_stuff[] = {
        REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
 #endif
 #ifdef CONFIG_TASK_IO_ACCOUNTING
-       INF("io",       S_IRUGO, pid_io_accounting),
+       INF("io",       S_IRUGO, tgid_io_accounting),
 #endif
 };
 
@@ -2564,10 +2651,9 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
        inode->i_op = &proc_tgid_base_inode_operations;
        inode->i_fop = &proc_tgid_base_operations;
        inode->i_flags|=S_IMMUTABLE;
-       inode->i_nlink = 5;
-#ifdef CONFIG_SECURITY
-       inode->i_nlink += 1;
-#endif
+
+       inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff,
+               ARRAY_SIZE(tgid_base_stuff));
 
        dentry->d_op = &pid_dentry_operations;
 
@@ -2708,10 +2794,14 @@ static const struct pid_entry tid_base_stuff[] = {
        REG("environ",   S_IRUSR, environ),
        INF("auxv",      S_IRUSR, pid_auxv),
        ONE("status",    S_IRUGO, pid_status),
+       ONE("personality", S_IRUSR, pid_personality),
        INF("limits",    S_IRUSR, pid_limits),
 #ifdef CONFIG_SCHED_DEBUG
        REG("sched",     S_IRUGO|S_IWUSR, pid_sched),
 #endif
+#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
+       INF("syscall",   S_IRUSR, pid_syscall),
+#endif
        INF("cmdline",   S_IRUGO, pid_cmdline),
        ONE("stat",      S_IRUGO, tid_stat),
        ONE("statm",     S_IRUGO, pid_statm),
@@ -2757,6 +2847,9 @@ static const struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_FAULT_INJECTION
        REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
 #endif
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+       INF("io",       S_IRUGO, tid_io_accounting),
+#endif
 };
 
 static int proc_tid_base_readdir(struct file * filp,
@@ -2795,10 +2888,9 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
        inode->i_op = &proc_tid_base_inode_operations;
        inode->i_fop = &proc_tid_base_operations;
        inode->i_flags|=S_IMMUTABLE;
-       inode->i_nlink = 4;
-#ifdef CONFIG_SECURITY
-       inode->i_nlink += 1;
-#endif
+
+       inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff,
+               ARRAY_SIZE(tid_base_stuff));
 
        dentry->d_op = &pid_dentry_operations;
 
@@ -2997,9 +3089,7 @@ static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
        generic_fillattr(inode, stat);
 
        if (p) {
-               rcu_read_lock();
                stat->nlink += get_nr_threads(p);
-               rcu_read_unlock();
                put_task_struct(p);
        }