fs: use rlimit helpers
[safe/jmp/linux-2.6] / fs / proc / array.c
index 98e78e2..aa8637b 100644 (file)
@@ -40,7 +40,7 @@
  *
  *
  * Alan Cox         :  security fixes.
- *                     <Alan.Cox@linux.org>
+ *                     <alan@lxorguk.ukuu.org.uk>
  *
  * Al Viro           :  safe handling of mm_struct
  *
@@ -62,6 +62,8 @@
 #include <linux/mman.h>
 #include <linux/proc_fs.h>
 #include <linux/ioport.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
 #include <linux/signal.h>
 #include <linux/highmem.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/times.h>
 #include <linux/cpuset.h>
 #include <linux/rcupdate.h>
 #include <linux/delayacct.h>
+#include <linux/seq_file.h>
+#include <linux/pid_namespace.h>
+#include <linux/ptrace.h>
+#include <linux/tracehook.h>
+#include <linux/swapops.h>
 
-#include <asm/uaccess.h>
 #include <asm/pgtable.h>
-#include <asm/io.h>
 #include <asm/processor.h>
 #include "internal.h"
 
-/* Gcc optimizes away "strlen(x)" for constant x */
-#define ADDBUF(buffer, string) \
-do { memcpy(buffer, string, strlen(string)); \
-     buffer += strlen(string); } while (0)
-
-static inline char * task_name(struct task_struct *p, char * buf)
+static inline void task_name(struct seq_file *m, struct task_struct *p)
 {
        int i;
-       char * name;
+       char *buf, *end;
+       char *name;
        char tcomm[sizeof(p->comm)];
 
        get_task_comm(tcomm, p);
 
-       ADDBUF(buf, "Name:\t");
+       seq_printf(m, "Name:\t");
+       end = m->buf + m->size;
+       buf = m->buf + m->count;
        name = tcomm;
        i = sizeof(tcomm);
-       do {
+       while (i && (buf < end)) {
                unsigned char c = *name;
                name++;
                i--;
@@ -106,20 +110,21 @@ static inline char * task_name(struct task_struct *p, char * buf)
                if (!c)
                        break;
                if (c == '\\') {
-                       buf[1] = c;
-                       buf += 2;
+                       buf++;
+                       if (buf < end)
+                               *buf++ = c;
                        continue;
                }
                if (c == '\n') {
-                       buf[0] = '\\';
-                       buf[1] = 'n';
-                       buf += 2;
+                       *buf++ = '\\';
+                       if (buf < end)
+                               *buf++ = 'n';
                        continue;
                }
                buf++;
-       } while (i);
-       *buf = '\n';
-       return buf+1;
+       }
+       m->count = buf - m->buf;
+       seq_printf(m, "\n");
 }
 
 /*
@@ -129,26 +134,25 @@ static inline char * task_name(struct task_struct *p, char * buf)
  * simple bit tests.
  */
 static const char *task_state_array[] = {
-       "R (running)",          /*  0 */
-       "S (sleeping)",         /*  1 */
-       "D (disk sleep)",       /*  2 */
-       "T (stopped)",          /*  4 */
-       "T (tracing stop)",     /*  8 */
-       "Z (zombie)",           /* 16 */
-       "X (dead)"              /* 32 */
+       "R (running)",          /*   0 */
+       "S (sleeping)",         /*   1 */
+       "D (disk sleep)",       /*   2 */
+       "T (stopped)",          /*   4 */
+       "t (tracing stop)",     /*   8 */
+       "Z (zombie)",           /*  16 */
+       "X (dead)",             /*  32 */
+       "x (dead)",             /*  64 */
+       "K (wakekill)",         /* 128 */
+       "W (waking)",           /* 256 */
 };
 
-static inline const char * get_task_state(struct task_struct *tsk)
+static inline const char *get_task_state(struct task_struct *tsk)
 {
-       unsigned int state = (tsk->state & (TASK_RUNNING |
-                                           TASK_INTERRUPTIBLE |
-                                           TASK_UNINTERRUPTIBLE |
-                                           TASK_STOPPED |
-                                           TASK_TRACED)) |
-                       (tsk->exit_state & (EXIT_ZOMBIE |
-                                           EXIT_DEAD));
+       unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
        const char **p = &task_state_array[0];
 
+       BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array));
+
        while (state) {
                p++;
                state >>= 1;
@@ -156,14 +160,26 @@ static inline const char * get_task_state(struct task_struct *tsk)
        return *p;
 }
 
-static inline char * task_state(struct task_struct *p, char *buffer)
+static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
+                               struct pid *pid, struct task_struct *p)
 {
        struct group_info *group_info;
        int g;
        struct fdtable *fdt = NULL;
+       const struct cred *cred;
+       pid_t ppid, tpid;
 
        rcu_read_lock();
-       buffer += sprintf(buffer,
+       ppid = pid_alive(p) ?
+               task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
+       tpid = 0;
+       if (pid_alive(p)) {
+               struct task_struct *tracer = tracehook_tracer_task(p);
+               if (tracer)
+                       tpid = task_pid_nr_ns(tracer, ns);
+       }
+       cred = get_cred((struct cred *) __task_cred(p));
+       seq_printf(m,
                "State:\t%s\n"
                "Tgid:\t%d\n"
                "Pid:\t%d\n"
@@ -172,40 +188,37 @@ static inline char * task_state(struct task_struct *p, char *buffer)
                "Uid:\t%d\t%d\t%d\t%d\n"
                "Gid:\t%d\t%d\t%d\t%d\n",
                get_task_state(p),
-               p->tgid, p->pid,
-               pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
-               pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0,
-               p->uid, p->euid, p->suid, p->fsuid,
-               p->gid, p->egid, p->sgid, p->fsgid);
+               task_tgid_nr_ns(p, ns),
+               pid_nr_ns(pid, ns),
+               ppid, tpid,
+               cred->uid, cred->euid, cred->suid, cred->fsuid,
+               cred->gid, cred->egid, cred->sgid, cred->fsgid);
 
        task_lock(p);
        if (p->files)
                fdt = files_fdtable(p->files);
-       buffer += sprintf(buffer,
+       seq_printf(m,
                "FDSize:\t%d\n"
                "Groups:\t",
                fdt ? fdt->max_fds : 0);
        rcu_read_unlock();
 
-       group_info = p->group_info;
-       get_group_info(group_info);
+       group_info = cred->group_info;
        task_unlock(p);
 
-       for (g = 0; g < min(group_info->ngroups,NGROUPS_SMALL); g++)
-               buffer += sprintf(buffer, "%d ", GROUP_AT(group_info,g));
-       put_group_info(group_info);
+       for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++)
+               seq_printf(m, "%d ", GROUP_AT(group_info, g));
+       put_cred(cred);
 
-       buffer += sprintf(buffer, "\n");
-       return buffer;
+       seq_printf(m, "\n");
 }
 
-static char * render_sigset_t(const char *header, sigset_t *set, char *buffer)
+static void render_sigset_t(struct seq_file *m, const char *header,
+                               sigset_t *set)
 {
-       int i, len;
+       int i;
 
-       len = strlen(header);
-       memcpy(buffer, header, len);
-       buffer += len;
+       seq_printf(m, "%s", header);
 
        i = _NSIG;
        do {
@@ -216,12 +229,10 @@ static char * render_sigset_t(const char *header, sigset_t *set, char *buffer)
                if (sigismember(set, i+2)) x |= 2;
                if (sigismember(set, i+3)) x |= 4;
                if (sigismember(set, i+4)) x |= 8;
-               *buffer++ = (x < 10 ? '0' : 'a' - 10) + x;
+               seq_printf(m, "%x", x);
        } while (i >= 4);
 
-       *buffer++ = '\n';
-       *buffer = 0;
-       return buffer;
+       seq_printf(m, "\n");
 }
 
 static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
@@ -239,7 +250,7 @@ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
        }
 }
 
-static inline char * task_sig(struct task_struct *p, char *buffer)
+static inline void task_sig(struct seq_file *m, struct task_struct *p)
 {
        unsigned long flags;
        sigset_t pending, shpending, blocked, ignored, caught;
@@ -253,140 +264,151 @@ static inline char * task_sig(struct task_struct *p, char *buffer)
        sigemptyset(&ignored);
        sigemptyset(&caught);
 
-       rcu_read_lock();
        if (lock_task_sighand(p, &flags)) {
                pending = p->pending.signal;
                shpending = p->signal->shared_pending.signal;
                blocked = p->blocked;
                collect_sigign_sigcatch(p, &ignored, &caught);
                num_threads = atomic_read(&p->signal->count);
-               qsize = atomic_read(&p->user->sigpending);
-               qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur;
+               rcu_read_lock();  /* FIXME: is this correct? */
+               qsize = atomic_read(&__task_cred(p)->user->sigpending);
+               rcu_read_unlock();
+               qlim = task_rlimit(p, RLIMIT_SIGPENDING);
                unlock_task_sighand(p, &flags);
        }
-       rcu_read_unlock();
 
-       buffer += sprintf(buffer, "Threads:\t%d\n", num_threads);
-       buffer += sprintf(buffer, "SigQ:\t%lu/%lu\n", qsize, qlim);
+       seq_printf(m, "Threads:\t%d\n", num_threads);
+       seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim);
 
        /* render them all */
-       buffer = render_sigset_t("SigPnd:\t", &pending, buffer);
-       buffer = render_sigset_t("ShdPnd:\t", &shpending, buffer);
-       buffer = render_sigset_t("SigBlk:\t", &blocked, buffer);
-       buffer = render_sigset_t("SigIgn:\t", &ignored, buffer);
-       buffer = render_sigset_t("SigCgt:\t", &caught, buffer);
-
-       return buffer;
+       render_sigset_t(m, "SigPnd:\t", &pending);
+       render_sigset_t(m, "ShdPnd:\t", &shpending);
+       render_sigset_t(m, "SigBlk:\t", &blocked);
+       render_sigset_t(m, "SigIgn:\t", &ignored);
+       render_sigset_t(m, "SigCgt:\t", &caught);
 }
 
-static inline char *task_cap(struct task_struct *p, char *buffer)
+static void render_cap_t(struct seq_file *m, const char *header,
+                       kernel_cap_t *a)
 {
-    return buffer + sprintf(buffer, "CapInh:\t%016x\n"
-                           "CapPrm:\t%016x\n"
-                           "CapEff:\t%016x\n",
-                           cap_t(p->cap_inheritable),
-                           cap_t(p->cap_permitted),
-                           cap_t(p->cap_effective));
+       unsigned __capi;
+
+       seq_printf(m, "%s", header);
+       CAP_FOR_EACH_U32(__capi) {
+               seq_printf(m, "%08x",
+                          a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
+       }
+       seq_printf(m, "\n");
 }
 
-int proc_pid_status(struct task_struct *task, char * buffer)
+static inline void task_cap(struct seq_file *m, struct task_struct *p)
 {
-       char * orig = buffer;
-       struct mm_struct *mm = get_task_mm(task);
+       const struct cred *cred;
+       kernel_cap_t cap_inheritable, cap_permitted, cap_effective, cap_bset;
 
-       buffer = task_name(task, buffer);
-       buffer = task_state(task, buffer);
-       if (mm) {
-               buffer = task_mem(mm, buffer);
-               mmput(mm);
-       }
-       buffer = task_sig(task, buffer);
-       buffer = task_cap(task, buffer);
-       buffer = cpuset_task_status_allowed(task, buffer);
-#if defined(CONFIG_S390)
-       buffer = task_show_regs(task, buffer);
-#endif
-       return buffer - orig;
+       rcu_read_lock();
+       cred = __task_cred(p);
+       cap_inheritable = cred->cap_inheritable;
+       cap_permitted   = cred->cap_permitted;
+       cap_effective   = cred->cap_effective;
+       cap_bset        = cred->cap_bset;
+       rcu_read_unlock();
+
+       render_cap_t(m, "CapInh:\t", &cap_inheritable);
+       render_cap_t(m, "CapPrm:\t", &cap_permitted);
+       render_cap_t(m, "CapEff:\t", &cap_effective);
+       render_cap_t(m, "CapBnd:\t", &cap_bset);
 }
 
-static clock_t task_utime(struct task_struct *p)
+static inline void task_context_switch_counts(struct seq_file *m,
+                                               struct task_struct *p)
 {
-       clock_t utime = cputime_to_clock_t(p->utime),
-               total = utime + cputime_to_clock_t(p->stime);
-       u64 temp;
-
-       /*
-        * Use CFS's precise accounting:
-        */
-       temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
-
-       if (total) {
-               temp *= utime;
-               do_div(temp, total);
-       }
-       utime = (clock_t)temp;
+       seq_printf(m,   "voluntary_ctxt_switches:\t%lu\n"
+                       "nonvoluntary_ctxt_switches:\t%lu\n",
+                       p->nvcsw,
+                       p->nivcsw);
+}
 
-       return utime;
+static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
+{
+       seq_printf(m, "Cpus_allowed:\t");
+       seq_cpumask(m, &task->cpus_allowed);
+       seq_printf(m, "\n");
+       seq_printf(m, "Cpus_allowed_list:\t");
+       seq_cpumask_list(m, &task->cpus_allowed);
+       seq_printf(m, "\n");
 }
 
-static clock_t task_stime(struct task_struct *p)
+int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
+                       struct pid *pid, struct task_struct *task)
 {
-       clock_t stime = cputime_to_clock_t(p->stime);
+       struct mm_struct *mm = get_task_mm(task);
 
-       /*
-        * Use CFS's precise accounting. (we subtract utime from
-        * the total, to make sure the total observed by userspace
-        * grows monotonically - apps rely on that):
-        */
-       stime = nsec_to_clock_t(p->se.sum_exec_runtime) - task_utime(p);
+       task_name(m, task);
+       task_state(m, ns, pid, task);
 
-       return stime;
+       if (mm) {
+               task_mem(m, mm);
+               mmput(mm);
+       }
+       task_sig(m, task);
+       task_cap(m, task);
+       task_cpus_allowed(m, task);
+       cpuset_task_status_allowed(m, task);
+#if defined(CONFIG_S390)
+       task_show_regs(m, task);
+#endif
+       task_context_switch_counts(m, task);
+       return 0;
 }
 
-
-static int do_task_stat(struct task_struct *task, char * buffer, int whole)
+static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
+                       struct pid *pid, struct task_struct *task, int whole)
 {
        unsigned long vsize, eip, esp, wchan = ~0UL;
        long priority, nice;
        int tty_pgrp = -1, tty_nr = 0;
        sigset_t sigign, sigcatch;
        char state;
-       int res;
-       pid_t ppid = 0, pgid = -1, sid = -1;
+       pid_t ppid = 0, pgid = -1, sid = -1;
        int num_threads = 0;
+       int permitted;
        struct mm_struct *mm;
        unsigned long long start_time;
        unsigned long cmin_flt = 0, cmaj_flt = 0;
        unsigned long  min_flt = 0,  maj_flt = 0;
-       cputime_t cutime, cstime;
-       clock_t utime, stime;
+       cputime_t cutime, cstime, utime, stime;
+       cputime_t cgtime, gtime;
        unsigned long rsslim = 0;
        char tcomm[sizeof(task->comm)];
        unsigned long flags;
 
        state = *get_task_state(task);
        vsize = eip = esp = 0;
+       permitted = ptrace_may_access(task, PTRACE_MODE_READ);
        mm = get_task_mm(task);
        if (mm) {
                vsize = task_vsize(mm);
-               eip = KSTK_EIP(task);
-               esp = KSTK_ESP(task);
+               if (permitted) {
+                       eip = KSTK_EIP(task);
+                       esp = KSTK_ESP(task);
+               }
        }
 
        get_task_comm(tcomm, task);
 
        sigemptyset(&sigign);
        sigemptyset(&sigcatch);
-       cutime = cstime = cputime_zero;
-       utime = stime = 0;
+       cutime = cstime = utime = stime = cputime_zero;
+       cgtime = gtime = cputime_zero;
 
-       rcu_read_lock();
        if (lock_task_sighand(task, &flags)) {
                struct signal_struct *sig = task->signal;
 
                if (sig->tty) {
-                       tty_pgrp = pid_nr(sig->tty->pgrp);
+                       struct pid *pgrp = tty_get_pgrp(sig->tty);
+                       tty_pgrp = pid_nr_ns(pgrp, ns);
+                       put_pid(pgrp);
                        tty_nr = new_encode_dev(tty_devnum(sig->tty));
                }
 
@@ -397,7 +419,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
                cmaj_flt = sig->cmaj_flt;
                cutime = sig->cutime;
                cstime = sig->cstime;
-               rsslim = sig->rlim[RLIMIT_RSS].rlim_cur;
+               cgtime = sig->cgtime;
+               rsslim = ACCESS_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur);
 
                /* add up live thread stats at the group level */
                if (whole) {
@@ -405,32 +428,30 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
                        do {
                                min_flt += t->min_flt;
                                maj_flt += t->maj_flt;
-                               utime += task_utime(t);
-                               stime += task_stime(t);
+                               gtime = cputime_add(gtime, t->gtime);
                                t = next_thread(t);
                        } while (t != task);
 
                        min_flt += sig->min_flt;
                        maj_flt += sig->maj_flt;
-                       utime += cputime_to_clock_t(sig->utime);
-                       stime += cputime_to_clock_t(sig->stime);
+                       thread_group_times(task, &utime, &stime);
+                       gtime = cputime_add(gtime, sig->gtime);
                }
 
-               sid = signal_session(sig);
-               pgid = process_group(task);
-               ppid = rcu_dereference(task->real_parent)->tgid;
+               sid = task_session_nr_ns(task, ns);
+               ppid = task_tgid_nr_ns(task->real_parent, ns);
+               pgid = task_pgrp_nr_ns(task, ns);
 
                unlock_task_sighand(task, &flags);
        }
-       rcu_read_unlock();
 
-       if (!whole || num_threads<2)
+       if (permitted && (!whole || num_threads < 2))
                wchan = get_wchan(task);
        if (!whole) {
                min_flt = task->min_flt;
                maj_flt = task->maj_flt;
-               utime = task_utime(task);
-               stime = task_stime(task);
+               task_times(task, &utime, &stime);
+               gtime = task->gtime;
        }
 
        /* scale priority and nice values from timeslices to -20..20 */
@@ -440,15 +461,16 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 
        /* Temporary variable needed for gcc-2.96 */
        /* convert timespec -> nsec*/
-       start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC
-                               + task->start_time.tv_nsec;
+       start_time =
+               (unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC
+                               + task->real_start_time.tv_nsec;
        /* convert nsec -> ticks */
        start_time = nsec_to_clock_t(start_time);
 
-       res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %u %lu \
+       seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
 %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
-%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu\n",
-               task->pid,
+%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n",
+               pid_nr_ns(pid, ns),
                tcomm,
                state,
                ppid,
@@ -461,8 +483,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
                cmin_flt,
                maj_flt,
                cmaj_flt,
-               utime,
-               stime,
+               cputime_to_clock_t(utime),
+               cputime_to_clock_t(stime),
                cputime_to_clock_t(cutime),
                cputime_to_clock_t(cstime),
                priority,
@@ -471,10 +493,10 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
                start_time,
                vsize,
                mm ? get_mm_rss(mm) : 0,
-               rsslim,
+               rsslim,
                mm ? mm->start_code : 0,
                mm ? mm->end_code : 0,
-               mm ? mm->start_stack : 0,
+               (permitted && mm) ? task->stack_start : 0,
                esp,
                eip,
                /* The signal information here is obsolete.
@@ -492,32 +514,38 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
                task_cpu(task),
                task->rt_priority,
                task->policy,
-               (unsigned long long)delayacct_blkio_ticks(task));
-       if(mm)
+               (unsigned long long)delayacct_blkio_ticks(task),
+               cputime_to_clock_t(gtime),
+               cputime_to_clock_t(cgtime));
+       if (mm)
                mmput(mm);
-       return res;
+       return 0;
 }
 
-int proc_tid_stat(struct task_struct *task, char * buffer)
+int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
+                       struct pid *pid, struct task_struct *task)
 {
-       return do_task_stat(task, buffer, 0);
+       return do_task_stat(m, ns, pid, task, 0);
 }
 
-int proc_tgid_stat(struct task_struct *task, char * buffer)
+int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
+                       struct pid *pid, struct task_struct *task)
 {
-       return do_task_stat(task, buffer, 1);
+       return do_task_stat(m, ns, pid, task, 1);
 }
 
-int proc_pid_statm(struct task_struct *task, char *buffer)
+int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
+                       struct pid *pid, struct task_struct *task)
 {
        int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0;
        struct mm_struct *mm = get_task_mm(task);
-       
+
        if (mm) {
                size = task_statm(mm, &shared, &text, &data, &resident);
                mmput(mm);
        }
+       seq_printf(m, "%d %d %d %d %d %d %d\n",
+                       size, resident, shared, text, lib, data, 0);
 
-       return sprintf(buffer,"%d %d %d %d %d %d %d\n",
-                      size, resident, shared, text, lib, data, 0);
+       return 0;
 }