X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fproc%2Fbase.c;h=a891fe4cb43bf34e0becd16e642adfc137c0d4e3;hb=297c5d92634c809cef23d73e7b2556f2528ff7e2;hp=a721acfd4fdcd3a008befe2a8a185a863b5e0903;hpb=8948e11f450e6189a79e47d6051c3d5a0b98e3f3;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/proc/base.c b/fs/proc/base.c index a721acf..a891fe4 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -56,23 +56,27 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include +#include +#include #include #include #include -#include +#include #include #include #include #include #include +#include +#include #include "internal.h" /* NOTE: @@ -85,13 +89,9 @@ * in /proc for a task before it execs a suid executable. */ - -/* Worst case buffer size needed for holding an integer. */ -#define PROC_NUMBUF 13 - struct pid_entry { - int len; char *name; + int len; mode_t mode; const struct inode_operations *iop; const struct file_operations *fop; @@ -99,8 +99,8 @@ struct pid_entry { }; #define NOD(NAME, MODE, IOP, FOP, OP) { \ - .len = sizeof(NAME) - 1, \ .name = (NAME), \ + .len = sizeof(NAME) - 1, \ .mode = MODE, \ .iop = IOP, \ .fop = FOP, \ @@ -122,6 +122,32 @@ struct pid_entry { NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_info_file_operations, \ { .proc_read = &proc_##OTYPE } ) +#define ONE(NAME, MODE, OTYPE) \ + NOD(NAME, (S_IFREG|(MODE)), \ + NULL, &proc_single_file_operations, \ + { .proc_show = &proc_##OTYPE } ) + +/* + * Count the number of hardlinks for the pid_entry table, excluding the . + * and .. links. + */ +static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, + unsigned int n) +{ + unsigned int i; + unsigned int count; + + count = 0; + for (i = 0; i < n; ++i) { + if (S_ISDIR(entries[i].mode)) + ++count; + } + + return count; +} + +int maps_protect; +EXPORT_SYMBOL(maps_protect); static struct fs_struct *get_fs_struct(struct task_struct *task) { @@ -147,7 +173,7 @@ static int get_nr_threads(struct task_struct *tsk) return count; } -static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +static int proc_cwd_link(struct inode *inode, struct path *path) { struct task_struct *task = get_proc_task(inode); struct fs_struct *fs = NULL; @@ -159,8 +185,8 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs } if (fs) { read_lock(&fs->lock); - *mnt = mntget(fs->pwdmnt); - *dentry = dget(fs->pwd); + *path = fs->pwd; + path_get(&fs->pwd); read_unlock(&fs->lock); result = 0; put_fs_struct(fs); @@ -168,7 +194,7 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs return result; } -static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +static int proc_root_link(struct inode *inode, struct path *path) { struct task_struct *task = get_proc_task(inode); struct fs_struct *fs = NULL; @@ -180,8 +206,8 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf } if (fs) { read_lock(&fs->lock); - *mnt = mntget(fs->rootmnt); - *dentry = dget(fs->root); + *path = fs->root; + path_get(&fs->root); read_unlock(&fs->lock); result = 0; put_fs_struct(fs); @@ -189,27 +215,52 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf return result; } -#define MAY_PTRACE(task) \ - (task == current || \ - (task->parent == current && \ - (task->ptrace & PT_PTRACED) && \ - (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ - security_ptrace(current,task) == 0)) +/* + * Return zero if current may access user memory in @task, -error if not. + */ +static int check_mem_permission(struct task_struct *task) +{ + /* + * A task can always look at itself, in case it chooses + * to use system calls instead of load instructions. + */ + if (task == current) + return 0; -static int proc_pid_environ(struct task_struct *task, char * buffer) + /* + * If current is actively ptrace'ing, and would also be + * permitted to freshly attach with ptrace now, permit it. + */ + if (task->parent == current && (task->ptrace & PT_PTRACED) && + task_is_stopped_or_traced(task) && + ptrace_may_access(task, PTRACE_MODE_ATTACH)) + return 0; + + /* + * Noone else is allowed. + */ + return -EPERM; +} + +struct mm_struct *mm_for_maps(struct task_struct *task) { - int res = 0; struct mm_struct *mm = get_task_mm(task); - if (mm) { - unsigned int len = mm->env_end - mm->env_start; - if (len > PAGE_SIZE) - len = PAGE_SIZE; - res = access_process_vm(task, mm->env_start, buffer, len, 0); - if (!ptrace_may_attach(task)) - res = -ESRCH; - mmput(mm); - } - return res; + if (!mm) + return NULL; + down_read(&mm->mmap_sem); + task_lock(task); + if (task->mm != mm) + goto out; + if (task->mm != current->mm && + __ptrace_may_access(task, PTRACE_MODE_READ) < 0) + goto out; + task_unlock(task); + return mm; +out: + task_unlock(task); + up_read(&mm->mmap_sem); + mmput(mm); + return NULL; } static int proc_pid_cmdline(struct task_struct *task, char * buffer) @@ -275,17 +326,15 @@ static int proc_pid_auxv(struct task_struct *task, char *buffer) */ static int proc_pid_wchan(struct task_struct *task, char *buffer) { - char *modname; - const char *sym_name; - unsigned long wchan, size, offset; - char namebuf[KSYM_NAME_LEN+1]; + unsigned long wchan; + char symname[KSYM_NAME_LEN]; wchan = get_wchan(task); - sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf); - if (sym_name) - return sprintf(buffer, "%s", sym_name); - return sprintf(buffer, "%lu", wchan); + if (lookup_symbol_name(wchan, symname) < 0) + return sprintf(buffer, "%lu", wchan); + else + return sprintf(buffer, "%s", symname); } #endif /* CONFIG_KALLSYMS */ @@ -295,13 +344,79 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer) */ static int proc_pid_schedstat(struct task_struct *task, char *buffer) { - return sprintf(buffer, "%lu %lu %lu\n", + return sprintf(buffer, "%llu %llu %lu\n", task->sched_info.cpu_time, task->sched_info.run_delay, - task->sched_info.pcnt); + task->sched_info.pcount); } #endif +#ifdef CONFIG_LATENCYTOP +static int lstats_show_proc(struct seq_file *m, void *v) +{ + int i; + struct inode *inode = m->private; + struct task_struct *task = get_proc_task(inode); + + if (!task) + return -ESRCH; + seq_puts(m, "Latency Top version : v0.1\n"); + for (i = 0; i < 32; i++) { + if (task->latency_record[i].backtrace[0]) { + int q; + seq_printf(m, "%i %li %li ", + task->latency_record[i].count, + task->latency_record[i].time, + task->latency_record[i].max); + for (q = 0; q < LT_BACKTRACEDEPTH; q++) { + char sym[KSYM_NAME_LEN]; + char *c; + if (!task->latency_record[i].backtrace[q]) + break; + if (task->latency_record[i].backtrace[q] == ULONG_MAX) + break; + sprint_symbol(sym, task->latency_record[i].backtrace[q]); + c = strchr(sym, '+'); + if (c) + *c = 0; + seq_printf(m, "%s ", sym); + } + seq_printf(m, "\n"); + } + + } + put_task_struct(task); + return 0; +} + +static int lstats_open(struct inode *inode, struct file *file) +{ + return single_open(file, lstats_show_proc, inode); +} + +static ssize_t lstats_write(struct file *file, const char __user *buf, + size_t count, loff_t *offs) +{ + struct task_struct *task = get_proc_task(file->f_dentry->d_inode); + + if (!task) + return -ESRCH; + clear_all_latency_tracing(task); + put_task_struct(task); + + return count; +} + +static const struct file_operations proc_lstats_operations = { + .open = lstats_open, + .read = seq_read, + .write = lstats_write, + .llseek = seq_lseek, + .release = single_release, +}; + +#endif + /* The badness from the OOM killer */ unsigned long badness(struct task_struct *p, unsigned long uptime); static int proc_oom_score(struct task_struct *task, char *buffer) @@ -310,10 +425,85 @@ static int proc_oom_score(struct task_struct *task, char *buffer) struct timespec uptime; do_posix_clock_monotonic_gettime(&uptime); + read_lock(&tasklist_lock); points = badness(task, uptime.tv_sec); + read_unlock(&tasklist_lock); return sprintf(buffer, "%lu\n", points); } +struct limit_names { + char *name; + char *unit; +}; + +static const struct limit_names lnames[RLIM_NLIMITS] = { + [RLIMIT_CPU] = {"Max cpu time", "ms"}, + [RLIMIT_FSIZE] = {"Max file size", "bytes"}, + [RLIMIT_DATA] = {"Max data size", "bytes"}, + [RLIMIT_STACK] = {"Max stack size", "bytes"}, + [RLIMIT_CORE] = {"Max core file size", "bytes"}, + [RLIMIT_RSS] = {"Max resident set", "bytes"}, + [RLIMIT_NPROC] = {"Max processes", "processes"}, + [RLIMIT_NOFILE] = {"Max open files", "files"}, + [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"}, + [RLIMIT_AS] = {"Max address space", "bytes"}, + [RLIMIT_LOCKS] = {"Max file locks", "locks"}, + [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"}, + [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"}, + [RLIMIT_NICE] = {"Max nice priority", NULL}, + [RLIMIT_RTPRIO] = {"Max realtime priority", NULL}, + [RLIMIT_RTTIME] = {"Max realtime timeout", "us"}, +}; + +/* Display limits for a process */ +static int proc_pid_limits(struct task_struct *task, char *buffer) +{ + unsigned int i; + int count = 0; + unsigned long flags; + char *bufptr = buffer; + + struct rlimit rlim[RLIM_NLIMITS]; + + rcu_read_lock(); + if (!lock_task_sighand(task,&flags)) { + rcu_read_unlock(); + return 0; + } + memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); + unlock_task_sighand(task, &flags); + rcu_read_unlock(); + + /* + * print the file header + */ + count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n", + "Limit", "Soft Limit", "Hard Limit", "Units"); + + for (i = 0; i < RLIM_NLIMITS; i++) { + if (rlim[i].rlim_cur == RLIM_INFINITY) + count += sprintf(&bufptr[count], "%-25s %-20s ", + lnames[i].name, "unlimited"); + else + count += sprintf(&bufptr[count], "%-25s %-20lu ", + lnames[i].name, rlim[i].rlim_cur); + + if (rlim[i].rlim_max == RLIM_INFINITY) + count += sprintf(&bufptr[count], "%-20s ", "unlimited"); + else + count += sprintf(&bufptr[count], "%-20lu ", + rlim[i].rlim_max); + + if (lnames[i].unit) + count += sprintf(&bufptr[count], "%-10s\n", + lnames[i].unit); + else + count += sprintf(&bufptr[count], "\n"); + } + + return count; +} + /************************************************************************/ /* Here the fs part begins */ /************************************************************************/ @@ -329,7 +519,7 @@ static int proc_fd_access_allowed(struct inode *inode) */ task = get_proc_task(inode); if (task) { - allowed = ptrace_may_attach(task); + allowed = ptrace_may_access(task, PTRACE_MODE_READ); put_task_struct(task); } return allowed; @@ -344,11 +534,8 @@ static int proc_setattr(struct dentry *dentry, struct iattr *attr) return -EPERM; error = inode_change_ok(inode, attr); - if (!error) { - error = security_inode_setattr(dentry, attr); - if (!error) - error = inode_setattr(inode, attr); - } + if (!error) + error = inode_setattr(inode, attr); return error; } @@ -356,60 +543,81 @@ static const struct inode_operations proc_def_inode_operations = { .setattr = proc_setattr, }; -extern struct seq_operations mounts_op; -struct proc_mounts { - struct seq_file m; - int event; -}; - -static int mounts_open(struct inode *inode, struct file *file) +static int mounts_open_common(struct inode *inode, struct file *file, + const struct seq_operations *op) { struct task_struct *task = get_proc_task(inode); + struct nsproxy *nsp; struct mnt_namespace *ns = NULL; + struct fs_struct *fs = NULL; + struct path root; struct proc_mounts *p; int ret = -EINVAL; if (task) { - task_lock(task); - if (task->nsproxy) { - ns = task->nsproxy->mnt_ns; + rcu_read_lock(); + nsp = task_nsproxy(task); + if (nsp) { + ns = nsp->mnt_ns; if (ns) get_mnt_ns(ns); } - task_unlock(task); + rcu_read_unlock(); + if (ns) + fs = get_fs_struct(task); put_task_struct(task); } - if (ns) { - ret = -ENOMEM; - p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); - if (p) { - file->private_data = &p->m; - ret = seq_open(file, &mounts_op); - if (!ret) { - p->m.private = ns; - p->event = ns->event; - return 0; - } - kfree(p); - } - put_mnt_ns(ns); - } + if (!ns) + goto err; + if (!fs) + goto err_put_ns; + + read_lock(&fs->lock); + root = fs->root; + path_get(&root); + read_unlock(&fs->lock); + put_fs_struct(fs); + + ret = -ENOMEM; + p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); + if (!p) + goto err_put_path; + + file->private_data = &p->m; + ret = seq_open(file, op); + if (ret) + goto err_free; + + p->m.private = p; + p->ns = ns; + p->root = root; + p->event = ns->event; + + return 0; + + err_free: + kfree(p); + err_put_path: + path_put(&root); + err_put_ns: + put_mnt_ns(ns); + err: return ret; } static int mounts_release(struct inode *inode, struct file *file) { - struct seq_file *m = file->private_data; - struct mnt_namespace *ns = m->private; - put_mnt_ns(ns); + struct proc_mounts *p = file->private_data; + path_put(&p->root); + put_mnt_ns(p->ns); return seq_release(inode, file); } static unsigned mounts_poll(struct file *file, poll_table *wait) { struct proc_mounts *p = file->private_data; - struct mnt_namespace *ns = p->m.private; + struct mnt_namespace *ns = p->ns; unsigned res = 0; poll_wait(file, &ns->poll, wait); @@ -424,6 +632,11 @@ static unsigned mounts_poll(struct file *file, poll_table *wait) return res; } +static int mounts_open(struct inode *inode, struct file *file) +{ + return mounts_open_common(inode, file, &mounts_op); +} + static const struct file_operations proc_mounts_operations = { .open = mounts_open, .read = seq_read, @@ -432,34 +645,22 @@ static const struct file_operations proc_mounts_operations = { .poll = mounts_poll, }; -extern struct seq_operations mountstats_op; -static int mountstats_open(struct inode *inode, struct file *file) +static int mountinfo_open(struct inode *inode, struct file *file) { - int ret = seq_open(file, &mountstats_op); + return mounts_open_common(inode, file, &mountinfo_op); +} - if (!ret) { - struct seq_file *m = file->private_data; - struct mnt_namespace *mnt_ns = NULL; - struct task_struct *task = get_proc_task(inode); - - if (task) { - task_lock(task); - if (task->nsproxy) - mnt_ns = task->nsproxy->mnt_ns; - if (mnt_ns) - get_mnt_ns(mnt_ns); - task_unlock(task); - put_task_struct(task); - } +static const struct file_operations proc_mountinfo_operations = { + .open = mountinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = mounts_release, + .poll = mounts_poll, +}; - if (mnt_ns) - m->private = mnt_ns; - else { - seq_release(inode, file); - ret = -EINVAL; - } - } - return ret; +static int mountstats_open(struct inode *inode, struct file *file) +{ + return mounts_open_common(inode, file, &mountstats_op); } static const struct file_operations proc_mountstats_operations = { @@ -487,7 +688,7 @@ static ssize_t proc_info_read(struct file * file, char __user * buf, count = PROC_BLOCK_SIZE; length = -ENOMEM; - if (!(page = __get_free_page(GFP_KERNEL))) + if (!(page = __get_free_page(GFP_TEMPORARY))) goto out; length = PROC_I(inode)->op.proc_read(task, (char*)page); @@ -505,6 +706,45 @@ static const struct file_operations proc_info_file_operations = { .read = proc_info_read, }; +static int proc_single_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct pid_namespace *ns; + struct pid *pid; + struct task_struct *task; + int ret; + + ns = inode->i_sb->s_fs_info; + pid = proc_pid(inode); + task = get_pid_task(pid, PIDTYPE_PID); + if (!task) + return -ESRCH; + + ret = PROC_I(inode)->op.proc_show(m, ns, pid, task); + + put_task_struct(task); + return ret; +} + +static int proc_single_open(struct inode *inode, struct file *filp) +{ + int ret; + ret = single_open(filp, proc_single_show, NULL); + if (!ret) { + struct seq_file *m = filp->private_data; + + m->private = inode; + } + return ret; +} + +static const struct file_operations proc_single_file_operations = { + .open = proc_single_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int mem_open(struct inode* inode, struct file* file) { file->private_data = (void*)((long)current->self_exec_id); @@ -523,11 +763,11 @@ static ssize_t mem_read(struct file * file, char __user * buf, if (!task) goto out_no_task; - if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) + if (check_mem_permission(task)) goto out; ret = -ENOMEM; - page = (char *)__get_free_page(GFP_USER); + page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) goto out; @@ -549,7 +789,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; retval = access_process_vm(task, src, page, this_len, 0); - if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) { + if (!retval || check_mem_permission(task)) { if (!ret) ret = -EIO; break; @@ -593,11 +833,11 @@ static ssize_t mem_write(struct file * file, const char __user *buf, if (!task) goto out_no_task; - if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) + if (check_mem_permission(task)) goto out; copied = -ENOMEM; - page = (char *)__get_free_page(GFP_USER); + page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) goto out; @@ -630,7 +870,7 @@ out_no_task: } #endif -static loff_t mem_lseek(struct file * file, loff_t offset, int orig) +loff_t mem_lseek(struct file *file, loff_t offset, int orig) { switch (orig) { case 0: @@ -653,6 +893,76 @@ static const struct file_operations proc_mem_operations = { .open = mem_open, }; +static ssize_t environ_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file->f_dentry->d_inode); + char *page; + unsigned long src = *ppos; + int ret = -ESRCH; + struct mm_struct *mm; + + if (!task) + goto out_no_task; + + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + goto out; + + ret = -ENOMEM; + page = (char *)__get_free_page(GFP_TEMPORARY); + if (!page) + goto out; + + ret = 0; + + mm = get_task_mm(task); + if (!mm) + goto out_free; + + while (count > 0) { + int this_len, retval, max_len; + + this_len = mm->env_end - (mm->env_start + src); + + if (this_len <= 0) + break; + + max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; + this_len = (this_len > max_len) ? max_len : this_len; + + retval = access_process_vm(task, (mm->env_start + src), + page, this_len, 0); + + if (retval <= 0) { + ret = retval; + break; + } + + if (copy_to_user(buf, page, retval)) { + ret = -EFAULT; + break; + } + + ret += retval; + src += retval; + buf += retval; + count -= retval; + } + *ppos = src; + + mmput(mm); +out_free: + free_page((unsigned long) page); +out: + put_task_struct(task); +out_no_task: + return ret; +} + +static const struct file_operations proc_environ_operations = { + .read = environ_read, +}; + static ssize_t oom_adjust_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -660,7 +970,6 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf, char buffer[PROC_NUMBUF]; size_t len; int oom_adjust; - loff_t __ppos = *ppos; if (!task) return -ESRCH; @@ -668,14 +977,8 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf, put_task_struct(task); len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); - if (__ppos >= len) - return 0; - if (count > len-__ppos) - count = len-__ppos; - if (copy_to_user(buf, buffer + __ppos, count)) - return -EFAULT; - *ppos = __ppos + count; - return count; + + return simple_read_from_buffer(buf, count, ppos, buffer, len); } static ssize_t oom_adjust_write(struct file *file, const char __user *buf, @@ -715,40 +1018,6 @@ static const struct file_operations proc_oom_adjust_operations = { .write = oom_adjust_write, }; -static ssize_t clear_refs_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct task_struct *task; - char buffer[PROC_NUMBUF], *end; - struct mm_struct *mm; - - memset(buffer, 0, sizeof(buffer)); - if (count > sizeof(buffer) - 1) - count = sizeof(buffer) - 1; - if (copy_from_user(buffer, buf, count)) - return -EFAULT; - if (!simple_strtol(buffer, &end, 0)) - return -EINVAL; - if (*end == '\n') - end++; - task = get_proc_task(file->f_path.dentry->d_inode); - if (!task) - return -ESRCH; - mm = get_task_mm(task); - if (mm) { - clear_refs_smap(mm); - mmput(mm); - } - put_task_struct(task); - if (end - buffer == 0) - return -EIO; - return end - buffer; -} - -static struct file_operations proc_clear_refs_operations = { - .write = clear_refs_write, -}; - #ifdef CONFIG_AUDITSYSCALL #define TMPBUFLEN 21 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, @@ -762,7 +1031,7 @@ static ssize_t proc_loginuid_read(struct file * file, char __user * buf, if (!task) return -ESRCH; length = scnprintf(tmpbuf, TMPBUFLEN, "%u", - audit_get_loginuid(task->audit_context)); + audit_get_loginuid(task)); put_task_struct(task); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } @@ -788,7 +1057,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, /* No partial writes. */ return -EINVAL; } - page = (char*)__get_free_page(GFP_USER); + page = (char*)__get_free_page(GFP_TEMPORARY); if (!page) return -ENOMEM; length = -EFAULT; @@ -815,79 +1084,27 @@ static const struct file_operations proc_loginuid_operations = { .read = proc_loginuid_read, .write = proc_loginuid_write, }; -#endif -#ifdef CONFIG_SECCOMP -static ssize_t seccomp_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t proc_sessionid_read(struct file * file, char __user * buf, + size_t count, loff_t *ppos) { - struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); - char __buf[20]; - loff_t __ppos = *ppos; - size_t len; + struct inode * inode = file->f_path.dentry->d_inode; + struct task_struct *task = get_proc_task(inode); + ssize_t length; + char tmpbuf[TMPBUFLEN]; - if (!tsk) + if (!task) return -ESRCH; - /* no need to print the trailing zero, so use only len */ - len = sprintf(__buf, "%u\n", tsk->seccomp.mode); - put_task_struct(tsk); - if (__ppos >= len) - return 0; - if (count > len - __ppos) - count = len - __ppos; - if (copy_to_user(buf, __buf + __ppos, count)) - return -EFAULT; - *ppos = __ppos + count; - return count; -} - -static ssize_t seccomp_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); - char __buf[20], *end; - unsigned int seccomp_mode; - ssize_t result; - - result = -ESRCH; - if (!tsk) - goto out_no_task; - - /* can set it only once to be even more secure */ - result = -EPERM; - if (unlikely(tsk->seccomp.mode)) - goto out; - - result = -EFAULT; - memset(__buf, 0, sizeof(__buf)); - count = min(count, sizeof(__buf) - 1); - if (copy_from_user(__buf, buf, count)) - goto out; - - seccomp_mode = simple_strtoul(__buf, &end, 0); - if (*end == '\n') - end++; - result = -EINVAL; - if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { - tsk->seccomp.mode = seccomp_mode; - set_tsk_thread_flag(tsk, TIF_SECCOMP); - } else - goto out; - result = -EIO; - if (unlikely(!(end - __buf))) - goto out; - result = end - __buf; -out: - put_task_struct(tsk); -out_no_task: - return result; + length = scnprintf(tmpbuf, TMPBUFLEN, "%u", + audit_get_sessionid(task)); + put_task_struct(task); + return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } -static const struct file_operations proc_seccomp_operations = { - .read = seccomp_read, - .write = seccomp_write, +static const struct file_operations proc_sessionid_operations = { + .read = proc_sessionid_read, }; -#endif /* CONFIG_SECCOMP */ +#endif #ifdef CONFIG_FAULT_INJECTION static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, @@ -897,7 +1114,6 @@ static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, char buffer[PROC_NUMBUF]; size_t len; int make_it_fail; - loff_t __ppos = *ppos; if (!task) return -ESRCH; @@ -905,87 +1121,218 @@ static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, put_task_struct(task); len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail); - if (__ppos >= len) - return 0; - if (count > len-__ppos) - count = len-__ppos; - if (copy_to_user(buf, buffer + __ppos, count)) + + return simple_read_from_buffer(buf, count, ppos, buffer, len); +} + +static ssize_t proc_fault_inject_write(struct file * file, + const char __user * buf, size_t count, loff_t *ppos) +{ + struct task_struct *task; + char buffer[PROC_NUMBUF], *end; + int make_it_fail; + + if (!capable(CAP_SYS_RESOURCE)) + return -EPERM; + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) return -EFAULT; - *ppos = __ppos + count; + make_it_fail = simple_strtol(buffer, &end, 0); + if (*end == '\n') + end++; + task = get_proc_task(file->f_dentry->d_inode); + if (!task) + return -ESRCH; + task->make_it_fail = make_it_fail; + put_task_struct(task); + if (end - buffer == 0) + return -EIO; + return end - buffer; +} + +static const struct file_operations proc_fault_inject_operations = { + .read = proc_fault_inject_read, + .write = proc_fault_inject_write, +}; +#endif + + +#ifdef CONFIG_SCHED_DEBUG +/* + * Print out various scheduling related per-task fields: + */ +static int sched_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + + WARN_ON(!inode); + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + proc_sched_show_task(p, m); + + put_task_struct(p); + + return 0; +} + +static ssize_t +sched_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct task_struct *p; + + WARN_ON(!inode); + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + proc_sched_set_task(p); + + put_task_struct(p); + return count; } -static ssize_t proc_fault_inject_write(struct file * file, - const char __user * buf, size_t count, loff_t *ppos) +static int sched_open(struct inode *inode, struct file *filp) +{ + int ret; + + ret = single_open(filp, sched_show, NULL); + if (!ret) { + struct seq_file *m = filp->private_data; + + m->private = inode; + } + return ret; +} + +static const struct file_operations proc_pid_sched_operations = { + .open = sched_open, + .read = seq_read, + .write = sched_write, + .llseek = seq_lseek, + .release = single_release, +}; + +#endif + +/* + * We added or removed a vma mapping the executable. The vmas are only mapped + * during exec and are not mapped with the mmap system call. + * Callers must hold down_write() on the mm's mmap_sem for these + */ +void added_exe_file_vma(struct mm_struct *mm) +{ + mm->num_exe_file_vmas++; +} + +void removed_exe_file_vma(struct mm_struct *mm) +{ + mm->num_exe_file_vmas--; + if ((mm->num_exe_file_vmas == 0) && mm->exe_file){ + fput(mm->exe_file); + mm->exe_file = NULL; + } + +} + +void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) +{ + if (new_exe_file) + get_file(new_exe_file); + if (mm->exe_file) + fput(mm->exe_file); + mm->exe_file = new_exe_file; + mm->num_exe_file_vmas = 0; +} + +struct file *get_mm_exe_file(struct mm_struct *mm) +{ + struct file *exe_file; + + /* We need mmap_sem to protect against races with removal of + * VM_EXECUTABLE vmas */ + down_read(&mm->mmap_sem); + exe_file = mm->exe_file; + if (exe_file) + get_file(exe_file); + up_read(&mm->mmap_sem); + return exe_file; +} + +void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm) +{ + /* It's safe to write the exe_file pointer without exe_file_lock because + * this is called during fork when the task is not yet in /proc */ + newmm->exe_file = get_mm_exe_file(oldmm); +} + +static int proc_exe_link(struct inode *inode, struct path *exe_path) { struct task_struct *task; - char buffer[PROC_NUMBUF], *end; - int make_it_fail; + struct mm_struct *mm; + struct file *exe_file; - if (!capable(CAP_SYS_RESOURCE)) - return -EPERM; - memset(buffer, 0, sizeof(buffer)); - if (count > sizeof(buffer) - 1) - count = sizeof(buffer) - 1; - if (copy_from_user(buffer, buf, count)) - return -EFAULT; - make_it_fail = simple_strtol(buffer, &end, 0); - if (*end == '\n') - end++; - task = get_proc_task(file->f_dentry->d_inode); + task = get_proc_task(inode); if (!task) - return -ESRCH; - task->make_it_fail = make_it_fail; + return -ENOENT; + mm = get_task_mm(task); put_task_struct(task); - if (end - buffer == 0) - return -EIO; - return end - buffer; + if (!mm) + return -ENOENT; + exe_file = get_mm_exe_file(mm); + mmput(mm); + if (exe_file) { + *exe_path = exe_file->f_path; + path_get(&exe_file->f_path); + fput(exe_file); + return 0; + } else + return -ENOENT; } -static const struct file_operations proc_fault_inject_operations = { - .read = proc_fault_inject_read, - .write = proc_fault_inject_write, -}; -#endif - static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; int error = -EACCES; /* We don't need a base pointer in the /proc filesystem */ - path_release(nd); + path_put(&nd->path); /* Are we allowed to snoop on the tasks file descriptors? */ if (!proc_fd_access_allowed(inode)) goto out; - error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); + error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); nd->last_type = LAST_BIND; out: return ERR_PTR(error); } -static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, - char __user *buffer, int buflen) +static int do_proc_readlink(struct path *path, char __user *buffer, int buflen) { - struct inode * inode; - char *tmp = (char*)__get_free_page(GFP_KERNEL), *path; + char *tmp = (char*)__get_free_page(GFP_TEMPORARY); + char *pathname; int len; if (!tmp) return -ENOMEM; - - inode = dentry->d_inode; - path = d_path(dentry, mnt, tmp, PAGE_SIZE); - len = PTR_ERR(path); - if (IS_ERR(path)) + + pathname = d_path(path, tmp, PAGE_SIZE); + len = PTR_ERR(pathname); + if (IS_ERR(pathname)) goto out; - len = tmp + PAGE_SIZE - 1 - path; + len = tmp + PAGE_SIZE - 1 - pathname; if (len > buflen) len = buflen; - if (copy_to_user(buffer, path, len)) + if (copy_to_user(buffer, pathname, len)) len = -EFAULT; out: free_page((unsigned long)tmp); @@ -996,20 +1343,18 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b { int error = -EACCES; struct inode *inode = dentry->d_inode; - struct dentry *de; - struct vfsmount *mnt = NULL; + struct path path; /* Are we allowed to snoop on the tasks file descriptors? */ if (!proc_fd_access_allowed(inode)) goto out; - error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); + error = PROC_I(inode)->op.proc_get_link(inode, &path); if (error) goto out; - error = do_proc_readlink(de, mnt, buffer, buflen); - dput(de); - mntput(mnt); + error = do_proc_readlink(&path, buffer, buflen); + path_put(&path); out: return error; } @@ -1031,7 +1376,7 @@ static int task_dumpable(struct task_struct *task) task_lock(task); mm = task->mm; if (mm) - dumpable = mm->dumpable; + dumpable = get_dumpable(mm); task_unlock(task); if(dumpable == 1) return 1; @@ -1155,7 +1500,8 @@ static struct dentry_operations pid_dentry_operations = /* Lookups */ -typedef struct dentry *instantiate_t(struct inode *, struct dentry *, struct task_struct *, void *); +typedef struct dentry *instantiate_t(struct inode *, struct dentry *, + struct task_struct *, const void *); /* * Fill a directory entry. @@ -1171,7 +1517,7 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *, struct tas */ static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, char *name, int len, - instantiate_t instantiate, struct task_struct *task, void *ptr) + instantiate_t instantiate, struct task_struct *task, const void *ptr) { struct dentry *child, *dir = filp->f_path.dentry; struct inode *inode; @@ -1233,7 +1579,9 @@ out: return ~0U; } -static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +#define PROC_FDINFO_MAX 64 + +static int proc_fd_info(struct inode *inode, struct path *path, char *info) { struct task_struct *task = get_proc_task(inode); struct files_struct *files = NULL; @@ -1252,8 +1600,16 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm spin_lock(&files->file_lock); file = fcheck_files(files, fd); if (file) { - *mnt = mntget(file->f_path.mnt); - *dentry = dget(file->f_path.dentry); + if (path) { + *path = file->f_path; + path_get(&file->f_path); + } + if (info) + snprintf(info, PROC_FDINFO_MAX, + "pos:\t%lli\n" + "flags:\t0%o\n", + (long long) file->f_pos, + file->f_flags); spin_unlock(&files->file_lock); put_files_struct(files); return 0; @@ -1264,6 +1620,11 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm return -ENOENT; } +static int proc_fd_link(struct inode *inode, struct path *path) +{ + return proc_fd_info(inode, path, NULL); +} + static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; @@ -1306,9 +1667,9 @@ static struct dentry_operations tid_fd_dentry_operations = }; static struct dentry *proc_fd_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, void *ptr) + struct dentry *dentry, struct task_struct *task, const void *ptr) { - unsigned fd = *(unsigned *)ptr; + unsigned fd = *(const unsigned *)ptr; struct file *file; struct files_struct *files; struct inode *inode; @@ -1359,7 +1720,9 @@ out_iput: goto out; } -static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) +static struct dentry *proc_lookupfd_common(struct inode *dir, + struct dentry *dentry, + instantiate_t instantiate) { struct task_struct *task = get_proc_task(dir); unsigned fd = name_to_int(dentry); @@ -1370,37 +1733,27 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, if (fd == ~0U) goto out; - result = proc_fd_instantiate(dir, dentry, task, &fd); + result = instantiate(dir, dentry, task, &fd); out: put_task_struct(task); out_no_task: return result; } -static int proc_fd_fill_cache(struct file *filp, void *dirent, filldir_t filldir, - struct task_struct *task, int fd) -{ - char name[PROC_NUMBUF]; - int len = snprintf(name, sizeof(name), "%d", fd); - return proc_fill_cache(filp, dirent, filldir, name, len, - proc_fd_instantiate, task, &fd); -} - -static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) +static int proc_readfd_common(struct file * filp, void * dirent, + filldir_t filldir, instantiate_t instantiate) { struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; struct task_struct *p = get_proc_task(inode); - unsigned int fd, tid, ino; + unsigned int fd, ino; int retval; struct files_struct * files; - struct fdtable *fdt; retval = -ENOENT; if (!p) goto out_no_task; retval = 0; - tid = p->pid; fd = filp->f_pos; switch (fd) { @@ -1418,16 +1771,20 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) if (!files) goto out; rcu_read_lock(); - fdt = files_fdtable(files); for (fd = filp->f_pos-2; - fd < fdt->max_fds; + fd < files_fdtable(files)->max_fds; fd++, filp->f_pos++) { + char name[PROC_NUMBUF]; + int len; if (!fcheck_files(files, fd)) continue; rcu_read_unlock(); - if (proc_fd_fill_cache(filp, dirent, filldir, p, fd) < 0) { + len = snprintf(name, sizeof(name), "%d", fd); + if (proc_fill_cache(filp, dirent, filldir, + name, len, instantiate, + p, &fd) < 0) { rcu_read_lock(); break; } @@ -1442,6 +1799,32 @@ out_no_task: return retval; } +static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); +} + +static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) +{ + return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); +} + +static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + char tmp[PROC_FDINFO_MAX]; + int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); + if (!err) + err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); + return err; +} + +static const struct file_operations proc_fdinfo_file_operations = { + .open = nonseekable_open, + .read = proc_fdinfo_read, +}; + static const struct file_operations proc_fd_operations = { .read = generic_read_dir, .readdir = proc_readfd, @@ -1473,10 +1856,62 @@ static const struct inode_operations proc_fd_inode_operations = { .setattr = proc_setattr, }; +static struct dentry *proc_fdinfo_instantiate(struct inode *dir, + struct dentry *dentry, struct task_struct *task, const void *ptr) +{ + unsigned fd = *(unsigned *)ptr; + struct inode *inode; + struct proc_inode *ei; + struct dentry *error = ERR_PTR(-ENOENT); + + inode = proc_pid_make_inode(dir->i_sb, task); + if (!inode) + goto out; + ei = PROC_I(inode); + ei->fd = fd; + inode->i_mode = S_IFREG | S_IRUSR; + inode->i_fop = &proc_fdinfo_file_operations; + dentry->d_op = &tid_fd_dentry_operations; + d_add(dentry, inode); + /* Close the race of the process dying before we return the dentry */ + if (tid_fd_revalidate(dentry, NULL)) + error = NULL; + + out: + return error; +} + +static struct dentry *proc_lookupfdinfo(struct inode *dir, + struct dentry *dentry, + struct nameidata *nd) +{ + return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); +} + +static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) +{ + return proc_readfd_common(filp, dirent, filldir, + proc_fdinfo_instantiate); +} + +static const struct file_operations proc_fdinfo_operations = { + .read = generic_read_dir, + .readdir = proc_readfdinfo, +}; + +/* + * proc directories can do almost nothing.. + */ +static const struct inode_operations proc_fdinfo_inode_operations = { + .lookup = proc_lookupfdinfo, + .setattr = proc_setattr, +}; + + static struct dentry *proc_pident_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, void *ptr) + struct dentry *dentry, struct task_struct *task, const void *ptr) { - struct pid_entry *p = ptr; + const struct pid_entry *p = ptr; struct inode *inode; struct proc_inode *ei; struct dentry *error = ERR_PTR(-EINVAL); @@ -1505,13 +1940,13 @@ out: static struct dentry *proc_pident_lookup(struct inode *dir, struct dentry *dentry, - struct pid_entry *ents, + const struct pid_entry *ents, unsigned int nents) { struct inode *inode; struct dentry *error; struct task_struct *task = get_proc_task(dir); - struct pid_entry *p, *last; + const struct pid_entry *p, *last; error = ERR_PTR(-ENOENT); inode = NULL; @@ -1540,8 +1975,8 @@ out_no_task: return error; } -static int proc_pident_fill_cache(struct file *filp, void *dirent, filldir_t filldir, - struct task_struct *task, struct pid_entry *p) +static int proc_pident_fill_cache(struct file *filp, void *dirent, + filldir_t filldir, struct task_struct *task, const struct pid_entry *p) { return proc_fill_cache(filp, dirent, filldir, p->name, p->len, proc_pident_instantiate, task, p); @@ -1549,14 +1984,13 @@ static int proc_pident_fill_cache(struct file *filp, void *dirent, filldir_t fil static int proc_pident_readdir(struct file *filp, void *dirent, filldir_t filldir, - struct pid_entry *ents, unsigned int nents) + const struct pid_entry *ents, unsigned int nents) { int i; - int pid; struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; struct task_struct *task = get_proc_task(inode); - struct pid_entry *p, *last; + const struct pid_entry *p, *last; ino_t ino; int ret; @@ -1565,7 +1999,6 @@ static int proc_pident_readdir(struct file *filp, goto out_no_task; ret = 0; - pid = task->pid; i = filp->f_pos; switch (i) { case 0: @@ -1647,7 +2080,7 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, goto out; length = -ENOMEM; - page = (char*)__get_free_page(GFP_USER); + page = (char*)__get_free_page(GFP_TEMPORARY); if (!page) goto out; @@ -1671,7 +2104,7 @@ static const struct file_operations proc_pid_attr_operations = { .write = proc_pid_attr_write, }; -static struct pid_entry attr_dir_stuff[] = { +static const struct pid_entry attr_dir_stuff[] = { REG("current", S_IRUGO|S_IWUGO, pid_attr), REG("prev", S_IRUGO, pid_attr), REG("exec", S_IRUGO|S_IWUGO, pid_attr), @@ -1707,21 +2140,114 @@ static const struct inode_operations proc_attr_dir_inode_operations = { #endif +#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) +static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file->f_dentry->d_inode); + struct mm_struct *mm; + char buffer[PROC_NUMBUF]; + size_t len; + int ret; + + if (!task) + return -ESRCH; + + ret = 0; + mm = get_task_mm(task); + if (mm) { + len = snprintf(buffer, sizeof(buffer), "%08lx\n", + ((mm->flags & MMF_DUMP_FILTER_MASK) >> + MMF_DUMP_FILTER_SHIFT)); + mmput(mm); + ret = simple_read_from_buffer(buf, count, ppos, buffer, len); + } + + put_task_struct(task); + + return ret; +} + +static ssize_t proc_coredump_filter_write(struct file *file, + const char __user *buf, + size_t count, + loff_t *ppos) +{ + struct task_struct *task; + struct mm_struct *mm; + char buffer[PROC_NUMBUF], *end; + unsigned int val; + int ret; + int i; + unsigned long mask; + + ret = -EFAULT; + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) + goto out_no_task; + + ret = -EINVAL; + val = (unsigned int)simple_strtoul(buffer, &end, 0); + if (*end == '\n') + end++; + if (end - buffer == 0) + goto out_no_task; + + ret = -ESRCH; + task = get_proc_task(file->f_dentry->d_inode); + if (!task) + goto out_no_task; + + ret = end - buffer; + mm = get_task_mm(task); + if (!mm) + goto out_no_mm; + + for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { + if (val & mask) + set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); + else + clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); + } + + mmput(mm); + out_no_mm: + put_task_struct(task); + out_no_task: + return ret; +} + +static const struct file_operations proc_coredump_filter_operations = { + .read = proc_coredump_filter_read, + .write = proc_coredump_filter_write, +}; +#endif + /* * /proc/self: */ static int proc_self_readlink(struct dentry *dentry, char __user *buffer, int buflen) { + struct pid_namespace *ns = dentry->d_sb->s_fs_info; + pid_t tgid = task_tgid_nr_ns(current, ns); char tmp[PROC_NUMBUF]; - sprintf(tmp, "%d", current->tgid); + if (!tgid) + return -ENOENT; + sprintf(tmp, "%d", tgid); return vfs_readlink(dentry,buffer,buflen,tmp); } static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) { + struct pid_namespace *ns = dentry->d_sb->s_fs_info; + pid_t tgid = task_tgid_nr_ns(current, ns); char tmp[PROC_NUMBUF]; - sprintf(tmp, "%d", current->tgid); + if (!tgid) + return ERR_PTR(-ENOENT); + sprintf(tmp, "%d", task_tgid_nr_ns(current, ns)); return ERR_PTR(vfs_follow_link(nd,tmp)); } @@ -1737,7 +2263,7 @@ static const struct inode_operations proc_self_inode_operations = { * that properly belong to the /proc filesystem, as they describe * describe something that is process related. */ -static struct pid_entry proc_base_stuff[] = { +static const struct pid_entry proc_base_stuff[] = { NOD("self", S_IFLNK|S_IRWXUGO, &proc_self_inode_operations, NULL, {}), }; @@ -1766,9 +2292,9 @@ static struct dentry_operations proc_base_dentry_operations = }; static struct dentry *proc_base_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, void *ptr) + struct dentry *dentry, struct task_struct *task, const void *ptr) { - struct pid_entry *p = ptr; + const struct pid_entry *p = ptr; struct inode *inode; struct proc_inode *ei; struct dentry *error = ERR_PTR(-EINVAL); @@ -1816,7 +2342,7 @@ static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) { struct dentry *error; struct task_struct *task = get_proc_task(dir); - struct pid_entry *p, *last; + const struct pid_entry *p, *last; error = ERR_PTR(-ENOENT); @@ -1842,37 +2368,90 @@ out_no_task: return error; } -static int proc_base_fill_cache(struct file *filp, void *dirent, filldir_t filldir, - struct task_struct *task, struct pid_entry *p) +static int proc_base_fill_cache(struct file *filp, void *dirent, + filldir_t filldir, struct task_struct *task, const struct pid_entry *p) { return proc_fill_cache(filp, dirent, filldir, p->name, p->len, proc_base_instantiate, task, p); } #ifdef CONFIG_TASK_IO_ACCOUNTING -static int proc_pid_io_accounting(struct task_struct *task, char *buffer) -{ +static int do_io_accounting(struct task_struct *task, char *buffer, int whole) +{ + u64 rchar, wchar, syscr, syscw; + struct task_io_accounting ioac; + + if (!whole) { + rchar = task->rchar; + wchar = task->wchar; + syscr = task->syscr; + syscw = task->syscw; + memcpy(&ioac, &task->ioac, sizeof(ioac)); + } else { + unsigned long flags; + struct task_struct *t = task; + rchar = wchar = syscr = syscw = 0; + memset(&ioac, 0, sizeof(ioac)); + + rcu_read_lock(); + do { + rchar += t->rchar; + wchar += t->wchar; + syscr += t->syscr; + syscw += t->syscw; + + ioac.read_bytes += t->ioac.read_bytes; + ioac.write_bytes += t->ioac.write_bytes; + ioac.cancelled_write_bytes += + t->ioac.cancelled_write_bytes; + t = next_thread(t); + } while (t != task); + rcu_read_unlock(); + + if (lock_task_sighand(task, &flags)) { + struct signal_struct *sig = task->signal; + + rchar += sig->rchar; + wchar += sig->wchar; + syscr += sig->syscr; + syscw += sig->syscw; + + ioac.read_bytes += sig->ioac.read_bytes; + ioac.write_bytes += sig->ioac.write_bytes; + ioac.cancelled_write_bytes += + sig->ioac.cancelled_write_bytes; + + unlock_task_sighand(task, &flags); + } + } + return sprintf(buffer, -#ifdef CONFIG_TASK_XACCT "rchar: %llu\n" "wchar: %llu\n" "syscr: %llu\n" "syscw: %llu\n" -#endif "read_bytes: %llu\n" "write_bytes: %llu\n" "cancelled_write_bytes: %llu\n", -#ifdef CONFIG_TASK_XACCT - (unsigned long long)task->rchar, - (unsigned long long)task->wchar, - (unsigned long long)task->syscr, - (unsigned long long)task->syscw, -#endif - (unsigned long long)task->ioac.read_bytes, - (unsigned long long)task->ioac.write_bytes, - (unsigned long long)task->ioac.cancelled_write_bytes); + (unsigned long long)rchar, + (unsigned long long)wchar, + (unsigned long long)syscr, + (unsigned long long)syscw, + (unsigned long long)ioac.read_bytes, + (unsigned long long)ioac.write_bytes, + (unsigned long long)ioac.cancelled_write_bytes); } -#endif + +static int proc_tid_io_accounting(struct task_struct *task, char *buffer) +{ + return do_io_accounting(task, buffer, 0); +} + +static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) +{ + return do_io_accounting(task, buffer, 1); +} +#endif /* CONFIG_TASK_IO_ACCOUNTING */ /* * Thread groups @@ -1880,31 +2459,38 @@ static int proc_pid_io_accounting(struct task_struct *task, char *buffer) static const struct file_operations proc_task_operations; static const struct inode_operations proc_task_inode_operations; -static struct pid_entry tgid_base_stuff[] = { +static const struct pid_entry tgid_base_stuff[] = { DIR("task", S_IRUGO|S_IXUGO, task), DIR("fd", S_IRUSR|S_IXUSR, fd), - INF("environ", S_IRUSR, pid_environ), + DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), +#ifdef CONFIG_NET + DIR("net", S_IRUGO|S_IXUGO, net), +#endif + REG("environ", S_IRUSR, environ), INF("auxv", S_IRUSR, pid_auxv), - INF("status", S_IRUGO, pid_status), + ONE("status", S_IRUGO, pid_status), + INF("limits", S_IRUSR, pid_limits), +#ifdef CONFIG_SCHED_DEBUG + REG("sched", S_IRUGO|S_IWUSR, pid_sched), +#endif INF("cmdline", S_IRUGO, pid_cmdline), - INF("stat", S_IRUGO, tgid_stat), - INF("statm", S_IRUGO, pid_statm), + ONE("stat", S_IRUGO, tgid_stat), + ONE("statm", S_IRUGO, pid_statm), REG("maps", S_IRUGO, maps), #ifdef CONFIG_NUMA REG("numa_maps", S_IRUGO, numa_maps), #endif REG("mem", S_IRUSR|S_IWUSR, mem), -#ifdef CONFIG_SECCOMP - REG("seccomp", S_IRUSR|S_IWUSR, seccomp), -#endif LNK("cwd", cwd), LNK("root", root), LNK("exe", exe), REG("mounts", S_IRUGO, mounts), + REG("mountinfo", S_IRUGO, mountinfo), REG("mountstats", S_IRUSR, mountstats), -#ifdef CONFIG_MMU +#ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, clear_refs), REG("smaps", S_IRUGO, smaps), + REG("pagemap", S_IRUSR, pagemap), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, attr_dir), @@ -1915,19 +2501,29 @@ static struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_SCHEDSTATS INF("schedstat", S_IRUGO, pid_schedstat), #endif -#ifdef CONFIG_CPUSETS +#ifdef CONFIG_LATENCYTOP + REG("latency", S_IRUGO, lstats), +#endif +#ifdef CONFIG_PROC_PID_CPUSET REG("cpuset", S_IRUGO, cpuset), #endif +#ifdef CONFIG_CGROUPS + REG("cgroup", S_IRUGO, cgroup), +#endif INF("oom_score", S_IRUGO, oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, loginuid), + REG("sessionid", S_IRUGO, sessionid), #endif #ifdef CONFIG_FAULT_INJECTION REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), #endif +#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) + REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), +#endif #ifdef CONFIG_TASK_IO_ACCOUNTING - INF("io", S_IRUGO, pid_io_accounting), + INF("io", S_IRUGO, tgid_io_accounting), #endif }; @@ -1954,48 +2550,28 @@ static const struct inode_operations proc_tgid_base_inode_operations = { .setattr = proc_setattr, }; -/** - * proc_flush_task - Remove dcache entries for @task from the /proc dcache. - * - * @task: task that should be flushed. - * - * Looks in the dcache for - * /proc/@pid - * /proc/@tgid/task/@pid - * if either directory is present flushes it and all of it'ts children - * from the dcache. - * - * It is safe and reasonable to cache /proc entries for a task until - * that task exits. After that they just clog up the dcache with - * useless entries, possibly causing useful dcache entries to be - * flushed instead. This routine is proved to flush those useless - * dcache entries at process exit time. - * - * NOTE: This routine is just an optimization so it does not guarantee - * that no dcache entries will exist at process exit time it - * just makes it very unlikely that any will persist. - */ -void proc_flush_task(struct task_struct *task) +static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) { struct dentry *dentry, *leader, *dir; char buf[PROC_NUMBUF]; struct qstr name; name.name = buf; - name.len = snprintf(buf, sizeof(buf), "%d", task->pid); - dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name); + name.len = snprintf(buf, sizeof(buf), "%d", pid); + dentry = d_hash_and_lookup(mnt->mnt_root, &name); if (dentry) { - shrink_dcache_parent(dentry); + if (!(current->flags & PF_EXITING)) + shrink_dcache_parent(dentry); d_drop(dentry); dput(dentry); } - if (thread_group_leader(task)) + if (tgid == 0) goto out; name.name = buf; - name.len = snprintf(buf, sizeof(buf), "%d", task->tgid); - leader = d_hash_and_lookup(proc_mnt->mnt_root, &name); + name.len = snprintf(buf, sizeof(buf), "%d", tgid); + leader = d_hash_and_lookup(mnt->mnt_root, &name); if (!leader) goto out; @@ -2006,7 +2582,7 @@ void proc_flush_task(struct task_struct *task) goto out_put_leader; name.name = buf; - name.len = snprintf(buf, sizeof(buf), "%d", task->pid); + name.len = snprintf(buf, sizeof(buf), "%d", pid); dentry = d_hash_and_lookup(dir, &name); if (dentry) { shrink_dcache_parent(dentry); @@ -2021,9 +2597,55 @@ out: return; } +/** + * proc_flush_task - Remove dcache entries for @task from the /proc dcache. + * @task: task that should be flushed. + * + * When flushing dentries from proc, one needs to flush them from global + * proc (proc_mnt) and from all the namespaces' procs this task was seen + * in. This call is supposed to do all of this job. + * + * Looks in the dcache for + * /proc/@pid + * /proc/@tgid/task/@pid + * if either directory is present flushes it and all of it'ts children + * from the dcache. + * + * It is safe and reasonable to cache /proc entries for a task until + * that task exits. After that they just clog up the dcache with + * useless entries, possibly causing useful dcache entries to be + * flushed instead. This routine is proved to flush those useless + * dcache entries at process exit time. + * + * NOTE: This routine is just an optimization so it does not guarantee + * that no dcache entries will exist at process exit time it + * just makes it very unlikely that any will persist. + */ + +void proc_flush_task(struct task_struct *task) +{ + int i; + struct pid *pid, *tgid = NULL; + struct upid *upid; + + pid = task_pid(task); + if (thread_group_leader(task)) + tgid = task_tgid(task); + + for (i = 0; i <= pid->level; i++) { + upid = &pid->numbers[i]; + proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, + tgid ? tgid->numbers[i].nr : 0); + } + + upid = &pid->numbers[pid->level]; + if (upid->nr == 1) + pid_ns_release_proc(upid->ns); +} + static struct dentry *proc_pid_instantiate(struct inode *dir, struct dentry * dentry, - struct task_struct *task, void *ptr) + struct task_struct *task, const void *ptr) { struct dentry *error = ERR_PTR(-ENOENT); struct inode *inode; @@ -2036,10 +2658,9 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; inode->i_flags|=S_IMMUTABLE; - inode->i_nlink = 4; -#ifdef CONFIG_SECURITY - inode->i_nlink += 1; -#endif + + inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, + ARRAY_SIZE(tgid_base_stuff)); dentry->d_op = &pid_dentry_operations; @@ -2056,6 +2677,7 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct struct dentry *result = ERR_PTR(-ENOENT); struct task_struct *task; unsigned tgid; + struct pid_namespace *ns; result = proc_base_lookup(dir, dentry); if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT) @@ -2065,8 +2687,9 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct if (tgid == ~0U) goto out; + ns = dentry->d_sb->s_fs_info; rcu_read_lock(); - task = find_task_by_pid(tgid); + task = find_task_by_pid_ns(tgid, ns); if (task) get_task_struct(task); rcu_read_unlock(); @@ -2083,18 +2706,23 @@ out: * Find the first task with tgid >= tgid * */ -static struct task_struct *next_tgid(unsigned int tgid) -{ +struct tgid_iter { + unsigned int tgid; struct task_struct *task; +}; +static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter) +{ struct pid *pid; + if (iter.task) + put_task_struct(iter.task); rcu_read_lock(); retry: - task = NULL; - pid = find_ge_pid(tgid); + iter.task = NULL; + pid = find_ge_pid(iter.tgid, ns); if (pid) { - tgid = pid->nr + 1; - task = pid_task(pid, PIDTYPE_PID); + iter.tgid = pid_nr_ns(pid, ns); + iter.task = pid_task(pid, PIDTYPE_PID); /* What we to know is if the pid we have find is the * pid of a thread_group_leader. Testing for task * being a thread_group_leader is the obvious thing @@ -2107,23 +2735,25 @@ retry: * found doesn't happen to be a thread group leader. * As we don't care in the case of readdir. */ - if (!task || !has_group_leader_pid(task)) + if (!iter.task || !has_group_leader_pid(iter.task)) { + iter.tgid += 1; goto retry; - get_task_struct(task); + } + get_task_struct(iter.task); } rcu_read_unlock(); - return task; + return iter; } #define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff)) static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, - struct task_struct *task, int tgid) + struct tgid_iter iter) { char name[PROC_NUMBUF]; - int len = snprintf(name, sizeof(name), "%d", tgid); + int len = snprintf(name, sizeof(name), "%d", iter.tgid); return proc_fill_cache(filp, dirent, filldir, name, len, - proc_pid_instantiate, task, NULL); + proc_pid_instantiate, iter.task, NULL); } /* for the /proc/ directory itself, after non-process stuff has been done */ @@ -2131,26 +2761,27 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) { unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode); - struct task_struct *task; - int tgid; + struct tgid_iter iter; + struct pid_namespace *ns; if (!reaper) goto out_no_task; for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) { - struct pid_entry *p = &proc_base_stuff[nr]; + const struct pid_entry *p = &proc_base_stuff[nr]; if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0) goto out; } - tgid = filp->f_pos - TGID_OFFSET; - for (task = next_tgid(tgid); - task; - put_task_struct(task), task = next_tgid(tgid + 1)) { - tgid = task->pid; - filp->f_pos = tgid + TGID_OFFSET; - if (proc_pid_fill_cache(filp, dirent, filldir, task, tgid) < 0) { - put_task_struct(task); + ns = filp->f_dentry->d_sb->s_fs_info; + iter.task = NULL; + iter.tgid = filp->f_pos - TGID_OFFSET; + for (iter = next_tgid(ns, iter); + iter.task; + iter.tgid += 1, iter = next_tgid(ns, iter)) { + filp->f_pos = iter.tgid + TGID_OFFSET; + if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) { + put_task_struct(iter.task); goto out; } } @@ -2164,29 +2795,33 @@ out_no_task: /* * Tasks */ -static struct pid_entry tid_base_stuff[] = { +static const struct pid_entry tid_base_stuff[] = { DIR("fd", S_IRUSR|S_IXUSR, fd), - INF("environ", S_IRUSR, pid_environ), + DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), + REG("environ", S_IRUSR, environ), INF("auxv", S_IRUSR, pid_auxv), - INF("status", S_IRUGO, pid_status), + ONE("status", S_IRUGO, pid_status), + INF("limits", S_IRUSR, pid_limits), +#ifdef CONFIG_SCHED_DEBUG + REG("sched", S_IRUGO|S_IWUSR, pid_sched), +#endif INF("cmdline", S_IRUGO, pid_cmdline), - INF("stat", S_IRUGO, tid_stat), - INF("statm", S_IRUGO, pid_statm), + ONE("stat", S_IRUGO, tid_stat), + ONE("statm", S_IRUGO, pid_statm), REG("maps", S_IRUGO, maps), #ifdef CONFIG_NUMA REG("numa_maps", S_IRUGO, numa_maps), #endif REG("mem", S_IRUSR|S_IWUSR, mem), -#ifdef CONFIG_SECCOMP - REG("seccomp", S_IRUSR|S_IWUSR, seccomp), -#endif LNK("cwd", cwd), LNK("root", root), LNK("exe", exe), REG("mounts", S_IRUGO, mounts), -#ifdef CONFIG_MMU + REG("mountinfo", S_IRUGO, mountinfo), +#ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, clear_refs), REG("smaps", S_IRUGO, smaps), + REG("pagemap", S_IRUSR, pagemap), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, attr_dir), @@ -2197,17 +2832,27 @@ static struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_SCHEDSTATS INF("schedstat", S_IRUGO, pid_schedstat), #endif -#ifdef CONFIG_CPUSETS +#ifdef CONFIG_LATENCYTOP + REG("latency", S_IRUGO, lstats), +#endif +#ifdef CONFIG_PROC_PID_CPUSET REG("cpuset", S_IRUGO, cpuset), #endif +#ifdef CONFIG_CGROUPS + REG("cgroup", S_IRUGO, cgroup), +#endif INF("oom_score", S_IRUGO, oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, loginuid), + REG("sessionid", S_IRUSR, sessionid), #endif #ifdef CONFIG_FAULT_INJECTION REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), #endif +#ifdef CONFIG_TASK_IO_ACCOUNTING + INF("io", S_IRUGO, tid_io_accounting), +#endif }; static int proc_tid_base_readdir(struct file * filp, @@ -2234,7 +2879,7 @@ static const struct inode_operations proc_tid_base_inode_operations = { }; static struct dentry *proc_task_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, void *ptr) + struct dentry *dentry, struct task_struct *task, const void *ptr) { struct dentry *error = ERR_PTR(-ENOENT); struct inode *inode; @@ -2246,10 +2891,9 @@ static struct dentry *proc_task_instantiate(struct inode *dir, inode->i_op = &proc_tid_base_inode_operations; inode->i_fop = &proc_tid_base_operations; inode->i_flags|=S_IMMUTABLE; - inode->i_nlink = 3; -#ifdef CONFIG_SECURITY - inode->i_nlink += 1; -#endif + + inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, + ARRAY_SIZE(tid_base_stuff)); dentry->d_op = &pid_dentry_operations; @@ -2267,6 +2911,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry struct task_struct *task; struct task_struct *leader = get_proc_task(dir); unsigned tid; + struct pid_namespace *ns; if (!leader) goto out_no_task; @@ -2275,14 +2920,15 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry if (tid == ~0U) goto out; + ns = dentry->d_sb->s_fs_info; rcu_read_lock(); - task = find_task_by_pid(tid); + task = find_task_by_pid_ns(tid, ns); if (task) get_task_struct(task); rcu_read_unlock(); if (!task) goto out; - if (leader->tgid != task->tgid) + if (!same_thread_group(leader, task)) goto out_drop_task; result = proc_task_instantiate(dir, dentry, task, NULL); @@ -2307,14 +2953,14 @@ out_no_task: * threads past it. */ static struct task_struct *first_tid(struct task_struct *leader, - int tid, int nr) + int tid, int nr, struct pid_namespace *ns) { struct task_struct *pos; rcu_read_lock(); /* Attempt to start with the pid of a thread */ if (tid && (nr > 0)) { - pos = find_task_by_pid(tid); + pos = find_task_by_pid_ns(tid, ns); if (pos && (pos->group_leader == leader)) goto found; } @@ -2383,6 +3029,7 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi ino_t ino; int tid; unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ + struct pid_namespace *ns; task = get_proc_task(inode); if (!task) @@ -2416,16 +3063,17 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi /* f_version caches the tgid value that the last readdir call couldn't * return. lseek aka telldir automagically resets f_version to 0. */ - tid = filp->f_version; + ns = filp->f_dentry->d_sb->s_fs_info; + tid = (int)filp->f_version; filp->f_version = 0; - for (task = first_tid(leader, tid, pos - 2); + for (task = first_tid(leader, tid, pos - 2, ns); task; task = next_tid(task), pos++) { - tid = task->pid; + tid = task_pid_nr_ns(task, ns); if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) { /* returning this tgid failed, save it as the first * pid for the next readir call */ - filp->f_version = tid; + filp->f_version = (u64)tid; put_task_struct(task); break; }