X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fproc%2Fbase.c;h=aef6d55b7de6761913d78a6ab594d5ddce7119ec;hb=d72f71eb0edd629c95715aa7305b0259d3581e34;hp=a17c26859074e80f47088e4e5a44d900190ed66f;hpb=9fcc2d15b14894aa53e5e8b7fd5d6e3ca558e5df;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/proc/base.c b/fs/proc/base.c index a17c268..aef6d55 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -53,9 +53,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -63,11 +65,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -88,10 +92,6 @@ * in /proc for a task before it execs a suid executable. */ - -/* Worst case buffer size needed for holding an integer. */ -#define PROC_NUMBUF 13 - struct pid_entry { char *name; int len; @@ -110,24 +110,41 @@ struct pid_entry { .op = OP, \ } -#define DIR(NAME, MODE, OTYPE) \ - NOD(NAME, (S_IFDIR|(MODE)), \ - &proc_##OTYPE##_inode_operations, &proc_##OTYPE##_operations, \ - {} ) -#define LNK(NAME, OTYPE) \ +#define DIR(NAME, MODE, iops, fops) \ + NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} ) +#define LNK(NAME, get_link) \ NOD(NAME, (S_IFLNK|S_IRWXUGO), \ &proc_pid_link_inode_operations, NULL, \ - { .proc_get_link = &proc_##OTYPE##_link } ) -#define REG(NAME, MODE, OTYPE) \ - NOD(NAME, (S_IFREG|(MODE)), NULL, \ - &proc_##OTYPE##_operations, {}) -#define INF(NAME, MODE, OTYPE) \ + { .proc_get_link = get_link } ) +#define REG(NAME, MODE, fops) \ + NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) +#define INF(NAME, MODE, read) \ NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_info_file_operations, \ - { .proc_read = &proc_##OTYPE } ) + { .proc_read = read } ) +#define ONE(NAME, MODE, show) \ + NOD(NAME, (S_IFREG|(MODE)), \ + NULL, &proc_single_file_operations, \ + { .proc_show = show } ) + +/* + * Count the number of hardlinks for the pid_entry table, excluding the . + * and .. links. + */ +static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, + unsigned int n) +{ + unsigned int i; + unsigned int count; + + count = 0; + for (i = 0; i < n; ++i) { + if (S_ISDIR(entries[i].mode)) + ++count; + } -int maps_protect; -EXPORT_SYMBOL(maps_protect); + return count; +} static struct fs_struct *get_fs_struct(struct task_struct *task) { @@ -142,7 +159,6 @@ static struct fs_struct *get_fs_struct(struct task_struct *task) static int get_nr_threads(struct task_struct *tsk) { - /* Must be called with the rcu_read_lock held */ unsigned long flags; int count = 0; @@ -153,7 +169,7 @@ static int get_nr_threads(struct task_struct *tsk) return count; } -static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +static int proc_cwd_link(struct inode *inode, struct path *path) { struct task_struct *task = get_proc_task(inode); struct fs_struct *fs = NULL; @@ -165,8 +181,8 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs } if (fs) { read_lock(&fs->lock); - *mnt = mntget(fs->pwdmnt); - *dentry = dget(fs->pwd); + *path = fs->pwd; + path_get(&fs->pwd); read_unlock(&fs->lock); result = 0; put_fs_struct(fs); @@ -174,7 +190,7 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs return result; } -static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +static int proc_root_link(struct inode *inode, struct path *path) { struct task_struct *task = get_proc_task(inode); struct fs_struct *fs = NULL; @@ -186,8 +202,8 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf } if (fs) { read_lock(&fs->lock); - *mnt = mntget(fs->rootmnt); - *dentry = dget(fs->root); + *path = fs->root; + path_get(&fs->root); read_unlock(&fs->lock); result = 0; put_fs_struct(fs); @@ -195,12 +211,57 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf return result; } -#define MAY_PTRACE(task) \ - (task == current || \ - (task->parent == current && \ - (task->ptrace & PT_PTRACED) && \ - (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ - security_ptrace(current,task) == 0)) +/* + * Return zero if current may access user memory in @task, -error if not. + */ +static int check_mem_permission(struct task_struct *task) +{ + /* + * A task can always look at itself, in case it chooses + * to use system calls instead of load instructions. + */ + if (task == current) + return 0; + + /* + * If current is actively ptrace'ing, and would also be + * permitted to freshly attach with ptrace now, permit it. + */ + if (task_is_stopped_or_traced(task)) { + int match; + rcu_read_lock(); + match = (tracehook_tracer_task(task) == current); + rcu_read_unlock(); + if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) + return 0; + } + + /* + * Noone else is allowed. + */ + return -EPERM; +} + +struct mm_struct *mm_for_maps(struct task_struct *task) +{ + struct mm_struct *mm = get_task_mm(task); + if (!mm) + return NULL; + down_read(&mm->mmap_sem); + task_lock(task); + if (task->mm != mm) + goto out; + if (task->mm != current->mm && + __ptrace_may_access(task, PTRACE_MODE_READ) < 0) + goto out; + task_unlock(task); + return mm; +out: + task_unlock(task); + up_read(&mm->mmap_sem); + mmput(mm); + return NULL; +} static int proc_pid_cmdline(struct task_struct *task, char * buffer) { @@ -245,9 +306,9 @@ static int proc_pid_auxv(struct task_struct *task, char *buffer) struct mm_struct *mm = get_task_mm(task); if (mm) { unsigned int nwords = 0; - do + do { nwords += 2; - while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ + } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ res = nwords * sizeof(mm->saved_auxv[0]); if (res > PAGE_SIZE) res = PAGE_SIZE; @@ -277,6 +338,37 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer) } #endif /* CONFIG_KALLSYMS */ +#ifdef CONFIG_STACKTRACE + +#define MAX_STACK_TRACE_DEPTH 64 + +static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) +{ + struct stack_trace trace; + unsigned long *entries; + int i; + + entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); + if (!entries) + return -ENOMEM; + + trace.nr_entries = 0; + trace.max_entries = MAX_STACK_TRACE_DEPTH; + trace.entries = entries; + trace.skip = 0; + save_stack_trace_tsk(task, &trace); + + for (i = 0; i < trace.nr_entries; i++) { + seq_printf(m, "[<%p>] %pS\n", + (void *)entries[i], (void *)entries[i]); + } + kfree(entries); + + return 0; +} +#endif + #ifdef CONFIG_SCHEDSTATS /* * Provides /proc/PID/schedstat @@ -284,12 +376,78 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer) static int proc_pid_schedstat(struct task_struct *task, char *buffer) { return sprintf(buffer, "%llu %llu %lu\n", - task->sched_info.cpu_time, - task->sched_info.run_delay, + (unsigned long long)task->se.sum_exec_runtime, + (unsigned long long)task->sched_info.run_delay, task->sched_info.pcount); } #endif +#ifdef CONFIG_LATENCYTOP +static int lstats_show_proc(struct seq_file *m, void *v) +{ + int i; + struct inode *inode = m->private; + struct task_struct *task = get_proc_task(inode); + + if (!task) + return -ESRCH; + seq_puts(m, "Latency Top version : v0.1\n"); + for (i = 0; i < 32; i++) { + if (task->latency_record[i].backtrace[0]) { + int q; + seq_printf(m, "%i %li %li ", + task->latency_record[i].count, + task->latency_record[i].time, + task->latency_record[i].max); + for (q = 0; q < LT_BACKTRACEDEPTH; q++) { + char sym[KSYM_SYMBOL_LEN]; + char *c; + if (!task->latency_record[i].backtrace[q]) + break; + if (task->latency_record[i].backtrace[q] == ULONG_MAX) + break; + sprint_symbol(sym, task->latency_record[i].backtrace[q]); + c = strchr(sym, '+'); + if (c) + *c = 0; + seq_printf(m, "%s ", sym); + } + seq_printf(m, "\n"); + } + + } + put_task_struct(task); + return 0; +} + +static int lstats_open(struct inode *inode, struct file *file) +{ + return single_open(file, lstats_show_proc, inode); +} + +static ssize_t lstats_write(struct file *file, const char __user *buf, + size_t count, loff_t *offs) +{ + struct task_struct *task = get_proc_task(file->f_dentry->d_inode); + + if (!task) + return -ESRCH; + clear_all_latency_tracing(task); + put_task_struct(task); + + return count; +} + +static const struct file_operations proc_lstats_operations = { + .open = lstats_open, + .read = seq_read, + .write = lstats_write, + .llseek = seq_lseek, + .release = single_release, +}; + +#endif + /* The badness from the OOM killer */ unsigned long badness(struct task_struct *p, unsigned long uptime); static int proc_oom_score(struct task_struct *task, char *buffer) @@ -325,6 +483,7 @@ static const struct limit_names lnames[RLIM_NLIMITS] = { [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"}, [RLIMIT_NICE] = {"Max nice priority", NULL}, [RLIMIT_RTPRIO] = {"Max realtime priority", NULL}, + [RLIMIT_RTTIME] = {"Max realtime timeout", "us"}, }; /* Display limits for a process */ @@ -337,14 +496,10 @@ static int proc_pid_limits(struct task_struct *task, char *buffer) struct rlimit rlim[RLIM_NLIMITS]; - rcu_read_lock(); - if (!lock_task_sighand(task,&flags)) { - rcu_read_unlock(); + if (!lock_task_sighand(task, &flags)) return 0; - } memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); unlock_task_sighand(task, &flags); - rcu_read_unlock(); /* * print the file header @@ -376,6 +531,26 @@ static int proc_pid_limits(struct task_struct *task, char *buffer) return count; } +#ifdef CONFIG_HAVE_ARCH_TRACEHOOK +static int proc_pid_syscall(struct task_struct *task, char *buffer) +{ + long nr; + unsigned long args[6], sp, pc; + + if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) + return sprintf(buffer, "running\n"); + + if (nr < 0) + return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); + + return sprintf(buffer, + "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", + nr, + args[0], args[1], args[2], args[3], args[4], args[5], + sp, pc); +} +#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ + /************************************************************************/ /* Here the fs part begins */ /************************************************************************/ @@ -391,7 +566,7 @@ static int proc_fd_access_allowed(struct inode *inode) */ task = get_proc_task(inode); if (task) { - allowed = ptrace_may_attach(task); + allowed = ptrace_may_access(task, PTRACE_MODE_READ); put_task_struct(task); } return allowed; @@ -415,17 +590,14 @@ static const struct inode_operations proc_def_inode_operations = { .setattr = proc_setattr, }; -extern struct seq_operations mounts_op; -struct proc_mounts { - struct seq_file m; - int event; -}; - -static int mounts_open(struct inode *inode, struct file *file) +static int mounts_open_common(struct inode *inode, struct file *file, + const struct seq_operations *op) { struct task_struct *task = get_proc_task(inode); struct nsproxy *nsp; struct mnt_namespace *ns = NULL; + struct fs_struct *fs = NULL; + struct path root; struct proc_mounts *p; int ret = -EINVAL; @@ -438,40 +610,61 @@ static int mounts_open(struct inode *inode, struct file *file) get_mnt_ns(ns); } rcu_read_unlock(); - + if (ns) + fs = get_fs_struct(task); put_task_struct(task); } - if (ns) { - ret = -ENOMEM; - p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); - if (p) { - file->private_data = &p->m; - ret = seq_open(file, &mounts_op); - if (!ret) { - p->m.private = ns; - p->event = ns->event; - return 0; - } - kfree(p); - } - put_mnt_ns(ns); - } + if (!ns) + goto err; + if (!fs) + goto err_put_ns; + + read_lock(&fs->lock); + root = fs->root; + path_get(&root); + read_unlock(&fs->lock); + put_fs_struct(fs); + + ret = -ENOMEM; + p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); + if (!p) + goto err_put_path; + + file->private_data = &p->m; + ret = seq_open(file, op); + if (ret) + goto err_free; + + p->m.private = p; + p->ns = ns; + p->root = root; + p->event = ns->event; + + return 0; + + err_free: + kfree(p); + err_put_path: + path_put(&root); + err_put_ns: + put_mnt_ns(ns); + err: return ret; } static int mounts_release(struct inode *inode, struct file *file) { - struct seq_file *m = file->private_data; - struct mnt_namespace *ns = m->private; - put_mnt_ns(ns); + struct proc_mounts *p = file->private_data; + path_put(&p->root); + put_mnt_ns(p->ns); return seq_release(inode, file); } static unsigned mounts_poll(struct file *file, poll_table *wait) { struct proc_mounts *p = file->private_data; - struct mnt_namespace *ns = p->m.private; + struct mnt_namespace *ns = p->ns; unsigned res = 0; poll_wait(file, &ns->poll, wait); @@ -486,6 +679,11 @@ static unsigned mounts_poll(struct file *file, poll_table *wait) return res; } +static int mounts_open(struct inode *inode, struct file *file) +{ + return mounts_open_common(inode, file, &mounts_op); +} + static const struct file_operations proc_mounts_operations = { .open = mounts_open, .read = seq_read, @@ -494,38 +692,22 @@ static const struct file_operations proc_mounts_operations = { .poll = mounts_poll, }; -extern struct seq_operations mountstats_op; -static int mountstats_open(struct inode *inode, struct file *file) +static int mountinfo_open(struct inode *inode, struct file *file) { - int ret = seq_open(file, &mountstats_op); - - if (!ret) { - struct seq_file *m = file->private_data; - struct nsproxy *nsp; - struct mnt_namespace *mnt_ns = NULL; - struct task_struct *task = get_proc_task(inode); - - if (task) { - rcu_read_lock(); - nsp = task_nsproxy(task); - if (nsp) { - mnt_ns = nsp->mnt_ns; - if (mnt_ns) - get_mnt_ns(mnt_ns); - } - rcu_read_unlock(); + return mounts_open_common(inode, file, &mountinfo_op); +} - put_task_struct(task); - } +static const struct file_operations proc_mountinfo_operations = { + .open = mountinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = mounts_release, + .poll = mounts_poll, +}; - if (mnt_ns) - m->private = mnt_ns; - else { - seq_release(inode, file); - ret = -EINVAL; - } - } - return ret; +static int mountstats_open(struct inode *inode, struct file *file) +{ + return mounts_open_common(inode, file, &mountstats_op); } static const struct file_operations proc_mountstats_operations = { @@ -571,6 +753,45 @@ static const struct file_operations proc_info_file_operations = { .read = proc_info_read, }; +static int proc_single_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct pid_namespace *ns; + struct pid *pid; + struct task_struct *task; + int ret; + + ns = inode->i_sb->s_fs_info; + pid = proc_pid(inode); + task = get_pid_task(pid, PIDTYPE_PID); + if (!task) + return -ESRCH; + + ret = PROC_I(inode)->op.proc_show(m, ns, pid, task); + + put_task_struct(task); + return ret; +} + +static int proc_single_open(struct inode *inode, struct file *filp) +{ + int ret; + ret = single_open(filp, proc_single_show, NULL); + if (!ret) { + struct seq_file *m = filp->private_data; + + m->private = inode; + } + return ret; +} + +static const struct file_operations proc_single_file_operations = { + .open = proc_single_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int mem_open(struct inode* inode, struct file* file) { file->private_data = (void*)((long)current->self_exec_id); @@ -589,7 +810,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, if (!task) goto out_no_task; - if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) + if (check_mem_permission(task)) goto out; ret = -ENOMEM; @@ -615,7 +836,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; retval = access_process_vm(task, src, page, this_len, 0); - if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) { + if (!retval || check_mem_permission(task)) { if (!ret) ret = -EIO; break; @@ -659,7 +880,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf, if (!task) goto out_no_task; - if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) + if (check_mem_permission(task)) goto out; copied = -ENOMEM; @@ -696,7 +917,7 @@ out_no_task: } #endif -static loff_t mem_lseek(struct file * file, loff_t offset, int orig) +loff_t mem_lseek(struct file *file, loff_t offset, int orig) { switch (orig) { case 0: @@ -731,7 +952,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, if (!task) goto out_no_task; - if (!ptrace_may_attach(task)) + if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto out; ret = -ENOMEM; @@ -844,42 +1065,6 @@ static const struct file_operations proc_oom_adjust_operations = { .write = oom_adjust_write, }; -#ifdef CONFIG_MMU -static ssize_t clear_refs_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct task_struct *task; - char buffer[PROC_NUMBUF], *end; - struct mm_struct *mm; - - memset(buffer, 0, sizeof(buffer)); - if (count > sizeof(buffer) - 1) - count = sizeof(buffer) - 1; - if (copy_from_user(buffer, buf, count)) - return -EFAULT; - if (!simple_strtol(buffer, &end, 0)) - return -EINVAL; - if (*end == '\n') - end++; - task = get_proc_task(file->f_path.dentry->d_inode); - if (!task) - return -ESRCH; - mm = get_task_mm(task); - if (mm) { - clear_refs_smap(mm); - mmput(mm); - } - put_task_struct(task); - if (end - buffer == 0) - return -EIO; - return end - buffer; -} - -static struct file_operations proc_clear_refs_operations = { - .write = clear_refs_write, -}; -#endif - #ifdef CONFIG_AUDITSYSCALL #define TMPBUFLEN 21 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, @@ -893,7 +1078,7 @@ static ssize_t proc_loginuid_read(struct file * file, char __user * buf, if (!task) return -ESRCH; length = scnprintf(tmpbuf, TMPBUFLEN, "%u", - audit_get_loginuid(task->audit_context)); + audit_get_loginuid(task)); put_task_struct(task); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } @@ -946,6 +1131,26 @@ static const struct file_operations proc_loginuid_operations = { .read = proc_loginuid_read, .write = proc_loginuid_write, }; + +static ssize_t proc_sessionid_read(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + struct inode * inode = file->f_path.dentry->d_inode; + struct task_struct *task = get_proc_task(inode); + ssize_t length; + char tmpbuf[TMPBUFLEN]; + + if (!task) + return -ESRCH; + length = scnprintf(tmpbuf, TMPBUFLEN, "%u", + audit_get_sessionid(task)); + put_task_struct(task); + return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); +} + +static const struct file_operations proc_sessionid_operations = { + .read = proc_sessionid_read, +}; #endif #ifdef CONFIG_FAULT_INJECTION @@ -1000,6 +1205,7 @@ static const struct file_operations proc_fault_inject_operations = { }; #endif + #ifdef CONFIG_SCHED_DEBUG /* * Print out various scheduling related per-task fields: @@ -1009,8 +1215,6 @@ static int sched_show(struct seq_file *m, void *v) struct inode *inode = m->private; struct task_struct *p; - WARN_ON(!inode); - p = get_proc_task(inode); if (!p) return -ESRCH; @@ -1028,8 +1232,6 @@ sched_write(struct file *file, const char __user *buf, struct inode *inode = file->f_path.dentry->d_inode; struct task_struct *p; - WARN_ON(!inode); - p = get_proc_task(inode); if (!p) return -ESRCH; @@ -1063,45 +1265,117 @@ static const struct file_operations proc_pid_sched_operations = { #endif +/* + * We added or removed a vma mapping the executable. The vmas are only mapped + * during exec and are not mapped with the mmap system call. + * Callers must hold down_write() on the mm's mmap_sem for these + */ +void added_exe_file_vma(struct mm_struct *mm) +{ + mm->num_exe_file_vmas++; +} + +void removed_exe_file_vma(struct mm_struct *mm) +{ + mm->num_exe_file_vmas--; + if ((mm->num_exe_file_vmas == 0) && mm->exe_file){ + fput(mm->exe_file); + mm->exe_file = NULL; + } + +} + +void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) +{ + if (new_exe_file) + get_file(new_exe_file); + if (mm->exe_file) + fput(mm->exe_file); + mm->exe_file = new_exe_file; + mm->num_exe_file_vmas = 0; +} + +struct file *get_mm_exe_file(struct mm_struct *mm) +{ + struct file *exe_file; + + /* We need mmap_sem to protect against races with removal of + * VM_EXECUTABLE vmas */ + down_read(&mm->mmap_sem); + exe_file = mm->exe_file; + if (exe_file) + get_file(exe_file); + up_read(&mm->mmap_sem); + return exe_file; +} + +void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm) +{ + /* It's safe to write the exe_file pointer without exe_file_lock because + * this is called during fork when the task is not yet in /proc */ + newmm->exe_file = get_mm_exe_file(oldmm); +} + +static int proc_exe_link(struct inode *inode, struct path *exe_path) +{ + struct task_struct *task; + struct mm_struct *mm; + struct file *exe_file; + + task = get_proc_task(inode); + if (!task) + return -ENOENT; + mm = get_task_mm(task); + put_task_struct(task); + if (!mm) + return -ENOENT; + exe_file = get_mm_exe_file(mm); + mmput(mm); + if (exe_file) { + *exe_path = exe_file->f_path; + path_get(&exe_file->f_path); + fput(exe_file); + return 0; + } else + return -ENOENT; +} + static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; int error = -EACCES; /* We don't need a base pointer in the /proc filesystem */ - path_release(nd); + path_put(&nd->path); /* Are we allowed to snoop on the tasks file descriptors? */ if (!proc_fd_access_allowed(inode)) goto out; - error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); + error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); nd->last_type = LAST_BIND; out: return ERR_PTR(error); } -static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, - char __user *buffer, int buflen) +static int do_proc_readlink(struct path *path, char __user *buffer, int buflen) { - struct inode * inode; char *tmp = (char*)__get_free_page(GFP_TEMPORARY); - char *path; + char *pathname; int len; if (!tmp) return -ENOMEM; - inode = dentry->d_inode; - path = d_path(dentry, mnt, tmp, PAGE_SIZE); - len = PTR_ERR(path); - if (IS_ERR(path)) + pathname = d_path(path, tmp, PAGE_SIZE); + len = PTR_ERR(pathname); + if (IS_ERR(pathname)) goto out; - len = tmp + PAGE_SIZE - 1 - path; + len = tmp + PAGE_SIZE - 1 - pathname; if (len > buflen) len = buflen; - if (copy_to_user(buffer, path, len)) + if (copy_to_user(buffer, pathname, len)) len = -EFAULT; out: free_page((unsigned long)tmp); @@ -1112,20 +1386,18 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b { int error = -EACCES; struct inode *inode = dentry->d_inode; - struct dentry *de; - struct vfsmount *mnt = NULL; + struct path path; /* Are we allowed to snoop on the tasks file descriptors? */ if (!proc_fd_access_allowed(inode)) goto out; - error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); + error = PROC_I(inode)->op.proc_get_link(inode, &path); if (error) goto out; - error = do_proc_readlink(de, mnt, buffer, buflen); - dput(de); - mntput(mnt); + error = do_proc_readlink(&path, buffer, buflen); + path_put(&path); out: return error; } @@ -1159,6 +1431,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st { struct inode * inode; struct proc_inode *ei; + const struct cred *cred; /* We need a new inode */ @@ -1178,11 +1451,12 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st if (!ei->pid) goto out_unlock; - inode->i_uid = 0; - inode->i_gid = 0; if (task_dumpable(task)) { - inode->i_uid = task->euid; - inode->i_gid = task->egid; + rcu_read_lock(); + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; + rcu_read_unlock(); } security_task_to_inode(task, inode); @@ -1198,6 +1472,8 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat { struct inode *inode = dentry->d_inode; struct task_struct *task; + const struct cred *cred; + generic_fillattr(inode, stat); rcu_read_lock(); @@ -1207,8 +1483,9 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { - stat->uid = task->euid; - stat->gid = task->egid; + cred = __task_cred(task); + stat->uid = cred->euid; + stat->gid = cred->egid; } } rcu_read_unlock(); @@ -1236,11 +1513,16 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; struct task_struct *task = get_proc_task(inode); + const struct cred *cred; + if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { - inode->i_uid = task->euid; - inode->i_gid = task->egid; + rcu_read_lock(); + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; + rcu_read_unlock(); } else { inode->i_uid = 0; inode->i_gid = 0; @@ -1263,7 +1545,7 @@ static int pid_delete_dentry(struct dentry * dentry) return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; } -static struct dentry_operations pid_dentry_operations = +static const struct dentry_operations pid_dentry_operations = { .d_revalidate = pid_revalidate, .d_delete = pid_delete_dentry, @@ -1352,8 +1634,7 @@ out: #define PROC_FDINFO_MAX 64 -static int proc_fd_info(struct inode *inode, struct dentry **dentry, - struct vfsmount **mnt, char *info) +static int proc_fd_info(struct inode *inode, struct path *path, char *info) { struct task_struct *task = get_proc_task(inode); struct files_struct *files = NULL; @@ -1372,10 +1653,10 @@ static int proc_fd_info(struct inode *inode, struct dentry **dentry, spin_lock(&files->file_lock); file = fcheck_files(files, fd); if (file) { - if (mnt) - *mnt = mntget(file->f_path.mnt); - if (dentry) - *dentry = dget(file->f_path.dentry); + if (path) { + *path = file->f_path; + path_get(&file->f_path); + } if (info) snprintf(info, PROC_FDINFO_MAX, "pos:\t%lli\n" @@ -1392,10 +1673,9 @@ static int proc_fd_info(struct inode *inode, struct dentry **dentry, return -ENOENT; } -static int proc_fd_link(struct inode *inode, struct dentry **dentry, - struct vfsmount **mnt) +static int proc_fd_link(struct inode *inode, struct path *path) { - return proc_fd_info(inode, dentry, mnt, NULL); + return proc_fd_info(inode, path, NULL); } static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) @@ -1404,6 +1684,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) struct task_struct *task = get_proc_task(inode); int fd = proc_fd(inode); struct files_struct *files; + const struct cred *cred; if (task) { files = get_files_struct(task); @@ -1413,8 +1694,11 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) rcu_read_unlock(); put_files_struct(files); if (task_dumpable(task)) { - inode->i_uid = task->euid; - inode->i_gid = task->egid; + rcu_read_lock(); + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; + rcu_read_unlock(); } else { inode->i_uid = 0; inode->i_gid = 0; @@ -1433,7 +1717,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) return 0; } -static struct dentry_operations tid_fd_dentry_operations = +static const struct dentry_operations tid_fd_dentry_operations = { .d_revalidate = tid_fd_revalidate, .d_delete = pid_delete_dentry, @@ -1467,9 +1751,9 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, file = fcheck_files(files, fd); if (!file) goto out_unlock; - if (file->f_mode & 1) + if (file->f_mode & FMODE_READ) inode->i_mode |= S_IRUSR | S_IXUSR; - if (file->f_mode & 2) + if (file->f_mode & FMODE_WRITE) inode->i_mode |= S_IWUSR | S_IXUSR; spin_unlock(&files->file_lock); put_files_struct(files); @@ -1522,7 +1806,6 @@ static int proc_readfd_common(struct file * filp, void * dirent, unsigned int fd, ino; int retval; struct files_struct * files; - struct fdtable *fdt; retval = -ENOENT; if (!p) @@ -1545,9 +1828,8 @@ static int proc_readfd_common(struct file * filp, void * dirent, if (!files) goto out; rcu_read_lock(); - fdt = files_fdtable(files); for (fd = filp->f_pos-2; - fd < fdt->max_fds; + fd < files_fdtable(files)->max_fds; fd++, filp->f_pos++) { char name[PROC_NUMBUF]; int len; @@ -1589,7 +1871,7 @@ static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { char tmp[PROC_FDINFO_MAX]; - int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, NULL, tmp); + int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); if (!err) err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); return err; @@ -1609,8 +1891,7 @@ static const struct file_operations proc_fd_operations = { * /proc/pid/fd needs a special permission handler so that a process can still * access /proc/self/fd after it has executed a setuid(). */ -static int proc_fd_permission(struct inode *inode, int mask, - struct nameidata *nd) +static int proc_fd_permission(struct inode *inode, int mask) { int rv; @@ -1718,13 +1999,11 @@ static struct dentry *proc_pident_lookup(struct inode *dir, const struct pid_entry *ents, unsigned int nents) { - struct inode *inode; struct dentry *error; struct task_struct *task = get_proc_task(dir); const struct pid_entry *p, *last; error = ERR_PTR(-ENOENT); - inode = NULL; if (!task) goto out_no_task; @@ -1880,12 +2159,12 @@ static const struct file_operations proc_pid_attr_operations = { }; static const struct pid_entry attr_dir_stuff[] = { - REG("current", S_IRUGO|S_IWUGO, pid_attr), - REG("prev", S_IRUGO, pid_attr), - REG("exec", S_IRUGO|S_IWUGO, pid_attr), - REG("fscreate", S_IRUGO|S_IWUGO, pid_attr), - REG("keycreate", S_IRUGO|S_IWUGO, pid_attr), - REG("sockcreate", S_IRUGO|S_IWUGO, pid_attr), + REG("current", S_IRUGO|S_IWUGO, proc_pid_attr_operations), + REG("prev", S_IRUGO, proc_pid_attr_operations), + REG("exec", S_IRUGO|S_IWUGO, proc_pid_attr_operations), + REG("fscreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), + REG("keycreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), + REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), }; static int proc_attr_dir_readdir(struct file * filp, @@ -2006,15 +2285,23 @@ static const struct file_operations proc_coredump_filter_operations = { static int proc_self_readlink(struct dentry *dentry, char __user *buffer, int buflen) { + struct pid_namespace *ns = dentry->d_sb->s_fs_info; + pid_t tgid = task_tgid_nr_ns(current, ns); char tmp[PROC_NUMBUF]; - sprintf(tmp, "%d", task_tgid_vnr(current)); + if (!tgid) + return -ENOENT; + sprintf(tmp, "%d", tgid); return vfs_readlink(dentry,buffer,buflen,tmp); } static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) { + struct pid_namespace *ns = dentry->d_sb->s_fs_info; + pid_t tgid = task_tgid_nr_ns(current, ns); char tmp[PROC_NUMBUF]; - sprintf(tmp, "%d", task_tgid_vnr(current)); + if (!tgid) + return ERR_PTR(-ENOENT); + sprintf(tmp, "%d", task_tgid_nr_ns(current, ns)); return ERR_PTR(vfs_follow_link(nd,tmp)); } @@ -2052,7 +2339,7 @@ static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd) return 0; } -static struct dentry_operations proc_base_dentry_operations = +static const struct dentry_operations proc_base_dentry_operations = { .d_revalidate = proc_base_revalidate, .d_delete = pid_delete_dentry, @@ -2083,8 +2370,6 @@ static struct dentry *proc_base_instantiate(struct inode *dir, if (!ei->pid) goto out_iput; - inode->i_uid = 0; - inode->i_gid = 0; inode->i_mode = p->mode; if (S_ISDIR(inode->i_mode)) inode->i_nlink = 2; @@ -2143,29 +2428,54 @@ static int proc_base_fill_cache(struct file *filp, void *dirent, } #ifdef CONFIG_TASK_IO_ACCOUNTING -static int proc_pid_io_accounting(struct task_struct *task, char *buffer) +static int do_io_accounting(struct task_struct *task, char *buffer, int whole) { + struct task_io_accounting acct = task->ioac; + unsigned long flags; + + if (whole && lock_task_sighand(task, &flags)) { + struct task_struct *t = task; + + task_io_accounting_add(&acct, &task->signal->ioac); + while_each_thread(task, t) + task_io_accounting_add(&acct, &t->ioac); + + unlock_task_sighand(task, &flags); + } return sprintf(buffer, -#ifdef CONFIG_TASK_XACCT "rchar: %llu\n" "wchar: %llu\n" "syscr: %llu\n" "syscw: %llu\n" -#endif "read_bytes: %llu\n" "write_bytes: %llu\n" "cancelled_write_bytes: %llu\n", -#ifdef CONFIG_TASK_XACCT - (unsigned long long)task->rchar, - (unsigned long long)task->wchar, - (unsigned long long)task->syscr, - (unsigned long long)task->syscw, -#endif - (unsigned long long)task->ioac.read_bytes, - (unsigned long long)task->ioac.write_bytes, - (unsigned long long)task->ioac.cancelled_write_bytes); + (unsigned long long)acct.rchar, + (unsigned long long)acct.wchar, + (unsigned long long)acct.syscr, + (unsigned long long)acct.syscw, + (unsigned long long)acct.read_bytes, + (unsigned long long)acct.write_bytes, + (unsigned long long)acct.cancelled_write_bytes); +} + +static int proc_tid_io_accounting(struct task_struct *task, char *buffer) +{ + return do_io_accounting(task, buffer, 0); +} + +static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) +{ + return do_io_accounting(task, buffer, 1); +} +#endif /* CONFIG_TASK_IO_ACCOUNTING */ + +static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) +{ + seq_printf(m, "%08x\n", task->personality); + return 0; } -#endif /* * Thread groups @@ -2174,61 +2484,77 @@ static const struct file_operations proc_task_operations; static const struct inode_operations proc_task_inode_operations; static const struct pid_entry tgid_base_stuff[] = { - DIR("task", S_IRUGO|S_IXUGO, task), - DIR("fd", S_IRUSR|S_IXUSR, fd), - DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), - REG("environ", S_IRUSR, environ), - INF("auxv", S_IRUSR, pid_auxv), - INF("status", S_IRUGO, pid_status), - INF("limits", S_IRUSR, pid_limits), + DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), + DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), + DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), +#ifdef CONFIG_NET + DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), +#endif + REG("environ", S_IRUSR, proc_environ_operations), + INF("auxv", S_IRUSR, proc_pid_auxv), + ONE("status", S_IRUGO, proc_pid_status), + ONE("personality", S_IRUSR, proc_pid_personality), + INF("limits", S_IRUSR, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG - REG("sched", S_IRUGO|S_IWUSR, pid_sched), + REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), +#endif +#ifdef CONFIG_HAVE_ARCH_TRACEHOOK + INF("syscall", S_IRUSR, proc_pid_syscall), #endif - INF("cmdline", S_IRUGO, pid_cmdline), - INF("stat", S_IRUGO, tgid_stat), - INF("statm", S_IRUGO, pid_statm), - REG("maps", S_IRUGO, maps), + INF("cmdline", S_IRUGO, proc_pid_cmdline), + ONE("stat", S_IRUGO, proc_tgid_stat), + ONE("statm", S_IRUGO, proc_pid_statm), + REG("maps", S_IRUGO, proc_maps_operations), #ifdef CONFIG_NUMA - REG("numa_maps", S_IRUGO, numa_maps), + REG("numa_maps", S_IRUGO, proc_numa_maps_operations), #endif - REG("mem", S_IRUSR|S_IWUSR, mem), - LNK("cwd", cwd), - LNK("root", root), - LNK("exe", exe), - REG("mounts", S_IRUGO, mounts), - REG("mountstats", S_IRUSR, mountstats), -#ifdef CONFIG_MMU - REG("clear_refs", S_IWUSR, clear_refs), - REG("smaps", S_IRUGO, smaps), + REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), + LNK("cwd", proc_cwd_link), + LNK("root", proc_root_link), + LNK("exe", proc_exe_link), + REG("mounts", S_IRUGO, proc_mounts_operations), + REG("mountinfo", S_IRUGO, proc_mountinfo_operations), + REG("mountstats", S_IRUSR, proc_mountstats_operations), +#ifdef CONFIG_PROC_PAGE_MONITOR + REG("clear_refs", S_IWUSR, proc_clear_refs_operations), + REG("smaps", S_IRUGO, proc_smaps_operations), + REG("pagemap", S_IRUSR, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY - DIR("attr", S_IRUGO|S_IXUGO, attr_dir), + DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), #endif #ifdef CONFIG_KALLSYMS - INF("wchan", S_IRUGO, pid_wchan), + INF("wchan", S_IRUGO, proc_pid_wchan), +#endif +#ifdef CONFIG_STACKTRACE + ONE("stack", S_IRUSR, proc_pid_stack), #endif #ifdef CONFIG_SCHEDSTATS - INF("schedstat", S_IRUGO, pid_schedstat), + INF("schedstat", S_IRUGO, proc_pid_schedstat), +#endif +#ifdef CONFIG_LATENCYTOP + REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET - REG("cpuset", S_IRUGO, cpuset), + REG("cpuset", S_IRUGO, proc_cpuset_operations), #endif #ifdef CONFIG_CGROUPS - REG("cgroup", S_IRUGO, cgroup), + REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif - INF("oom_score", S_IRUGO, oom_score), - REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), + INF("oom_score", S_IRUGO, proc_oom_score), + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), #ifdef CONFIG_AUDITSYSCALL - REG("loginuid", S_IWUSR|S_IRUGO, loginuid), + REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), + REG("sessionid", S_IRUGO, proc_sessionid_operations), #endif #ifdef CONFIG_FAULT_INJECTION - REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), + REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), #endif #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) - REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), + REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING - INF("io", S_IRUGO, pid_io_accounting), + INF("io", S_IRUGO, proc_tgid_io_accounting), #endif }; @@ -2265,7 +2591,8 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) name.len = snprintf(buf, sizeof(buf), "%d", pid); dentry = d_hash_and_lookup(mnt->mnt_root, &name); if (dentry) { - shrink_dcache_parent(dentry); + if (!(current->flags & PF_EXITING)) + shrink_dcache_parent(dentry); d_drop(dentry); dput(dentry); } @@ -2362,10 +2689,9 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; inode->i_flags|=S_IMMUTABLE; - inode->i_nlink = 5; -#ifdef CONFIG_SECURITY - inode->i_nlink += 1; -#endif + + inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, + ARRAY_SIZE(tgid_base_stuff)); dentry->d_op = &pid_dentry_operations; @@ -2411,19 +2737,23 @@ out: * Find the first task with tgid >= tgid * */ -static struct task_struct *next_tgid(unsigned int tgid, - struct pid_namespace *ns) -{ +struct tgid_iter { + unsigned int tgid; struct task_struct *task; +}; +static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter) +{ struct pid *pid; + if (iter.task) + put_task_struct(iter.task); rcu_read_lock(); retry: - task = NULL; - pid = find_ge_pid(tgid, ns); + iter.task = NULL; + pid = find_ge_pid(iter.tgid, ns); if (pid) { - tgid = pid_nr_ns(pid, ns) + 1; - task = pid_task(pid, PIDTYPE_PID); + iter.tgid = pid_nr_ns(pid, ns); + iter.task = pid_task(pid, PIDTYPE_PID); /* What we to know is if the pid we have find is the * pid of a thread_group_leader. Testing for task * being a thread_group_leader is the obvious thing @@ -2436,23 +2766,25 @@ retry: * found doesn't happen to be a thread group leader. * As we don't care in the case of readdir. */ - if (!task || !has_group_leader_pid(task)) + if (!iter.task || !has_group_leader_pid(iter.task)) { + iter.tgid += 1; goto retry; - get_task_struct(task); + } + get_task_struct(iter.task); } rcu_read_unlock(); - return task; + return iter; } #define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff)) static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, - struct task_struct *task, int tgid) + struct tgid_iter iter) { char name[PROC_NUMBUF]; - int len = snprintf(name, sizeof(name), "%d", tgid); + int len = snprintf(name, sizeof(name), "%d", iter.tgid); return proc_fill_cache(filp, dirent, filldir, name, len, - proc_pid_instantiate, task, NULL); + proc_pid_instantiate, iter.task, NULL); } /* for the /proc/ directory itself, after non-process stuff has been done */ @@ -2460,8 +2792,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) { unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode); - struct task_struct *task; - int tgid; + struct tgid_iter iter; struct pid_namespace *ns; if (!reaper) @@ -2474,14 +2805,14 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) } ns = filp->f_dentry->d_sb->s_fs_info; - tgid = filp->f_pos - TGID_OFFSET; - for (task = next_tgid(tgid, ns); - task; - put_task_struct(task), task = next_tgid(tgid + 1, ns)) { - tgid = task_pid_nr_ns(task, ns); - filp->f_pos = tgid + TGID_OFFSET; - if (proc_pid_fill_cache(filp, dirent, filldir, task, tgid) < 0) { - put_task_struct(task); + iter.task = NULL; + iter.tgid = filp->f_pos - TGID_OFFSET; + for (iter = next_tgid(ns, iter); + iter.task; + iter.tgid += 1, iter = next_tgid(ns, iter)) { + filp->f_pos = iter.tgid + TGID_OFFSET; + if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) { + put_task_struct(iter.task); goto out; } } @@ -2496,53 +2827,69 @@ out_no_task: * Tasks */ static const struct pid_entry tid_base_stuff[] = { - DIR("fd", S_IRUSR|S_IXUSR, fd), - DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), - REG("environ", S_IRUSR, environ), - INF("auxv", S_IRUSR, pid_auxv), - INF("status", S_IRUGO, pid_status), - INF("limits", S_IRUSR, pid_limits), + DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), + DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fd_operations), + REG("environ", S_IRUSR, proc_environ_operations), + INF("auxv", S_IRUSR, proc_pid_auxv), + ONE("status", S_IRUGO, proc_pid_status), + ONE("personality", S_IRUSR, proc_pid_personality), + INF("limits", S_IRUSR, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG - REG("sched", S_IRUGO|S_IWUSR, pid_sched), + REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), +#endif +#ifdef CONFIG_HAVE_ARCH_TRACEHOOK + INF("syscall", S_IRUSR, proc_pid_syscall), #endif - INF("cmdline", S_IRUGO, pid_cmdline), - INF("stat", S_IRUGO, tid_stat), - INF("statm", S_IRUGO, pid_statm), - REG("maps", S_IRUGO, maps), + INF("cmdline", S_IRUGO, proc_pid_cmdline), + ONE("stat", S_IRUGO, proc_tid_stat), + ONE("statm", S_IRUGO, proc_pid_statm), + REG("maps", S_IRUGO, proc_maps_operations), #ifdef CONFIG_NUMA - REG("numa_maps", S_IRUGO, numa_maps), + REG("numa_maps", S_IRUGO, proc_numa_maps_operations), #endif - REG("mem", S_IRUSR|S_IWUSR, mem), - LNK("cwd", cwd), - LNK("root", root), - LNK("exe", exe), - REG("mounts", S_IRUGO, mounts), -#ifdef CONFIG_MMU - REG("clear_refs", S_IWUSR, clear_refs), - REG("smaps", S_IRUGO, smaps), + REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), + LNK("cwd", proc_cwd_link), + LNK("root", proc_root_link), + LNK("exe", proc_exe_link), + REG("mounts", S_IRUGO, proc_mounts_operations), + REG("mountinfo", S_IRUGO, proc_mountinfo_operations), +#ifdef CONFIG_PROC_PAGE_MONITOR + REG("clear_refs", S_IWUSR, proc_clear_refs_operations), + REG("smaps", S_IRUGO, proc_smaps_operations), + REG("pagemap", S_IRUSR, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY - DIR("attr", S_IRUGO|S_IXUGO, attr_dir), + DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), #endif #ifdef CONFIG_KALLSYMS - INF("wchan", S_IRUGO, pid_wchan), + INF("wchan", S_IRUGO, proc_pid_wchan), +#endif +#ifdef CONFIG_STACKTRACE + ONE("stack", S_IRUSR, proc_pid_stack), #endif #ifdef CONFIG_SCHEDSTATS - INF("schedstat", S_IRUGO, pid_schedstat), + INF("schedstat", S_IRUGO, proc_pid_schedstat), +#endif +#ifdef CONFIG_LATENCYTOP + REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET - REG("cpuset", S_IRUGO, cpuset), + REG("cpuset", S_IRUGO, proc_cpuset_operations), #endif #ifdef CONFIG_CGROUPS - REG("cgroup", S_IRUGO, cgroup), + REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif - INF("oom_score", S_IRUGO, oom_score), - REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), + INF("oom_score", S_IRUGO, proc_oom_score), + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), #ifdef CONFIG_AUDITSYSCALL - REG("loginuid", S_IWUSR|S_IRUGO, loginuid), + REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), + REG("sessionid", S_IRUSR, proc_sessionid_operations), #endif #ifdef CONFIG_FAULT_INJECTION - REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), + REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), +#endif +#ifdef CONFIG_TASK_IO_ACCOUNTING + INF("io", S_IRUGO, proc_tid_io_accounting), #endif }; @@ -2582,10 +2929,9 @@ static struct dentry *proc_task_instantiate(struct inode *dir, inode->i_op = &proc_tid_base_inode_operations; inode->i_fop = &proc_tid_base_operations; inode->i_flags|=S_IMMUTABLE; - inode->i_nlink = 4; -#ifdef CONFIG_SECURITY - inode->i_nlink += 1; -#endif + + inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, + ARRAY_SIZE(tid_base_stuff)); dentry->d_op = &pid_dentry_operations; @@ -2720,7 +3066,6 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi int retval = -ENOENT; ino_t ino; int tid; - unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ struct pid_namespace *ns; task = get_proc_task(inode); @@ -2737,18 +3082,18 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi goto out_no_task; retval = 0; - switch (pos) { + switch ((unsigned long)filp->f_pos) { case 0: ino = inode->i_ino; - if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0) + if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0) goto out; - pos++; + filp->f_pos++; /* fall through */ case 1: ino = parent_ino(dentry); - if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0) + if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0) goto out; - pos++; + filp->f_pos++; /* fall through */ } @@ -2758,9 +3103,9 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi ns = filp->f_dentry->d_sb->s_fs_info; tid = (int)filp->f_version; filp->f_version = 0; - for (task = first_tid(leader, tid, pos - 2, ns); + for (task = first_tid(leader, tid, filp->f_pos - 2, ns); task; - task = next_tid(task), pos++) { + task = next_tid(task), filp->f_pos++) { tid = task_pid_nr_ns(task, ns); if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) { /* returning this tgid failed, save it as the first @@ -2771,7 +3116,6 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi } } out: - filp->f_pos = pos; put_task_struct(leader); out_no_task: return retval; @@ -2784,9 +3128,7 @@ static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct generic_fillattr(inode, stat); if (p) { - rcu_read_lock(); stat->nlink += get_nr_threads(p); - rcu_read_unlock(); put_task_struct(p); }