nommu: report correct errno in message
[safe/jmp/linux-2.6] / fs / proc / base.c
index 433a01d..a5fa1fd 100644 (file)
 #include <linux/string.h>
 #include <linux/seq_file.h>
 #include <linux/namei.h>
-#include <linux/namespace.h>
+#include <linux/mnt_namespace.h>
 #include <linux/mm.h>
-#include <linux/smp_lock.h>
 #include <linux/rcupdate.h>
 #include <linux/kallsyms.h>
+#include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/ptrace.h>
@@ -72,6 +72,7 @@
 #include <linux/audit.h>
 #include <linux/poll.h>
 #include <linux/nsproxy.h>
+#include <linux/oom.h>
 #include "internal.h"
 
 /* NOTE:
 
 
 /* Worst case buffer size needed for holding an integer. */
-#define PROC_NUMBUF 10
+#define PROC_NUMBUF 13
 
 struct pid_entry {
-       int len;
        char *name;
+       int len;
        mode_t mode;
-       struct inode_operations *iop;
-       struct file_operations *fop;
+       const struct inode_operations *iop;
+       const struct file_operations *fop;
        union proc_op op;
 };
 
 #define NOD(NAME, MODE, IOP, FOP, OP) {                        \
-       .len  = sizeof(NAME) - 1,                       \
        .name = (NAME),                                 \
+       .len  = sizeof(NAME) - 1,                       \
        .mode = MODE,                                   \
        .iop  = IOP,                                    \
        .fop  = FOP,                                    \
@@ -122,6 +123,9 @@ struct pid_entry {
                NULL, &proc_info_file_operations,       \
                { .proc_read = &proc_##OTYPE } )
 
+int maps_protect;
+EXPORT_SYMBOL(maps_protect);
+
 static struct fs_struct *get_fs_struct(struct task_struct *task)
 {
        struct fs_struct *fs;
@@ -274,17 +278,15 @@ static int proc_pid_auxv(struct task_struct *task, char *buffer)
  */
 static int proc_pid_wchan(struct task_struct *task, char *buffer)
 {
-       char *modname;
-       const char *sym_name;
-       unsigned long wchan, size, offset;
-       char namebuf[KSYM_NAME_LEN+1];
+       unsigned long wchan;
+       char symname[KSYM_NAME_LEN+1];
 
        wchan = get_wchan(task);
 
-       sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf);
-       if (sym_name)
-               return sprintf(buffer, "%s", sym_name);
-       return sprintf(buffer, "%lu", wchan);
+       if (lookup_symbol_name(wchan, symname) < 0)
+               return sprintf(buffer, "%lu", wchan);
+       else
+               return sprintf(buffer, "%s", symname);
 }
 #endif /* CONFIG_KALLSYMS */
 
@@ -309,7 +311,9 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
        struct timespec uptime;
 
        do_posix_clock_monotonic_gettime(&uptime);
+       read_lock(&tasklist_lock);
        points = badness(task, uptime.tv_sec);
+       read_unlock(&tasklist_lock);
        return sprintf(buffer, "%lu\n", points);
 }
 
@@ -343,15 +347,12 @@ static int proc_setattr(struct dentry *dentry, struct iattr *attr)
                return -EPERM;
 
        error = inode_change_ok(inode, attr);
-       if (!error) {
-               error = security_inode_setattr(dentry, attr);
-               if (!error)
-                       error = inode_setattr(inode, attr);
-       }
+       if (!error)
+               error = inode_setattr(inode, attr);
        return error;
 }
 
-static struct inode_operations proc_def_inode_operations = {
+static const struct inode_operations proc_def_inode_operations = {
        .setattr        = proc_setattr,
 };
 
@@ -364,33 +365,35 @@ struct proc_mounts {
 static int mounts_open(struct inode *inode, struct file *file)
 {
        struct task_struct *task = get_proc_task(inode);
-       struct namespace *namespace = NULL;
+       struct mnt_namespace *ns = NULL;
        struct proc_mounts *p;
        int ret = -EINVAL;
 
        if (task) {
                task_lock(task);
-               namespace = task->nsproxy->namespace;
-               if (namespace)
-                       get_namespace(namespace);
+               if (task->nsproxy) {
+                       ns = task->nsproxy->mnt_ns;
+                       if (ns)
+                               get_mnt_ns(ns);
+               }
                task_unlock(task);
                put_task_struct(task);
        }
 
-       if (namespace) {
+       if (ns) {
                ret = -ENOMEM;
                p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
                if (p) {
                        file->private_data = &p->m;
                        ret = seq_open(file, &mounts_op);
                        if (!ret) {
-                               p->m.private = namespace;
-                               p->event = namespace->event;
+                               p->m.private = ns;
+                               p->event = ns->event;
                                return 0;
                        }
                        kfree(p);
                }
-               put_namespace(namespace);
+               put_mnt_ns(ns);
        }
        return ret;
 }
@@ -398,15 +401,15 @@ static int mounts_open(struct inode *inode, struct file *file)
 static int mounts_release(struct inode *inode, struct file *file)
 {
        struct seq_file *m = file->private_data;
-       struct namespace *namespace = m->private;
-       put_namespace(namespace);
+       struct mnt_namespace *ns = m->private;
+       put_mnt_ns(ns);
        return seq_release(inode, file);
 }
 
 static unsigned mounts_poll(struct file *file, poll_table *wait)
 {
        struct proc_mounts *p = file->private_data;
-       struct namespace *ns = p->m.private;
+       struct mnt_namespace *ns = p->m.private;
        unsigned res = 0;
 
        poll_wait(file, &ns->poll, wait);
@@ -421,7 +424,7 @@ static unsigned mounts_poll(struct file *file, poll_table *wait)
        return res;
 }
 
-static struct file_operations proc_mounts_operations = {
+static const struct file_operations proc_mounts_operations = {
        .open           = mounts_open,
        .read           = seq_read,
        .llseek         = seq_lseek,
@@ -436,20 +439,21 @@ static int mountstats_open(struct inode *inode, struct file *file)
 
        if (!ret) {
                struct seq_file *m = file->private_data;
-               struct namespace *namespace = NULL;
+               struct mnt_namespace *mnt_ns = NULL;
                struct task_struct *task = get_proc_task(inode);
 
                if (task) {
                        task_lock(task);
-                       namespace = task->nsproxy->namespace;
-                       if (namespace)
-                               get_namespace(namespace);
+                       if (task->nsproxy)
+                               mnt_ns = task->nsproxy->mnt_ns;
+                       if (mnt_ns)
+                               get_mnt_ns(mnt_ns);
                        task_unlock(task);
                        put_task_struct(task);
                }
 
-               if (namespace)
-                       m->private = namespace;
+               if (mnt_ns)
+                       m->private = mnt_ns;
                else {
                        seq_release(inode, file);
                        ret = -EINVAL;
@@ -458,7 +462,7 @@ static int mountstats_open(struct inode *inode, struct file *file)
        return ret;
 }
 
-static struct file_operations proc_mountstats_operations = {
+static const struct file_operations proc_mountstats_operations = {
        .open           = mountstats_open,
        .read           = seq_read,
        .llseek         = seq_lseek,
@@ -470,7 +474,7 @@ static struct file_operations proc_mountstats_operations = {
 static ssize_t proc_info_read(struct file * file, char __user * buf,
                          size_t count, loff_t *ppos)
 {
-       struct inode * inode = file->f_dentry->d_inode;
+       struct inode * inode = file->f_path.dentry->d_inode;
        unsigned long page;
        ssize_t length;
        struct task_struct *task = get_proc_task(inode);
@@ -497,7 +501,7 @@ out_no_task:
        return length;
 }
 
-static struct file_operations proc_info_file_operations = {
+static const struct file_operations proc_info_file_operations = {
        .read           = proc_info_read,
 };
 
@@ -510,7 +514,7 @@ static int mem_open(struct inode* inode, struct file* file)
 static ssize_t mem_read(struct file * file, char __user * buf,
                        size_t count, loff_t *ppos)
 {
-       struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
+       struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
        char *page;
        unsigned long src = *ppos;
        int ret = -ESRCH;
@@ -577,12 +581,12 @@ out_no_task:
 
 #ifndef mem_write
 /* This is a security hazard */
-static ssize_t mem_write(struct file * file, const char buf,
+static ssize_t mem_write(struct file * file, const char __user *buf,
                         size_t count, loff_t *ppos)
 {
        int copied;
        char *page;
-       struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
+       struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
        unsigned long dst = *ppos;
 
        copied = -ESRCH;
@@ -642,7 +646,7 @@ static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
        return file->f_pos;
 }
 
-static struct file_operations proc_mem_operations = {
+static const struct file_operations proc_mem_operations = {
        .llseek         = mem_lseek,
        .read           = mem_read,
        .write          = mem_write,
@@ -652,11 +656,10 @@ static struct file_operations proc_mem_operations = {
 static ssize_t oom_adjust_read(struct file *file, char __user *buf,
                                size_t count, loff_t *ppos)
 {
-       struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
+       struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
        char buffer[PROC_NUMBUF];
        size_t len;
        int oom_adjust;
-       loff_t __ppos = *ppos;
 
        if (!task)
                return -ESRCH;
@@ -664,14 +667,8 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
        put_task_struct(task);
 
        len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
-       if (__ppos >= len)
-               return 0;
-       if (count > len-__ppos)
-               count = len-__ppos;
-       if (copy_to_user(buf, buffer + __ppos, count))
-               return -EFAULT;
-       *ppos = __ppos + count;
-       return count;
+
+       return simple_read_from_buffer(buf, count, ppos, buffer, len);
 }
 
 static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
@@ -681,21 +678,24 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
        char buffer[PROC_NUMBUF], *end;
        int oom_adjust;
 
-       if (!capable(CAP_SYS_RESOURCE))
-               return -EPERM;
        memset(buffer, 0, sizeof(buffer));
        if (count > sizeof(buffer) - 1)
                count = sizeof(buffer) - 1;
        if (copy_from_user(buffer, buf, count))
                return -EFAULT;
        oom_adjust = simple_strtol(buffer, &end, 0);
-       if ((oom_adjust < -16 || oom_adjust > 15) && oom_adjust != OOM_DISABLE)
+       if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
+            oom_adjust != OOM_DISABLE)
                return -EINVAL;
        if (*end == '\n')
                end++;
-       task = get_proc_task(file->f_dentry->d_inode);
+       task = get_proc_task(file->f_path.dentry->d_inode);
        if (!task)
                return -ESRCH;
+       if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) {
+               put_task_struct(task);
+               return -EACCES;
+       }
        task->oomkilladj = oom_adjust;
        put_task_struct(task);
        if (end - buffer == 0)
@@ -703,17 +703,53 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
        return end - buffer;
 }
 
-static struct file_operations proc_oom_adjust_operations = {
+static const struct file_operations proc_oom_adjust_operations = {
        .read           = oom_adjust_read,
        .write          = oom_adjust_write,
 };
 
+#ifdef CONFIG_MMU
+static ssize_t clear_refs_write(struct file *file, const char __user *buf,
+                               size_t count, loff_t *ppos)
+{
+       struct task_struct *task;
+       char buffer[PROC_NUMBUF], *end;
+       struct mm_struct *mm;
+
+       memset(buffer, 0, sizeof(buffer));
+       if (count > sizeof(buffer) - 1)
+               count = sizeof(buffer) - 1;
+       if (copy_from_user(buffer, buf, count))
+               return -EFAULT;
+       if (!simple_strtol(buffer, &end, 0))
+               return -EINVAL;
+       if (*end == '\n')
+               end++;
+       task = get_proc_task(file->f_path.dentry->d_inode);
+       if (!task)
+               return -ESRCH;
+       mm = get_task_mm(task);
+       if (mm) {
+               clear_refs_smap(mm);
+               mmput(mm);
+       }
+       put_task_struct(task);
+       if (end - buffer == 0)
+               return -EIO;
+       return end - buffer;
+}
+
+static struct file_operations proc_clear_refs_operations = {
+       .write          = clear_refs_write,
+};
+#endif
+
 #ifdef CONFIG_AUDITSYSCALL
 #define TMPBUFLEN 21
 static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
                                  size_t count, loff_t *ppos)
 {
-       struct inode * inode = file->f_dentry->d_inode;
+       struct inode * inode = file->f_path.dentry->d_inode;
        struct task_struct *task = get_proc_task(inode);
        ssize_t length;
        char tmpbuf[TMPBUFLEN];
@@ -729,7 +765,7 @@ static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
                                   size_t count, loff_t *ppos)
 {
-       struct inode * inode = file->f_dentry->d_inode;
+       struct inode * inode = file->f_path.dentry->d_inode;
        char *page, *tmp;
        ssize_t length;
        uid_t loginuid;
@@ -770,7 +806,7 @@ out_free_page:
        return length;
 }
 
-static struct file_operations proc_loginuid_operations = {
+static const struct file_operations proc_loginuid_operations = {
        .read           = proc_loginuid_read,
        .write          = proc_loginuid_write,
 };
@@ -782,7 +818,6 @@ static ssize_t seccomp_read(struct file *file, char __user *buf,
 {
        struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
        char __buf[20];
-       loff_t __ppos = *ppos;
        size_t len;
 
        if (!tsk)
@@ -790,14 +825,8 @@ static ssize_t seccomp_read(struct file *file, char __user *buf,
        /* no need to print the trailing zero, so use only len */
        len = sprintf(__buf, "%u\n", tsk->seccomp.mode);
        put_task_struct(tsk);
-       if (__ppos >= len)
-               return 0;
-       if (count > len - __ppos)
-               count = len - __ppos;
-       if (copy_to_user(buf, __buf + __ppos, count))
-               return -EFAULT;
-       *ppos = __ppos + count;
-       return count;
+
+       return simple_read_from_buffer(buf, count, ppos, __buf, len);
 }
 
 static ssize_t seccomp_write(struct file *file, const char __user *buf,
@@ -842,12 +871,64 @@ out_no_task:
        return result;
 }
 
-static struct file_operations proc_seccomp_operations = {
+static const struct file_operations proc_seccomp_operations = {
        .read           = seccomp_read,
        .write          = seccomp_write,
 };
 #endif /* CONFIG_SECCOMP */
 
+#ifdef CONFIG_FAULT_INJECTION
+static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
+                                     size_t count, loff_t *ppos)
+{
+       struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
+       char buffer[PROC_NUMBUF];
+       size_t len;
+       int make_it_fail;
+
+       if (!task)
+               return -ESRCH;
+       make_it_fail = task->make_it_fail;
+       put_task_struct(task);
+
+       len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail);
+
+       return simple_read_from_buffer(buf, count, ppos, buffer, len);
+}
+
+static ssize_t proc_fault_inject_write(struct file * file,
+                       const char __user * buf, size_t count, loff_t *ppos)
+{
+       struct task_struct *task;
+       char buffer[PROC_NUMBUF], *end;
+       int make_it_fail;
+
+       if (!capable(CAP_SYS_RESOURCE))
+               return -EPERM;
+       memset(buffer, 0, sizeof(buffer));
+       if (count > sizeof(buffer) - 1)
+               count = sizeof(buffer) - 1;
+       if (copy_from_user(buffer, buf, count))
+               return -EFAULT;
+       make_it_fail = simple_strtol(buffer, &end, 0);
+       if (*end == '\n')
+               end++;
+       task = get_proc_task(file->f_dentry->d_inode);
+       if (!task)
+               return -ESRCH;
+       task->make_it_fail = make_it_fail;
+       put_task_struct(task);
+       if (end - buffer == 0)
+               return -EIO;
+       return end - buffer;
+}
+
+static const struct file_operations proc_fault_inject_operations = {
+       .read           = proc_fault_inject_read,
+       .write          = proc_fault_inject_write,
+};
+#endif
+
 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
        struct inode *inode = dentry->d_inode;
@@ -875,7 +956,7 @@ static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
 
        if (!tmp)
                return -ENOMEM;
-               
+
        inode = dentry->d_inode;
        path = d_path(dentry, mnt, tmp, PAGE_SIZE);
        len = PTR_ERR(path);
@@ -914,7 +995,7 @@ out:
        return error;
 }
 
-static struct inode_operations proc_pid_link_inode_operations = {
+static const struct inode_operations proc_pid_link_inode_operations = {
        .readlink       = proc_pid_readlink,
        .follow_link    = proc_pid_follow_link,
        .setattr        = proc_setattr,
@@ -958,7 +1039,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
        /*
         * grab the reference to task.
         */
-       ei->pid = get_pid(task_pid(task));
+       ei->pid = get_task_pid(task, PIDTYPE_PID);
        if (!ei->pid)
                goto out_unlock;
 
@@ -1055,13 +1136,26 @@ static struct dentry_operations pid_dentry_operations =
 
 /* Lookups */
 
-typedef struct dentry *instantiate_t(struct inode *, struct dentry *, struct task_struct *, void *);
+typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
+                               struct task_struct *, const void *);
 
+/*
+ * Fill a directory entry.
+ *
+ * If possible create the dcache entry and derive our inode number and
+ * file type from dcache entry.
+ *
+ * Since all of the proc inode numbers are dynamically generated, the inode
+ * numbers do not exist until the inode is cache.  This means creating the
+ * the dcache entry in readdir is necessary to keep the inode numbers
+ * reported by readdir in sync with the inode numbers reported
+ * by stat.
+ */
 static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
        char *name, int len,
-       instantiate_t instantiate, struct task_struct *task, void *ptr)
+       instantiate_t instantiate, struct task_struct *task, const void *ptr)
 {
-       struct dentry *child, *dir = filp->f_dentry;
+       struct dentry *child, *dir = filp->f_path.dentry;
        struct inode *inode;
        struct qstr qname;
        ino_t ino = 0;
@@ -1121,7 +1215,10 @@ out:
        return ~0U;
 }
 
-static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
+#define PROC_FDINFO_MAX 64
+
+static int proc_fd_info(struct inode *inode, struct dentry **dentry,
+                       struct vfsmount **mnt, char *info)
 {
        struct task_struct *task = get_proc_task(inode);
        struct files_struct *files = NULL;
@@ -1140,8 +1237,16 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm
                spin_lock(&files->file_lock);
                file = fcheck_files(files, fd);
                if (file) {
-                       *mnt = mntget(file->f_vfsmnt);
-                       *dentry = dget(file->f_dentry);
+                       if (mnt)
+                               *mnt = mntget(file->f_path.mnt);
+                       if (dentry)
+                               *dentry = dget(file->f_path.dentry);
+                       if (info)
+                               snprintf(info, PROC_FDINFO_MAX,
+                                        "pos:\t%lli\n"
+                                        "flags:\t0%o\n",
+                                        (long long) file->f_pos,
+                                        file->f_flags);
                        spin_unlock(&files->file_lock);
                        put_files_struct(files);
                        return 0;
@@ -1152,6 +1257,12 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm
        return -ENOENT;
 }
 
+static int proc_fd_link(struct inode *inode, struct dentry **dentry,
+                       struct vfsmount **mnt)
+{
+       return proc_fd_info(inode, dentry, mnt, NULL);
+}
+
 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
        struct inode *inode = dentry->d_inode;
@@ -1194,9 +1305,9 @@ static struct dentry_operations tid_fd_dentry_operations =
 };
 
 static struct dentry *proc_fd_instantiate(struct inode *dir,
-       struct dentry *dentry, struct task_struct *task, void *ptr)
+       struct dentry *dentry, struct task_struct *task, const void *ptr)
 {
-       unsigned fd = *(unsigned *)ptr;
+       unsigned fd = *(const unsigned *)ptr;
        struct file *file;
        struct files_struct *files;
        struct inode *inode;
@@ -1247,7 +1358,9 @@ out_iput:
        goto out;
 }
 
-static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
+static struct dentry *proc_lookupfd_common(struct inode *dir,
+                                          struct dentry *dentry,
+                                          instantiate_t instantiate)
 {
        struct task_struct *task = get_proc_task(dir);
        unsigned fd = name_to_int(dentry);
@@ -1258,25 +1371,17 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
        if (fd == ~0U)
                goto out;
 
-       result = proc_fd_instantiate(dir, dentry, task, &fd);
+       result = instantiate(dir, dentry, task, &fd);
 out:
        put_task_struct(task);
 out_no_task:
        return result;
 }
 
-static int proc_fd_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
-       struct task_struct *task, int fd)
+static int proc_readfd_common(struct file * filp, void * dirent,
+                             filldir_t filldir, instantiate_t instantiate)
 {
-       char name[PROC_NUMBUF];
-       int len = snprintf(name, sizeof(name), "%d", fd);
-       return proc_fill_cache(filp, dirent, filldir, name, len,
-                               proc_fd_instantiate, task, &fd);
-}
-
-static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
-{
-       struct dentry *dentry = filp->f_dentry;
+       struct dentry *dentry = filp->f_path.dentry;
        struct inode *inode = dentry->d_inode;
        struct task_struct *p = get_proc_task(inode);
        unsigned int fd, tid, ino;
@@ -1310,12 +1415,17 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
                        for (fd = filp->f_pos-2;
                             fd < fdt->max_fds;
                             fd++, filp->f_pos++) {
+                               char name[PROC_NUMBUF];
+                               int len;
 
                                if (!fcheck_files(files, fd))
                                        continue;
                                rcu_read_unlock();
 
-                               if (proc_fd_fill_cache(filp, dirent, filldir, p, fd) < 0) {
+                               len = snprintf(name, sizeof(name), "%d", fd);
+                               if (proc_fill_cache(filp, dirent, filldir,
+                                                   name, len, instantiate,
+                                                   p, &fd) < 0) {
                                        rcu_read_lock();
                                        break;
                                }
@@ -1330,23 +1440,119 @@ out_no_task:
        return retval;
 }
 
-static struct file_operations proc_fd_operations = {
+static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
+                                   struct nameidata *nd)
+{
+       return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
+}
+
+static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
+{
+       return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
+}
+
+static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
+                                     size_t len, loff_t *ppos)
+{
+       char tmp[PROC_FDINFO_MAX];
+       int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, NULL, tmp);
+       if (!err)
+               err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
+       return err;
+}
+
+static const struct file_operations proc_fdinfo_file_operations = {
+       .open           = nonseekable_open,
+       .read           = proc_fdinfo_read,
+};
+
+static const struct file_operations proc_fd_operations = {
        .read           = generic_read_dir,
        .readdir        = proc_readfd,
 };
 
 /*
+ * /proc/pid/fd needs a special permission handler so that a process can still
+ * access /proc/self/fd after it has executed a setuid().
+ */
+static int proc_fd_permission(struct inode *inode, int mask,
+                               struct nameidata *nd)
+{
+       int rv;
+
+       rv = generic_permission(inode, mask, NULL);
+       if (rv == 0)
+               return 0;
+       if (task_pid(current) == proc_pid(inode))
+               rv = 0;
+       return rv;
+}
+
+/*
  * proc directories can do almost nothing..
  */
-static struct inode_operations proc_fd_inode_operations = {
+static const struct inode_operations proc_fd_inode_operations = {
        .lookup         = proc_lookupfd,
+       .permission     = proc_fd_permission,
        .setattr        = proc_setattr,
 };
 
+static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
+       struct dentry *dentry, struct task_struct *task, const void *ptr)
+{
+       unsigned fd = *(unsigned *)ptr;
+       struct inode *inode;
+       struct proc_inode *ei;
+       struct dentry *error = ERR_PTR(-ENOENT);
+
+       inode = proc_pid_make_inode(dir->i_sb, task);
+       if (!inode)
+               goto out;
+       ei = PROC_I(inode);
+       ei->fd = fd;
+       inode->i_mode = S_IFREG | S_IRUSR;
+       inode->i_fop = &proc_fdinfo_file_operations;
+       dentry->d_op = &tid_fd_dentry_operations;
+       d_add(dentry, inode);
+       /* Close the race of the process dying before we return the dentry */
+       if (tid_fd_revalidate(dentry, NULL))
+               error = NULL;
+
+ out:
+       return error;
+}
+
+static struct dentry *proc_lookupfdinfo(struct inode *dir,
+                                       struct dentry *dentry,
+                                       struct nameidata *nd)
+{
+       return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
+}
+
+static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
+{
+       return proc_readfd_common(filp, dirent, filldir,
+                                 proc_fdinfo_instantiate);
+}
+
+static const struct file_operations proc_fdinfo_operations = {
+       .read           = generic_read_dir,
+       .readdir        = proc_readfdinfo,
+};
+
+/*
+ * proc directories can do almost nothing..
+ */
+static const struct inode_operations proc_fdinfo_inode_operations = {
+       .lookup         = proc_lookupfdinfo,
+       .setattr        = proc_setattr,
+};
+
+
 static struct dentry *proc_pident_instantiate(struct inode *dir,
-       struct dentry *dentry, struct task_struct *task, void *ptr)
+       struct dentry *dentry, struct task_struct *task, const void *ptr)
 {
-       struct pid_entry *p = ptr;
+       const struct pid_entry *p = ptr;
        struct inode *inode;
        struct proc_inode *ei;
        struct dentry *error = ERR_PTR(-EINVAL);
@@ -1375,13 +1581,13 @@ out:
 
 static struct dentry *proc_pident_lookup(struct inode *dir, 
                                         struct dentry *dentry,
-                                        struct pid_entry *ents,
+                                        const struct pid_entry *ents,
                                         unsigned int nents)
 {
        struct inode *inode;
        struct dentry *error;
        struct task_struct *task = get_proc_task(dir);
-       struct pid_entry *p, *last;
+       const struct pid_entry *p, *last;
 
        error = ERR_PTR(-ENOENT);
        inode = NULL;
@@ -1410,8 +1616,8 @@ out_no_task:
        return error;
 }
 
-static int proc_pident_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
-       struct task_struct *task, struct pid_entry *p)
+static int proc_pident_fill_cache(struct file *filp, void *dirent,
+       filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
 {
        return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
                                proc_pident_instantiate, task, p);
@@ -1419,14 +1625,14 @@ static int proc_pident_fill_cache(struct file *filp, void *dirent, filldir_t fil
 
 static int proc_pident_readdir(struct file *filp,
                void *dirent, filldir_t filldir,
-               struct pid_entry *ents, unsigned int nents)
+               const struct pid_entry *ents, unsigned int nents)
 {
        int i;
        int pid;
-       struct dentry *dentry = filp->f_dentry;
+       struct dentry *dentry = filp->f_path.dentry;
        struct inode *inode = dentry->d_inode;
        struct task_struct *task = get_proc_task(inode);
-       struct pid_entry *p, *last;
+       const struct pid_entry *p, *last;
        ino_t ino;
        int ret;
 
@@ -1479,37 +1685,28 @@ out_no_task:
 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
                                  size_t count, loff_t *ppos)
 {
-       struct inode * inode = file->f_dentry->d_inode;
-       unsigned long page;
+       struct inode * inode = file->f_path.dentry->d_inode;
+       char *p = NULL;
        ssize_t length;
        struct task_struct *task = get_proc_task(inode);
 
-       length = -ESRCH;
        if (!task)
-               goto out_no_task;
-
-       if (count > PAGE_SIZE)
-               count = PAGE_SIZE;
-       length = -ENOMEM;
-       if (!(page = __get_free_page(GFP_KERNEL)))
-               goto out;
+               return -ESRCH;
 
        length = security_getprocattr(task,
-                                     (char*)file->f_dentry->d_name.name,
-                                     (void*)page, count);
-       if (length >= 0)
-               length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
-       free_page(page);
-out:
+                                     (char*)file->f_path.dentry->d_name.name,
+                                     &p);
        put_task_struct(task);
-out_no_task:
+       if (length > 0)
+               length = simple_read_from_buffer(buf, count, ppos, p, length);
+       kfree(p);
        return length;
 }
 
 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
                                   size_t count, loff_t *ppos)
 {
-       struct inode * inode = file->f_dentry->d_inode;
+       struct inode * inode = file->f_path.dentry->d_inode;
        char *page;
        ssize_t length;
        struct task_struct *task = get_proc_task(inode);
@@ -1535,7 +1732,7 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
                goto out_free;
 
        length = security_setprocattr(task,
-                                     (char*)file->f_dentry->d_name.name,
+                                     (char*)file->f_path.dentry->d_name.name,
                                      (void*)page, count);
 out_free:
        free_page((unsigned long) page);
@@ -1545,12 +1742,12 @@ out_no_task:
        return length;
 }
 
-static struct file_operations proc_pid_attr_operations = {
+static const struct file_operations proc_pid_attr_operations = {
        .read           = proc_pid_attr_read,
        .write          = proc_pid_attr_write,
 };
 
-static struct pid_entry attr_dir_stuff[] = {
+static const struct pid_entry attr_dir_stuff[] = {
        REG("current",    S_IRUGO|S_IWUGO, pid_attr),
        REG("prev",       S_IRUGO,         pid_attr),
        REG("exec",       S_IRUGO|S_IWUGO, pid_attr),
@@ -1566,7 +1763,7 @@ static int proc_attr_dir_readdir(struct file * filp,
                                   attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff));
 }
 
-static struct file_operations proc_attr_dir_operations = {
+static const struct file_operations proc_attr_dir_operations = {
        .read           = generic_read_dir,
        .readdir        = proc_attr_dir_readdir,
 };
@@ -1578,7 +1775,7 @@ static struct dentry *proc_attr_dir_lookup(struct inode *dir,
                                  attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
 }
 
-static struct inode_operations proc_attr_dir_inode_operations = {
+static const struct inode_operations proc_attr_dir_inode_operations = {
        .lookup         = proc_attr_dir_lookup,
        .getattr        = pid_getattr,
        .setattr        = proc_setattr,
@@ -1604,7 +1801,7 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
        return ERR_PTR(vfs_follow_link(nd,tmp));
 }
 
-static struct inode_operations proc_self_inode_operations = {
+static const struct inode_operations proc_self_inode_operations = {
        .readlink       = proc_self_readlink,
        .follow_link    = proc_self_follow_link,
 };
@@ -1616,7 +1813,7 @@ static struct inode_operations proc_self_inode_operations = {
  * that properly belong to the /proc filesystem, as they describe
  * describe something that is process related.
  */
-static struct pid_entry proc_base_stuff[] = {
+static const struct pid_entry proc_base_stuff[] = {
        NOD("self", S_IFLNK|S_IRWXUGO,
                &proc_self_inode_operations, NULL, {}),
 };
@@ -1645,9 +1842,9 @@ static struct dentry_operations proc_base_dentry_operations =
 };
 
 static struct dentry *proc_base_instantiate(struct inode *dir,
-       struct dentry *dentry, struct task_struct *task, void *ptr)
+       struct dentry *dentry, struct task_struct *task, const void *ptr)
 {
-       struct pid_entry *p = ptr;
+       const struct pid_entry *p = ptr;
        struct inode *inode;
        struct proc_inode *ei;
        struct dentry *error = ERR_PTR(-EINVAL);
@@ -1665,7 +1862,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
        /*
         * grab the reference to the task.
         */
-       ei->pid = get_pid(task_pid(task));
+       ei->pid = get_task_pid(task, PIDTYPE_PID);
        if (!ei->pid)
                goto out_iput;
 
@@ -1695,7 +1892,7 @@ static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
 {
        struct dentry *error;
        struct task_struct *task = get_proc_task(dir);
-       struct pid_entry *p, *last;
+       const struct pid_entry *p, *last;
 
        error = ERR_PTR(-ENOENT);
 
@@ -1721,22 +1918,48 @@ out_no_task:
        return error;
 }
 
-static int proc_base_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
-       struct task_struct *task, struct pid_entry *p)
+static int proc_base_fill_cache(struct file *filp, void *dirent,
+       filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
 {
        return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
                                proc_base_instantiate, task, p);
 }
 
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+static int proc_pid_io_accounting(struct task_struct *task, char *buffer)
+{
+       return sprintf(buffer,
+#ifdef CONFIG_TASK_XACCT
+                       "rchar: %llu\n"
+                       "wchar: %llu\n"
+                       "syscr: %llu\n"
+                       "syscw: %llu\n"
+#endif
+                       "read_bytes: %llu\n"
+                       "write_bytes: %llu\n"
+                       "cancelled_write_bytes: %llu\n",
+#ifdef CONFIG_TASK_XACCT
+                       (unsigned long long)task->rchar,
+                       (unsigned long long)task->wchar,
+                       (unsigned long long)task->syscr,
+                       (unsigned long long)task->syscw,
+#endif
+                       (unsigned long long)task->ioac.read_bytes,
+                       (unsigned long long)task->ioac.write_bytes,
+                       (unsigned long long)task->ioac.cancelled_write_bytes);
+}
+#endif
+
 /*
  * Thread groups
  */
-static struct file_operations proc_task_operations;
-static struct inode_operations proc_task_inode_operations;
+static const struct file_operations proc_task_operations;
+static const struct inode_operations proc_task_inode_operations;
 
-static struct pid_entry tgid_base_stuff[] = {
+static const struct pid_entry tgid_base_stuff[] = {
        DIR("task",       S_IRUGO|S_IXUGO, task),
        DIR("fd",         S_IRUSR|S_IXUSR, fd),
+       DIR("fdinfo",     S_IRUSR|S_IXUSR, fdinfo),
        INF("environ",    S_IRUSR, pid_environ),
        INF("auxv",       S_IRUSR, pid_auxv),
        INF("status",     S_IRUGO, pid_status),
@@ -1757,6 +1980,7 @@ static struct pid_entry tgid_base_stuff[] = {
        REG("mounts",     S_IRUGO, mounts),
        REG("mountstats", S_IRUSR, mountstats),
 #ifdef CONFIG_MMU
+       REG("clear_refs", S_IWUSR, clear_refs),
        REG("smaps",      S_IRUGO, smaps),
 #endif
 #ifdef CONFIG_SECURITY
@@ -1776,6 +2000,12 @@ static struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_AUDITSYSCALL
        REG("loginuid",   S_IWUSR|S_IRUGO, loginuid),
 #endif
+#ifdef CONFIG_FAULT_INJECTION
+       REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
+#endif
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+       INF("io",       S_IRUGO, pid_io_accounting),
+#endif
 };
 
 static int proc_tgid_base_readdir(struct file * filp,
@@ -1785,7 +2015,7 @@ static int proc_tgid_base_readdir(struct file * filp,
                                   tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
 }
 
-static struct file_operations proc_tgid_base_operations = {
+static const struct file_operations proc_tgid_base_operations = {
        .read           = generic_read_dir,
        .readdir        = proc_tgid_base_readdir,
 };
@@ -1795,7 +2025,7 @@ static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *de
                                  tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
 }
 
-static struct inode_operations proc_tgid_base_inode_operations = {
+static const struct inode_operations proc_tgid_base_inode_operations = {
        .lookup         = proc_tgid_base_lookup,
        .getattr        = pid_getattr,
        .setattr        = proc_setattr,
@@ -1868,8 +2098,9 @@ out:
        return;
 }
 
-struct dentry *proc_pid_instantiate(struct inode *dir,
-       struct dentry * dentry, struct task_struct *task, void *ptr)
+static struct dentry *proc_pid_instantiate(struct inode *dir,
+                                          struct dentry * dentry,
+                                          struct task_struct *task, const void *ptr)
 {
        struct dentry *error = ERR_PTR(-ENOENT);
        struct inode *inode;
@@ -1882,7 +2113,7 @@ struct dentry *proc_pid_instantiate(struct inode *dir,
        inode->i_op = &proc_tgid_base_inode_operations;
        inode->i_fop = &proc_tgid_base_operations;
        inode->i_flags|=S_IMMUTABLE;
-       inode->i_nlink = 4;
+       inode->i_nlink = 5;
 #ifdef CONFIG_SECURITY
        inode->i_nlink += 1;
 #endif
@@ -1976,7 +2207,7 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
        unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
-       struct task_struct *reaper = get_proc_task(filp->f_dentry->d_inode);
+       struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode);
        struct task_struct *task;
        int tgid;
 
@@ -1984,7 +2215,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
                goto out_no_task;
 
        for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
-               struct pid_entry *p = &proc_base_stuff[nr];
+               const struct pid_entry *p = &proc_base_stuff[nr];
                if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0)
                        goto out;
        }
@@ -2010,8 +2241,9 @@ out_no_task:
 /*
  * Tasks
  */
-static struct pid_entry tid_base_stuff[] = {
+static const struct pid_entry tid_base_stuff[] = {
        DIR("fd",        S_IRUSR|S_IXUSR, fd),
+       DIR("fdinfo",    S_IRUSR|S_IXUSR, fdinfo),
        INF("environ",   S_IRUSR, pid_environ),
        INF("auxv",      S_IRUSR, pid_auxv),
        INF("status",    S_IRUGO, pid_status),
@@ -2031,6 +2263,7 @@ static struct pid_entry tid_base_stuff[] = {
        LNK("exe",       exe),
        REG("mounts",    S_IRUGO, mounts),
 #ifdef CONFIG_MMU
+       REG("clear_refs", S_IWUSR, clear_refs),
        REG("smaps",     S_IRUGO, smaps),
 #endif
 #ifdef CONFIG_SECURITY
@@ -2050,6 +2283,9 @@ static struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_AUDITSYSCALL
        REG("loginuid",  S_IWUSR|S_IRUGO, loginuid),
 #endif
+#ifdef CONFIG_FAULT_INJECTION
+       REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
+#endif
 };
 
 static int proc_tid_base_readdir(struct file * filp,
@@ -2064,19 +2300,19 @@ static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *den
                                  tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
 }
 
-static struct file_operations proc_tid_base_operations = {
+static const struct file_operations proc_tid_base_operations = {
        .read           = generic_read_dir,
        .readdir        = proc_tid_base_readdir,
 };
 
-static struct inode_operations proc_tid_base_inode_operations = {
+static const struct inode_operations proc_tid_base_inode_operations = {
        .lookup         = proc_tid_base_lookup,
        .getattr        = pid_getattr,
        .setattr        = proc_setattr,
 };
 
 static struct dentry *proc_task_instantiate(struct inode *dir,
-       struct dentry *dentry, struct task_struct *task, void *ptr)
+       struct dentry *dentry, struct task_struct *task, const void *ptr)
 {
        struct dentry *error = ERR_PTR(-ENOENT);
        struct inode *inode;
@@ -2088,7 +2324,7 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
        inode->i_op = &proc_tid_base_inode_operations;
        inode->i_fop = &proc_tid_base_operations;
        inode->i_flags|=S_IMMUTABLE;
-       inode->i_nlink = 3;
+       inode->i_nlink = 4;
 #ifdef CONFIG_SECURITY
        inode->i_nlink += 1;
 #endif
@@ -2217,15 +2453,25 @@ static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filld
 /* for the /proc/TGID/task/ directories */
 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
-       struct dentry *dentry = filp->f_dentry;
+       struct dentry *dentry = filp->f_path.dentry;
        struct inode *inode = dentry->d_inode;
-       struct task_struct *leader = get_proc_task(inode);
+       struct task_struct *leader = NULL;
        struct task_struct *task;
        int retval = -ENOENT;
        ino_t ino;
        int tid;
        unsigned long pos = filp->f_pos;  /* avoiding "long long" filp->f_pos */
 
+       task = get_proc_task(inode);
+       if (!task)
+               goto out_no_task;
+       rcu_read_lock();
+       if (pid_alive(task)) {
+               leader = task->group_leader;
+               get_task_struct(leader);
+       }
+       rcu_read_unlock();
+       put_task_struct(task);
        if (!leader)
                goto out_no_task;
        retval = 0;
@@ -2285,13 +2531,13 @@ static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
        return 0;
 }
 
-static struct inode_operations proc_task_inode_operations = {
+static const struct inode_operations proc_task_inode_operations = {
        .lookup         = proc_task_lookup,
        .getattr        = proc_task_getattr,
        .setattr        = proc_setattr,
 };
 
-static struct file_operations proc_task_operations = {
+static const struct file_operations proc_task_operations = {
        .read           = generic_read_dir,
        .readdir        = proc_task_readdir,
 };