netns xfrm: fix "ip xfrm state|policy count" misreport
[safe/jmp/linux-2.6] / fs / proc / root.c
index aef148f..b080b79 100644 (file)
 #include <linux/time.h>
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
-#include <linux/config.h>
 #include <linux/init.h>
+#include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/bitops.h>
-#include <linux/smp_lock.h>
+#include <linux/mount.h>
+#include <linux/pid_namespace.h>
 
-struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
+#include "internal.h"
 
-#ifdef CONFIG_SYSCTL
-struct proc_dir_entry *proc_sys_root;
-#endif
+static int proc_test_super(struct super_block *sb, void *data)
+{
+       return sb->s_fs_info == data;
+}
+
+static int proc_set_super(struct super_block *sb, void *data)
+{
+       struct pid_namespace *ns;
+
+       ns = (struct pid_namespace *)data;
+       sb->s_fs_info = get_pid_ns(ns);
+       return set_anon_super(sb, NULL);
+}
+
+static int proc_get_sb(struct file_system_type *fs_type,
+       int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+{
+       int err;
+       struct super_block *sb;
+       struct pid_namespace *ns;
+       struct proc_inode *ei;
+
+       if (proc_mnt) {
+               /* Seed the root directory with a pid so it doesn't need
+                * to be special in base.c.  I would do this earlier but
+                * the only task alive when /proc is mounted the first time
+                * is the init_task and it doesn't have any pids.
+                */
+               ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode);
+               if (!ei->pid)
+                       ei->pid = find_get_pid(1);
+       }
 
-static struct super_block *proc_get_sb(struct file_system_type *fs_type,
-       int flags, const char *dev_name, void *data)
+       if (flags & MS_KERNMOUNT)
+               ns = (struct pid_namespace *)data;
+       else
+               ns = current->nsproxy->pid_ns;
+
+       sb = sget(fs_type, proc_test_super, proc_set_super, ns);
+       if (IS_ERR(sb))
+               return PTR_ERR(sb);
+
+       if (!sb->s_root) {
+               sb->s_flags = flags;
+               err = proc_fill_super(sb);
+               if (err) {
+                       deactivate_locked_super(sb);
+                       return err;
+               }
+
+               ei = PROC_I(sb->s_root->d_inode);
+               if (!ei->pid) {
+                       rcu_read_lock();
+                       ei->pid = get_pid(find_pid_ns(1, ns));
+                       rcu_read_unlock();
+               }
+
+               sb->s_flags |= MS_ACTIVE;
+               ns->proc_mnt = mnt;
+       }
+
+       simple_set_mnt(mnt, sb);
+       return 0;
+}
+
+static void proc_kill_sb(struct super_block *sb)
 {
-       return get_sb_single(fs_type, flags, data, proc_fill_super);
+       struct pid_namespace *ns;
+
+       ns = (struct pid_namespace *)sb->s_fs_info;
+       kill_anon_super(sb);
+       put_pid_ns(ns);
 }
 
 static struct file_system_type proc_fs_type = {
        .name           = "proc",
        .get_sb         = proc_get_sb,
-       .kill_sb        = kill_anon_super,
+       .kill_sb        = proc_kill_sb,
 };
 
-extern int __init proc_init_inodecache(void);
 void __init proc_root_init(void)
 {
-       int err = proc_init_inodecache();
-       if (err)
-               return;
+       int err;
+
+       proc_init_inodecache();
        err = register_filesystem(&proc_fs_type);
        if (err)
                return;
-       proc_mnt = kern_mount(&proc_fs_type);
+       proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns);
        err = PTR_ERR(proc_mnt);
        if (IS_ERR(proc_mnt)) {
                unregister_filesystem(&proc_fs_type);
                return;
        }
-       proc_misc_init();
-       proc_net = proc_mkdir("net", NULL);
-       proc_net_stat = proc_mkdir("net/stat", NULL);
+
+       proc_symlink("mounts", NULL, "self/mounts");
+
+       proc_net_init();
 
 #ifdef CONFIG_SYSVIPC
        proc_mkdir("sysvipc", NULL);
 #endif
-#ifdef CONFIG_SYSCTL
-       proc_sys_root = proc_mkdir("sys", NULL);
-#endif
-#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
-       proc_mkdir("sys/fs", NULL);
-       proc_mkdir("sys/fs/binfmt_misc", NULL);
-#endif
-       proc_root_fs = proc_mkdir("fs", NULL);
-       proc_root_driver = proc_mkdir("driver", NULL);
+       proc_mkdir("fs", NULL);
+       proc_mkdir("driver", NULL);
        proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */
 #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
        /* just give it a mountpoint */
@@ -76,19 +134,20 @@ void __init proc_root_init(void)
 #ifdef CONFIG_PROC_DEVICETREE
        proc_device_tree_init();
 #endif
-       proc_bus = proc_mkdir("bus", NULL);
+       proc_mkdir("bus", NULL);
+       proc_sys_init();
 }
 
-static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
+static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
+)
 {
-       /*
-        * nr_threads is actually protected by the tasklist_lock;
-        * however, it's conventional to do reads, especially for
-        * reporting, without any locking whatsoever.
-        */
-       if (dir->i_ino == PROC_ROOT_INO) /* check for safety... */
-               dir->i_nlink = proc_root.nlink + nr_threads;
+       generic_fillattr(dentry->d_inode, stat);
+       stat->nlink = proc_root.nlink + nr_processes();
+       return 0;
+}
 
+static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
+{
        if (!proc_lookup(dir, dentry, nd)) {
                return NULL;
        }
@@ -102,17 +161,12 @@ static int proc_root_readdir(struct file * filp,
        unsigned int nr = filp->f_pos;
        int ret;
 
-       lock_kernel();
-
        if (nr < FIRST_PROCESS_ENTRY) {
                int error = proc_readdir(filp, dirent, filldir);
-               if (error <= 0) {
-                       unlock_kernel();
+               if (error <= 0)
                        return error;
-               }
                filp->f_pos = FIRST_PROCESS_ENTRY;
        }
-       unlock_kernel();
 
        ret = proc_pid_readdir(filp, dirent, filldir);
        return ret;
@@ -123,7 +177,7 @@ static int proc_root_readdir(struct file * filp,
  * <pid> directories. Thus we don't use the generic
  * directory handling functions for that..
  */
-static struct file_operations proc_root_operations = {
+static const struct file_operations proc_root_operations = {
        .read            = generic_read_dir,
        .readdir         = proc_root_readdir,
 };
@@ -131,8 +185,9 @@ static struct file_operations proc_root_operations = {
 /*
  * proc root can do almost nothing..
  */
-static struct inode_operations proc_root_inode_operations = {
+static const struct inode_operations proc_root_inode_operations = {
        .lookup         = proc_root_lookup,
+       .getattr        = proc_root_getattr,
 };
 
 /*
@@ -144,18 +199,30 @@ struct proc_dir_entry proc_root = {
        .name           = "/proc",
        .mode           = S_IFDIR | S_IRUGO | S_IXUGO, 
        .nlink          = 2, 
+       .count          = ATOMIC_INIT(1),
        .proc_iops      = &proc_root_inode_operations, 
        .proc_fops      = &proc_root_operations,
        .parent         = &proc_root,
 };
 
+int pid_ns_prepare_proc(struct pid_namespace *ns)
+{
+       struct vfsmount *mnt;
+
+       mnt = kern_mount_data(&proc_fs_type, ns);
+       if (IS_ERR(mnt))
+               return PTR_ERR(mnt);
+
+       return 0;
+}
+
+void pid_ns_release_proc(struct pid_namespace *ns)
+{
+       mntput(ns->proc_mnt);
+}
+
 EXPORT_SYMBOL(proc_symlink);
 EXPORT_SYMBOL(proc_mkdir);
 EXPORT_SYMBOL(create_proc_entry);
+EXPORT_SYMBOL(proc_create_data);
 EXPORT_SYMBOL(remove_proc_entry);
-EXPORT_SYMBOL(proc_root);
-EXPORT_SYMBOL(proc_root_fs);
-EXPORT_SYMBOL(proc_net);
-EXPORT_SYMBOL(proc_net_stat);
-EXPORT_SYMBOL(proc_bus);
-EXPORT_SYMBOL(proc_root_driver);