nfsd4: reshuffle lease-setting code to allow reuse
[safe/jmp/linux-2.6] / fs / namespace.c
index 4740f7b..c768f73 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/seq_file.h>
 #include <linux/mnt_namespace.h>
 #include <linux/namei.h>
+#include <linux/nsproxy.h>
 #include <linux/security.h>
 #include <linux/mount.h>
 #include <linux/ramfs.h>
@@ -42,6 +43,8 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
 static int event;
 static DEFINE_IDA(mnt_id_ida);
 static DEFINE_IDA(mnt_group_ida);
+static int mnt_id_start = 0;
+static int mnt_group_start = 1;
 
 static struct list_head *mount_hashtable __read_mostly;
 static struct kmem_cache *mnt_cache __read_mostly;
@@ -69,7 +72,9 @@ static int mnt_alloc_id(struct vfsmount *mnt)
 retry:
        ida_pre_get(&mnt_id_ida, GFP_KERNEL);
        spin_lock(&vfsmount_lock);
-       res = ida_get_new(&mnt_id_ida, &mnt->mnt_id);
+       res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
+       if (!res)
+               mnt_id_start = mnt->mnt_id + 1;
        spin_unlock(&vfsmount_lock);
        if (res == -EAGAIN)
                goto retry;
@@ -79,8 +84,11 @@ retry:
 
 static void mnt_free_id(struct vfsmount *mnt)
 {
+       int id = mnt->mnt_id;
        spin_lock(&vfsmount_lock);
-       ida_remove(&mnt_id_ida, mnt->mnt_id);
+       ida_remove(&mnt_id_ida, id);
+       if (mnt_id_start > id)
+               mnt_id_start = id;
        spin_unlock(&vfsmount_lock);
 }
 
@@ -91,10 +99,18 @@ static void mnt_free_id(struct vfsmount *mnt)
  */
 static int mnt_alloc_group_id(struct vfsmount *mnt)
 {
+       int res;
+
        if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
                return -ENOMEM;
 
-       return ida_get_new_above(&mnt_group_ida, 1, &mnt->mnt_group_id);
+       res = ida_get_new_above(&mnt_group_ida,
+                               mnt_group_start,
+                               &mnt->mnt_group_id);
+       if (!res)
+               mnt_group_start = mnt->mnt_group_id + 1;
+
+       return res;
 }
 
 /*
@@ -102,7 +118,10 @@ static int mnt_alloc_group_id(struct vfsmount *mnt)
  */
 void mnt_release_group_id(struct vfsmount *mnt)
 {
-       ida_remove(&mnt_group_ida, mnt->mnt_group_id);
+       int id = mnt->mnt_group_id;
+       ida_remove(&mnt_group_ida, id);
+       if (mnt_group_start > id)
+               mnt_group_start = id;
        mnt->mnt_group_id = 0;
 }
 
@@ -297,7 +316,8 @@ EXPORT_SYMBOL_GPL(mnt_clone_write);
  */
 int mnt_want_write_file(struct file *file)
 {
-       if (!(file->f_mode & FMODE_WRITE))
+       struct inode *inode = file->f_dentry->d_inode;
+       if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
                return mnt_want_write(file->f_path.mnt);
        else
                return mnt_clone_write(file->f_path.mnt);
@@ -945,10 +965,12 @@ EXPORT_SYMBOL(may_umount_tree);
 int may_umount(struct vfsmount *mnt)
 {
        int ret = 1;
+       down_read(&namespace_sem);
        spin_lock(&vfsmount_lock);
        if (propagate_mount_busy(mnt, 2))
                ret = 0;
        spin_unlock(&vfsmount_lock);
+       up_read(&namespace_sem);
        return ret;
 }
 
@@ -1060,11 +1082,8 @@ static int do_umount(struct vfsmount *mnt, int flags)
                 * we just try to remount it readonly.
                 */
                down_write(&sb->s_umount);
-               if (!(sb->s_flags & MS_RDONLY)) {
-                       lock_kernel();
+               if (!(sb->s_flags & MS_RDONLY))
                        retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
-                       unlock_kernel();
-               }
                up_write(&sb->s_umount);
                return retval;
        }
@@ -1335,12 +1354,12 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
        if (err)
                goto out_cleanup_ids;
 
+       spin_lock(&vfsmount_lock);
+
        if (IS_MNT_SHARED(dest_mnt)) {
                for (p = source_mnt; p; p = next_mnt(p, source_mnt))
                        set_mnt_shared(p);
        }
-
-       spin_lock(&vfsmount_lock);
        if (parent_path) {
                detach_mnt(source_mnt, parent_path);
                attach_mnt(source_mnt, path);
@@ -1517,8 +1536,12 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
                err = change_mount_flags(path->mnt, flags);
        else
                err = do_remount_sb(sb, flags, data, 0);
-       if (!err)
+       if (!err) {
+               spin_lock(&vfsmount_lock);
+               mnt_flags |= path->mnt->mnt_flags & MNT_PNODE_MASK;
                path->mnt->mnt_flags = mnt_flags;
+               spin_unlock(&vfsmount_lock);
+       }
        up_write(&sb->s_umount);
        if (!err) {
                security_sb_post_remount(path->mnt, flags, data);
@@ -1623,14 +1646,16 @@ static int do_new_mount(struct path *path, char *type, int flags,
 {
        struct vfsmount *mnt;
 
-       if (!type || !memchr(type, 0, PAGE_SIZE))
+       if (!type)
                return -EINVAL;
 
        /* we need capabilities... */
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
+       lock_kernel();
        mnt = do_kern_mount(type, flags, name, data);
+       unlock_kernel();
        if (IS_ERR(mnt))
                return PTR_ERR(mnt);
 
@@ -1646,6 +1671,8 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
 {
        int err;
 
+       mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD);
+
        down_write(&namespace_sem);
        /* Something was mounted here while we slept */
        while (d_mountpoint(path->dentry) &&
@@ -1852,6 +1879,23 @@ int copy_mount_options(const void __user * data, unsigned long *where)
        return 0;
 }
 
+int copy_mount_string(const void __user *data, char **where)
+{
+       char *tmp;
+
+       if (!data) {
+               *where = NULL;
+               return 0;
+       }
+
+       tmp = strndup_user(data, PAGE_SIZE);
+       if (IS_ERR(tmp))
+               return PTR_ERR(tmp);
+
+       *where = tmp;
+       return 0;
+}
+
 /*
  * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
  * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
@@ -1881,12 +1925,20 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
 
        if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
                return -EINVAL;
-       if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
-               return -EINVAL;
 
        if (data_page)
                ((char *)data_page)[PAGE_SIZE - 1] = 0;
 
+       /* ... and get the mountpoint */
+       retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
+       if (retval)
+               return retval;
+
+       retval = security_sb_mount(dev_name, &path,
+                                  type_page, flags, data_page);
+       if (retval)
+               goto dput_out;
+
        /* Default to relatime unless overriden */
        if (!(flags & MS_NOATIME))
                mnt_flags |= MNT_RELATIME;
@@ -1911,17 +1963,6 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
                   MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
                   MS_STRICTATIME);
 
-       /* ... and get the mountpoint */
-       retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
-       if (retval)
-               return retval;
-
-       retval = security_sb_mount(dev_name, &path,
-                                  type_page, flags, data_page);
-       if (retval)
-               goto dput_out;
-
-       lock_kernel();
        if (flags & MS_REMOUNT)
                retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
                                    data_page);
@@ -1934,12 +1975,26 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
        else
                retval = do_new_mount(&path, type_page, flags, mnt_flags,
                                      dev_name, data_page);
-       unlock_kernel();
 dput_out:
        path_put(&path);
        return retval;
 }
 
+static struct mnt_namespace *alloc_mnt_ns(void)
+{
+       struct mnt_namespace *new_ns;
+
+       new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
+       if (!new_ns)
+               return ERR_PTR(-ENOMEM);
+       atomic_set(&new_ns->count, 1);
+       new_ns->root = NULL;
+       INIT_LIST_HEAD(&new_ns->list);
+       init_waitqueue_head(&new_ns->poll);
+       new_ns->event = 0;
+       return new_ns;
+}
+
 /*
  * Allocate a new namespace structure and populate it with contents
  * copied from the namespace of the passed in task structure.
@@ -1951,14 +2006,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
        struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
        struct vfsmount *p, *q;
 
-       new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
-       if (!new_ns)
-               return ERR_PTR(-ENOMEM);
-
-       atomic_set(&new_ns->count, 1);
-       INIT_LIST_HEAD(&new_ns->list);
-       init_waitqueue_head(&new_ns->poll);
-       new_ns->event = 0;
+       new_ns = alloc_mnt_ns();
+       if (IS_ERR(new_ns))
+               return new_ns;
 
        down_write(&namespace_sem);
        /* First pass: copy the tree topology */
@@ -2022,43 +2072,63 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
        return new_ns;
 }
 
+/**
+ * create_mnt_ns - creates a private namespace and adds a root filesystem
+ * @mnt: pointer to the new root filesystem mountpoint
+ */
+struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
+{
+       struct mnt_namespace *new_ns;
+
+       new_ns = alloc_mnt_ns();
+       if (!IS_ERR(new_ns)) {
+               mnt->mnt_ns = new_ns;
+               new_ns->root = mnt;
+               list_add(&new_ns->list, &new_ns->root->mnt_list);
+       }
+       return new_ns;
+}
+EXPORT_SYMBOL(create_mnt_ns);
+
 SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
                char __user *, type, unsigned long, flags, void __user *, data)
 {
-       int retval;
+       int ret;
+       char *kernel_type;
+       char *kernel_dir;
+       char *kernel_dev;
        unsigned long data_page;
-       unsigned long type_page;
-       unsigned long dev_page;
-       char *dir_page;
 
-       retval = copy_mount_options(type, &type_page);
-       if (retval < 0)
-               return retval;
+       ret = copy_mount_string(type, &kernel_type);
+       if (ret < 0)
+               goto out_type;
 
-       dir_page = getname(dir_name);
-       retval = PTR_ERR(dir_page);
-       if (IS_ERR(dir_page))
-               goto out1;
+       kernel_dir = getname(dir_name);
+       if (IS_ERR(kernel_dir)) {
+               ret = PTR_ERR(kernel_dir);
+               goto out_dir;
+       }
 
-       retval = copy_mount_options(dev_name, &dev_page);
-       if (retval < 0)
-               goto out2;
+       ret = copy_mount_string(dev_name, &kernel_dev);
+       if (ret < 0)
+               goto out_dev;
 
-       retval = copy_mount_options(data, &data_page);
-       if (retval < 0)
-               goto out3;
+       ret = copy_mount_options(data, &data_page);
+       if (ret < 0)
+               goto out_data;
 
-       retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
-                         flags, (void *)data_page);
-       free_page(data_page);
+       ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags,
+               (void *) data_page);
 
-out3:
-       free_page(dev_page);
-out2:
-       putname(dir_page);
-out1:
-       free_page(type_page);
-       return retval;
+       free_page(data_page);
+out_data:
+       kfree(kernel_dev);
+out_dev:
+       putname(kernel_dir);
+out_dir:
+       kfree(kernel_type);
+out_type:
+       return ret;
 }
 
 /*
@@ -2197,16 +2267,9 @@ static void __init init_mount_tree(void)
        mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
        if (IS_ERR(mnt))
                panic("Can't create rootfs");
-       ns = kmalloc(sizeof(*ns), GFP_KERNEL);
-       if (!ns)
+       ns = create_mnt_ns(mnt);
+       if (IS_ERR(ns))
                panic("Can't allocate initial namespace");
-       atomic_set(&ns->count, 1);
-       INIT_LIST_HEAD(&ns->list);
-       init_waitqueue_head(&ns->poll);
-       ns->event = 0;
-       list_add(&mnt->mnt_list, &ns->list);
-       ns->root = mnt;
-       mnt->mnt_ns = ns;
 
        init_task.nsproxy->mnt_ns = ns;
        get_mnt_ns(ns);
@@ -2249,10 +2312,14 @@ void __init mnt_init(void)
        init_mount_tree();
 }
 
-void __put_mnt_ns(struct mnt_namespace *ns)
+void put_mnt_ns(struct mnt_namespace *ns)
 {
-       struct vfsmount *root = ns->root;
+       struct vfsmount *root;
        LIST_HEAD(umount_list);
+
+       if (!atomic_dec_and_lock(&ns->count, &vfsmount_lock))
+               return;
+       root = ns->root;
        ns->root = NULL;
        spin_unlock(&vfsmount_lock);
        down_write(&namespace_sem);
@@ -2263,3 +2330,4 @@ void __put_mnt_ns(struct mnt_namespace *ns)
        release_mounts(&umount_list);
        kfree(ns);
 }
+EXPORT_SYMBOL(put_mnt_ns);