nfsd4: reshuffle lease-setting code to allow reuse

[safe/jmp/linux-2.6] / fs / namespace.c
diff --git a/fs/namespace.c b/fs/namespace.c

index 4740f7b..c768f73 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -22,6 +22,7 @@
  #include <linux/seq_file.h>
  #include <linux/mnt_namespace.h>
  #include <linux/namei.h>
+#include <linux/nsproxy.h>
  #include <linux/security.h>
  #include <linux/mount.h>
  #include <linux/ramfs.h>
@@ -42,6 +43,8 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
  static int event;
  static DEFINE_IDA(mnt_id_ida);
  static DEFINE_IDA(mnt_group_ida);
+static int mnt_id_start = 0;
+static int mnt_group_start = 1;
  
  static struct list_head *mount_hashtable __read_mostly;
  static struct kmem_cache *mnt_cache __read_mostly;
@@ -69,7 +72,9 @@ static int mnt_alloc_id(struct vfsmount *mnt)
  retry:
         ida_pre_get(&mnt_id_ida, GFP_KERNEL);
         spin_lock(&vfsmount_lock);
-       res = ida_get_new(&mnt_id_ida, &mnt->mnt_id);
+       res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
+       if (!res)
+               mnt_id_start = mnt->mnt_id + 1;
         spin_unlock(&vfsmount_lock);
         if (res == -EAGAIN)
                 goto retry;
@@ -79,8 +84,11 @@ retry:
  
  static void mnt_free_id(struct vfsmount *mnt)
  {
+       int id = mnt->mnt_id;
         spin_lock(&vfsmount_lock);
-       ida_remove(&mnt_id_ida, mnt->mnt_id);
+       ida_remove(&mnt_id_ida, id);
+       if (mnt_id_start > id)
+               mnt_id_start = id;
         spin_unlock(&vfsmount_lock);
  }
  
@@ -91,10 +99,18 @@ static void mnt_free_id(struct vfsmount *mnt)
   */
  static int mnt_alloc_group_id(struct vfsmount *mnt)
  {
+       int res;
+
         if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
                 return -ENOMEM;
  
-       return ida_get_new_above(&mnt_group_ida, 1, &mnt->mnt_group_id);
+       res = ida_get_new_above(&mnt_group_ida,
+                               mnt_group_start,
+                               &mnt->mnt_group_id);
+       if (!res)
+               mnt_group_start = mnt->mnt_group_id + 1;
+
+       return res;
  }
  
  /*
@@ -102,7 +118,10 @@ static int mnt_alloc_group_id(struct vfsmount *mnt)
   */
  void mnt_release_group_id(struct vfsmount *mnt)
  {
-       ida_remove(&mnt_group_ida, mnt->mnt_group_id);
+       int id = mnt->mnt_group_id;
+       ida_remove(&mnt_group_ida, id);
+       if (mnt_group_start > id)
+               mnt_group_start = id;
         mnt->mnt_group_id = 0;
  }
  
@@ -297,7 +316,8 @@ EXPORT_SYMBOL_GPL(mnt_clone_write);
   */
  int mnt_want_write_file(struct file *file)
  {
-       if (!(file->f_mode & FMODE_WRITE))
+       struct inode *inode = file->f_dentry->d_inode;
+       if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
                 return mnt_want_write(file->f_path.mnt);
         else
                 return mnt_clone_write(file->f_path.mnt);
@@ -945,10 +965,12 @@ EXPORT_SYMBOL(may_umount_tree);
  int may_umount(struct vfsmount *mnt)
  {
         int ret = 1;
+       down_read(&namespace_sem);
         spin_lock(&vfsmount_lock);
         if (propagate_mount_busy(mnt, 2))
                 ret = 0;
         spin_unlock(&vfsmount_lock);
+       up_read(&namespace_sem);
         return ret;
  }
  
@@ -1060,11 +1082,8 @@ static int do_umount(struct vfsmount *mnt, int flags)
                  * we just try to remount it readonly.
                  */
                 down_write(&sb->s_umount);
-               if (!(sb->s_flags & MS_RDONLY)) {
-                       lock_kernel();
+               if (!(sb->s_flags & MS_RDONLY))
                         retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
-                       unlock_kernel();
-               }
                 up_write(&sb->s_umount);
                 return retval;
         }
@@ -1335,12 +1354,12 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
         if (err)
                 goto out_cleanup_ids;
  
+       spin_lock(&vfsmount_lock);
+
         if (IS_MNT_SHARED(dest_mnt)) {
                 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
                         set_mnt_shared(p);
         }
-
-       spin_lock(&vfsmount_lock);
         if (parent_path) {
                 detach_mnt(source_mnt, parent_path);
                 attach_mnt(source_mnt, path);
@@ -1517,8 +1536,12 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
                 err = change_mount_flags(path->mnt, flags);
         else
                 err = do_remount_sb(sb, flags, data, 0);
-       if (!err)
+       if (!err) {
+               spin_lock(&vfsmount_lock);
+               mnt_flags |= path->mnt->mnt_flags & MNT_PNODE_MASK;
                 path->mnt->mnt_flags = mnt_flags;
+               spin_unlock(&vfsmount_lock);
+       }
         up_write(&sb->s_umount);
         if (!err) {
                 security_sb_post_remount(path->mnt, flags, data);
@@ -1623,14 +1646,16 @@ static int do_new_mount(struct path *path, char *type, int flags,
  {
         struct vfsmount *mnt;
  
-       if (!type || !memchr(type, 0, PAGE_SIZE))
+       if (!type)
                 return -EINVAL;
  
         /* we need capabilities... */
         if (!capable(CAP_SYS_ADMIN))
                 return -EPERM;
  
+       lock_kernel();
         mnt = do_kern_mount(type, flags, name, data);
+       unlock_kernel();
         if (IS_ERR(mnt))
                 return PTR_ERR(mnt);
  
@@ -1646,6 +1671,8 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
  {
         int err;
  
+       mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD);
+
         down_write(&namespace_sem);
         /* Something was mounted here while we slept */
         while (d_mountpoint(path->dentry) &&
@@ -1852,6 +1879,23 @@ int copy_mount_options(const void __user * data, unsigned long *where)
         return 0;
  }
  
+int copy_mount_string(const void __user *data, char **where)
+{
+       char *tmp;
+
+       if (!data) {
+               *where = NULL;
+               return 0;
+       }
+
+       tmp = strndup_user(data, PAGE_SIZE);
+       if (IS_ERR(tmp))
+               return PTR_ERR(tmp);
+
+       *where = tmp;
+       return 0;
+}
+
  /*
   * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
   * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
@@ -1881,12 +1925,20 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
  
         if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
                 return -EINVAL;
-       if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
-               return -EINVAL;
  
         if (data_page)
                 ((char *)data_page)[PAGE_SIZE - 1] = 0;
  
+       /* ... and get the mountpoint */
+       retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
+       if (retval)
+               return retval;
+
+       retval = security_sb_mount(dev_name, &path,
+                                  type_page, flags, data_page);
+       if (retval)
+               goto dput_out;
+
         /* Default to relatime unless overriden */
         if (!(flags & MS_NOATIME))
                 mnt_flags |= MNT_RELATIME;
@@ -1911,17 +1963,6 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
                    MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
                    MS_STRICTATIME);
  
-       /* ... and get the mountpoint */
-       retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
-       if (retval)
-               return retval;
-
-       retval = security_sb_mount(dev_name, &path,
-                                  type_page, flags, data_page);
-       if (retval)
-               goto dput_out;
-
-       lock_kernel();
         if (flags & MS_REMOUNT)
                 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
                                     data_page);
@@ -1934,12 +1975,26 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
         else
                 retval = do_new_mount(&path, type_page, flags, mnt_flags,
                                       dev_name, data_page);
-       unlock_kernel();
  dput_out:
         path_put(&path);
         return retval;
  }
  
+static struct mnt_namespace *alloc_mnt_ns(void)
+{
+       struct mnt_namespace *new_ns;
+
+       new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
+       if (!new_ns)
+               return ERR_PTR(-ENOMEM);
+       atomic_set(&new_ns->count, 1);
+       new_ns->root = NULL;
+       INIT_LIST_HEAD(&new_ns->list);
+       init_waitqueue_head(&new_ns->poll);
+       new_ns->event = 0;
+       return new_ns;
+}
+
  /*
   * Allocate a new namespace structure and populate it with contents
   * copied from the namespace of the passed in task structure.
@@ -1951,14 +2006,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
         struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
         struct vfsmount *p, *q;
  
-       new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
-       if (!new_ns)
-               return ERR_PTR(-ENOMEM);
-
-       atomic_set(&new_ns->count, 1);
-       INIT_LIST_HEAD(&new_ns->list);
-       init_waitqueue_head(&new_ns->poll);
-       new_ns->event = 0;
+       new_ns = alloc_mnt_ns();
+       if (IS_ERR(new_ns))
+               return new_ns;
  
         down_write(&namespace_sem);
         /* First pass: copy the tree topology */
@@ -2022,43 +2072,63 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
         return new_ns;
  }
  
+/**
+ * create_mnt_ns - creates a private namespace and adds a root filesystem
+ * @mnt: pointer to the new root filesystem mountpoint
+ */
+struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
+{
+       struct mnt_namespace *new_ns;
+
+       new_ns = alloc_mnt_ns();
+       if (!IS_ERR(new_ns)) {
+               mnt->mnt_ns = new_ns;
+               new_ns->root = mnt;
+               list_add(&new_ns->list, &new_ns->root->mnt_list);
+       }
+       return new_ns;
+}
+EXPORT_SYMBOL(create_mnt_ns);
+
  SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
                 char __user *, type, unsigned long, flags, void __user *, data)
  {
-       int retval;
+       int ret;
+       char *kernel_type;
+       char *kernel_dir;
+       char *kernel_dev;
         unsigned long data_page;
-       unsigned long type_page;
-       unsigned long dev_page;
-       char *dir_page;
  
-       retval = copy_mount_options(type, &type_page);
-       if (retval < 0)
-               return retval;
+       ret = copy_mount_string(type, &kernel_type);
+       if (ret < 0)
+               goto out_type;
  
-       dir_page = getname(dir_name);
-       retval = PTR_ERR(dir_page);
-       if (IS_ERR(dir_page))
-               goto out1;
+       kernel_dir = getname(dir_name);
+       if (IS_ERR(kernel_dir)) {
+               ret = PTR_ERR(kernel_dir);
+               goto out_dir;
+       }
  
-       retval = copy_mount_options(dev_name, &dev_page);
-       if (retval < 0)
-               goto out2;
+       ret = copy_mount_string(dev_name, &kernel_dev);
+       if (ret < 0)
+               goto out_dev;
  
-       retval = copy_mount_options(data, &data_page);
-       if (retval < 0)
-               goto out3;
+       ret = copy_mount_options(data, &data_page);
+       if (ret < 0)
+               goto out_data;
  
-       retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
-                         flags, (void *)data_page);
-       free_page(data_page);
+       ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags,
+               (void *) data_page);
  
-out3:
-       free_page(dev_page);
-out2:
-       putname(dir_page);
-out1:
-       free_page(type_page);
-       return retval;
+       free_page(data_page);
+out_data:
+       kfree(kernel_dev);
+out_dev:
+       putname(kernel_dir);
+out_dir:
+       kfree(kernel_type);
+out_type:
+       return ret;
  }
  
  /*
@@ -2197,16 +2267,9 @@ static void __init init_mount_tree(void)
         mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
         if (IS_ERR(mnt))
                 panic("Can't create rootfs");
-       ns = kmalloc(sizeof(*ns), GFP_KERNEL);
-       if (!ns)
+       ns = create_mnt_ns(mnt);
+       if (IS_ERR(ns))
                 panic("Can't allocate initial namespace");
-       atomic_set(&ns->count, 1);
-       INIT_LIST_HEAD(&ns->list);
-       init_waitqueue_head(&ns->poll);
-       ns->event = 0;
-       list_add(&mnt->mnt_list, &ns->list);
-       ns->root = mnt;
-       mnt->mnt_ns = ns;
  
         init_task.nsproxy->mnt_ns = ns;
         get_mnt_ns(ns);
@@ -2249,10 +2312,14 @@ void __init mnt_init(void)
         init_mount_tree();
  }
  
-void __put_mnt_ns(struct mnt_namespace *ns)
+void put_mnt_ns(struct mnt_namespace *ns)
  {
-       struct vfsmount *root = ns->root;
+       struct vfsmount *root;
         LIST_HEAD(umount_list);
+
+       if (!atomic_dec_and_lock(&ns->count, &vfsmount_lock))
+               return;
+       root = ns->root;
         ns->root = NULL;
         spin_unlock(&vfsmount_lock);
         down_write(&namespace_sem);
@@ -2263,3 +2330,4 @@ void __put_mnt_ns(struct mnt_namespace *ns)
         release_mounts(&umount_list);
         kfree(ns);
  }
+EXPORT_SYMBOL(put_mnt_ns);