[PATCH] nfsd: remove inline from a couple of large NFS functions
[safe/jmp/linux-2.6] / fs / namespace.c
index 46f99bc..ce97bec 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/quotaops.h>
 #include <linux/acct.h>
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/seq_file.h>
 #include <linux/namespace.h>
@@ -47,6 +48,10 @@ static int hash_mask __read_mostly, hash_bits __read_mostly;
 static kmem_cache_t *mnt_cache;
 static struct rw_semaphore namespace_sem;
 
+/* /sys/fs */
+decl_subsys(fs, NULL, NULL);
+EXPORT_SYMBOL_GPL(fs_subsys);
+
 static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
 {
        unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
@@ -213,6 +218,16 @@ static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
        return list_entry(next, struct vfsmount, mnt_child);
 }
 
+static struct vfsmount *skip_mnt_tree(struct vfsmount *p)
+{
+       struct list_head *prev = p->mnt_mounts.prev;
+       while (prev != &p->mnt_mounts) {
+               p = list_entry(prev, struct vfsmount, mnt_child);
+               prev = p->mnt_mounts.prev;
+       }
+       return p;
+}
+
 static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
                                        int flag)
 {
@@ -227,8 +242,17 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
                mnt->mnt_mountpoint = mnt->mnt_root;
                mnt->mnt_parent = mnt;
 
-               if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
-                       list_add(&mnt->mnt_share, &old->mnt_share);
+               if (flag & CL_SLAVE) {
+                       list_add(&mnt->mnt_slave, &old->mnt_slave_list);
+                       mnt->mnt_master = old;
+                       CLEAR_MNT_SHARED(mnt);
+               } else {
+                       if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
+                               list_add(&mnt->mnt_share, &old->mnt_share);
+                       if (IS_MNT_SLAVE(old))
+                               list_add(&mnt->mnt_slave, &old->mnt_slave);
+                       mnt->mnt_master = old->mnt_master;
+               }
                if (flag & CL_MAKE_SHARED)
                        set_mnt_shared(mnt);
 
@@ -336,14 +360,14 @@ static int show_vfsmnt(struct seq_file *m, void *v)
                { MS_SYNCHRONOUS, ",sync" },
                { MS_DIRSYNC, ",dirsync" },
                { MS_MANDLOCK, ",mand" },
-               { MS_NOATIME, ",noatime" },
-               { MS_NODIRATIME, ",nodiratime" },
                { 0, NULL }
        };
        static struct proc_fs_info mnt_info[] = {
                { MNT_NOSUID, ",nosuid" },
                { MNT_NODEV, ",nodev" },
                { MNT_NOEXEC, ",noexec" },
+               { MNT_NOATIME, ",noatime" },
+               { MNT_NODIRATIME, ",nodiratime" },
                { 0, NULL }
        };
        struct proc_fs_info *fs_infop;
@@ -432,7 +456,7 @@ EXPORT_SYMBOL(may_umount);
 void release_mounts(struct list_head *head)
 {
        struct vfsmount *mnt;
-       while(!list_empty(head)) {
+       while (!list_empty(head)) {
                mnt = list_entry(head->next, struct vfsmount, mnt_hash);
                list_del_init(&mnt->mnt_hash);
                if (mnt->mnt_parent != mnt) {
@@ -618,7 +642,7 @@ static int mount_is_safe(struct nameidata *nd)
                if (current->uid != nd->dentry->d_inode->i_uid)
                        return -EPERM;
        }
-       if (permission(nd->dentry->d_inode, MAY_WRITE, nd))
+       if (vfs_permission(nd, MAY_WRITE))
                return -EPERM;
        return 0;
 #endif
@@ -641,6 +665,9 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
        struct vfsmount *res, *p, *q, *r, *s;
        struct nameidata nd;
 
+       if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
+               return NULL;
+
        res = q = clone_mnt(mnt, dentry, flag);
        if (!q)
                goto Enomem;
@@ -652,6 +679,10 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
                        continue;
 
                for (s = r; s; s = next_mnt(s, r)) {
+                       if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) {
+                               s = skip_mnt_tree(s);
+                               continue;
+                       }
                        while (p != s->mnt_parent) {
                                p = p->mnt_parent;
                                q = q->mnt_parent;
@@ -689,18 +720,18 @@ Enomem:
  *
  *  NOTE: in the table below explains the semantics when a source mount
  *  of a given type is attached to a destination mount of a given type.
- *     ---------------------------------------------
- *     |         BIND MOUNT OPERATION              |
- *     |********************************************
- *     | source-->| shared        |       private  |
- *     | dest     |               |                |
- *     |   |      |               |                |
- *     |   v      |               |                |
- *     |********************************************
- *     |  shared  | shared (++)   |     shared (+) |
- *     |          |               |                |
- *     |non-shared| shared (+)    |      private   |
- *     *********************************************
+ * ---------------------------------------------------------------------------
+ * |         BIND MOUNT OPERATION                                            |
+ * |**************************************************************************
+ * | source-->| shared        |       private  |       slave    | unbindable |
+ * | dest     |               |                |                |            |
+ * |   |      |               |                |                |            |
+ * |   v      |               |                |                |            |
+ * |**************************************************************************
+ * |  shared  | shared (++)   |     shared (+) |     shared(+++)|  invalid   |
+ * |          |               |                |                |            |
+ * |non-shared| shared (+)    |      private   |      slave (*) |  invalid   |
+ * ***************************************************************************
  * A bind operation clones the source mount and mounts the clone on the
  * destination mount.
  *
@@ -710,21 +741,33 @@ Enomem:
  * (+)   the cloned mount is created under the destination mount and is marked
  *       as shared. The cloned mount is added to the peer group of the source
  *       mount.
- *     ---------------------------------------------
- *     |               MOVE MOUNT OPERATION        |
- *     |********************************************
- *     | source-->| shared        |       private  |
- *     | dest     |               |                |
- *     |   |      |               |                |
- *     |   v      |               |                |
- *     |********************************************
- *     |  shared  | shared (+)    |     shared (+) |
- *     |          |               |                |
- *     |non-shared| shared (+*)   |      private   |
- *     *********************************************
- * (+)  the mount is moved to the destination. And is then propagated to all
- *     the mounts in the propagation tree of the destination mount.
+ * (+++) the mount is propagated to all the mounts in the propagation tree
+ *       of the destination mount and the cloned mount is made slave
+ *       of the same master as that of the source mount. The cloned mount
+ *       is marked as 'shared and slave'.
+ * (*)   the cloned mount is made a slave of the same master as that of the
+ *      source mount.
+ *
+ * ---------------------------------------------------------------------------
+ * |                   MOVE MOUNT OPERATION                                 |
+ * |**************************************************************************
+ * | source-->| shared        |       private  |       slave    | unbindable |
+ * | dest     |               |                |                |            |
+ * |   |      |               |                |                |            |
+ * |   v      |               |                |                |            |
+ * |**************************************************************************
+ * |  shared  | shared (+)    |     shared (+) |    shared(+++) |  invalid   |
+ * |          |               |                |                |            |
+ * |non-shared| shared (+*)   |      private   |    slave (*)   | unbindable |
+ * ***************************************************************************
+ *
+ * (+)  the mount is moved to the destination. And is then propagated to
+ *     all the mounts in the propagation tree of the destination mount.
  * (+*)  the mount is moved to the destination.
+ * (+++)  the mount is moved to the destination and is then propagated to
+ *     all the mounts belonging to the destination mount's propagation tree.
+ *     the mount is marked as 'shared and slave'.
+ * (*) the mount continues to be a slave at the new location.
  *
  * if the source mount is a tree, the operations explained above is
  * applied to each mount in the tree.
@@ -776,7 +819,7 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
                return -ENOTDIR;
 
        err = -ENOENT;
-       down(&nd->dentry->d_inode->i_sem);
+       mutex_lock(&nd->dentry->d_inode->i_mutex);
        if (IS_DEADDIR(nd->dentry->d_inode))
                goto out_unlock;
 
@@ -788,7 +831,7 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
        if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry))
                err = attach_recursive_mnt(mnt, nd, NULL);
 out_unlock:
-       up(&nd->dentry->d_inode->i_sem);
+       mutex_unlock(&nd->dentry->d_inode->i_mutex);
        if (!err)
                security_sb_post_addmount(mnt, nd);
        return err;
@@ -833,6 +876,9 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
 
        down_write(&namespace_sem);
        err = -EINVAL;
+       if (IS_MNT_UNBINDABLE(old_nd.mnt))
+               goto out;
+
        if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt))
                goto out;
 
@@ -890,6 +936,16 @@ static int do_remount(struct nameidata *nd, int flags, int mnt_flags,
        return err;
 }
 
+static inline int tree_contains_unbindable(struct vfsmount *mnt)
+{
+       struct vfsmount *p;
+       for (p = mnt; p; p = next_mnt(p, mnt)) {
+               if (IS_MNT_UNBINDABLE(p))
+                       return 1;
+       }
+       return 0;
+}
+
 static int do_move_mount(struct nameidata *nd, char *old_name)
 {
        struct nameidata old_nd, parent_nd;
@@ -911,7 +967,7 @@ static int do_move_mount(struct nameidata *nd, char *old_name)
                goto out;
 
        err = -ENOENT;
-       down(&nd->dentry->d_inode->i_sem);
+       mutex_lock(&nd->dentry->d_inode->i_mutex);
        if (IS_DEADDIR(nd->dentry->d_inode))
                goto out1;
 
@@ -933,6 +989,12 @@ static int do_move_mount(struct nameidata *nd, char *old_name)
         */
        if (old_nd.mnt->mnt_parent && IS_MNT_SHARED(old_nd.mnt->mnt_parent))
                goto out1;
+       /*
+        * Don't move a mount tree containing unbindable mounts to a destination
+        * mount which is shared.
+        */
+       if (IS_MNT_SHARED(nd->mnt) && tree_contains_unbindable(old_nd.mnt))
+               goto out1;
        err = -ELOOP;
        for (p = nd->mnt; p->mnt_parent != p; p = p->mnt_parent)
                if (p == old_nd.mnt)
@@ -947,7 +1009,7 @@ static int do_move_mount(struct nameidata *nd, char *old_name)
        list_del_init(&old_nd.mnt->mnt_expire);
        spin_unlock(&vfsmount_lock);
 out1:
-       up(&nd->dentry->d_inode->i_sem);
+       mutex_unlock(&nd->dentry->d_inode->i_mutex);
 out:
        up_write(&namespace_sem);
        if (!err)
@@ -1229,7 +1291,13 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
                mnt_flags |= MNT_NODEV;
        if (flags & MS_NOEXEC)
                mnt_flags |= MNT_NOEXEC;
-       flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE);
+       if (flags & MS_NOATIME)
+               mnt_flags |= MNT_NOATIME;
+       if (flags & MS_NODIRATIME)
+               mnt_flags |= MNT_NODIRATIME;
+
+       flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
+                  MS_NOATIME | MS_NODIRATIME);
 
        /* ... and get the mountpoint */
        retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
@@ -1245,7 +1313,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
                                    data_page);
        else if (flags & MS_BIND)
                retval = do_loopback(&nd, dev_name, flags & MS_REC);
-       else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE))
+       else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
                retval = do_change_type(&nd, flags);
        else if (flags & MS_MOVE)
                retval = do_move_mount(&nd, dev_name);
@@ -1290,7 +1358,7 @@ int copy_namespace(int flags, struct task_struct *tsk)
        down_write(&namespace_sem);
        /* First pass: copy the tree topology */
        new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root,
-                                       CL_EXPIRE);
+                                       CL_COPY_ALL | CL_EXPIRE);
        if (!new_ns->root) {
                up_write(&namespace_sem);
                kfree(new_ns);
@@ -1469,6 +1537,10 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd)
  * pointed to by put_old must yield the same directory as new_root. No other
  * file system may be mounted on put_old. After all, new_root is a mountpoint.
  *
+ * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem.
+ * See Documentation/filesystems/ramfs-rootfs-initramfs.txt for alternatives
+ * in this situation.
+ *
  * Notes:
  *  - we don't move root/cwd if they are not at the root (reason: if something
  *    cared enough to change them, it's probably wrong to force them elsewhere)
@@ -1512,7 +1584,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
        user_nd.dentry = dget(current->fs->root);
        read_unlock(&current->fs->lock);
        down_write(&namespace_sem);
-       down(&old_nd.dentry->d_inode->i_sem);
+       mutex_lock(&old_nd.dentry->d_inode->i_mutex);
        error = -EINVAL;
        if (IS_MNT_SHARED(old_nd.mnt) ||
                IS_MNT_SHARED(new_nd.mnt->mnt_parent) ||
@@ -1565,7 +1637,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
        path_release(&root_parent);
        path_release(&parent_nd);
 out2:
-       up(&old_nd.dentry->d_inode->i_sem);
+       mutex_unlock(&old_nd.dentry->d_inode->i_mutex);
        up_write(&namespace_sem);
        path_release(&user_nd);
        path_release(&old_nd);
@@ -1657,6 +1729,7 @@ void __init mnt_init(unsigned long mempages)
                i--;
        } while (i);
        sysfs_init();
+       subsystem_register(&fs_subsys);
        init_rootfs();
        init_mount_tree();
 }