postpone __putname() until after do_last()
[safe/jmp/linux-2.6] / fs / namei.c
index 1b26b16..30ba3f3 100644 (file)
@@ -689,33 +689,20 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
        set_root(nd);
 
        while(1) {
-               struct vfsmount *parent;
                struct dentry *old = nd->path.dentry;
 
                if (nd->path.dentry == nd->root.dentry &&
                    nd->path.mnt == nd->root.mnt) {
                        break;
                }
-               spin_lock(&dcache_lock);
                if (nd->path.dentry != nd->path.mnt->mnt_root) {
-                       nd->path.dentry = dget(nd->path.dentry->d_parent);
-                       spin_unlock(&dcache_lock);
+                       /* rare case of legitimate dget_parent()... */
+                       nd->path.dentry = dget_parent(nd->path.dentry);
                        dput(old);
                        break;
                }
-               spin_unlock(&dcache_lock);
-               spin_lock(&vfsmount_lock);
-               parent = nd->path.mnt->mnt_parent;
-               if (parent == nd->path.mnt) {
-                       spin_unlock(&vfsmount_lock);
+               if (!follow_up(&nd->path))
                        break;
-               }
-               mntget(parent);
-               nd->path.dentry = dget(nd->path.mnt->mnt_mountpoint);
-               spin_unlock(&vfsmount_lock);
-               dput(old);
-               mntput(nd->path.mnt);
-               nd->path.mnt = parent;
        }
        follow_mount(&nd->path);
 }
@@ -823,6 +810,17 @@ fail:
 }
 
 /*
+ * This is a temporary kludge to deal with "automount" symlinks; proper
+ * solution is to trigger them on follow_mount(), so that do_lookup()
+ * would DTRT.  To be killed before 2.6.34-final.
+ */
+static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
+{
+       return inode && unlikely(inode->i_op->follow_link) &&
+               ((lookup_flags & LOOKUP_FOLLOW) || S_ISDIR(inode->i_mode));
+}
+
+/*
  * Name resolution.
  * This is the basic name resolution function, turning a pathname into
  * the final dentry. We expect 'base' to be positive and a directory.
@@ -942,8 +940,7 @@ last_component:
                if (err)
                        break;
                inode = next.dentry->d_inode;
-               if ((lookup_flags & LOOKUP_FOLLOW)
-                   && inode && inode->i_op->follow_link) {
+               if (follow_on_final(inode, lookup_flags)) {
                        err = do_follow_link(&next, nd);
                        if (err)
                                goto return_err;
@@ -1337,7 +1334,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
                return -ENOENT;
 
        BUG_ON(victim->d_parent->d_inode != dir);
-       audit_inode_child(victim->d_name.name, victim, dir);
+       audit_inode_child(victim, dir);
 
        error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
        if (error)
@@ -1493,7 +1490,7 @@ int may_open(struct path *path, int acc_mode, int flag)
         * An append-only file must be opened in append mode for writing.
         */
        if (IS_APPEND(inode)) {
-               if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
+               if  ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
                        return -EPERM;
                if (flag & O_TRUNC)
                        return -EPERM;
@@ -1537,7 +1534,7 @@ static int handle_truncate(struct path *path)
  * what get passed to sys_open().
  */
 static int __open_namei_create(struct nameidata *nd, struct path *path,
-                               int flag, int mode)
+                               int open_flag, int mode)
 {
        int error;
        struct dentry *dir = nd->path.dentry;
@@ -1555,7 +1552,7 @@ out_unlock:
        if (error)
                return error;
        /* Don't check for write permission, don't truncate */
-       return may_open(&nd->path, 0, flag & ~O_TRUNC);
+       return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
 }
 
 /*
@@ -1593,6 +1590,153 @@ static int open_will_truncate(int flag, struct inode *inode)
        return (flag & O_TRUNC);
 }
 
+static struct file *finish_open(struct nameidata *nd,
+                               int open_flag, int flag, int acc_mode)
+{
+       struct file *filp;
+       int will_truncate;
+       int error;
+
+       will_truncate = open_will_truncate(flag, nd->path.dentry->d_inode);
+       if (will_truncate) {
+               error = mnt_want_write(nd->path.mnt);
+               if (error)
+                       goto exit;
+       }
+       error = may_open(&nd->path, acc_mode, open_flag);
+       if (error) {
+               if (will_truncate)
+                       mnt_drop_write(nd->path.mnt);
+               goto exit;
+       }
+       filp = nameidata_to_filp(nd);
+       if (!IS_ERR(filp)) {
+               error = ima_file_check(filp, acc_mode);
+               if (error) {
+                       fput(filp);
+                       filp = ERR_PTR(error);
+               }
+       }
+       if (!IS_ERR(filp)) {
+               if (acc_mode & MAY_WRITE)
+                       vfs_dq_init(nd->path.dentry->d_inode);
+
+               if (will_truncate) {
+                       error = handle_truncate(&nd->path);
+                       if (error) {
+                               fput(filp);
+                               filp = ERR_PTR(error);
+                       }
+               }
+       }
+       /*
+        * It is now safe to drop the mnt write
+        * because the filp has had a write taken
+        * on its behalf.
+        */
+       if (will_truncate)
+               mnt_drop_write(nd->path.mnt);
+       return filp;
+
+exit:
+       if (!IS_ERR(nd->intent.open.file))
+               release_open_intent(nd);
+       path_put(&nd->path);
+       return ERR_PTR(error);
+}
+
+static struct file *do_last(struct nameidata *nd, struct path *path,
+                           int open_flag, int flag, int acc_mode,
+                           int mode, const char *pathname,
+                           struct dentry *dir, int *is_link)
+{
+       struct file *filp;
+       int error;
+
+       *is_link = 0;
+
+       error = PTR_ERR(path->dentry);
+       if (IS_ERR(path->dentry)) {
+               mutex_unlock(&dir->d_inode->i_mutex);
+               goto exit;
+       }
+
+       if (IS_ERR(nd->intent.open.file)) {
+               error = PTR_ERR(nd->intent.open.file);
+               goto exit_mutex_unlock;
+       }
+
+       /* Negative dentry, just create the file */
+       if (!path->dentry->d_inode) {
+               /*
+                * This write is needed to ensure that a
+                * ro->rw transition does not occur between
+                * the time when the file is created and when
+                * a permanent write count is taken through
+                * the 'struct file' in nameidata_to_filp().
+                */
+               error = mnt_want_write(nd->path.mnt);
+               if (error)
+                       goto exit_mutex_unlock;
+               error = __open_namei_create(nd, path, open_flag, mode);
+               if (error) {
+                       mnt_drop_write(nd->path.mnt);
+                       goto exit;
+               }
+               filp = nameidata_to_filp(nd);
+               mnt_drop_write(nd->path.mnt);
+               if (!IS_ERR(filp)) {
+                       error = ima_file_check(filp, acc_mode);
+                       if (error) {
+                               fput(filp);
+                               filp = ERR_PTR(error);
+                       }
+               }
+               return filp;
+       }
+
+       /*
+        * It already exists.
+        */
+       mutex_unlock(&dir->d_inode->i_mutex);
+       audit_inode(pathname, path->dentry);
+
+       error = -EEXIST;
+       if (flag & O_EXCL)
+               goto exit_dput;
+
+       if (__follow_mount(path)) {
+               error = -ELOOP;
+               if (flag & O_NOFOLLOW)
+                       goto exit_dput;
+       }
+
+       error = -ENOENT;
+       if (!path->dentry->d_inode)
+               goto exit_dput;
+       if (path->dentry->d_inode->i_op->follow_link) {
+               *is_link = 1;
+               return NULL;
+       }
+
+       path_to_nameidata(path, nd);
+       error = -EISDIR;
+       if (S_ISDIR(path->dentry->d_inode->i_mode))
+               goto exit;
+       filp = finish_open(nd, open_flag, flag, acc_mode);
+       return filp;
+
+exit_mutex_unlock:
+       mutex_unlock(&dir->d_inode->i_mutex);
+exit_dput:
+       path_put_conditional(path, nd);
+exit:
+       if (!IS_ERR(nd->intent.open.file))
+               release_open_intent(nd);
+       path_put(&nd->path);
+       return ERR_PTR(error);
+}
+
 /*
  * Note that the low bits of the passed in "open_flag"
  * are not the same as in the local variable "flag". See
@@ -1604,11 +1748,12 @@ struct file *do_filp_open(int dfd, const char *pathname,
        struct file *filp;
        struct nameidata nd;
        int error;
-       struct path path, save;
+       struct path path;
        struct dentry *dir;
        int count = 0;
-       int will_truncate;
        int flag = open_to_namei_flags(open_flag);
+       int force_reval = 0;
+       int is_link;
 
        /*
         * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only
@@ -1620,7 +1765,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
                open_flag |= O_DSYNC;
 
        if (!acc_mode)
-               acc_mode = MAY_OPEN | ACC_MODE(flag);
+               acc_mode = MAY_OPEN | ACC_MODE(open_flag);
 
        /* O_TRUNC implies we need access checks for write permissions */
        if (flag & O_TRUNC)
@@ -1660,9 +1805,12 @@ struct file *do_filp_open(int dfd, const char *pathname,
        /*
         * Create - we need to know the parent.
         */
+reval:
        error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
        if (error)
                return ERR_PTR(error);
+       if (force_reval)
+               nd.flags |= LOOKUP_REVAL;
        error = path_walk(pathname, &nd);
        if (error) {
                if (nd.root.mnt)
@@ -1697,134 +1845,20 @@ struct file *do_filp_open(int dfd, const char *pathname,
        mutex_lock(&dir->d_inode->i_mutex);
        path.dentry = lookup_hash(&nd);
        path.mnt = nd.path.mnt;
-
-do_last:
-       error = PTR_ERR(path.dentry);
-       if (IS_ERR(path.dentry)) {
-               mutex_unlock(&dir->d_inode->i_mutex);
-               goto exit;
-       }
-
-       if (IS_ERR(nd.intent.open.file)) {
-               error = PTR_ERR(nd.intent.open.file);
-               goto exit_mutex_unlock;
-       }
-
-       /* Negative dentry, just create the file */
-       if (!path.dentry->d_inode) {
-               /*
-                * This write is needed to ensure that a
-                * ro->rw transition does not occur between
-                * the time when the file is created and when
-                * a permanent write count is taken through
-                * the 'struct file' in nameidata_to_filp().
-                */
-               error = mnt_want_write(nd.path.mnt);
-               if (error)
-                       goto exit_mutex_unlock;
-               error = __open_namei_create(&nd, &path, flag, mode);
-               if (error) {
-                       mnt_drop_write(nd.path.mnt);
-                       goto exit;
-               }
-               filp = nameidata_to_filp(&nd);
-               mnt_drop_write(nd.path.mnt);
-               if (nd.root.mnt)
-                       path_put(&nd.root);
-               if (!IS_ERR(filp)) {
-                       error = ima_path_check(&filp->f_path, filp->f_mode &
-                                      (MAY_READ | MAY_WRITE | MAY_EXEC));
-                       if (error) {
-                               fput(filp);
-                               filp = ERR_PTR(error);
-                       }
-               }
-               return filp;
-       }
-
-       /*
-        * It already exists.
-        */
-       mutex_unlock(&dir->d_inode->i_mutex);
-       audit_inode(pathname, path.dentry);
-
-       error = -EEXIST;
-       if (flag & O_EXCL)
-               goto exit_dput;
-
-       if (__follow_mount(&path)) {
-               error = -ELOOP;
-               if (flag & O_NOFOLLOW)
-                       goto exit_dput;
-       }
-
-       error = -ENOENT;
-       if (!path.dentry->d_inode)
-               goto exit_dput;
-       if (path.dentry->d_inode->i_op->follow_link)
+       filp = do_last(&nd, &path, open_flag, flag, acc_mode, mode,
+                      pathname, dir, &is_link);
+       if (is_link)
                goto do_link;
+       if (nd.root.mnt)
+               path_put(&nd.root);
+       return filp;
 
-       path_to_nameidata(&path, &nd);
-       error = -EISDIR;
-       if (S_ISDIR(path.dentry->d_inode->i_mode))
-               goto exit;
 ok:
-       /*
-        * Consider:
-        * 1. may_open() truncates a file
-        * 2. a rw->ro mount transition occurs
-        * 3. nameidata_to_filp() fails due to
-        *    the ro mount.
-        * That would be inconsistent, and should
-        * be avoided. Taking this mnt write here
-        * ensures that (2) can not occur.
-        */
-       will_truncate = open_will_truncate(flag, nd.path.dentry->d_inode);
-       if (will_truncate) {
-               error = mnt_want_write(nd.path.mnt);
-               if (error)
-                       goto exit;
-       }
-       error = may_open(&nd.path, acc_mode, flag);
-       if (error) {
-               if (will_truncate)
-                       mnt_drop_write(nd.path.mnt);
-               goto exit;
-       }
-       filp = nameidata_to_filp(&nd);
-       if (!IS_ERR(filp)) {
-               error = ima_path_check(&filp->f_path, filp->f_mode &
-                              (MAY_READ | MAY_WRITE | MAY_EXEC));
-               if (error) {
-                       fput(filp);
-                       filp = ERR_PTR(error);
-               }
-       }
-       if (!IS_ERR(filp)) {
-               if (acc_mode & MAY_WRITE)
-                       vfs_dq_init(nd.path.dentry->d_inode);
-
-               if (will_truncate) {
-                       error = handle_truncate(&nd.path);
-                       if (error) {
-                               fput(filp);
-                               filp = ERR_PTR(error);
-                       }
-               }
-       }
-       /*
-        * It is now safe to drop the mnt write
-        * because the filp has had a write taken
-        * on its behalf.
-        */
-       if (will_truncate)
-               mnt_drop_write(nd.path.mnt);
+       filp = finish_open(&nd, open_flag, flag, acc_mode);
        if (nd.root.mnt)
                path_put(&nd.root);
        return filp;
 
-exit_mutex_unlock:
-       mutex_unlock(&dir->d_inode->i_mutex);
 exit_dput:
        path_put_conditional(&path, &nd);
 exit:
@@ -1854,17 +1888,7 @@ do_link:
        error = security_inode_follow_link(path.dentry, &nd);
        if (error)
                goto exit_dput;
-       save = nd.path;
-       path_get(&save);
        error = __do_follow_link(&path, &nd);
-       if (error == -ESTALE) {
-               /* nd.path had been dropped */
-               nd.path = save;
-               path_get(&nd.path);
-               nd.flags |= LOOKUP_REVAL;
-               error = __do_follow_link(&path, &nd);
-       }
-       path_put(&save);
        path_put(&path);
        if (error) {
                /* Does someone understand code flow here? Or it is only
@@ -1874,6 +1898,10 @@ do_link:
                release_open_intent(&nd);
                if (nd.root.mnt)
                        path_put(&nd.root);
+               if (error == -ESTALE && !force_reval) {
+                       force_reval = 1;
+                       goto reval;
+               }
                return ERR_PTR(error);
        }
        nd.flags &= ~LOOKUP_PARENT;
@@ -1895,8 +1923,14 @@ do_link:
        mutex_lock(&dir->d_inode->i_mutex);
        path.dentry = lookup_hash(&nd);
        path.mnt = nd.path.mnt;
+       filp = do_last(&nd, &path, open_flag, flag, acc_mode, mode,
+                      pathname, dir, &is_link);
        __putname(nd.last.name);
-       goto do_last;
+       if (is_link)
+               goto do_link;
+       if (nd.root.mnt)
+               path_put(&nd.root);
+       return filp;
 }
 
 /**
@@ -2269,8 +2303,11 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
                error = -EBUSY;
        else {
                error = security_inode_unlink(dir, dentry);
-               if (!error)
+               if (!error) {
                        error = dir->i_op->unlink(dir, dentry);
+                       if (!error)
+                               dentry->d_inode->i_flags |= S_DEAD;
+               }
        }
        mutex_unlock(&dentry->d_inode->i_mutex);
 
@@ -2623,6 +2660,8 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
        else
                error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
        if (!error) {
+               if (target)
+                       target->i_flags |= S_DEAD;
                if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
                        d_move(old_dentry, new_dentry);
        }
@@ -2665,11 +2704,9 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
        else
                error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
-       if (!error) {
-               const char *new_name = old_dentry->d_name.name;
-               fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir,
+       if (!error)
+               fsnotify_move(old_dir, new_dir, old_name, is_dir,
                              new_dentry->d_inode, old_dentry);
-       }
        fsnotify_oldname_free(old_name);
 
        return error;