ocfs2: Remove ->unblock lockres operation

[safe/jmp/linux-2.6] / fs / namei.c
diff --git a/fs/namei.c b/fs/namei.c

index faf61c3..6b591c0 100644 (file)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -104,7 +104,7 @@
   */
  /*
   * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland)
- * implemented.  Let's see if raised priority of ->s_vfs_rename_sem gives
+ * implemented.  Let's see if raised priority of ->s_vfs_rename_mutex gives
   * any extra contention...
   */
  
@@ -159,7 +159,7 @@ char * getname(const char __user * filename)
  #ifdef CONFIG_AUDITSYSCALL
  void putname(const char *name)
  {
-       if (unlikely(current->audit_context))
+       if (unlikely(!audit_dummy_context()))
                 audit_putname(name);
         else
                 __putname(name);
@@ -227,10 +227,10 @@ int generic_permission(struct inode *inode, int mask,
  
  int permission(struct inode *inode, int mask, struct nameidata *nd)
  {
+       umode_t mode = inode->i_mode;
         int retval, submask;
  
         if (mask & MAY_WRITE) {
-               umode_t mode = inode->i_mode;
  
                 /*
                  * Nobody gets write access to a read-only fs.
@@ -247,6 +247,13 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
         }
  
  
+       /*
+        * MAY_EXEC on regular files requires special handling: We override
+        * filesystem execute permissions if the mode bits aren't set.
+        */
+       if ((mask & MAY_EXEC) && S_ISREG(mode) && !(mode & S_IXUGO))
+               return -EACCES;
+
         /* Ordinary permission routines do not understand MAY_APPEND. */
         submask = mask & ~MAY_APPEND;
         if (inode->i_op && inode->i_op->permission)
@@ -546,6 +553,22 @@ struct path {
         struct dentry *dentry;
  };
  
+static inline void dput_path(struct path *path, struct nameidata *nd)
+{
+       dput(path->dentry);
+       if (path->mnt != nd->mnt)
+               mntput(path->mnt);
+}
+
+static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
+{
+       dput(nd->dentry);
+       if (nd->mnt != path->mnt)
+               mntput(nd->mnt);
+       nd->mnt = path->mnt;
+       nd->dentry = path->dentry;
+}
+
  static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd)
  {
         int error;
@@ -555,8 +578,11 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
         touch_atime(path->mnt, dentry);
         nd_set_link(nd, NULL);
  
-       if (path->mnt == nd->mnt)
-               mntget(path->mnt);
+       if (path->mnt != nd->mnt) {
+               path_to_nameidata(path, nd);
+               dget(dentry);
+       }
+       mntget(path->mnt);
         cookie = dentry->d_inode->i_op->follow_link(dentry, nd);
         error = PTR_ERR(cookie);
         if (!IS_ERR(cookie)) {
@@ -573,22 +599,6 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
         return error;
  }
  
-static inline void dput_path(struct path *path, struct nameidata *nd)
-{
-       dput(path->dentry);
-       if (path->mnt != nd->mnt)
-               mntput(path->mnt);
-}
-
-static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
-{
-       dput(nd->dentry);
-       if (nd->mnt != path->mnt)
-               mntput(nd->mnt);
-       nd->mnt = path->mnt;
-       nd->dentry = path->dentry;
-}
-
  /*
   * This limits recursive symlink follows to 8, while
   * limiting consecutive symlinks to 40.
@@ -1077,8 +1087,8 @@ static int fastcall do_path_lookup(int dfd, const char *name,
         nd->flags = flags;
         nd->depth = 0;
  
-       read_lock(&current->fs->lock);
         if (*name=='/') {
+               read_lock(&current->fs->lock);
                 if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
                         nd->mnt = mntget(current->fs->altrootmnt);
                         nd->dentry = dget(current->fs->altroot);
@@ -1089,46 +1099,49 @@ static int fastcall do_path_lookup(int dfd, const char *name,
                 }
                 nd->mnt = mntget(current->fs->rootmnt);
                 nd->dentry = dget(current->fs->root);
+               read_unlock(&current->fs->lock);
         } else if (dfd == AT_FDCWD) {
+               read_lock(&current->fs->lock);
                 nd->mnt = mntget(current->fs->pwdmnt);
                 nd->dentry = dget(current->fs->pwd);
+               read_unlock(&current->fs->lock);
         } else {
                 struct dentry *dentry;
  
                 file = fget_light(dfd, &fput_needed);
                 retval = -EBADF;
                 if (!file)
-                       goto unlock_fail;
+                       goto out_fail;
  
                 dentry = file->f_dentry;
  
                 retval = -ENOTDIR;
                 if (!S_ISDIR(dentry->d_inode->i_mode))
-                       goto fput_unlock_fail;
+                       goto fput_fail;
  
                 retval = file_permission(file, MAY_EXEC);
                 if (retval)
-                       goto fput_unlock_fail;
+                       goto fput_fail;
  
                 nd->mnt = mntget(file->f_vfsmnt);
                 nd->dentry = dget(dentry);
  
                 fput_light(file, fput_needed);
         }
-       read_unlock(&current->fs->lock);
         current->total_link_count = 0;
         retval = link_path_walk(name, nd);
  out:
-       if (unlikely(current->audit_context
-                    && nd && nd->dentry && nd->dentry->d_inode))
-               audit_inode(name, nd->dentry->d_inode, flags);
+       if (likely(retval == 0)) {
+               if (unlikely(!audit_dummy_context() && nd && nd->dentry &&
+                               nd->dentry->d_inode))
+               audit_inode(name, nd->dentry->d_inode);
+       }
+out_fail:
         return retval;
  
-fput_unlock_fail:
+fput_fail:
         fput_light(file, fput_needed);
-unlock_fail:
-       read_unlock(&current->fs->lock);
-       return retval;
+       goto out_fail;
  }
  
  int fastcall path_lookup(const char *name, unsigned int flags,
@@ -1249,7 +1262,7 @@ out:
         return dentry;
  }
  
-struct dentry * lookup_hash(struct nameidata *nd)
+static struct dentry *lookup_hash(struct nameidata *nd)
  {
         return __lookup_hash(&nd->last, nd->dentry, nd);
  }
@@ -1351,6 +1364,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
                 return -ENOENT;
  
         BUG_ON(victim->d_parent->d_inode != dir);
+       audit_inode_child(victim->d_name.name, victim->d_inode, dir);
  
         error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
         if (error)
@@ -1416,30 +1430,30 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
         struct dentry *p;
  
         if (p1 == p2) {
-               mutex_lock(&p1->d_inode->i_mutex);
+               mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
                 return NULL;
         }
  
-       down(&p1->d_inode->i_sb->s_vfs_rename_sem);
+       mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
  
         for (p = p1; p->d_parent != p; p = p->d_parent) {
                 if (p->d_parent == p2) {
-                       mutex_lock(&p2->d_inode->i_mutex);
-                       mutex_lock(&p1->d_inode->i_mutex);
+                       mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT);
+                       mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD);
                         return p;
                 }
         }
  
         for (p = p2; p->d_parent != p; p = p->d_parent) {
                 if (p->d_parent == p1) {
-                       mutex_lock(&p1->d_inode->i_mutex);
-                       mutex_lock(&p2->d_inode->i_mutex);
+                       mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
+                       mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
                         return p;
                 }
         }
  
-       mutex_lock(&p1->d_inode->i_mutex);
-       mutex_lock(&p2->d_inode->i_mutex);
+       mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
+       mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
         return NULL;
  }
  
@@ -1448,7 +1462,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
         mutex_unlock(&p1->d_inode->i_mutex);
         if (p1 != p2) {
                 mutex_unlock(&p2->d_inode->i_mutex);
-               up(&p1->d_inode->i_sb->s_vfs_rename_sem);
+               mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
         }
  }
  
@@ -1470,7 +1484,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
         DQUOT_INIT(dir);
         error = dir->i_op->create(dir, dentry, mode, nd);
         if (!error)
-               fsnotify_create(dir, dentry->d_name.name);
+               fsnotify_create(dir, dentry);
         return error;
  }
  
@@ -1626,6 +1640,12 @@ do_last:
                 goto exit;
         }
  
+       if (IS_ERR(nd->intent.open.file)) {
+               mutex_unlock(&dir->d_inode->i_mutex);
+               error = PTR_ERR(nd->intent.open.file);
+               goto exit_dput;
+       }
+
         /* Negative dentry, just create the file */
         if (!path.dentry->d_inode) {
                 if (!IS_POSIXACL(dir->d_inode))
@@ -1646,6 +1666,7 @@ do_last:
          * It already exists.
          */
         mutex_unlock(&dir->d_inode->i_mutex);
+       audit_inode_update(path.dentry->d_inode);
  
         error = -EEXIST;
         if (flag & O_EXCL)
@@ -1656,6 +1677,7 @@ do_last:
                 if (flag & O_NOFOLLOW)
                         goto exit_dput;
         }
+
         error = -ENOENT;
         if (!path.dentry->d_inode)
                 goto exit_dput;
@@ -1699,8 +1721,14 @@ do_link:
         if (error)
                 goto exit_dput;
         error = __do_follow_link(&path, nd);
-       if (error)
+       if (error) {
+               /* Does someone understand code flow here? Or it is only
+                * me so stupid? Anathema to whoever designed this non-sense
+                * with "intent.open".
+                */
+               release_open_intent(nd);
                 return error;
+       }
         nd->flags &= ~LOOKUP_PARENT;
         if (nd->last_type == LAST_BIND)
                 goto ok;
@@ -1738,7 +1766,7 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir)
  {
         struct dentry *dentry = ERR_PTR(-EEXIST);
  
-       mutex_lock(&nd->dentry->d_inode->i_mutex);
+       mutex_lock_nested(&nd->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
         /*
          * Yucky last component or no last component at all?
          * (foo/., foo/.., /////)
@@ -1746,6 +1774,8 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir)
         if (nd->last_type != LAST_NORM)
                 goto fail;
         nd->flags &= ~LOOKUP_PARENT;
+       nd->flags |= LOOKUP_CREATE;
+       nd->intent.open.flags = O_EXCL;
  
         /*
          * Do the final lookup.
@@ -1791,7 +1821,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
         DQUOT_INIT(dir);
         error = dir->i_op->mknod(dir, dentry, mode, dev);
         if (!error)
-               fsnotify_create(dir, dentry->d_name.name);
+               fsnotify_create(dir, dentry);
         return error;
  }
  
@@ -1868,7 +1898,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
         DQUOT_INIT(dir);
         error = dir->i_op->mkdir(dir, dentry, mode);
         if (!error)
-               fsnotify_mkdir(dir, dentry->d_name.name);
+               fsnotify_mkdir(dir, dentry);
         return error;
  }
  
@@ -1995,7 +2025,7 @@ static long do_rmdir(int dfd, const char __user *pathname)
                         error = -EBUSY;
                         goto exit1;
         }
-       mutex_lock(&nd.dentry->d_inode->i_mutex);
+       mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
         dentry = lookup_hash(&nd);
         error = PTR_ERR(dentry);
         if (!IS_ERR(dentry)) {
@@ -2069,7 +2099,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
         error = -EISDIR;
         if (nd.last_type != LAST_NORM)
                 goto exit1;
-       mutex_lock(&nd.dentry->d_inode->i_mutex);
+       mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
         dentry = lookup_hash(&nd);
         error = PTR_ERR(dentry);
         if (!IS_ERR(dentry)) {
@@ -2131,7 +2161,7 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i
         DQUOT_INIT(dir);
         error = dir->i_op->symlink(dir, dentry, oldname);
         if (!error)
-               fsnotify_create(dir, dentry->d_name.name);
+               fsnotify_create(dir, dentry);
         return error;
  }
  
@@ -2208,7 +2238,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
         error = dir->i_op->link(old_dentry, dir, new_dentry);
         mutex_unlock(&old_dentry->d_inode->i_mutex);
         if (!error)
-               fsnotify_create(dir, new_dentry->d_name.name);
+               fsnotify_create(dir, new_dentry);
         return error;
  }
  
@@ -2222,18 +2252,24 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
   * and other special files.  --ADM
   */
  asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
-                          int newdfd, const char __user *newname)
+                          int newdfd, const char __user *newname,
+                          int flags)
  {
         struct dentry *new_dentry;
         struct nameidata nd, old_nd;
         int error;
         char * to;
  
+       if ((flags & ~AT_SYMLINK_FOLLOW) != 0)
+               return -EINVAL;
+
         to = getname(newname);
         if (IS_ERR(to))
                 return PTR_ERR(to);
  
-       error = __user_walk_fd(olddfd, oldname, 0, &old_nd);
+       error = __user_walk_fd(olddfd, oldname,
+                              flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
+                              &old_nd);
         if (error)
                 goto exit;
         error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
@@ -2261,7 +2297,7 @@ exit:
  
  asmlinkage long sys_link(const char __user *oldname, const char __user *newname)
  {
-       return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname);
+       return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
  }
  
  /*
@@ -2271,17 +2307,17 @@ asmlinkage long sys_link(const char __user *oldname, const char __user *newname)
   *     a) we can get into loop creation. Check is done in is_subdir().
   *     b) race potential - two innocent renames can create a loop together.
   *        That's where 4.4 screws up. Current fix: serialization on
- *        sb->s_vfs_rename_sem. We might be more accurate, but that's another
+ *        sb->s_vfs_rename_mutex. We might be more accurate, but that's another
   *        story.
   *     c) we have to lock _three_ objects - parents and victim (if it exists).
   *        And that - after we got ->i_mutex on parents (until then we don't know
   *        whether the target exists).  Solution: try to be smart with locking
   *        order for inodes.  We rely on the fact that tree topology may change
- *        only under ->s_vfs_rename_sem _and_ that parent of the object we
+ *        only under ->s_vfs_rename_mutex _and_ that parent of the object we
   *        move will be locked.  Thus we can rank directories by the tree
   *        (ancestors first) and rank all non-directories after them.
   *        That works since everybody except rename does "lock parent, lookup,
- *        lock child" and rename is under ->s_vfs_rename_sem.
+ *        lock child" and rename is under ->s_vfs_rename_mutex.
   *        HOWEVER, it relies on the assumption that any object with ->lookup()
   *        has no more than 1 dentry.  If "hybrid" objects will ever appear,
   *        we'd better make sure that there's no link(2) for them.
@@ -2334,7 +2370,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
                 dput(new_dentry);
         }
         if (!error)
-               d_move(old_dentry,new_dentry);
+               if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
+                       d_move(old_dentry,new_dentry);
         return error;
  }
  
@@ -2357,8 +2394,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
         else
                 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
         if (!error) {
-               /* The following d_move() should become unconditional */
-               if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME))
+               if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
                         d_move(old_dentry, new_dentry);
         }
         if (target)
@@ -2560,8 +2596,7 @@ static char *page_getlink(struct dentry * dentry, struct page **ppage)
  {
         struct page * page;
         struct address_space *mapping = dentry->d_inode->i_mapping;
-       page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage,
-                               NULL);
+       page = read_mapping_page(mapping, 0, NULL);
         if (IS_ERR(page))
                 goto sync_fail;
         wait_on_page_locked(page);
@@ -2607,22 +2642,35 @@ void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
         }
  }
  
-int page_symlink(struct inode *inode, const char *symname, int len)
+int __page_symlink(struct inode *inode, const char *symname, int len,
+               gfp_t gfp_mask)
  {
         struct address_space *mapping = inode->i_mapping;
-       struct page *page = grab_cache_page(mapping, 0);
+       struct page *page;
         int err = -ENOMEM;
         char *kaddr;
  
+retry:
+       page = find_or_create_page(mapping, 0, gfp_mask);
         if (!page)
                 goto fail;
         err = mapping->a_ops->prepare_write(NULL, page, 0, len-1);
+       if (err == AOP_TRUNCATED_PAGE) {
+               page_cache_release(page);
+               goto retry;
+       }
         if (err)
                 goto fail_map;
         kaddr = kmap_atomic(page, KM_USER0);
         memcpy(kaddr, symname, len-1);
         kunmap_atomic(kaddr, KM_USER0);
-       mapping->a_ops->commit_write(NULL, page, 0, len-1);
+       err = mapping->a_ops->commit_write(NULL, page, 0, len-1);
+       if (err == AOP_TRUNCATED_PAGE) {
+               page_cache_release(page);
+               goto retry;
+       }
+       if (err)
+               goto fail_map;
         /*
          * Notice that we are _not_ going to block here - end of page is
          * unmapped, so this will only try to map the rest of page, see
@@ -2632,7 +2680,8 @@ int page_symlink(struct inode *inode, const char *symname, int len)
          */
         if (!PageUptodate(page)) {
                 err = mapping->a_ops->readpage(NULL, page);
-               wait_on_page_locked(page);
+               if (err != AOP_TRUNCATED_PAGE)
+                       wait_on_page_locked(page);
         } else {
                 unlock_page(page);
         }
@@ -2648,6 +2697,12 @@ fail:
         return err;
  }
  
+int page_symlink(struct inode *inode, const char *symname, int len)
+{
+       return __page_symlink(inode, symname, len,
+                       mapping_gfp_mask(inode->i_mapping));
+}
+
  struct inode_operations page_symlink_inode_operations = {
         .readlink       = generic_readlink,
         .follow_link    = page_follow_link_light,
@@ -2661,11 +2716,11 @@ EXPORT_SYMBOL(follow_up);
  EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
  EXPORT_SYMBOL(getname);
  EXPORT_SYMBOL(lock_rename);
-EXPORT_SYMBOL(lookup_hash);
  EXPORT_SYMBOL(lookup_one_len);
  EXPORT_SYMBOL(page_follow_link_light);
  EXPORT_SYMBOL(page_put_link);
  EXPORT_SYMBOL(page_readlink);
+EXPORT_SYMBOL(__page_symlink);
  EXPORT_SYMBOL(page_symlink);
  EXPORT_SYMBOL(page_symlink_inode_operations);
  EXPORT_SYMBOL(path_lookup);