Merge branch 'for-2.6.34-incoming' into for-2.6.35-incoming
[safe/jmp/linux-2.6] / fs / nfsd / vfs.c
index 4145083..4eb9baa 100644 (file)
@@ -1,7 +1,5 @@
 #define MSNFS  /* HACK HACK */
 /*
- * linux/fs/nfsd/vfs.c
- *
  * File operations used by nfsd. Some of these have been ripped from
  * other parts of the kernel because they weren't exported, others
  * are partial duplicates with added or changed functionality.
  * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp>
  */
 
-#include <linux/string.h>
-#include <linux/time.h>
-#include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/file.h>
-#include <linux/mount.h>
-#include <linux/major.h>
 #include <linux/splice.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
 #include <linux/fcntl.h>
-#include <linux/net.h>
-#include <linux/unistd.h>
-#include <linux/slab.h>
-#include <linux/pagemap.h>
-#include <linux/in.h>
-#include <linux/module.h>
 #include <linux/namei.h>
-#include <linux/vfs.h>
 #include <linux/delay.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#ifdef CONFIG_NFSD_V3
-#include <linux/nfs3.h>
-#include <linux/nfsd/xdr3.h>
-#endif /* CONFIG_NFSD_V3 */
-#include <linux/nfsd/nfsfh.h>
-#include <linux/quotaops.h>
 #include <linux/fsnotify.h>
-#include <linux/posix_acl.h>
 #include <linux/posix_acl_xattr.h>
 #include <linux/xattr.h>
+#include <linux/jhash.h>
+#include <linux/ima.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+#include <linux/exportfs.h>
+#include <linux/writeback.h>
+
+#ifdef CONFIG_NFSD_V3
+#include "xdr3.h"
+#endif /* CONFIG_NFSD_V3 */
+
 #ifdef CONFIG_NFSD_V4
-#include <linux/nfs4.h>
 #include <linux/nfs4_acl.h>
 #include <linux/nfsd_idmap.h>
-#include <linux/security.h>
 #endif /* CONFIG_NFSD_V4 */
-#include <linux/jhash.h>
-#include <linux/ima.h>
 
-#include <asm/uaccess.h>
+#include "nfsd.h"
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY               NFSDDBG_FILEOP
 
@@ -110,12 +93,21 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
 
        exp2 = rqst_exp_get_by_name(rqstp, &path);
        if (IS_ERR(exp2)) {
-               if (PTR_ERR(exp2) != -ENOENT)
-                       err = PTR_ERR(exp2);
+               err = PTR_ERR(exp2);
+               /*
+                * We normally allow NFS clients to continue
+                * "underneath" a mountpoint that is not exported.
+                * The exception is V4ROOT, where no traversal is ever
+                * allowed without an explicit export of the new
+                * directory.
+                */
+               if (err == -ENOENT && !(exp->ex_flags & NFSEXP_V4ROOT))
+                       err = 0;
                path_put(&path);
                goto out;
        }
-       if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
+       if (nfsd_v4client(rqstp) ||
+               (exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
                /* successfully crossed mount point */
                /*
                 * This is subtle: path.dentry is *not* on path.mnt
@@ -134,6 +126,53 @@ out:
        return err;
 }
 
+static void follow_to_parent(struct path *path)
+{
+       struct dentry *dp;
+
+       while (path->dentry == path->mnt->mnt_root && follow_up(path))
+               ;
+       dp = dget_parent(path->dentry);
+       dput(path->dentry);
+       path->dentry = dp;
+}
+
+static int nfsd_lookup_parent(struct svc_rqst *rqstp, struct dentry *dparent, struct svc_export **exp, struct dentry **dentryp)
+{
+       struct svc_export *exp2;
+       struct path path = {.mnt = mntget((*exp)->ex_path.mnt),
+                           .dentry = dget(dparent)};
+
+       follow_to_parent(&path);
+
+       exp2 = rqst_exp_parent(rqstp, &path);
+       if (PTR_ERR(exp2) == -ENOENT) {
+               *dentryp = dget(dparent);
+       } else if (IS_ERR(exp2)) {
+               path_put(&path);
+               return PTR_ERR(exp2);
+       } else {
+               *dentryp = dget(path.dentry);
+               exp_put(*exp);
+               *exp = exp2;
+       }
+       path_put(&path);
+       return 0;
+}
+
+/*
+ * For nfsd purposes, we treat V4ROOT exports as though there was an
+ * export at *every* directory.
+ */
+int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp)
+{
+       if (d_mountpoint(dentry))
+               return 1;
+       if (!(exp->ex_flags & NFSEXP_V4ROOT))
+               return 0;
+       return dentry->d_inode != NULL;
+}
+
 __be32
 nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
                   const char *name, unsigned int len,
@@ -162,35 +201,13 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
                        dentry = dget(dparent);
                else if (dparent != exp->ex_path.dentry)
                        dentry = dget_parent(dparent);
-               else if (!EX_NOHIDE(exp))
+               else if (!EX_NOHIDE(exp) && !nfsd_v4client(rqstp))
                        dentry = dget(dparent); /* .. == . just like at / */
                else {
                        /* checking mountpoint crossing is very different when stepping up */
-                       struct svc_export *exp2 = NULL;
-                       struct dentry *dp;
-                       struct path path = {.mnt = mntget(exp->ex_path.mnt),
-                                           .dentry = dget(dparent)};
-
-                       while (path.dentry == path.mnt->mnt_root &&
-                              follow_up(&path))
-                               ;
-                       dp = dget_parent(path.dentry);
-                       dput(path.dentry);
-                       path.dentry = dp;
-
-                       exp2 = rqst_exp_parent(rqstp, &path);
-                       if (PTR_ERR(exp2) == -ENOENT) {
-                               dentry = dget(dparent);
-                       } else if (IS_ERR(exp2)) {
-                               host_err = PTR_ERR(exp2);
-                               path_put(&path);
+                       host_err = nfsd_lookup_parent(rqstp, dparent, &exp, &dentry);
+                       if (host_err)
                                goto out_nfserr;
-                       } else {
-                               dentry = dget(path.dentry);
-                               exp_put(exp);
-                               exp = exp2;
-                       }
-                       path_put(&path);
                }
        } else {
                fh_lock(fhp);
@@ -201,7 +218,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
                /*
                 * check if we have crossed a mount point ...
                 */
-               if (d_mountpoint(dentry)) {
+               if (nfsd_mountpoint(dentry, exp)) {
                        if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
                                dput(dentry);
                                goto out_nfserr;
@@ -256,6 +273,32 @@ out:
        return err;
 }
 
+/*
+ * Commit metadata changes to stable storage.
+ */
+static int
+commit_metadata(struct svc_fh *fhp)
+{
+       struct inode *inode = fhp->fh_dentry->d_inode;
+       const struct export_operations *export_ops = inode->i_sb->s_export_op;
+       int error = 0;
+
+       if (!EX_ISSYNC(fhp->fh_export))
+               return 0;
+
+       if (export_ops->commit_metadata) {
+               error = export_ops->commit_metadata(inode);
+       } else {
+               struct writeback_control wbc = {
+                       .sync_mode = WB_SYNC_ALL,
+                       .nr_to_write = 0, /* metadata only */
+               };
+
+               error = sync_inode(inode, &wbc);
+       }
+
+       return error;
+}
 
 /*
  * Set various file attributes.
@@ -346,7 +389,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
                 * If we are changing the size of the file, then
                 * we need to break all leases.
                 */
-               host_err = break_lease(inode, FMODE_WRITE | O_NONBLOCK);
+               host_err = break_lease(inode, O_WRONLY | O_NONBLOCK);
                if (host_err == -EWOULDBLOCK)
                        host_err = -ETIMEDOUT;
                if (host_err) /* ENOMEM or EWOULDBLOCK */
@@ -362,7 +405,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
                        put_write_access(inode);
                        goto out_nfserr;
                }
-               vfs_dq_init(inode);
        }
 
        /* sanitize the mode change */
@@ -401,8 +443,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
        if (size_change)
                put_write_access(inode);
        if (!err)
-               if (EX_ISSYNC(fhp->fh_export))
-                       write_inode_now(inode, 1);
+               commit_metadata(fhp);
 out:
        return err;
 
@@ -678,12 +719,13 @@ __be32
 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
                        int access, struct file **filp)
 {
-       const struct cred *cred = current_cred();
        struct dentry   *dentry;
        struct inode    *inode;
        int             flags = O_RDONLY|O_LARGEFILE;
        __be32          err;
-       int             host_err;
+       int             host_err = 0;
+
+       validate_process_creds();
 
        /*
         * If we get here, then the client has already done an "open",
@@ -718,7 +760,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
         * Check to see if there are any leases on this file.
         * This may block while leases are broken.
         */
-       host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? FMODE_WRITE : 0));
+       if (!(access & NFSD_MAY_NOT_BREAK_LEASE))
+               host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0));
        if (host_err == -EWOULDBLOCK)
                host_err = -ETIMEDOUT;
        if (host_err) /* NOMEM or WOULDBLOCK */
@@ -729,18 +772,17 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
                        flags = O_RDWR|O_LARGEFILE;
                else
                        flags = O_WRONLY|O_LARGEFILE;
-
-               vfs_dq_init(inode);
        }
        *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
-                           flags, cred);
+                           flags, current_cred());
        if (IS_ERR(*filp))
                host_err = PTR_ERR(*filp);
        else
-               ima_counts_get(*filp);
+               host_err = ima_file_check(*filp, access);
 out_nfserr:
        err = nfserrno(host_err);
 out:
+       validate_process_creds();
        return err;
 }
 
@@ -754,46 +796,6 @@ nfsd_close(struct file *filp)
 }
 
 /*
- * Sync a file
- * As this calls fsync (not fdatasync) there is no need for a write_inode
- * after it.
- */
-static inline int nfsd_dosync(struct file *filp, struct dentry *dp,
-                             const struct file_operations *fop)
-{
-       struct inode *inode = dp->d_inode;
-       int (*fsync) (struct file *, struct dentry *, int);
-       int err;
-
-       err = filemap_fdatawrite(inode->i_mapping);
-       if (err == 0 && fop && (fsync = fop->fsync))
-               err = fsync(filp, dp, 0);
-       if (err == 0)
-               err = filemap_fdatawait(inode->i_mapping);
-
-       return err;
-}
-
-static int
-nfsd_sync(struct file *filp)
-{
-        int err;
-       struct inode *inode = filp->f_path.dentry->d_inode;
-       dprintk("nfsd: sync file %s\n", filp->f_path.dentry->d_name.name);
-       mutex_lock(&inode->i_mutex);
-       err=nfsd_dosync(filp, filp->f_path.dentry, filp->f_op);
-       mutex_unlock(&inode->i_mutex);
-
-       return err;
-}
-
-int
-nfsd_sync_dir(struct dentry *dp)
-{
-       return nfsd_dosync(NULL, dp, dp->d_inode->i_fop);
-}
-
-/*
  * Obtain the readahead parameters for the file
  * specified by (dev, ino).
  */
@@ -996,7 +998,7 @@ static int wait_for_concurrent_writes(struct file *file)
 
        if (inode->i_state & I_DIRTY) {
                dprintk("nfsd: write sync %d\n", task_pid_nr(current));
-               err = nfsd_sync(file);
+               err = vfs_fsync(file, file->f_path.dentry, 0);
        }
        last_ino = inode->i_ino;
        last_dev = inode->i_sb->s_dev;
@@ -1144,8 +1146,9 @@ out:
 #ifdef CONFIG_NFSD_V3
 /*
  * Commit all pending writes to stable storage.
- * Strictly speaking, we could sync just the indicated file region here,
- * but there's currently no way we can ask the VFS to do so.
+ *
+ * Note: we only guarantee that data that lies within the range specified
+ * by the 'offset' and 'count' parameters will be synced.
  *
  * Unfortunately we cannot lock the file to make sure we return full WCC
  * data to the client, as locking happens lower down in the filesystem.
@@ -1155,23 +1158,33 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
                loff_t offset, unsigned long count)
 {
        struct file     *file;
-       __be32          err;
+       loff_t          end = LLONG_MAX;
+       __be32          err = nfserr_inval;
 
-       if ((u64)count > ~(u64)offset)
-               return nfserr_inval;
+       if (offset < 0)
+               goto out;
+       if (count != 0) {
+               end = offset + (loff_t)count - 1;
+               if (end < offset)
+                       goto out;
+       }
 
-       err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
+       err = nfsd_open(rqstp, fhp, S_IFREG,
+                       NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file);
        if (err)
-               return err;
+               goto out;
        if (EX_ISSYNC(fhp->fh_export)) {
-               if (file->f_op && file->f_op->fsync) {
-                       err = nfserrno(nfsd_sync(file));
-               } else {
+               int err2 = vfs_fsync_range(file, file->f_path.dentry,
+                               offset, end, 0);
+
+               if (err2 != -EINVAL)
+                       err = nfserrno(err2);
+               else
                        err = nfserr_notsupp;
-               }
        }
 
        nfsd_close(file);
+out:
        return err;
 }
 #endif /* CONFIG_NFSD_V3 */
@@ -1324,12 +1337,14 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                goto out_nfserr;
        }
 
-       if (EX_ISSYNC(fhp->fh_export)) {
-               err = nfserrno(nfsd_sync_dir(dentry));
-               write_inode_now(dchild->d_inode, 1);
-       }
+       err = nfsd_create_setattr(rqstp, resfhp, iap);
 
-       err2 = nfsd_create_setattr(rqstp, resfhp, iap);
+       /*
+        * nfsd_setattr already committed the child.  Transactional filesystems
+        * had a chance to commit changes for both parent and child
+        * simultaneously making the following commit_metadata a noop.
+        */
+       err2 = nfserrno(commit_metadata(fhp));
        if (err2)
                err = err2;
        mnt_drop_write(fhp->fh_export->ex_path.mnt);
@@ -1361,7 +1376,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
        struct dentry   *dentry, *dchild = NULL;
        struct inode    *dirp;
        __be32          err;
-       __be32          err2;
        int             host_err;
        __u32           v_mtime=0, v_atime=0;
 
@@ -1456,11 +1470,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
        if (created)
                *created = 1;
 
-       if (EX_ISSYNC(fhp->fh_export)) {
-               err = nfserrno(nfsd_sync_dir(dentry));
-               /* setattr will sync the child (or not) */
-       }
-
        nfsd_check_ignore_resizing(iap);
 
        if (createmode == NFS3_CREATE_EXCLUSIVE) {
@@ -1475,9 +1484,13 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
        }
 
  set_attr:
-       err2 = nfsd_create_setattr(rqstp, resfhp, iap);
-       if (err2)
-               err = err2;
+       err = nfsd_create_setattr(rqstp, resfhp, iap);
+
+       /*
+        * nfsd_setattr already committed the child (and possibly also the parent).
+        */
+       if (!err)
+               err = nfserrno(commit_metadata(fhp));
 
        mnt_drop_write(fhp->fh_export->ex_path.mnt);
        /*
@@ -1592,12 +1605,9 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
                }
        } else
                host_err = vfs_symlink(dentry->d_inode, dnew, path);
-
-       if (!host_err) {
-               if (EX_ISSYNC(fhp->fh_export))
-                       host_err = nfsd_sync_dir(dentry);
-       }
        err = nfserrno(host_err);
+       if (!err)
+               err = nfserrno(commit_metadata(fhp));
        fh_unlock(fhp);
 
        mnt_drop_write(fhp->fh_export->ex_path.mnt);
@@ -1659,11 +1669,9 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
        }
        host_err = vfs_link(dold, dirp, dnew);
        if (!host_err) {
-               if (EX_ISSYNC(ffhp->fh_export)) {
-                       err = nfserrno(nfsd_sync_dir(ddir));
-                       write_inode_now(dest, 1);
-               }
-               err = 0;
+               err = nfserrno(commit_metadata(ffhp));
+               if (!err)
+                       err = nfserrno(commit_metadata(tfhp));
        } else {
                if (host_err == -EXDEV && rqstp->rq_vers == 2)
                        err = nfserr_acces;
@@ -1759,10 +1767,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
                goto out_dput_new;
 
        host_err = vfs_rename(fdir, odentry, tdir, ndentry);
-       if (!host_err && EX_ISSYNC(tfhp->fh_export)) {
-               host_err = nfsd_sync_dir(tdentry);
+       if (!host_err) {
+               host_err = commit_metadata(tfhp);
                if (!host_err)
-                       host_err = nfsd_sync_dir(fdentry);
+                       host_err = commit_metadata(ffhp);
        }
 
        mnt_drop_write(ffhp->fh_export->ex_path.mnt);
@@ -1843,12 +1851,9 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 
        dput(rdentry);
 
-       if (host_err)
-               goto out_drop;
-       if (EX_ISSYNC(fhp->fh_export))
-               host_err = nfsd_sync_dir(dentry);
+       if (!host_err)
+               host_err = commit_metadata(fhp);
 
-out_drop:
        mnt_drop_write(fhp->fh_export->ex_path.mnt);
 out_nfserr:
        err = nfserrno(host_err);
@@ -2115,8 +2120,6 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
         */
        path.mnt = exp->ex_path.mnt;
        path.dentry = dentry;
-       err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC),
-                            IMA_COUNT_LEAVE);
 nfsd_out:
        return err? nfserrno(err) : 0;
 }