}
if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
/* successfully crossed mount point */
- exp_put(exp);
- *expp = exp2;
+ /*
+ * This is subtle: dentry is *not* under mnt at this point.
+ * The only reason we are safe is that original mnt is pinned
+ * down by exp, so we should dput before putting exp.
+ */
dput(dentry);
*dpp = mounts;
+ exp_put(exp);
+ *expp = exp2;
} else {
exp_put(exp2);
dput(mounts);
put_write_access(inode);
goto out_nfserr;
}
- DQUOT_INIT(inode);
+ vfs_dq_init(inode);
}
/* sanitize the mode change */
}
/* Revoke setuid/setgid on chown */
- if (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) ||
- ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)) {
+ if (!S_ISDIR(inode->i_mode) &&
+ (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) ||
+ ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid))) {
iap->ia_valid |= ATTR_KILL_PRIV;
if (iap->ia_valid & ATTR_MODE) {
/* we're setting mode too, just clear the s*id bits */
static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
{
ssize_t buflen;
+ ssize_t ret;
buflen = vfs_getxattr(dentry, key, NULL, 0);
if (buflen <= 0)
if (!*buf)
return -ENOMEM;
- return vfs_getxattr(dentry, key, *buf, buflen);
+ ret = vfs_getxattr(dentry, key, *buf, buflen);
+ if (ret < 0)
+ kfree(*buf);
+ return ret;
}
#endif
nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
int access, struct file **filp)
{
+ const struct cred *cred = current_cred();
struct dentry *dentry;
struct inode *inode;
int flags = O_RDONLY|O_LARGEFILE;
else
flags = O_WRONLY|O_LARGEFILE;
- DQUOT_INIT(inode);
+ vfs_dq_init(inode);
}
*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
- flags);
+ flags, cred);
if (IS_ERR(*filp))
host_err = PTR_ERR(*filp);
out_nfserr:
return err;
}
-
static int
nfsd_sync(struct file *filp)
mutex_unlock(&dentry->d_inode->i_mutex);
}
+/*
+ * Gathered writes: If another process is currently writing to the file,
+ * there's a high chance this is another nfsd (triggered by a bulk write
+ * from a client's biod). Rather than syncing the file with each write
+ * request, we sleep for 10 msec.
+ *
+ * I don't know if this roughly approximates C. Juszak's idea of
+ * gathered writes, but it's a nice and simple solution (IMHO), and it
+ * seems to work:-)
+ *
+ * Note: we do this only in the NFSv2 case, since v3 and higher have a
+ * better tool (separate unstable writes and commits) for solving this
+ * problem.
+ */
+static int wait_for_concurrent_writes(struct file *file)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ static ino_t last_ino;
+ static dev_t last_dev;
+ int err = 0;
+
+ if (atomic_read(&inode->i_writecount) > 1
+ || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
+ dprintk("nfsd: write defer %d\n", task_pid_nr(current));
+ msleep(10);
+ dprintk("nfsd: write resume %d\n", task_pid_nr(current));
+ }
+
+ if (inode->i_state & I_DIRTY) {
+ dprintk("nfsd: write sync %d\n", task_pid_nr(current));
+ err = nfsd_sync(file);
+ }
+ last_ino = inode->i_ino;
+ last_dev = inode->i_sb->s_dev;
+ return err;
+}
+
static __be32
nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen,
- unsigned long cnt, int *stablep)
+ unsigned long *cnt, int *stablep)
{
struct svc_export *exp;
struct dentry *dentry;
__be32 err = 0;
int host_err;
int stable = *stablep;
+ int use_wgather;
#ifdef MSNFS
err = nfserr_perm;
if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
- (!lock_may_write(file->f_path.dentry->d_inode, offset, cnt)))
+ (!lock_may_write(file->f_path.dentry->d_inode, offset, *cnt)))
goto out;
#endif
* - the sync export option has been set, or
* - the client requested O_SYNC behavior (NFSv3 feature).
* - The file system doesn't support fsync().
- * When gathered writes have been configured for this volume,
+ * When NFSv2 gathered writes have been configured for this volume,
* flushing the data to disk is handled separately below.
*/
+ use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
if (!file->f_op->fsync) {/* COMMIT3 cannot work */
stable = 2;
if (!EX_ISSYNC(exp))
stable = 0;
- if (stable && !EX_WGATHER(exp))
+ if (stable && !use_wgather) {
+ spin_lock(&file->f_lock);
file->f_flags |= O_SYNC;
+ spin_unlock(&file->f_lock);
+ }
/* Write the data. */
oldfs = get_fs(); set_fs(KERNEL_DS);
host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
set_fs(oldfs);
if (host_err >= 0) {
- nfsdstats.io_write += cnt;
+ *cnt = host_err;
+ nfsdstats.io_write += host_err;
fsnotify_modify(file->f_path.dentry);
}
if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID)))
kill_suid(dentry);
- if (host_err >= 0 && stable) {
- static ino_t last_ino;
- static dev_t last_dev;
-
- /*
- * Gathered writes: If another process is currently
- * writing to the file, there's a high chance
- * this is another nfsd (triggered by a bulk write
- * from a client's biod). Rather than syncing the
- * file with each write request, we sleep for 10 msec.
- *
- * I don't know if this roughly approximates
- * C. Juszak's idea of gathered writes, but it's a
- * nice and simple solution (IMHO), and it seems to
- * work:-)
- */
- if (EX_WGATHER(exp)) {
- if (atomic_read(&inode->i_writecount) > 1
- || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
- dprintk("nfsd: write defer %d\n", task_pid_nr(current));
- msleep(10);
- dprintk("nfsd: write resume %d\n", task_pid_nr(current));
- }
-
- if (inode->i_state & I_DIRTY) {
- dprintk("nfsd: write sync %d\n", task_pid_nr(current));
- host_err=nfsd_sync(file);
- }
-#if 0
- wake_up(&inode->i_wait);
-#endif
- }
- last_ino = inode->i_ino;
- last_dev = inode->i_sb->s_dev;
- }
+ if (host_err >= 0 && stable && use_wgather)
+ host_err = wait_for_concurrent_writes(file);
dprintk("nfsd: write complete host_err=%d\n", host_err);
if (host_err >= 0)
err = 0;
- else
+ else
err = nfserrno(host_err);
out:
return err;
*/
__be32
nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
- loff_t offset, struct kvec *vec, int vlen, unsigned long cnt,
+ loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt,
int *stablep)
{
__be32 err = 0;
* send along the gid on create when it tries to implement
* setgid directories via NFS:
*/
- if (current->fsuid != 0)
+ if (current_fsuid() != 0)
iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
if (iap->ia_valid)
return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
return 0;
}
+/* HPUX client sometimes creates a file in mode 000, and sets size to 0.
+ * setting size to 0 may fail for some specific file systems by the permission
+ * checking which requires WRITE permission but the mode is 000.
+ * we ignore the resizing(to 0) on the just new created file, since the size is
+ * 0 after file created.
+ *
+ * call this only after vfs_create() is called.
+ * */
+static void
+nfsd_check_ignore_resizing(struct iattr *iap)
+{
+ if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
+ iap->ia_valid &= ~ATTR_SIZE;
+}
+
/*
* Create a file (regular, directory, device, fifo); UNIX sockets
* not yet implemented.
dirp = dentry->d_inode;
err = nfserr_notdir;
- if(!dirp->i_op || !dirp->i_op->lookup)
+ if (!dirp->i_op->lookup)
goto out;
/*
* Check whether the response file handle has been verified yet.
switch (type) {
case S_IFREG:
host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
+ if (!host_err)
+ nfsd_check_ignore_resizing(iap);
break;
case S_IFDIR:
host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
/* Get all the sanity checks out of the way before
* we lock the parent. */
err = nfserr_notdir;
- if(!dirp->i_op || !dirp->i_op->lookup)
+ if (!dirp->i_op->lookup)
goto out;
fh_lock_nested(fhp, I_MUTEX_PARENT);
/* setattr will sync the child (or not) */
}
+ nfsd_check_ignore_resizing(iap);
+
if (createmode == NFS3_CREATE_EXCLUSIVE) {
/* Cram the verifier into atime/mtime */
iap->ia_valid = ATTR_MTIME|ATTR_ATIME
inode = dentry->d_inode;
err = nfserr_inval;
- if (!inode->i_op || !inode->i_op->readlink)
+ if (!inode->i_op->readlink)
goto out;
touch_atime(fhp->fh_export->ex_path.mnt, dentry);
return 0;
}
-static int nfsd_buffered_readdir(struct file *file, filldir_t func,
- struct readdir_cd *cdp, loff_t *offsetp)
+static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func,
+ struct readdir_cd *cdp, loff_t *offsetp)
{
struct readdir_data buf;
struct buffered_dirent *de;
buf.dirent = (void *)__get_free_page(GFP_KERNEL);
if (!buf.dirent)
- return -ENOMEM;
+ return nfserrno(-ENOMEM);
offset = *offsetp;
- cdp->err = nfserr_eof; /* will be cleared on successful read */
while (1) {
+ struct inode *dir_inode = file->f_path.dentry->d_inode;
unsigned int reclen;
+ cdp->err = nfserr_eof; /* will be cleared on successful read */
buf.used = 0;
buf.full = 0;
if (!size)
break;
+ /*
+ * Various filldir functions may end up calling back into
+ * lookup_one_len() and the file system's ->lookup() method.
+ * These expect i_mutex to be held, as it would within readdir.
+ */
+ host_err = mutex_lock_killable(&dir_inode->i_mutex);
+ if (host_err)
+ break;
+
de = (struct buffered_dirent *)buf.dirent;
while (size > 0) {
offset = de->offset;
if (func(cdp, de->name, de->namlen, de->offset,
de->ino, de->d_type))
- goto done;
+ break;
if (cdp->err != nfs_ok)
- goto done;
+ break;
reclen = ALIGN(sizeof(*de) + de->namlen,
sizeof(u64));
size -= reclen;
de = (struct buffered_dirent *)((char *)de + reclen);
}
- offset = vfs_llseek(file, 0, SEEK_CUR);
- if (!buf.full)
+ mutex_unlock(&dir_inode->i_mutex);
+ if (size > 0) /* We bailed out early */
break;
+
+ offset = vfs_llseek(file, 0, SEEK_CUR);
}
- done:
free_page((unsigned long)(buf.dirent));
if (host_err)
IS_APPEND(inode)? " append" : "",
__mnt_is_readonly(exp->ex_path.mnt)? " ro" : "");
dprintk(" owner %d/%d user %d/%d\n",
- inode->i_uid, inode->i_gid, current->fsuid, current->fsgid);
+ inode->i_uid, inode->i_gid, current_fsuid(), current_fsgid());
#endif
/* Normally we reject any write/sattr etc access on a read-only file
* with NFSv3.
*/
if ((acc & NFSD_MAY_OWNER_OVERRIDE) &&
- inode->i_uid == current->fsuid)
+ inode->i_uid == current_fsuid())
return 0;
/* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
size_t size;
int error;
- if (!IS_POSIXACL(inode) || !inode->i_op ||
+ if (!IS_POSIXACL(inode) ||
!inode->i_op->setxattr || !inode->i_op->removexattr)
return -EOPNOTSUPP;
switch(type) {