netfilter: nf_conntrack: death_by_timeout() fix
[safe/jmp/linux-2.6] / net / sunrpc / rpc_pipe.c
index b382809..9ced062 100644 (file)
@@ -8,14 +8,13 @@
  * Copyright (c) 2002, Trond Myklebust <trond.myklebust@fys.uio.no>
  *
  */
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/pagemap.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
-#include <linux/dnotify.h>
+#include <linux/fsnotify.h>
 #include <linux/kernel.h>
 
 #include <asm/ioctls.h>
@@ -34,97 +33,128 @@ static int rpc_mount_count;
 static struct file_system_type rpc_pipe_fs_type;
 
 
-static kmem_cache_t *rpc_inode_cachep __read_mostly;
+static struct kmem_cache *rpc_inode_cachep __read_mostly;
 
 #define RPC_UPCALL_TIMEOUT (30*HZ)
 
-static void
-__rpc_purge_upcall(struct inode *inode, int err)
+static void rpc_purge_list(struct rpc_inode *rpci, struct list_head *head,
+               void (*destroy_msg)(struct rpc_pipe_msg *), int err)
 {
-       struct rpc_inode *rpci = RPC_I(inode);
        struct rpc_pipe_msg *msg;
 
-       while (!list_empty(&rpci->pipe)) {
-               msg = list_entry(rpci->pipe.next, struct rpc_pipe_msg, list);
-               list_del_init(&msg->list);
-               msg->errno = err;
-               rpci->ops->destroy_msg(msg);
-       }
-       while (!list_empty(&rpci->in_upcall)) {
-               msg = list_entry(rpci->pipe.next, struct rpc_pipe_msg, list);
-               list_del_init(&msg->list);
+       if (list_empty(head))
+               return;
+       do {
+               msg = list_entry(head->next, struct rpc_pipe_msg, list);
+               list_del(&msg->list);
                msg->errno = err;
-               rpci->ops->destroy_msg(msg);
-       }
-       rpci->pipelen = 0;
+               destroy_msg(msg);
+       } while (!list_empty(head));
        wake_up(&rpci->waitq);
 }
 
 static void
-rpc_timeout_upcall_queue(void *data)
+rpc_timeout_upcall_queue(struct work_struct *work)
 {
-       struct rpc_inode *rpci = (struct rpc_inode *)data;
+       LIST_HEAD(free_list);
+       struct rpc_inode *rpci =
+               container_of(work, struct rpc_inode, queue_timeout.work);
        struct inode *inode = &rpci->vfs_inode;
+       void (*destroy_msg)(struct rpc_pipe_msg *);
 
-       down(&inode->i_sem);
-       if (rpci->nreaders == 0 && !list_empty(&rpci->pipe))
-               __rpc_purge_upcall(inode, -ETIMEDOUT);
-       up(&inode->i_sem);
+       spin_lock(&inode->i_lock);
+       if (rpci->ops == NULL) {
+               spin_unlock(&inode->i_lock);
+               return;
+       }
+       destroy_msg = rpci->ops->destroy_msg;
+       if (rpci->nreaders == 0) {
+               list_splice_init(&rpci->pipe, &free_list);
+               rpci->pipelen = 0;
+       }
+       spin_unlock(&inode->i_lock);
+       rpc_purge_list(rpci, &free_list, destroy_msg, -ETIMEDOUT);
 }
 
+/**
+ * rpc_queue_upcall
+ * @inode: inode of upcall pipe on which to queue given message
+ * @msg: message to queue
+ *
+ * Call with an @inode created by rpc_mkpipe() to queue an upcall.
+ * A userspace process may then later read the upcall by performing a
+ * read on an open file for this inode.  It is up to the caller to
+ * initialize the fields of @msg (other than @msg->list) appropriately.
+ */
 int
 rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg)
 {
        struct rpc_inode *rpci = RPC_I(inode);
-       int res = 0;
+       int res = -EPIPE;
 
-       down(&inode->i_sem);
+       spin_lock(&inode->i_lock);
+       if (rpci->ops == NULL)
+               goto out;
        if (rpci->nreaders) {
                list_add_tail(&msg->list, &rpci->pipe);
                rpci->pipelen += msg->len;
+               res = 0;
        } else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) {
                if (list_empty(&rpci->pipe))
-                       schedule_delayed_work(&rpci->queue_timeout,
+                       queue_delayed_work(rpciod_workqueue,
+                                       &rpci->queue_timeout,
                                        RPC_UPCALL_TIMEOUT);
                list_add_tail(&msg->list, &rpci->pipe);
                rpci->pipelen += msg->len;
-       } else
-               res = -EPIPE;
-       up(&inode->i_sem);
+               res = 0;
+       }
+out:
+       spin_unlock(&inode->i_lock);
        wake_up(&rpci->waitq);
        return res;
 }
+EXPORT_SYMBOL_GPL(rpc_queue_upcall);
+
+static inline void
+rpc_inode_setowner(struct inode *inode, void *private)
+{
+       RPC_I(inode)->private = private;
+}
 
 static void
 rpc_close_pipes(struct inode *inode)
 {
        struct rpc_inode *rpci = RPC_I(inode);
-
-       cancel_delayed_work(&rpci->queue_timeout);
-       flush_scheduled_work();
-       down(&inode->i_sem);
-       if (rpci->ops != NULL) {
+       struct rpc_pipe_ops *ops;
+       int need_release;
+
+       mutex_lock(&inode->i_mutex);
+       ops = rpci->ops;
+       if (ops != NULL) {
+               LIST_HEAD(free_list);
+               spin_lock(&inode->i_lock);
+               need_release = rpci->nreaders != 0 || rpci->nwriters != 0;
                rpci->nreaders = 0;
-               __rpc_purge_upcall(inode, -EPIPE);
-               rpci->nwriters = 0;
-               if (rpci->ops->release_pipe)
-                       rpci->ops->release_pipe(inode);
+               list_splice_init(&rpci->in_upcall, &free_list);
+               list_splice_init(&rpci->pipe, &free_list);
+               rpci->pipelen = 0;
                rpci->ops = NULL;
+               spin_unlock(&inode->i_lock);
+               rpc_purge_list(rpci, &free_list, ops->destroy_msg, -EPIPE);
+               rpci->nwriters = 0;
+               if (need_release && ops->release_pipe)
+                       ops->release_pipe(inode);
+               cancel_delayed_work_sync(&rpci->queue_timeout);
        }
-       up(&inode->i_sem);
-}
-
-static inline void
-rpc_inode_setowner(struct inode *inode, void *private)
-{
-       RPC_I(inode)->private = private;
+       rpc_inode_setowner(inode, NULL);
+       mutex_unlock(&inode->i_mutex);
 }
 
 static struct inode *
 rpc_alloc_inode(struct super_block *sb)
 {
        struct rpc_inode *rpci;
-       rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, SLAB_KERNEL);
+       rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, GFP_KERNEL);
        if (!rpci)
                return NULL;
        return &rpci->vfs_inode;
@@ -140,63 +170,84 @@ static int
 rpc_pipe_open(struct inode *inode, struct file *filp)
 {
        struct rpc_inode *rpci = RPC_I(inode);
+       int first_open;
        int res = -ENXIO;
 
-       down(&inode->i_sem);
-       if (rpci->ops != NULL) {
-               if (filp->f_mode & FMODE_READ)
-                       rpci->nreaders ++;
-               if (filp->f_mode & FMODE_WRITE)
-                       rpci->nwriters ++;
-               res = 0;
+       mutex_lock(&inode->i_mutex);
+       if (rpci->ops == NULL)
+               goto out;
+       first_open = rpci->nreaders == 0 && rpci->nwriters == 0;
+       if (first_open && rpci->ops->open_pipe) {
+               res = rpci->ops->open_pipe(inode);
+               if (res)
+                       goto out;
        }
-       up(&inode->i_sem);
+       if (filp->f_mode & FMODE_READ)
+               rpci->nreaders++;
+       if (filp->f_mode & FMODE_WRITE)
+               rpci->nwriters++;
+       res = 0;
+out:
+       mutex_unlock(&inode->i_mutex);
        return res;
 }
 
 static int
 rpc_pipe_release(struct inode *inode, struct file *filp)
 {
-       struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
+       struct rpc_inode *rpci = RPC_I(inode);
        struct rpc_pipe_msg *msg;
+       int last_close;
 
-       down(&inode->i_sem);
+       mutex_lock(&inode->i_mutex);
        if (rpci->ops == NULL)
                goto out;
        msg = (struct rpc_pipe_msg *)filp->private_data;
        if (msg != NULL) {
-               msg->errno = -EPIPE;
-               list_del_init(&msg->list);
+               spin_lock(&inode->i_lock);
+               msg->errno = -EAGAIN;
+               list_del(&msg->list);
+               spin_unlock(&inode->i_lock);
                rpci->ops->destroy_msg(msg);
        }
        if (filp->f_mode & FMODE_WRITE)
                rpci->nwriters --;
-       if (filp->f_mode & FMODE_READ)
+       if (filp->f_mode & FMODE_READ) {
                rpci->nreaders --;
-       if (!rpci->nreaders)
-               __rpc_purge_upcall(inode, -EPIPE);
-       if (rpci->ops->release_pipe)
+               if (rpci->nreaders == 0) {
+                       LIST_HEAD(free_list);
+                       spin_lock(&inode->i_lock);
+                       list_splice_init(&rpci->pipe, &free_list);
+                       rpci->pipelen = 0;
+                       spin_unlock(&inode->i_lock);
+                       rpc_purge_list(rpci, &free_list,
+                                       rpci->ops->destroy_msg, -EAGAIN);
+               }
+       }
+       last_close = rpci->nwriters == 0 && rpci->nreaders == 0;
+       if (last_close && rpci->ops->release_pipe)
                rpci->ops->release_pipe(inode);
 out:
-       up(&inode->i_sem);
+       mutex_unlock(&inode->i_mutex);
        return 0;
 }
 
 static ssize_t
 rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
 {
-       struct inode *inode = filp->f_dentry->d_inode;
+       struct inode *inode = filp->f_path.dentry->d_inode;
        struct rpc_inode *rpci = RPC_I(inode);
        struct rpc_pipe_msg *msg;
        int res = 0;
 
-       down(&inode->i_sem);
+       mutex_lock(&inode->i_mutex);
        if (rpci->ops == NULL) {
                res = -EPIPE;
                goto out_unlock;
        }
        msg = filp->private_data;
        if (msg == NULL) {
+               spin_lock(&inode->i_lock);
                if (!list_empty(&rpci->pipe)) {
                        msg = list_entry(rpci->pipe.next,
                                        struct rpc_pipe_msg,
@@ -206,6 +257,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
                        filp->private_data = msg;
                        msg->copied = 0;
                }
+               spin_unlock(&inode->i_lock);
                if (msg == NULL)
                        goto out_unlock;
        }
@@ -213,26 +265,28 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
        res = rpci->ops->upcall(filp, msg, buf, len);
        if (res < 0 || msg->len == msg->copied) {
                filp->private_data = NULL;
-               list_del_init(&msg->list);
+               spin_lock(&inode->i_lock);
+               list_del(&msg->list);
+               spin_unlock(&inode->i_lock);
                rpci->ops->destroy_msg(msg);
        }
 out_unlock:
-       up(&inode->i_sem);
+       mutex_unlock(&inode->i_mutex);
        return res;
 }
 
 static ssize_t
 rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *offset)
 {
-       struct inode *inode = filp->f_dentry->d_inode;
+       struct inode *inode = filp->f_path.dentry->d_inode;
        struct rpc_inode *rpci = RPC_I(inode);
        int res;
 
-       down(&inode->i_sem);
+       mutex_lock(&inode->i_mutex);
        res = -EPIPE;
        if (rpci->ops != NULL)
                res = rpci->ops->downcall(filp, buf, len);
-       up(&inode->i_sem);
+       mutex_unlock(&inode->i_mutex);
        return res;
 }
 
@@ -242,13 +296,13 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
        struct rpc_inode *rpci;
        unsigned int mask = 0;
 
-       rpci = RPC_I(filp->f_dentry->d_inode);
+       rpci = RPC_I(filp->f_path.dentry->d_inode);
        poll_wait(filp, &rpci->waitq, wait);
 
        mask = POLLOUT | POLLWRNORM;
        if (rpci->ops == NULL)
                mask |= POLLERR | POLLHUP;
-       if (!list_empty(&rpci->pipe))
+       if (filp->private_data || !list_empty(&rpci->pipe))
                mask |= POLLIN | POLLRDNORM;
        return mask;
 }
@@ -257,7 +311,7 @@ static int
 rpc_pipe_ioctl(struct inode *ino, struct file *filp,
                unsigned int cmd, unsigned long arg)
 {
-       struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
+       struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode);
        int len;
 
        switch (cmd) {
@@ -276,7 +330,7 @@ rpc_pipe_ioctl(struct inode *ino, struct file *filp,
        }
 }
 
-static struct file_operations rpc_pipe_fops = {
+static const struct file_operations rpc_pipe_fops = {
        .owner          = THIS_MODULE,
        .llseek         = no_llseek,
        .read           = rpc_pipe_read,
@@ -295,10 +349,9 @@ rpc_show_info(struct seq_file *m, void *v)
        seq_printf(m, "RPC server: %s\n", clnt->cl_server);
        seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname,
                        clnt->cl_prog, clnt->cl_vers);
-       seq_printf(m, "address: %u.%u.%u.%u\n",
-                       NIPQUAD(clnt->cl_xprt->addr.sin_addr.s_addr));
-       seq_printf(m, "protocol: %s\n",
-                       clnt->cl_xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
+       seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
+       seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO));
+       seq_printf(m, "port: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PORT));
        return 0;
 }
 
@@ -310,16 +363,16 @@ rpc_info_open(struct inode *inode, struct file *file)
 
        if (!ret) {
                struct seq_file *m = file->private_data;
-               down(&inode->i_sem);
+               mutex_lock(&inode->i_mutex);
                clnt = RPC_I(inode)->private;
                if (clnt) {
-                       atomic_inc(&clnt->cl_users);
+                       kref_get(&clnt->cl_kref);
                        m->private = clnt;
                } else {
                        single_release(inode, file);
                        ret = -EINVAL;
                }
-               up(&inode->i_sem);
+               mutex_unlock(&inode->i_mutex);
        }
        return ret;
 }
@@ -335,7 +388,7 @@ rpc_info_release(struct inode *inode, struct file *file)
        return single_release(inode, file);
 }
 
-static struct file_operations rpc_info_operations = {
+static const struct file_operations rpc_info_operations = {
        .owner          = THIS_MODULE,
        .open           = rpc_info_open,
        .read           = seq_read,
@@ -354,6 +407,7 @@ enum {
        RPCAUTH_nfs,
        RPCAUTH_portmap,
        RPCAUTH_statd,
+       RPCAUTH_nfsd4_cb,
        RPCAUTH_RootEOF
 };
 
@@ -362,7 +416,7 @@ enum {
  */
 struct rpc_filelist {
        char *name;
-       struct file_operations *i_fop;
+       const struct file_operations *i_fop;
        int mode;
 };
 
@@ -387,6 +441,10 @@ static struct rpc_filelist files[] = {
                .name = "statd",
                .mode = S_IFDIR | S_IRUGO | S_IXUGO,
        },
+       [RPCAUTH_nfsd4_cb] = {
+               .name = "nfsd4_cb",
+               .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+       },
 };
 
 enum {
@@ -402,16 +460,59 @@ static struct rpc_filelist authfiles[] = {
        },
 };
 
+struct vfsmount *rpc_get_mount(void)
+{
+       int err;
+
+       err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mount, &rpc_mount_count);
+       if (err != 0)
+               return ERR_PTR(err);
+       return rpc_mount;
+}
+
+void rpc_put_mount(void)
+{
+       simple_release_fs(&rpc_mount, &rpc_mount_count);
+}
+
+static int rpc_delete_dentry(struct dentry *dentry)
+{
+       return 1;
+}
+
+static const struct dentry_operations rpc_dentry_operations = {
+       .d_delete = rpc_delete_dentry,
+};
+
 static int
-rpc_get_mount(void)
+rpc_lookup_parent(char *path, struct nameidata *nd)
 {
-       return simple_pin_fs("rpc_pipefs", &rpc_mount, &rpc_mount_count);
+       struct vfsmount *mnt;
+
+       if (path[0] == '\0')
+               return -ENOENT;
+
+       mnt = rpc_get_mount();
+       if (IS_ERR(mnt)) {
+               printk(KERN_WARNING "%s: %s failed to mount "
+                              "pseudofilesystem \n", __FILE__, __func__);
+               return PTR_ERR(mnt);
+       }
+
+       if (vfs_path_lookup(mnt->mnt_root, mnt, path, LOOKUP_PARENT, nd)) {
+               printk(KERN_WARNING "%s: %s failed to find path %s\n",
+                               __FILE__, __func__, path);
+               rpc_put_mount();
+               return -ENOENT;
+       }
+       return 0;
 }
 
 static void
-rpc_put_mount(void)
+rpc_release_path(struct nameidata *nd)
 {
-       simple_release_fs(&rpc_mount, &rpc_mount_count);
+       path_put(&nd->path);
+       rpc_put_mount();
 }
 
 static struct inode *
@@ -421,15 +522,12 @@ rpc_get_inode(struct super_block *sb, int mode)
        if (!inode)
                return NULL;
        inode->i_mode = mode;
-       inode->i_uid = inode->i_gid = 0;
-       inode->i_blksize = PAGE_CACHE_SIZE;
-       inode->i_blocks = 0;
        inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
        switch(mode & S_IFMT) {
                case S_IFDIR:
                        inode->i_fop = &simple_dir_operations;
                        inode->i_op = &simple_dir_inode_operations;
-                       inode->i_nlink++;
+                       inc_nlink(inode);
                default:
                        break;
        }
@@ -439,19 +537,23 @@ rpc_get_inode(struct super_block *sb, int mode)
 /*
  * FIXME: This probably has races.
  */
-static void
-rpc_depopulate(struct dentry *parent)
+static void rpc_depopulate(struct dentry *parent,
+                          unsigned long start, unsigned long eof)
 {
        struct inode *dir = parent->d_inode;
        struct list_head *pos, *next;
        struct dentry *dentry, *dvec[10];
        int n = 0;
 
-       down(&dir->i_sem);
+       mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD);
 repeat:
        spin_lock(&dcache_lock);
        list_for_each_safe(pos, next, &parent->d_subdirs) {
-               dentry = list_entry(pos, struct dentry, d_child);
+               dentry = list_entry(pos, struct dentry, d_u.d_child);
+               if (!dentry->d_inode ||
+                               dentry->d_inode->i_ino < start ||
+                               dentry->d_inode->i_ino >= eof)
+                       continue;
                spin_lock(&dentry->d_lock);
                if (!d_unhashed(dentry)) {
                        dget_locked(dentry);
@@ -467,16 +569,16 @@ repeat:
        if (n) {
                do {
                        dentry = dvec[--n];
-                       if (dentry->d_inode) {
-                               rpc_close_pipes(dentry->d_inode);
-                               rpc_inode_setowner(dentry->d_inode, NULL);
+                       if (S_ISREG(dentry->d_inode->i_mode))
                                simple_unlink(dir, dentry);
-                       }
+                       else if (S_ISDIR(dentry->d_inode->i_mode))
+                               simple_rmdir(dir, dentry);
+                       d_delete(dentry);
                        dput(dentry);
                } while (n);
                goto repeat;
        }
-       up(&dir->i_sem);
+       mutex_unlock(&dir->i_mutex);
 }
 
 static int
@@ -489,11 +591,12 @@ rpc_populate(struct dentry *parent,
        struct dentry *dentry;
        int mode, i;
 
-       down(&dir->i_sem);
+       mutex_lock(&dir->i_mutex);
        for (i = start; i < eof; i++) {
                dentry = d_alloc_name(parent, files[i].name);
                if (!dentry)
                        goto out_bad;
+               dentry->d_op = &rpc_dentry_operations;
                mode = files[i].mode;
                inode = rpc_get_inode(dir->i_sb, mode);
                if (!inode) {
@@ -506,162 +609,251 @@ rpc_populate(struct dentry *parent,
                if (private)
                        rpc_inode_setowner(inode, private);
                if (S_ISDIR(mode))
-                       dir->i_nlink++;
+                       inc_nlink(dir);
                d_add(dentry, inode);
+               fsnotify_create(dir, dentry);
        }
-       up(&dir->i_sem);
+       mutex_unlock(&dir->i_mutex);
        return 0;
 out_bad:
-       up(&dir->i_sem);
+       mutex_unlock(&dir->i_mutex);
        printk(KERN_WARNING "%s: %s failed to populate directory %s\n",
-                       __FILE__, __FUNCTION__, parent->d_name.name);
+                       __FILE__, __func__, parent->d_name.name);
        return -ENOMEM;
 }
 
-struct dentry *
-rpc_mkdir(struct dentry *parent, char *name, struct rpc_clnt *rpc_client)
+static int
+__rpc_mkdir(struct inode *dir, struct dentry *dentry)
 {
-       struct inode *dir;
-       struct dentry *dentry;
        struct inode *inode;
-       int error;
 
-       if (!parent)
-               parent = rpc_mount->mnt_root;
+       inode = rpc_get_inode(dir->i_sb, S_IFDIR | S_IRUGO | S_IXUGO);
+       if (!inode)
+               goto out_err;
+       inode->i_ino = iunique(dir->i_sb, 100);
+       d_instantiate(dentry, inode);
+       inc_nlink(dir);
+       fsnotify_mkdir(dir, dentry);
+       return 0;
+out_err:
+       printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
+                       __FILE__, __func__, dentry->d_name.name);
+       return -ENOMEM;
+}
 
-       dir = parent->d_inode;
-       
-       error = rpc_get_mount();
-       if (error)
-               return ERR_PTR(error);
+static int
+__rpc_rmdir(struct inode *dir, struct dentry *dentry)
+{
+       int error;
+       error = simple_rmdir(dir, dentry);
+       if (!error)
+               d_delete(dentry);
+       return error;
+}
 
-       down(&dir->i_sem);
-       dentry = lookup_one_len(name, parent, strlen(name));
+static struct dentry *
+rpc_lookup_create(struct dentry *parent, const char *name, int len, int exclusive)
+{
+       struct inode *dir = parent->d_inode;
+       struct dentry *dentry;
+
+       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       dentry = lookup_one_len(name, parent, len);
        if (IS_ERR(dentry))
-               goto out_unlock;
-       if (dentry->d_inode) {
+               goto out_err;
+       if (!dentry->d_inode)
+               dentry->d_op = &rpc_dentry_operations;
+       else if (exclusive) {
+               dput(dentry);
                dentry = ERR_PTR(-EEXIST);
-               goto out_dput;
+               goto out_err;
        }
+       return dentry;
+out_err:
+       mutex_unlock(&dir->i_mutex);
+       return dentry;
+}
 
-       inode = rpc_get_inode(dir->i_sb, S_IFDIR | S_IRUSR | S_IXUSR);
-       if (!inode)
-               goto out_dput;
-       inode->i_ino = iunique(dir->i_sb, 100);
-       dir->i_nlink++;
-       RPC_I(dentry->d_inode)->private = rpc_client;
+static struct dentry *
+rpc_lookup_negative(char *path, struct nameidata *nd)
+{
+       struct dentry *dentry;
+       int error;
 
-       d_instantiate(dentry, inode);
-       dget(dentry);
-       up(&dir->i_sem);
+       if ((error = rpc_lookup_parent(path, nd)) != 0)
+               return ERR_PTR(error);
+       dentry = rpc_lookup_create(nd->path.dentry, nd->last.name, nd->last.len,
+                                  1);
+       if (IS_ERR(dentry))
+               rpc_release_path(nd);
+       return dentry;
+}
 
-       inode_dir_notify(dir, DN_CREATE);
+/**
+ * rpc_mkdir - Create a new directory in rpc_pipefs
+ * @path: path from the rpc_pipefs root to the new directory
+ * @rpc_client: rpc client to associate with this directory
+ *
+ * This creates a directory at the given @path associated with
+ * @rpc_clnt, which will contain a file named "info" with some basic
+ * information about the client, together with any "pipes" that may
+ * later be created using rpc_mkpipe().
+ */
+struct dentry *
+rpc_mkdir(char *path, struct rpc_clnt *rpc_client)
+{
+       struct nameidata nd;
+       struct dentry *dentry;
+       struct inode *dir;
+       int error;
 
+       dentry = rpc_lookup_negative(path, &nd);
+       if (IS_ERR(dentry))
+               return dentry;
+       dir = nd.path.dentry->d_inode;
+       if ((error = __rpc_mkdir(dir, dentry)) != 0)
+               goto err_dput;
+       RPC_I(dentry->d_inode)->private = rpc_client;
        error = rpc_populate(dentry, authfiles,
                        RPCAUTH_info, RPCAUTH_EOF);
        if (error)
-               goto out_depopulate;
-
+               goto err_depopulate;
+       dget(dentry);
+out:
+       mutex_unlock(&dir->i_mutex);
+       rpc_release_path(&nd);
        return dentry;
-
- out_depopulate:
-       rpc_rmdir(dentry);
- out_dput:
+err_depopulate:
+       rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF);
+       __rpc_rmdir(dir, dentry);
+err_dput:
        dput(dentry);
- out_unlock:
-       up(&dir->i_sem);
-       rpc_put_mount();
-       return dentry;
+       printk(KERN_WARNING "%s: %s() failed to create directory %s (errno = %d)\n",
+                       __FILE__, __func__, path, error);
+       dentry = ERR_PTR(error);
+       goto out;
 }
 
-void
+/**
+ * rpc_rmdir - Remove a directory created with rpc_mkdir()
+ * @dentry: directory to remove
+ */
+int
 rpc_rmdir(struct dentry *dentry)
 {
-       struct dentry *parent = dentry->d_parent;
-
-       rpc_depopulate(dentry);
-
-       down(&parent->d_inode->i_sem);
-       if (dentry->d_inode) {
-               rpc_close_pipes(dentry->d_inode);
-               rpc_inode_setowner(dentry->d_inode, NULL);
-               simple_rmdir(parent->d_inode, dentry);
-       }
-       up(&parent->d_inode->i_sem);
+       struct dentry *parent;
+       struct inode *dir;
+       int error;
 
-       inode_dir_notify(parent->d_inode, DN_DELETE);
-       rpc_put_mount();
+       parent = dget_parent(dentry);
+       dir = parent->d_inode;
+       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF);
+       error = __rpc_rmdir(dir, dentry);
+       dput(dentry);
+       mutex_unlock(&dir->i_mutex);
+       dput(parent);
+       return error;
 }
 
+/**
+ * rpc_mkpipe - make an rpc_pipefs file for kernel<->userspace communication
+ * @parent: dentry of directory to create new "pipe" in
+ * @name: name of pipe
+ * @private: private data to associate with the pipe, for the caller's use
+ * @ops: operations defining the behavior of the pipe: upcall, downcall,
+ *     release_pipe, open_pipe, and destroy_msg.
+ * @flags: rpc_inode flags
+ *
+ * Data is made available for userspace to read by calls to
+ * rpc_queue_upcall().  The actual reads will result in calls to
+ * @ops->upcall, which will be called with the file pointer,
+ * message, and userspace buffer to copy to.
+ *
+ * Writes can come at any time, and do not necessarily have to be
+ * responses to upcalls.  They will result in calls to @msg->downcall.
+ *
+ * The @private argument passed here will be available to all these methods
+ * from the file pointer, via RPC_I(file->f_dentry->d_inode)->private.
+ */
 struct dentry *
-rpc_mkpipe(struct dentry *parent, char *name, void *private,
-          struct rpc_pipe_ops *ops, int flags)
+rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pipe_ops *ops, int flags)
 {
-       struct inode *dir = parent->d_inode;
        struct dentry *dentry;
-       struct inode *inode;
+       struct inode *dir, *inode;
        struct rpc_inode *rpci;
-       int error;
 
-       error = rpc_get_mount();
-       if (error)
-               return ERR_PTR(error);
-
-       down(&parent->d_inode->i_sem);
-       dentry = lookup_one_len(name, parent, strlen(name));
+       dentry = rpc_lookup_create(parent, name, strlen(name), 0);
        if (IS_ERR(dentry))
-               goto out_unlock;
+               return dentry;
+       dir = parent->d_inode;
        if (dentry->d_inode) {
-               dentry = ERR_PTR(-EEXIST);
-               goto out_dput;
-       }
-
-       inode = rpc_get_inode(parent->d_inode->i_sb,
-                       S_IFSOCK | S_IRUSR | S_IWUSR);
-       if (!inode) {
-               dentry = ERR_PTR(-ENOMEM);
-               goto out_dput;
+               rpci = RPC_I(dentry->d_inode);
+               if (rpci->private != private ||
+                               rpci->ops != ops ||
+                               rpci->flags != flags) {
+                       dput (dentry);
+                       dentry = ERR_PTR(-EBUSY);
+               }
+               rpci->nkern_readwriters++;
+               goto out;
        }
-
+       inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR);
+       if (!inode)
+               goto err_dput;
        inode->i_ino = iunique(dir->i_sb, 100);
        inode->i_fop = &rpc_pipe_fops;
-
+       d_instantiate(dentry, inode);
        rpci = RPC_I(inode);
        rpci->private = private;
        rpci->flags = flags;
        rpci->ops = ops;
-
-       d_instantiate(dentry, inode);
+       rpci->nkern_readwriters = 1;
+       fsnotify_create(dir, dentry);
        dget(dentry);
-       up(&parent->d_inode->i_sem);
-
-       inode_dir_notify(dir, DN_CREATE);
+out:
+       mutex_unlock(&dir->i_mutex);
        return dentry;
-
- out_dput:
+err_dput:
        dput(dentry);
- out_unlock:
-       up(&parent->d_inode->i_sem);
-       rpc_put_mount();
-       return dentry;
+       dentry = ERR_PTR(-ENOMEM);
+       printk(KERN_WARNING "%s: %s() failed to create pipe %s/%s (errno = %d)\n",
+                       __FILE__, __func__, parent->d_name.name, name,
+                       -ENOMEM);
+       goto out;
 }
+EXPORT_SYMBOL_GPL(rpc_mkpipe);
 
-void
+/**
+ * rpc_unlink - remove a pipe
+ * @dentry: dentry for the pipe, as returned from rpc_mkpipe
+ *
+ * After this call, lookups will no longer find the pipe, and any
+ * attempts to read or write using preexisting opens of the pipe will
+ * return -EPIPE.
+ */
+int
 rpc_unlink(struct dentry *dentry)
 {
-       struct dentry *parent = dentry->d_parent;
+       struct dentry *parent;
+       struct inode *dir;
+       int error = 0;
 
-       down(&parent->d_inode->i_sem);
-       if (dentry->d_inode) {
+       parent = dget_parent(dentry);
+       dir = parent->d_inode;
+       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       if (--RPC_I(dentry->d_inode)->nkern_readwriters == 0) {
                rpc_close_pipes(dentry->d_inode);
-               rpc_inode_setowner(dentry->d_inode, NULL);
-               simple_unlink(parent->d_inode, dentry);
+               error = simple_unlink(dir, dentry);
+               if (!error)
+                       d_delete(dentry);
        }
-       up(&parent->d_inode->i_sem);
-
-       inode_dir_notify(parent->d_inode, DN_DELETE);
-       rpc_put_mount();
+       dput(dentry);
+       mutex_unlock(&dir->i_mutex);
+       dput(parent);
+       return error;
 }
+EXPORT_SYMBOL_GPL(rpc_unlink);
 
 /*
  * populate the filesystem
@@ -704,11 +896,11 @@ out:
        return -ENOMEM;
 }
 
-static struct super_block *
+static int
 rpc_get_sb(struct file_system_type *fs_type,
-               int flags, const char *dev_name, void *data)
+               int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-       return get_sb_single(fs_type, flags, data, rpc_fill_super);
+       return get_sb_single(fs_type, flags, data, rpc_fill_super, mnt);
 }
 
 static struct file_system_type rpc_pipe_fs_type = {
@@ -719,40 +911,46 @@ static struct file_system_type rpc_pipe_fs_type = {
 };
 
 static void
-init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+init_once(void *foo)
 {
        struct rpc_inode *rpci = (struct rpc_inode *) foo;
 
-       if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
-           SLAB_CTOR_CONSTRUCTOR) {
-               inode_init_once(&rpci->vfs_inode);
-               rpci->private = NULL;
-               rpci->nreaders = 0;
-               rpci->nwriters = 0;
-               INIT_LIST_HEAD(&rpci->in_upcall);
-               INIT_LIST_HEAD(&rpci->pipe);
-               rpci->pipelen = 0;
-               init_waitqueue_head(&rpci->waitq);
-               INIT_WORK(&rpci->queue_timeout, rpc_timeout_upcall_queue, rpci);
-               rpci->ops = NULL;
-       }
+       inode_init_once(&rpci->vfs_inode);
+       rpci->private = NULL;
+       rpci->nreaders = 0;
+       rpci->nwriters = 0;
+       INIT_LIST_HEAD(&rpci->in_upcall);
+       INIT_LIST_HEAD(&rpci->in_downcall);
+       INIT_LIST_HEAD(&rpci->pipe);
+       rpci->pipelen = 0;
+       init_waitqueue_head(&rpci->waitq);
+       INIT_DELAYED_WORK(&rpci->queue_timeout,
+                           rpc_timeout_upcall_queue);
+       rpci->ops = NULL;
 }
 
 int register_rpc_pipefs(void)
 {
+       int err;
+
        rpc_inode_cachep = kmem_cache_create("rpc_inode_cache",
-                                             sizeof(struct rpc_inode),
-                                             0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
-                                             init_once, NULL);
+                               sizeof(struct rpc_inode),
+                               0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
+                                               SLAB_MEM_SPREAD),
+                               init_once);
        if (!rpc_inode_cachep)
                return -ENOMEM;
-       register_filesystem(&rpc_pipe_fs_type);
+       err = register_filesystem(&rpc_pipe_fs_type);
+       if (err) {
+               kmem_cache_destroy(rpc_inode_cachep);
+               return err;
+       }
+
        return 0;
 }
 
 void unregister_rpc_pipefs(void)
 {
-       if (kmem_cache_destroy(rpc_inode_cachep))
-               printk(KERN_WARNING "RPC: unable to free inode cache\n");
+       kmem_cache_destroy(rpc_inode_cachep);
        unregister_filesystem(&rpc_pipe_fs_type);
 }