nfsd4: don't sleep in lease-break callback
[safe/jmp/linux-2.6] / fs / nfsd / nfs4callback.c
index 353eb4a..e078c74 100644 (file)
@@ -1,6 +1,4 @@
 /*
- *  linux/fs/nfsd/nfs4callback.c
- *
  *  Copyright (c) 2001 The Regents of the University of Michigan.
  *  All rights reserved.
  *
  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/inet.h>
-#include <linux/errno.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/kthread.h>
-#include <linux/sunrpc/xdr.h>
-#include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/clnt.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/state.h>
-#include <linux/sunrpc/sched.h>
-#include <linux/nfs4.h>
+#include <linux/sunrpc/svc_xprt.h>
+#include "nfsd.h"
+#include "state.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
 #define NFSPROC4_CB_NULL 0
 #define NFSPROC4_CB_COMPOUND 1
+#define NFS4_STATEID_SIZE 16
 
 /* Index of predefined Linux callback client operations */
 
 enum {
-        NFSPROC4_CLNT_CB_NULL = 0,
+       NFSPROC4_CLNT_CB_NULL = 0,
        NFSPROC4_CLNT_CB_RECALL,
+       NFSPROC4_CLNT_CB_SEQUENCE,
 };
 
 enum nfs_cb_opnum4 {
        OP_CB_RECALL            = 4,
+       OP_CB_SEQUENCE          = 11,
 };
 
 #define NFS4_MAXTAGLEN         20
@@ -70,15 +61,22 @@ enum nfs_cb_opnum4 {
 #define NFS4_dec_cb_null_sz            0
 #define cb_compound_enc_hdr_sz         4
 #define cb_compound_dec_hdr_sz         (3 + (NFS4_MAXTAGLEN >> 2))
+#define sessionid_sz                   (NFS4_MAX_SESSIONID_LEN >> 2)
+#define cb_sequence_enc_sz             (sessionid_sz + 4 +             \
+                                       1 /* no referring calls list yet */)
+#define cb_sequence_dec_sz             (op_dec_sz + sessionid_sz + 4)
+
 #define op_enc_sz                      1
 #define op_dec_sz                      2
 #define enc_nfs4_fh_sz                 (1 + (NFS4_FHSIZE >> 2))
 #define enc_stateid_sz                 (NFS4_STATEID_SIZE >> 2)
 #define NFS4_enc_cb_recall_sz          (cb_compound_enc_hdr_sz +       \
+                                       cb_sequence_enc_sz +            \
                                        1 + enc_stateid_sz +            \
                                        enc_nfs4_fh_sz)
 
 #define NFS4_dec_cb_recall_sz          (cb_compound_dec_hdr_sz  +      \
+                                       cb_sequence_dec_sz +            \
                                        op_dec_sz)
 
 /*
@@ -137,10 +135,13 @@ xdr_error:                                      \
 } while (0)
 
 struct nfs4_cb_compound_hdr {
-       int             status;
-       u32             ident;
+       /* args */
+       u32             ident;  /* minorversion 0 only */
        u32             nops;
        __be32          *nops_p;
+       u32             minorversion;
+       /* res */
+       int             status;
        u32             taglen;
        char            *tag;
 };
@@ -209,7 +210,7 @@ encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
 
        RESERVE_SPACE(16);
        WRITE32(0);            /* tag length is always 0 */
-       WRITE32(NFS4_MINOR_VERSION);
+       WRITE32(hdr->minorversion);
        WRITE32(hdr->ident);
        hdr->nops_p = p;
        WRITE32(hdr->nops);
@@ -237,6 +238,27 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp,
        hdr->nops++;
 }
 
+static void
+encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args,
+                  struct nfs4_cb_compound_hdr *hdr)
+{
+       __be32 *p;
+
+       if (hdr->minorversion == 0)
+               return;
+
+       RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20);
+
+       WRITE32(OP_CB_SEQUENCE);
+       WRITEMEM(args->cbs_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN);
+       WRITE32(args->cbs_clp->cl_cb_seq_nr);
+       WRITE32(0);             /* slotid, always 0 */
+       WRITE32(0);             /* highest slotid always 0 */
+       WRITE32(0);             /* cachethis always 0 */
+       WRITE32(0); /* FIXME: support referring_call_lists */
+       hdr->nops++;
+}
+
 static int
 nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
 {
@@ -248,15 +270,19 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
 }
 
 static int
-nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_delegation *args)
+nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p,
+               struct nfs4_rpc_args *rpc_args)
 {
        struct xdr_stream xdr;
+       struct nfs4_delegation *args = rpc_args->args_op;
        struct nfs4_cb_compound_hdr hdr = {
                .ident = args->dl_ident,
+               .minorversion = rpc_args->args_seq.cbs_minorversion,
        };
 
        xdr_init_encode(&xdr, &req->rq_snd_buf, p);
        encode_cb_compound_hdr(&xdr, &hdr);
+       encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr);
        encode_cb_recall(&xdr, args, &hdr);
        encode_cb_nops(&hdr);
        return 0;
@@ -298,6 +324,57 @@ decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
        return 0;
 }
 
+/*
+ * Our current back channel implmentation supports a single backchannel
+ * with a single slot.
+ */
+static int
+decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res,
+                  struct rpc_rqst *rqstp)
+{
+       struct nfs4_sessionid id;
+       int status;
+       u32 dummy;
+       __be32 *p;
+
+       if (res->cbs_minorversion == 0)
+               return 0;
+
+       status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE);
+       if (status)
+               return status;
+
+       /*
+        * If the server returns different values for sessionID, slotID or
+        * sequence number, the server is looney tunes.
+        */
+       status = -ESERVERFAULT;
+
+       READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
+       memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
+       p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
+       if (memcmp(id.data, res->cbs_clp->cl_sessionid.data,
+                  NFS4_MAX_SESSIONID_LEN)) {
+               dprintk("%s Invalid session id\n", __func__);
+               goto out;
+       }
+       READ32(dummy);
+       if (dummy != res->cbs_clp->cl_cb_seq_nr) {
+               dprintk("%s Invalid sequence number\n", __func__);
+               goto out;
+       }
+       READ32(dummy);  /* slotid must be 0 */
+       if (dummy != 0) {
+               dprintk("%s Invalid slotid\n", __func__);
+               goto out;
+       }
+       /* FIXME: process highest slotid and target highest slotid */
+       status = 0;
+out:
+       return status;
+}
+
+
 static int
 nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
 {
@@ -305,7 +382,8 @@ nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
 }
 
 static int
-nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p)
+nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
+               struct nfsd4_cb_sequence *seq)
 {
        struct xdr_stream xdr;
        struct nfs4_cb_compound_hdr hdr;
@@ -315,6 +393,11 @@ nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p)
        status = decode_cb_compound_hdr(&xdr, &hdr);
        if (status)
                goto out;
+       if (seq) {
+               status = decode_cb_sequence(&xdr, seq, rqstp);
+               if (status)
+                       goto out;
+       }
        status = decode_cb_op_hdr(&xdr, OP_CB_RECALL);
 out:
        return status;
@@ -368,7 +451,7 @@ static struct rpc_program cb_program = {
 
 static int max_cb_time(void)
 {
-       return max(NFSD_LEASE_TIME/10, (time_t)1) * HZ;
+       return max(nfsd4_lease/10, (time_t)1) * HZ;
 }
 
 /* Reference counting, callback cleanup, etc., all look racy as heck.
@@ -376,16 +459,15 @@ static int max_cb_time(void)
 
 int setup_callback_client(struct nfs4_client *clp)
 {
-       struct sockaddr_in      addr;
        struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
        struct rpc_timeout      timeparms = {
                .to_initval     = max_cb_time(),
                .to_retries     = 0,
        };
        struct rpc_create_args args = {
-               .protocol       = IPPROTO_TCP,
-               .address        = (struct sockaddr *)&addr,
-               .addrsize       = sizeof(addr),
+               .protocol       = XPRT_TRANSPORT_TCP,
+               .address        = (struct sockaddr *) &cb->cb_addr,
+               .addrsize       = cb->cb_addrlen,
                .timeout        = &timeparms,
                .program        = &cb_program,
                .prognumber     = cb->cb_prog,
@@ -398,13 +480,10 @@ int setup_callback_client(struct nfs4_client *clp)
 
        if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
                return -EINVAL;
-
-       /* Initialize address */
-       memset(&addr, 0, sizeof(addr));
-       addr.sin_family = AF_INET;
-       addr.sin_port = htons(cb->cb_port);
-       addr.sin_addr.s_addr = htonl(cb->cb_addr);
-
+       if (cb->cb_minorversion) {
+               args.bc_xprt = clp->cl_cb_xprt;
+               args.protocol = XPRT_TRANSPORT_BC_TCP;
+       }
        /* Create RPC client */
        client = rpc_create(&args);
        if (IS_ERR(client)) {
@@ -431,53 +510,40 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
                warn_no_callback_path(clp, task->tk_status);
        else
                atomic_set(&clp->cl_cb_conn.cb_set, 1);
-       put_nfs4_client(clp);
 }
 
 static const struct rpc_call_ops nfsd4_cb_probe_ops = {
        .rpc_call_done = nfsd4_cb_probe_done,
 };
 
-static struct rpc_cred *lookup_cb_cred(struct nfs4_cb_conn *cb)
-{
-       struct auth_cred acred = {
-               .machine_cred = 1
-       };
+static struct rpc_cred *callback_cred;
 
-       /*
-        * Note in the gss case this doesn't actually have to wait for a
-        * gss upcall (or any calls to the client); this just creates a
-        * non-uptodate cred which the rpc state machine will fill in with
-        * a refresh_upcall later.
-        */
-       return rpcauth_lookup_credcache(cb->cb_client->cl_auth, &acred,
-                                                       RPCAUTH_LOOKUP_NEW);
+int set_callback_cred(void)
+{
+       if (callback_cred)
+               return 0;
+       callback_cred = rpc_lookup_machine_cred();
+       if (!callback_cred)
+               return -ENOMEM;
+       return 0;
 }
 
+
 void do_probe_callback(struct nfs4_client *clp)
 {
        struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
        struct rpc_message msg = {
                .rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
                .rpc_argp       = clp,
+               .rpc_cred       = callback_cred
        };
-       struct rpc_cred *cred;
        int status;
 
-       cred = lookup_cb_cred(cb);
-       if (IS_ERR(cred)) {
-               status = PTR_ERR(cred);
-               goto out;
-       }
-       cb->cb_cred = cred;
-       msg.rpc_cred = cb->cb_cred;
-       status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT,
+       status = rpc_call_async(cb->cb_client, &msg,
+                               RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
                                &nfsd4_cb_probe_ops, (void *)clp);
-out:
-       if (status) {
+       if (status)
                warn_no_callback_path(clp, status);
-               put_nfs4_client(clp);
-       }
 }
 
 /*
@@ -495,11 +561,89 @@ nfsd4_probe_callback(struct nfs4_client *clp)
                warn_no_callback_path(clp, status);
                return;
        }
+       do_probe_callback(clp);
+}
+
+/*
+ * There's currently a single callback channel slot.
+ * If the slot is available, then mark it busy.  Otherwise, set the
+ * thread for sleeping on the callback RPC wait queue.
+ */
+static int nfsd41_cb_setup_sequence(struct nfs4_client *clp,
+               struct rpc_task *task)
+{
+       struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
+       u32 *ptr = (u32 *)clp->cl_sessionid.data;
+       int status = 0;
+
+       dprintk("%s: %u:%u:%u:%u\n", __func__,
+               ptr[0], ptr[1], ptr[2], ptr[3]);
+
+       if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
+               rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
+               dprintk("%s slot is busy\n", __func__);
+               status = -EAGAIN;
+               goto out;
+       }
 
-       /* the task holds a reference to the nfs4_client struct */
-       atomic_inc(&clp->cl_count);
+       /*
+        * We'll need the clp during XDR encoding and decoding,
+        * and the sequence during decoding to verify the reply
+        */
+       args->args_seq.cbs_clp = clp;
+       task->tk_msg.rpc_resp = &args->args_seq;
 
-       do_probe_callback(clp);
+out:
+       dprintk("%s status=%d\n", __func__, status);
+       return status;
+}
+
+/*
+ * TODO: cb_sequence should support referring call lists, cachethis, multiple
+ * slots, and mark callback channel down on communication errors.
+ */
+static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
+{
+       struct nfs4_delegation *dp = calldata;
+       struct nfs4_client *clp = dp->dl_client;
+       struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
+       u32 minorversion = clp->cl_cb_conn.cb_minorversion;
+       int status = 0;
+
+       args->args_seq.cbs_minorversion = minorversion;
+       if (minorversion) {
+               status = nfsd41_cb_setup_sequence(clp, task);
+               if (status) {
+                       if (status != -EAGAIN) {
+                               /* terminate rpc task */
+                               task->tk_status = status;
+                               task->tk_action = NULL;
+                       }
+                       return;
+               }
+       }
+       rpc_call_start(task);
+}
+
+static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
+{
+       struct nfs4_delegation *dp = calldata;
+       struct nfs4_client *clp = dp->dl_client;
+
+       dprintk("%s: minorversion=%d\n", __func__,
+               clp->cl_cb_conn.cb_minorversion);
+
+       if (clp->cl_cb_conn.cb_minorversion) {
+               /* No need for lock, access serialized in nfsd4_cb_prepare */
+               ++clp->cl_cb_seq_nr;
+               clear_bit(0, &clp->cl_cb_slot_busy);
+               rpc_wake_up_next(&clp->cl_cb_waitq);
+               dprintk("%s: freed slot, new seqid=%d\n", __func__,
+                       clp->cl_cb_seq_nr);
+
+               /* We're done looking into the sequence information */
+               task->tk_msg.rpc_resp = NULL;
+       }
 }
 
 static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
@@ -507,6 +651,8 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
        struct nfs4_delegation *dp = calldata;
        struct nfs4_client *clp = dp->dl_client;
 
+       nfsd4_cb_done(task, calldata);
+
        switch (task->tk_status) {
        case -EIO:
                /* Network partition? */
@@ -525,6 +671,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
                rpc_delay(task, 2*HZ);
                task->tk_status = 0;
                rpc_restart_call(task);
+               return;
        } else {
                atomic_set(&clp->cl_cb_conn.cb_set, 0);
                warn_no_callback_path(clp, task->tk_status);
@@ -541,25 +688,61 @@ static void nfsd4_cb_recall_release(void *calldata)
 }
 
 static const struct rpc_call_ops nfsd4_cb_recall_ops = {
+       .rpc_call_prepare = nfsd4_cb_prepare,
        .rpc_call_done = nfsd4_cb_recall_done,
        .rpc_release = nfsd4_cb_recall_release,
 };
 
+static struct workqueue_struct *callback_wq;
+
+int nfsd4_create_callback_queue(void)
+{
+       callback_wq = create_singlethread_workqueue("nfsd4_callbacks");
+       if (!callback_wq)
+               return -ENOMEM;
+       return 0;
+}
+
+void nfsd4_destroy_callback_queue(void)
+{
+       destroy_workqueue(callback_wq);
+}
+
+void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt
+*new)
+{
+       struct rpc_clnt *old = clp->cl_cb_conn.cb_client;
+
+       clp->cl_cb_conn.cb_client = new;
+       /*
+        * After this, any work that saw the old value of cb_client will
+        * be gone:
+        */
+       flush_workqueue(callback_wq);
+       /* So we can safely shut it down: */
+       if (old)
+               rpc_shutdown_client(old);
+}
+
 /*
  * called with dp->dl_count inc'ed.
  */
-void
-nfsd4_cb_recall(struct nfs4_delegation *dp)
+static void _nfsd4_cb_recall(struct nfs4_delegation *dp)
 {
        struct nfs4_client *clp = dp->dl_client;
        struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
+       struct nfs4_rpc_args *args = &dp->dl_recall.cb_args;
        struct rpc_message msg = {
                .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
-               .rpc_argp = dp,
-               .rpc_cred = clp->cl_cb_conn.cb_cred
+               .rpc_cred = callback_cred
        };
        int status;
 
+       if (clnt == NULL)
+               return; /* Client is shutting down; give up. */
+
+       args->args_op = dp;
+       msg.rpc_argp = args;
        dp->dl_retries = 1;
        status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
                                &nfsd4_cb_recall_ops, dp);
@@ -568,3 +751,19 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
                nfs4_put_delegation(dp);
        }
 }
+
+void nfsd4_do_callback_rpc(struct work_struct *w)
+{
+       /* XXX: for now, just send off delegation recall. */
+       /* In future, generalize to handle any sort of callback. */
+       struct nfsd4_callback *c = container_of(w, struct nfsd4_callback, cb_work);
+       struct nfs4_delegation *dp = container_of(c, struct nfs4_delegation, dl_recall);
+
+       _nfsd4_cb_recall(dp);
+}
+
+
+void nfsd4_cb_recall(struct nfs4_delegation *dp)
+{
+       queue_work(callback_wq, &dp->dl_recall.cb_work);
+}