Merge branch 'for-2.6.34-incoming' into for-2.6.35-incoming

author J. Bruce Fields <bfields@citi.umich.edu>

Wed, 9 Jun 2010 00:05:18 +0000 (20:05 -0400)

committer J. Bruce Fields <bfields@citi.umich.edu>

Wed, 9 Jun 2010 00:05:18 +0000 (20:05 -0400)
author J. Bruce Fields <bfields@citi.umich.edu>
Wed, 9 Jun 2010 00:05:18 +0000 (20:05 -0400)
committer J. Bruce Fields <bfields@citi.umich.edu>
Wed, 9 Jun 2010 00:05:18 +0000 (20:05 -0400)
diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt

index 6a53a84..0488491 100644 (file)
--- a/Documentation/filesystems/nfs/nfs41-server.txt
+++ b/Documentation/filesystems/nfs/nfs41-server.txt
@@ -137,7 +137,7 @@ NS*| OPENATTR             | OPT        |              | Section 18.17  |
     | READ                 | REQ        |              | Section 18.22  |
     | READDIR              | REQ        |              | Section 18.23  |
     | READLINK             | OPT        |              | Section 18.24  |
-NS | RECLAIM_COMPLETE     | REQ        |              | Section 18.51  |
+   | RECLAIM_COMPLETE     | REQ        |              | Section 18.51  |
     | RELEASE_LOCKOWNER    | MNI        |              | N/A            |
     | REMOVE               | REQ        |              | Section 18.25  |
     | RENAME               | REQ        |              | Section 18.26  |
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c

index 872a5ef..c2a4f71 100644 (file)
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -259,10 +259,9 @@ static struct cache_detail svc_expkey_cache = {
         .alloc          = expkey_alloc,
  };
  
-static struct svc_expkey *
-svc_expkey_lookup(struct svc_expkey *item)
+static int
+svc_expkey_hash(struct svc_expkey *item)
  {
-       struct cache_head *ch;
         int hash = item->ek_fsidtype;
         char * cp = (char*)item->ek_fsid;
         int len = key_len(item->ek_fsidtype);
@@ -270,6 +269,14 @@ svc_expkey_lookup(struct svc_expkey *item)
         hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
         hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS);
         hash &= EXPKEY_HASHMASK;
+       return hash;
+}
+
+static struct svc_expkey *
+svc_expkey_lookup(struct svc_expkey *item)
+{
+       struct cache_head *ch;
+       int hash = svc_expkey_hash(item);
  
         ch = sunrpc_cache_lookup(&svc_expkey_cache, &item->h,
                                  hash);
@@ -283,13 +290,7 @@ static struct svc_expkey *
  svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
  {
         struct cache_head *ch;
-       int hash = new->ek_fsidtype;
-       char * cp = (char*)new->ek_fsid;
-       int len = key_len(new->ek_fsidtype);
-
-       hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
-       hash ^= hash_ptr(new->ek_client, EXPKEY_HASHBITS);
-       hash &= EXPKEY_HASHMASK;
+       int hash = svc_expkey_hash(new);
  
         ch = sunrpc_cache_update(&svc_expkey_cache, &new->h,
                                  &old->h, hash);
@@ -738,14 +739,22 @@ struct cache_detail svc_export_cache = {
         .alloc          = svc_export_alloc,
  };
  
-static struct svc_export *
-svc_export_lookup(struct svc_export *exp)
+static int
+svc_export_hash(struct svc_export *exp)
  {
-       struct cache_head *ch;
         int hash;
+
         hash = hash_ptr(exp->ex_client, EXPORT_HASHBITS);
         hash ^= hash_ptr(exp->ex_path.dentry, EXPORT_HASHBITS);
         hash ^= hash_ptr(exp->ex_path.mnt, EXPORT_HASHBITS);
+       return hash;
+}
+
+static struct svc_export *
+svc_export_lookup(struct svc_export *exp)
+{
+       struct cache_head *ch;
+       int hash = svc_export_hash(exp);
  
         ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h,
                                  hash);
@@ -759,10 +768,7 @@ static struct svc_export *
  svc_export_update(struct svc_export *new, struct svc_export *old)
  {
         struct cache_head *ch;
-       int hash;
-       hash = hash_ptr(old->ex_client, EXPORT_HASHBITS);
-       hash ^= hash_ptr(old->ex_path.dentry, EXPORT_HASHBITS);
-       hash ^= hash_ptr(old->ex_path.mnt, EXPORT_HASHBITS);
+       int hash = svc_export_hash(old);
  
         ch = sunrpc_cache_update(&svc_export_cache, &new->h,
                                  &old->h,
@@ -1071,9 +1077,9 @@ exp_export(struct nfsctl_export *nxp)
                 err = 0;
  finish:
         kfree(new.ex_pathname);
-       if (exp)
+       if (!IS_ERR_OR_NULL(exp))
                 exp_put(exp);
-       if (fsid_key && !IS_ERR(fsid_key))
+       if (!IS_ERR_OR_NULL(fsid_key))
                 cache_put(&fsid_key->h, &svc_expkey_cache);
         path_put(&path);
  out_put_clp:
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c

index 7e32bd3..eb78e7e 100644 (file)
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -32,6 +32,7 @@
   */
  
  #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/svc_xprt.h>
  #include <linux/slab.h>
  #include "nfsd.h"
  #include "state.h"
@@ -79,11 +80,6 @@ enum nfs_cb_opnum4 {
                                         cb_sequence_dec_sz +            \
                                         op_dec_sz)
  
-struct nfs4_rpc_args {
-       void                            *args_op;
-       struct nfsd4_cb_sequence        args_seq;
-};
-
  /*
  * Generic encode routines from fs/nfs/nfs4xdr.c
  */
@@ -428,13 +424,19 @@ static struct rpc_procinfo     nfs4_cb_procedures[] = {
  };
  
  static struct rpc_version       nfs_cb_version4 = {
+/*
+ * Note on the callback rpc program version number: despite language in rfc
+ * 5661 section 18.36.3 requiring servers to use 4 in this field, the
+ * official xdr descriptions for both 4.0 and 4.1 specify version 1, and
+ * in practice that appears to be what implementations use.  The section
+ * 18.36.3 language is expected to be fixed in an erratum.
+ */
          .number                 = 1,
          .nrprocs                = ARRAY_SIZE(nfs4_cb_procedures),
          .procs                  = nfs4_cb_procedures
  };
  
  static struct rpc_version *    nfs_cb_version[] = {
-       NULL,
         &nfs_cb_version4,
  };
  
@@ -456,15 +458,14 @@ static struct rpc_program cb_program = {
  
  static int max_cb_time(void)
  {
-       return max(NFSD_LEASE_TIME/10, (time_t)1) * HZ;
+       return max(nfsd4_lease/10, (time_t)1) * HZ;
  }
  
  /* Reference counting, callback cleanup, etc., all look racy as heck.
- * And why is cb_set an atomic? */
+ * And why is cl_cb_set an atomic? */
  
-int setup_callback_client(struct nfs4_client *clp)
+int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
  {
-       struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
         struct rpc_timeout      timeparms = {
                 .to_initval     = max_cb_time(),
                 .to_retries     = 0,
@@ -476,7 +477,7 @@ int setup_callback_client(struct nfs4_client *clp)
                 .timeout        = &timeparms,
                 .program        = &cb_program,
                 .prognumber     = cb->cb_prog,
-               .version        = nfs_cb_version[1]->number,
+               .version        = 0,
                 .authflavor     = clp->cl_flavor,
                 .flags          = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
                 .client_name    = clp->cl_principal,
@@ -486,7 +487,7 @@ int setup_callback_client(struct nfs4_client *clp)
         if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
                 return -EINVAL;
         if (cb->cb_minorversion) {
-               args.bc_xprt = clp->cl_cb_xprt;
+               args.bc_xprt = cb->cb_xprt;
                 args.protocol = XPRT_TRANSPORT_BC_TCP;
         }
         /* Create RPC client */
@@ -496,7 +497,7 @@ int setup_callback_client(struct nfs4_client *clp)
                         PTR_ERR(client));
                 return PTR_ERR(client);
         }
-       cb->cb_client = client;
+       nfsd4_set_callback_client(clp, client);
         return 0;
  
  }
@@ -514,8 +515,7 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
         if (task->tk_status)
                 warn_no_callback_path(clp, task->tk_status);
         else
-               atomic_set(&clp->cl_cb_conn.cb_set, 1);
-       put_nfs4_client(clp);
+               atomic_set(&clp->cl_cb_set, 1);
  }
  
  static const struct rpc_call_ops nfsd4_cb_probe_ops = {
@@ -537,7 +537,6 @@ int set_callback_cred(void)
  
  void do_probe_callback(struct nfs4_client *clp)
  {
-       struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
         struct rpc_message msg = {
                 .rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
                 .rpc_argp       = clp,
@@ -545,34 +544,27 @@ void do_probe_callback(struct nfs4_client *clp)
         };
         int status;
  
-       status = rpc_call_async(cb->cb_client, &msg,
+       status = rpc_call_async(clp->cl_cb_client, &msg,
                                 RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
                                 &nfsd4_cb_probe_ops, (void *)clp);
-       if (status) {
+       if (status)
                 warn_no_callback_path(clp, status);
-               put_nfs4_client(clp);
-       }
  }
  
  /*
   * Set up the callback client and put a NFSPROC4_CB_NULL on the wire...
   */
-void
-nfsd4_probe_callback(struct nfs4_client *clp)
+void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
  {
         int status;
  
-       BUG_ON(atomic_read(&clp->cl_cb_conn.cb_set));
+       BUG_ON(atomic_read(&clp->cl_cb_set));
  
-       status = setup_callback_client(clp);
+       status = setup_callback_client(clp, cb);
         if (status) {
                 warn_no_callback_path(clp, status);
                 return;
         }
-
-       /* the task holds a reference to the nfs4_client struct */
-       atomic_inc(&clp->cl_count);
-
         do_probe_callback(clp);
  }
  
@@ -658,18 +650,32 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
         }
  }
  
+
  static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
  {
         struct nfs4_delegation *dp = calldata;
         struct nfs4_client *clp = dp->dl_client;
+       struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
  
         nfsd4_cb_done(task, calldata);
  
+       if (current_rpc_client == NULL) {
+               /* We're shutting down; give up. */
+               /* XXX: err, or is it ok just to fall through
+                * and rpc_restart_call? */
+               return;
+       }
+
         switch (task->tk_status) {
         case -EIO:
                 /* Network partition? */
-               atomic_set(&clp->cl_cb_conn.cb_set, 0);
+               atomic_set(&clp->cl_cb_set, 0);
                 warn_no_callback_path(clp, task->tk_status);
+               if (current_rpc_client != task->tk_client) {
+                       /* queue a callback on the new connection: */
+                       nfsd4_cb_recall(dp);
+                       return;
+               }
         case -EBADHANDLE:
         case -NFS4ERR_BAD_STATEID:
                 /* Race: client probably got cb_recall
@@ -677,7 +683,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
                 break;
         default:
                 /* success, or error we can't handle */
-               goto done;
+               return;
         }
         if (dp->dl_retries--) {
                 rpc_delay(task, 2*HZ);
@@ -685,20 +691,16 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
                 rpc_restart_call(task);
                 return;
         } else {
-               atomic_set(&clp->cl_cb_conn.cb_set, 0);
+               atomic_set(&clp->cl_cb_set, 0);
                 warn_no_callback_path(clp, task->tk_status);
         }
-done:
-       kfree(task->tk_msg.rpc_argp);
  }
  
  static void nfsd4_cb_recall_release(void *calldata)
  {
         struct nfs4_delegation *dp = calldata;
-       struct nfs4_client *clp = dp->dl_client;
  
         nfs4_put_delegation(dp);
-       put_nfs4_client(clp);
  }
  
  static const struct rpc_call_ops nfsd4_cb_recall_ops = {
@@ -707,33 +709,75 @@ static const struct rpc_call_ops nfsd4_cb_recall_ops = {
         .rpc_release = nfsd4_cb_recall_release,
  };
  
+static struct workqueue_struct *callback_wq;
+
+int nfsd4_create_callback_queue(void)
+{
+       callback_wq = create_singlethread_workqueue("nfsd4_callbacks");
+       if (!callback_wq)
+               return -ENOMEM;
+       return 0;
+}
+
+void nfsd4_destroy_callback_queue(void)
+{
+       destroy_workqueue(callback_wq);
+}
+
+/* must be called under the state lock */
+void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new)
+{
+       struct rpc_clnt *old = clp->cl_cb_client;
+
+       clp->cl_cb_client = new;
+       /*
+        * After this, any work that saw the old value of cl_cb_client will
+        * be gone:
+        */
+       flush_workqueue(callback_wq);
+       /* So we can safely shut it down: */
+       if (old)
+               rpc_shutdown_client(old);
+}
+
  /*
   * called with dp->dl_count inc'ed.
   */
-void
-nfsd4_cb_recall(struct nfs4_delegation *dp)
+static void _nfsd4_cb_recall(struct nfs4_delegation *dp)
  {
         struct nfs4_client *clp = dp->dl_client;
-       struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
-       struct nfs4_rpc_args *args;
+       struct rpc_clnt *clnt = clp->cl_cb_client;
+       struct nfs4_rpc_args *args = &dp->dl_recall.cb_args;
         struct rpc_message msg = {
                 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
                 .rpc_cred = callback_cred
         };
-       int status = -ENOMEM;
+       int status;
+
+       if (clnt == NULL)
+               return; /* Client is shutting down; give up. */
  
-       args = kzalloc(sizeof(*args), GFP_KERNEL);
-       if (!args)
-               goto out;
         args->args_op = dp;
         msg.rpc_argp = args;
         dp->dl_retries = 1;
         status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
                                 &nfsd4_cb_recall_ops, dp);
-out:
-       if (status) {
-               kfree(args);
-               put_nfs4_client(clp);
+       if (status)
                 nfs4_put_delegation(dp);
-       }
+}
+
+void nfsd4_do_callback_rpc(struct work_struct *w)
+{
+       /* XXX: for now, just send off delegation recall. */
+       /* In future, generalize to handle any sort of callback. */
+       struct nfsd4_callback *c = container_of(w, struct nfsd4_callback, cb_work);
+       struct nfs4_delegation *dp = container_of(c, struct nfs4_delegation, dl_recall);
+
+       _nfsd4_cb_recall(dp);
+}
+
+
+void nfsd4_cb_recall(struct nfs4_delegation *dp)
+{
+       queue_work(callback_wq, &dp->dl_recall.cb_work);
  }
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c

index 2ab9e85..59ec449 100644 (file)
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -969,20 +969,36 @@ static struct nfsd4_operation nfsd4_ops[];
  static const char *nfsd4_op_name(unsigned opnum);
  
  /*
- * Enforce NFSv4.1 COMPOUND ordering rules.
+ * Enforce NFSv4.1 COMPOUND ordering rules:
   *
- * TODO:
- * - enforce NFS4ERR_NOT_ONLY_OP,
- * - DESTROY_SESSION MUST be the final operation in the COMPOUND request.
+ * Also note, enforced elsewhere:
+ *     - SEQUENCE other than as first op results in
+ *       NFS4ERR_SEQUENCE_POS. (Enforced in nfsd4_sequence().)
+ *     - BIND_CONN_TO_SESSION must be the only op in its compound
+ *       (Will be enforced in nfsd4_bind_conn_to_session().)
+ *     - DESTROY_SESSION must be the final operation in a compound, if
+ *       sessionid's in SEQUENCE and DESTROY_SESSION are the same.
+ *       (Enforced in nfsd4_destroy_session().)
   */
-static bool nfs41_op_ordering_ok(struct nfsd4_compoundargs *args)
+static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args)
  {
-       if (args->minorversion && args->opcnt > 0) {
-               struct nfsd4_op *op = &args->ops[0];
-               return (op->status == nfserr_op_illegal) ||
-                      (nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP);
-       }
-       return true;
+       struct nfsd4_op *op = &args->ops[0];
+
+       /* These ordering requirements don't apply to NFSv4.0: */
+       if (args->minorversion == 0)
+               return nfs_ok;
+       /* This is weird, but OK, not our problem: */
+       if (args->opcnt == 0)
+               return nfs_ok;
+       if (op->status == nfserr_op_illegal)
+               return nfs_ok;
+       if (!(nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP))
+               return nfserr_op_not_in_session;
+       if (op->opnum == OP_SEQUENCE)
+               return nfs_ok;
+       if (args->opcnt != 1)
+               return nfserr_not_only_op;
+       return nfs_ok;
  }
  
  /*
@@ -1012,6 +1028,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
         resp->rqstp = rqstp;
         resp->cstate.minorversion = args->minorversion;
         resp->cstate.replay_owner = NULL;
+       resp->cstate.session = NULL;
         fh_init(&resp->cstate.current_fh, NFS4_FHSIZE);
         fh_init(&resp->cstate.save_fh, NFS4_FHSIZE);
         /* Use the deferral mechanism only for NFSv4.0 compounds */
@@ -1024,13 +1041,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
         if (args->minorversion > nfsd_supported_minorversion)
                 goto out;
  
-       if (!nfs41_op_ordering_ok(args)) {
+       status = nfs41_check_op_ordering(args);
+       if (status) {
                 op = &args->ops[0];
-               op->status = nfserr_sequence_pos;
+               op->status = status;
                 goto encode_op;
         }
  
-       status = nfs_ok;
         while (!status && resp->opcnt < args->opcnt) {
                 op = &args->ops[resp->opcnt++];
  
@@ -1295,6 +1312,11 @@ static struct nfsd4_operation nfsd4_ops[] = {
                 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
                 .op_name = "OP_SEQUENCE",
         },
+       [OP_RECLAIM_COMPLETE] = {
+               .op_func = (nfsd4op_func)nfsd4_reclaim_complete,
+               .op_flags = ALLOWED_WITHOUT_FH,
+               .op_name = "OP_RECLAIM_COMPLETE",
+       },
  };
  
  static const char *nfsd4_op_name(unsigned opnum)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c

index 6a8feda..4a27347 100644 (file)
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -45,8 +45,8 @@
  #define NFSDDBG_FACILITY                NFSDDBG_PROC
  
  /* Globals */
-static time_t lease_time = 90;     /* default lease time */
-static time_t user_lease_time = 90;
+time_t nfsd4_lease = 90;     /* default lease time */
+time_t nfsd4_grace = 90;
  static time_t boot_time;
  static u32 current_ownerid = 1;
  static u32 current_fileid = 1;
@@ -190,7 +190,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
         dp->dl_vfs_file = stp->st_vfs_file;
         dp->dl_type = type;
         dp->dl_ident = cb->cb_ident;
-       dp->dl_stateid.si_boot = get_seconds();
+       dp->dl_stateid.si_boot = boot_time;
         dp->dl_stateid.si_stateownerid = current_delegid++;
         dp->dl_stateid.si_fileid = 0;
         dp->dl_stateid.si_generation = 0;
@@ -199,6 +199,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
         atomic_set(&dp->dl_count, 1);
         list_add(&dp->dl_perfile, &fp->fi_delegations);
         list_add(&dp->dl_perclnt, &clp->cl_delegations);
+       INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc);
         return dp;
  }
  
@@ -249,6 +250,9 @@ unhash_delegation(struct nfs4_delegation *dp)
   * SETCLIENTID state 
   */
  
+/* client_lock protects the client lru list and session hash table */
+static DEFINE_SPINLOCK(client_lock);
+
  /* Hash tables for nfs4_clientid state */
  #define CLIENT_HASH_BITS                 4
  #define CLIENT_HASH_SIZE                (1 << CLIENT_HASH_BITS)
@@ -367,7 +371,6 @@ static void release_openowner(struct nfs4_stateowner *sop)
         nfs4_put_stateowner(sop);
  }
  
-static DEFINE_SPINLOCK(sessionid_lock);
  #define SESSION_HASH_SIZE      512
  static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE];
  
@@ -565,10 +568,10 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
  
         new->se_flags = cses->flags;
         kref_init(&new->se_ref);
-       spin_lock(&sessionid_lock);
+       spin_lock(&client_lock);
         list_add(&new->se_hash, &sessionid_hashtbl[idx]);
         list_add(&new->se_perclnt, &clp->cl_sessions);
-       spin_unlock(&sessionid_lock);
+       spin_unlock(&client_lock);
  
         status = nfs_ok;
  out:
@@ -579,7 +582,7 @@ out_free:
         goto out;
  }
  
-/* caller must hold sessionid_lock */
+/* caller must hold client_lock */
  static struct nfsd4_session *
  find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid)
  {
@@ -602,7 +605,7 @@ find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid)
         return NULL;
  }
  
-/* caller must hold sessionid_lock */
+/* caller must hold client_lock */
  static void
  unhash_session(struct nfsd4_session *ses)
  {
@@ -610,15 +613,6 @@ unhash_session(struct nfsd4_session *ses)
         list_del(&ses->se_perclnt);
  }
  
-static void
-release_session(struct nfsd4_session *ses)
-{
-       spin_lock(&sessionid_lock);
-       unhash_session(ses);
-       spin_unlock(&sessionid_lock);
-       nfsd4_put_session(ses);
-}
-
  void
  free_session(struct kref *kref)
  {
@@ -634,9 +628,18 @@ free_session(struct kref *kref)
         kfree(ses);
  }
  
+/* must be called under the client_lock */
  static inline void
-renew_client(struct nfs4_client *clp)
+renew_client_locked(struct nfs4_client *clp)
  {
+       if (is_client_expired(clp)) {
+               dprintk("%s: client (clientid %08x/%08x) already expired\n",
+                       __func__,
+                       clp->cl_clientid.cl_boot,
+                       clp->cl_clientid.cl_id);
+               return;
+       }
+
         /*
         * Move client to the end to the LRU list.
         */
@@ -647,6 +650,14 @@ renew_client(struct nfs4_client *clp)
         clp->cl_time = get_seconds();
  }
  
+static inline void
+renew_client(struct nfs4_client *clp)
+{
+       spin_lock(&client_lock);
+       renew_client_locked(clp);
+       spin_unlock(&client_lock);
+}
+
  /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
  static int
  STALE_CLIENTID(clientid_t *clid)
@@ -680,27 +691,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
         return clp;
  }
  
-static void
-shutdown_callback_client(struct nfs4_client *clp)
-{
-       struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
-
-       if (clnt) {
-               /*
-                * Callback threads take a reference on the client, so there
-                * should be no outstanding callbacks at this point.
-                */
-               clp->cl_cb_conn.cb_client = NULL;
-               rpc_shutdown_client(clnt);
-       }
-}
-
  static inline void
  free_client(struct nfs4_client *clp)
  {
-       shutdown_callback_client(clp);
-       if (clp->cl_cb_xprt)
-               svc_xprt_put(clp->cl_cb_xprt);
         if (clp->cl_cred.cr_group_info)
                 put_group_info(clp->cl_cred.cr_group_info);
         kfree(clp->cl_principal);
@@ -709,10 +702,34 @@ free_client(struct nfs4_client *clp)
  }
  
  void
-put_nfs4_client(struct nfs4_client *clp)
+release_session_client(struct nfsd4_session *session)
  {
-       if (atomic_dec_and_test(&clp->cl_count))
+       struct nfs4_client *clp = session->se_client;
+
+       if (!atomic_dec_and_lock(&clp->cl_refcount, &client_lock))
+               return;
+       if (is_client_expired(clp)) {
                 free_client(clp);
+               session->se_client = NULL;
+       } else
+               renew_client_locked(clp);
+       spin_unlock(&client_lock);
+       nfsd4_put_session(session);
+}
+
+/* must be called under the client_lock */
+static inline void
+unhash_client_locked(struct nfs4_client *clp)
+{
+       mark_client_expired(clp);
+       list_del(&clp->cl_lru);
+       while (!list_empty(&clp->cl_sessions)) {
+               struct nfsd4_session  *ses;
+               ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
+                                se_perclnt);
+               unhash_session(ses);
+               nfsd4_put_session(ses);
+       }
  }
  
  static void
@@ -722,9 +739,6 @@ expire_client(struct nfs4_client *clp)
         struct nfs4_delegation *dp;
         struct list_head reaplist;
  
-       dprintk("NFSD: expire_client cl_count %d\n",
-                           atomic_read(&clp->cl_count));
-
         INIT_LIST_HEAD(&reaplist);
         spin_lock(&recall_lock);
         while (!list_empty(&clp->cl_delegations)) {
@@ -740,20 +754,20 @@ expire_client(struct nfs4_client *clp)
                 list_del_init(&dp->dl_recall_lru);
                 unhash_delegation(dp);
         }
-       list_del(&clp->cl_idhash);
-       list_del(&clp->cl_strhash);
-       list_del(&clp->cl_lru);
         while (!list_empty(&clp->cl_openowners)) {
                 sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
                 release_openowner(sop);
         }
-       while (!list_empty(&clp->cl_sessions)) {
-               struct nfsd4_session  *ses;
-               ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
-                                se_perclnt);
-               release_session(ses);
-       }
-       put_nfs4_client(clp);
+       nfsd4_set_callback_client(clp, NULL);
+       if (clp->cl_cb_conn.cb_xprt)
+               svc_xprt_put(clp->cl_cb_conn.cb_xprt);
+       list_del(&clp->cl_idhash);
+       list_del(&clp->cl_strhash);
+       spin_lock(&client_lock);
+       unhash_client_locked(clp);
+       if (atomic_read(&clp->cl_refcount) == 0)
+               free_client(clp);
+       spin_unlock(&client_lock);
  }
  
  static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
@@ -839,14 +853,15 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
         }
  
         memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
-       atomic_set(&clp->cl_count, 1);
-       atomic_set(&clp->cl_cb_conn.cb_set, 0);
+       atomic_set(&clp->cl_refcount, 0);
+       atomic_set(&clp->cl_cb_set, 0);
         INIT_LIST_HEAD(&clp->cl_idhash);
         INIT_LIST_HEAD(&clp->cl_strhash);
         INIT_LIST_HEAD(&clp->cl_openowners);
         INIT_LIST_HEAD(&clp->cl_delegations);
         INIT_LIST_HEAD(&clp->cl_sessions);
         INIT_LIST_HEAD(&clp->cl_lru);
+       clp->cl_time = get_seconds();
         clear_bit(0, &clp->cl_cb_slot_busy);
         rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
         copy_verf(clp, verf);
@@ -877,8 +892,7 @@ add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
         list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]);
         idhashval = clientid_hashval(clp->cl_clientid.cl_id);
         list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]);
-       list_add_tail(&clp->cl_lru, &client_lru);
-       clp->cl_time = get_seconds();
+       renew_client(clp);
  }
  
  static void
@@ -888,10 +902,9 @@ move_to_confirmed(struct nfs4_client *clp)
         unsigned int strhashval;
  
         dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
-       list_del_init(&clp->cl_strhash);
         list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
         strhashval = clientstr_hashval(clp->cl_recdir);
-       list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
+       list_move(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
         renew_client(clp);
  }
  
@@ -1327,15 +1340,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
                 cs_slot->sl_seqid++; /* from 0 to 1 */
                 move_to_confirmed(unconf);
  
-               /*
-                * We do not support RDMA or persistent sessions
-                */
-               cr_ses->flags &= ~SESSION4_PERSIST;
-               cr_ses->flags &= ~SESSION4_RDMA;
-
                 if (cr_ses->flags & SESSION4_BACK_CHAN) {
-                       unconf->cl_cb_xprt = rqstp->rq_xprt;
-                       svc_xprt_get(unconf->cl_cb_xprt);
+                       unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
+                       svc_xprt_get(rqstp->rq_xprt);
                         rpc_copy_addr(
                                 (struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
                                 sa);
@@ -1344,7 +1351,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
                                 cstate->minorversion;
                         unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
                         unconf->cl_cb_seq_nr = 1;
-                       nfsd4_probe_callback(unconf);
+                       nfsd4_probe_callback(unconf, &unconf->cl_cb_conn);
                 }
                 conf = unconf;
         } else {
@@ -1352,6 +1359,12 @@ nfsd4_create_session(struct svc_rqst *rqstp,
                 goto out;
         }
  
+       /*
+        * We do not support RDMA or persistent sessions
+        */
+       cr_ses->flags &= ~SESSION4_PERSIST;
+       cr_ses->flags &= ~SESSION4_RDMA;
+
         status = alloc_init_session(rqstp, conf, cr_ses);
         if (status)
                 goto out;
@@ -1369,6 +1382,21 @@ out:
         return status;
  }
  
+static bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
+{
+       struct nfsd4_compoundres *resp = rqstp->rq_resp;
+       struct nfsd4_compoundargs *argp = rqstp->rq_argp;
+
+       return argp->opcnt == resp->opcnt;
+}
+
+static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid)
+{
+       if (!session)
+               return 0;
+       return !memcmp(sid, &session->se_sessionid, sizeof(*sid));
+}
+
  __be32
  nfsd4_destroy_session(struct svc_rqst *r,
                       struct nfsd4_compound_state *cstate,
@@ -1384,19 +1412,25 @@ nfsd4_destroy_session(struct svc_rqst *r,
          * - Do we need to clear any callback info from previous session?
          */
  
+       if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) {
+               if (!nfsd4_last_compound_op(r))
+                       return nfserr_not_only_op;
+       }
         dump_sessionid(__func__, &sessionid->sessionid);
-       spin_lock(&sessionid_lock);
+       spin_lock(&client_lock);
         ses = find_in_sessionid_hashtbl(&sessionid->sessionid);
         if (!ses) {
-               spin_unlock(&sessionid_lock);
+               spin_unlock(&client_lock);
                 goto out;
         }
  
         unhash_session(ses);
-       spin_unlock(&sessionid_lock);
+       spin_unlock(&client_lock);
  
+       nfs4_lock_state();
         /* wait for callbacks */
-       shutdown_callback_client(ses->se_client);
+       nfsd4_set_callback_client(ses->se_client, NULL);
+       nfs4_unlock_state();
         nfsd4_put_session(ses);
         status = nfs_ok;
  out:
@@ -1417,7 +1451,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
         if (resp->opcnt != 1)
                 return nfserr_sequence_pos;
  
-       spin_lock(&sessionid_lock);
+       spin_lock(&client_lock);
         status = nfserr_badsession;
         session = find_in_sessionid_hashtbl(&seq->sessionid);
         if (!session)
@@ -1456,23 +1490,47 @@ nfsd4_sequence(struct svc_rqst *rqstp,
         cstate->slot = slot;
         cstate->session = session;
  
-       /* Hold a session reference until done processing the compound:
-        * nfsd4_put_session called only if the cstate slot is set.
-        */
-       nfsd4_get_session(session);
  out:
-       spin_unlock(&sessionid_lock);
-       /* Renew the clientid on success and on replay */
+       /* Hold a session reference until done processing the compound. */
         if (cstate->session) {
-               nfs4_lock_state();
-               renew_client(session->se_client);
-               nfs4_unlock_state();
+               nfsd4_get_session(cstate->session);
+               atomic_inc(&session->se_client->cl_refcount);
         }
+       spin_unlock(&client_lock);
         dprintk("%s: return %d\n", __func__, ntohl(status));
         return status;
  }
  
  __be32
+nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc)
+{
+       if (rc->rca_one_fs) {
+               if (!cstate->current_fh.fh_dentry)
+                       return nfserr_nofilehandle;
+               /*
+                * We don't take advantage of the rca_one_fs case.
+                * That's OK, it's optional, we can safely ignore it.
+                */
+                return nfs_ok;
+       }
+       nfs4_lock_state();
+       if (is_client_expired(cstate->session->se_client)) {
+               nfs4_unlock_state();
+               /*
+                * The following error isn't really legal.
+                * But we only get here if the client just explicitly
+                * destroyed the client.  Surely it no longer cares what
+                * error it gets back on an operation for the dead
+                * client.
+                */
+               return nfserr_stale_clientid;
+       }
+       nfsd4_create_clid_dir(cstate->session->se_client);
+       nfs4_unlock_state();
+       return nfs_ok;
+}
+
+__be32
  nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                   struct nfsd4_setclientid *setclid)
  {
@@ -1631,9 +1689,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
                 if (!same_creds(&conf->cl_cred, &unconf->cl_cred))
                         status = nfserr_clid_inuse;
                 else {
-                       /* XXX: We just turn off callbacks until we can handle
-                         * change request correctly. */
-                       atomic_set(&conf->cl_cb_conn.cb_set, 0);
+                       atomic_set(&conf->cl_cb_set, 0);
+                       nfsd4_probe_callback(conf, &unconf->cl_cb_conn);
                         expire_client(unconf);
                         status = nfs_ok;
  
@@ -1667,7 +1724,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
                         }
                         move_to_confirmed(unconf);
                         conf = unconf;
-                       nfsd4_probe_callback(conf);
+                       nfsd4_probe_callback(conf, &conf->cl_cb_conn);
                         status = nfs_ok;
                 }
         } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
@@ -1700,12 +1757,12 @@ alloc_init_file(struct inode *ino)
                 INIT_LIST_HEAD(&fp->fi_hash);
                 INIT_LIST_HEAD(&fp->fi_stateids);
                 INIT_LIST_HEAD(&fp->fi_delegations);
-               spin_lock(&recall_lock);
-               list_add(&fp->fi_hash, &file_hashtbl[hashval]);
-               spin_unlock(&recall_lock);
                 fp->fi_inode = igrab(ino);
                 fp->fi_id = current_fileid++;
                 fp->fi_had_conflict = false;
+               spin_lock(&recall_lock);
+               list_add(&fp->fi_hash, &file_hashtbl[hashval]);
+               spin_unlock(&recall_lock);
                 return fp;
         }
         return NULL;
@@ -1827,7 +1884,7 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
         stp->st_stateowner = sop;
         get_nfs4_file(fp);
         stp->st_file = fp;
-       stp->st_stateid.si_boot = get_seconds();
+       stp->st_stateid.si_boot = boot_time;
         stp->st_stateid.si_stateownerid = sop->so_id;
         stp->st_stateid.si_fileid = fp->fi_id;
         stp->st_stateid.si_generation = 0;
@@ -2028,7 +2085,6 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
          * lock) we know the server hasn't removed the lease yet, we know
          * it's safe to take a reference: */
         atomic_inc(&dp->dl_count);
-       atomic_inc(&dp->dl_client->cl_count);
  
         spin_lock(&recall_lock);
         list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
@@ -2347,7 +2403,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
  {
         struct nfs4_delegation *dp;
         struct nfs4_stateowner *sop = stp->st_stateowner;
-       struct nfs4_cb_conn *cb = &sop->so_client->cl_cb_conn;
+       int cb_up = atomic_read(&sop->so_client->cl_cb_set);
         struct file_lock fl, *flp = &fl;
         int status, flag = 0;
  
@@ -2355,7 +2411,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
         open->op_recall = 0;
         switch (open->op_claim_type) {
                 case NFS4_OPEN_CLAIM_PREVIOUS:
-                       if (!atomic_read(&cb->cb_set))
+                       if (!cb_up)
                                 open->op_recall = 1;
                         flag = open->op_delegate_type;
                         if (flag == NFS4_OPEN_DELEGATE_NONE)
@@ -2366,7 +2422,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
                          * had the chance to reclaim theirs.... */
                         if (locks_in_grace())
                                 goto out;
-                       if (!atomic_read(&cb->cb_set) || !sop->so_confirmed)
+                       if (!cb_up || !sop->so_confirmed)
                                 goto out;
                         if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
                                 flag = NFS4_OPEN_DELEGATE_WRITE;
@@ -2483,10 +2539,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
         }
         memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t));
  
-       if (nfsd4_has_session(&resp->cstate)) {
+       if (nfsd4_has_session(&resp->cstate))
                 open->op_stateowner->so_confirmed = 1;
-               nfsd4_create_clid_dir(open->op_stateowner->so_client);
-       }
  
         /*
         * Attempt to hand out a delegation. No error return, because the
@@ -2537,7 +2591,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
         renew_client(clp);
         status = nfserr_cb_path_down;
         if (!list_empty(&clp->cl_delegations)
-                       && !atomic_read(&clp->cl_cb_conn.cb_set))
+                       && !atomic_read(&clp->cl_cb_set))
                 goto out;
         status = nfs_ok;
  out:
@@ -2554,6 +2608,12 @@ nfsd4_end_grace(void)
         dprintk("NFSD: end of grace period\n");
         nfsd4_recdir_purge_old();
         locks_end_grace(&nfsd4_manager);
+       /*
+        * Now that every NFSv4 client has had the chance to recover and
+        * to see the (possibly new, possibly shorter) lease time, we
+        * can safely set the next grace time to the current lease time:
+        */
+       nfsd4_grace = nfsd4_lease;
  }
  
  static time_t
@@ -2563,15 +2623,17 @@ nfs4_laundromat(void)
         struct nfs4_stateowner *sop;
         struct nfs4_delegation *dp;
         struct list_head *pos, *next, reaplist;
-       time_t cutoff = get_seconds() - NFSD_LEASE_TIME;
-       time_t t, clientid_val = NFSD_LEASE_TIME;
-       time_t u, test_val = NFSD_LEASE_TIME;
+       time_t cutoff = get_seconds() - nfsd4_lease;
+       time_t t, clientid_val = nfsd4_lease;
+       time_t u, test_val = nfsd4_lease;
  
         nfs4_lock_state();
  
         dprintk("NFSD: laundromat service - starting\n");
         if (locks_in_grace())
                 nfsd4_end_grace();
+       INIT_LIST_HEAD(&reaplist);
+       spin_lock(&client_lock);
         list_for_each_safe(pos, next, &client_lru) {
                 clp = list_entry(pos, struct nfs4_client, cl_lru);
                 if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
@@ -2580,12 +2642,22 @@ nfs4_laundromat(void)
                                 clientid_val = t;
                         break;
                 }
+               if (atomic_read(&clp->cl_refcount)) {
+                       dprintk("NFSD: client in use (clientid %08x)\n",
+                               clp->cl_clientid.cl_id);
+                       continue;
+               }
+               unhash_client_locked(clp);
+               list_add(&clp->cl_lru, &reaplist);
+       }
+       spin_unlock(&client_lock);
+       list_for_each_safe(pos, next, &reaplist) {
+               clp = list_entry(pos, struct nfs4_client, cl_lru);
                 dprintk("NFSD: purging unused client (clientid %08x)\n",
                         clp->cl_clientid.cl_id);
                 nfsd4_remove_clid_dir(clp);
                 expire_client(clp);
         }
-       INIT_LIST_HEAD(&reaplist);
         spin_lock(&recall_lock);
         list_for_each_safe(pos, next, &del_recall_lru) {
                 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
@@ -2605,7 +2677,7 @@ nfs4_laundromat(void)
                 list_del_init(&dp->dl_recall_lru);
                 unhash_delegation(dp);
         }
-       test_val = NFSD_LEASE_TIME;
+       test_val = nfsd4_lease;
         list_for_each_safe(pos, next, &close_lru) {
                 sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
                 if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) {
@@ -2661,39 +2733,11 @@ nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp)
  static int
  STALE_STATEID(stateid_t *stateid)
  {
-       if (time_after((unsigned long)boot_time,
-                       (unsigned long)stateid->si_boot)) {
-               dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
-                       STATEID_VAL(stateid));
-               return 1;
-       }
-       return 0;
-}
-
-static int
-EXPIRED_STATEID(stateid_t *stateid)
-{
-       if (time_before((unsigned long)boot_time,
-                       ((unsigned long)stateid->si_boot)) &&
-           time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) {
-               dprintk("NFSD: expired stateid " STATEID_FMT "!\n",
-                       STATEID_VAL(stateid));
-               return 1;
-       }
-       return 0;
-}
-
-static __be32
-stateid_error_map(stateid_t *stateid)
-{
-       if (STALE_STATEID(stateid))
-               return nfserr_stale_stateid;
-       if (EXPIRED_STATEID(stateid))
-               return nfserr_expired;
-
-       dprintk("NFSD: bad stateid " STATEID_FMT "!\n",
+       if (stateid->si_boot == boot_time)
+               return 0;
+       dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
                 STATEID_VAL(stateid));
-       return nfserr_bad_stateid;
+       return 1;
  }
  
  static inline int
@@ -2817,10 +2861,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
         status = nfserr_bad_stateid;
         if (is_delegation_stateid(stateid)) {
                 dp = find_delegation_stateid(ino, stateid);
-               if (!dp) {
-                       status = stateid_error_map(stateid);
+               if (!dp)
                         goto out;
-               }
                 status = check_stateid_generation(stateid, &dp->dl_stateid,
                                                   flags);
                 if (status)
@@ -2833,10 +2875,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
                         *filpp = dp->dl_vfs_file;
         } else { /* open or lock stateid */
                 stp = find_stateid(stateid, flags);
-               if (!stp) {
-                       status = stateid_error_map(stateid);
+               if (!stp)
                         goto out;
-               }
                 if (nfs4_check_fh(current_fh, stp))
                         goto out;
                 if (!stp->st_stateowner->so_confirmed)
@@ -2908,7 +2948,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
                  */
                 sop = search_close_lru(stateid->si_stateownerid, flags);
                 if (sop == NULL)
-                       return stateid_error_map(stateid);
+                       return nfserr_bad_stateid;
                 *sopp = sop;
                 goto check_replay;
         }
@@ -3175,10 +3215,8 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
         if (!is_delegation_stateid(stateid))
                 goto out;
         dp = find_delegation_stateid(inode, stateid);
-       if (!dp) {
-               status = stateid_error_map(stateid);
+       if (!dp)
                 goto out;
-       }
         status = check_stateid_generation(stateid, &dp->dl_stateid, flags);
         if (status)
                 goto out;
@@ -3404,7 +3442,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
         stp->st_stateowner = sop;
         get_nfs4_file(fp);
         stp->st_file = fp;
-       stp->st_stateid.si_boot = get_seconds();
+       stp->st_stateid.si_boot = boot_time;
         stp->st_stateid.si_stateownerid = sop->so_id;
         stp->st_stateid.si_fileid = fp->fi_id;
         stp->st_stateid.si_generation = 0;
@@ -3976,12 +4014,6 @@ nfsd4_load_reboot_recovery_data(void)
                 printk("NFSD: Failure reading reboot recovery data\n");
  }
  
-unsigned long
-get_nfs4_grace_period(void)
-{
-       return max(user_lease_time, lease_time) * HZ;
-}
-
  /*
   * Since the lifetime of a delegation isn't limited to that of an open, a
   * client may quite reasonably hang on to a delegation as long as it has
@@ -4008,20 +4040,27 @@ set_max_delegations(void)
  static int
  __nfs4_state_start(void)
  {
-       unsigned long grace_time;
+       int ret;
  
         boot_time = get_seconds();
-       grace_time = get_nfs4_grace_period();
-       lease_time = user_lease_time;
         locks_start_grace(&nfsd4_manager);
         printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
-              grace_time/HZ);
+              nfsd4_grace);
+       ret = set_callback_cred();
+       if (ret)
+               return -ENOMEM;
         laundry_wq = create_singlethread_workqueue("nfsd4");
         if (laundry_wq == NULL)
                 return -ENOMEM;
-       queue_delayed_work(laundry_wq, &laundromat_work, grace_time);
+       ret = nfsd4_create_callback_queue();
+       if (ret)
+               goto out_free_laundry;
+       queue_delayed_work(laundry_wq, &laundromat_work, nfsd4_grace * HZ);
         set_max_delegations();
-       return set_callback_cred();
+       return 0;
+out_free_laundry:
+       destroy_workqueue(laundry_wq);
+       return ret;
  }
  
  int
@@ -4039,12 +4078,6 @@ nfs4_state_start(void)
         return 0;
  }
  
-time_t
-nfs4_lease_time(void)
-{
-       return lease_time;
-}
-
  static void
  __nfs4_state_shutdown(void)
  {
@@ -4090,6 +4123,7 @@ nfs4_state_shutdown(void)
         nfs4_release_reclaim();
         __nfs4_state_shutdown();
         nfs4_unlock_state();
+       nfsd4_destroy_callback_queue();
  }
  
  /*
@@ -4128,21 +4162,3 @@ nfs4_recoverydir(void)
  {
         return user_recovery_dirname;
  }
-
-/*
- * Called when leasetime is changed.
- *
- * The only way the protocol gives us to handle on-the-fly lease changes is to
- * simulate a reboot.  Instead of doing that, we just wait till the next time
- * we start to register any changes in lease time.  If the administrator
- * really wants to change the lease time *now*, they can go ahead and bring
- * nfsd down and then back up again after changing the lease time.
- *
- * user_lease_time is protected by nfsd_mutex since it's only really accessed
- * when nfsd is starting
- */
-void
-nfs4_reset_lease(time_t leasetime)
-{
-       user_lease_time = leasetime;
-}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c

index 34ccf81..ac17a70 100644 (file)
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1234,6 +1234,16 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
         DECODE_TAIL;
  }
  
+static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc)
+{
+       DECODE_HEAD;
+
+       READ_BUF(4);
+       READ32(rc->rca_one_fs);
+
+       DECODE_TAIL;
+}
+
  static __be32
  nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
  {
@@ -1346,7 +1356,7 @@ static nfsd4_dec nfsd41_dec_ops[] = {
         [OP_TEST_STATEID]       = (nfsd4_dec)nfsd4_decode_notsupp,
         [OP_WANT_DELEGATION]    = (nfsd4_dec)nfsd4_decode_notsupp,
         [OP_DESTROY_CLIENTID]   = (nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_RECLAIM_COMPLETE]   = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_RECLAIM_COMPLETE]   = (nfsd4_dec)nfsd4_decode_reclaim_complete,
  };
  
  struct nfsd4_minorversion_ops {
@@ -1900,7 +1910,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
         if (bmval0 & FATTR4_WORD0_LEASE_TIME) {
                 if ((buflen -= 4) < 0)
                         goto out_resource;
-               WRITE32(NFSD_LEASE_TIME);
+               WRITE32(nfsd4_lease);
         }
         if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
                 if ((buflen -= 4) < 0)
@@ -3307,11 +3317,14 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
                 iov = &rqstp->rq_res.head[0];
         iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base;
         BUG_ON(iov->iov_len > PAGE_SIZE);
-       if (nfsd4_has_session(cs) && cs->status != nfserr_replay_cache) {
-               nfsd4_store_cache_entry(resp);
-               dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
-               resp->cstate.slot->sl_inuse = false;
-               nfsd4_put_session(resp->cstate.session);
+       if (nfsd4_has_session(cs)) {
+               if (cs->status != nfserr_replay_cache) {
+                       nfsd4_store_cache_entry(resp);
+                       dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
+                       cs->slot->sl_inuse = false;
+               }
+               /* Renew the clientid on success and on replay */
+               release_session_client(cs->session);
         }
         return 1;
  }
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c

index e359107..bc3194e 100644 (file)
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -46,6 +46,7 @@ enum {
          */
  #ifdef CONFIG_NFSD_V4
         NFSD_Leasetime,
+       NFSD_Gracetime,
         NFSD_RecoveryDir,
  #endif
  };
@@ -70,6 +71,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size);
  static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
  #ifdef CONFIG_NFSD_V4
  static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
+static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
  static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
  #endif
  
@@ -91,6 +93,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
         [NFSD_MaxBlkSize] = write_maxblksize,
  #ifdef CONFIG_NFSD_V4
         [NFSD_Leasetime] = write_leasetime,
+       [NFSD_Gracetime] = write_gracetime,
         [NFSD_RecoveryDir] = write_recoverydir,
  #endif
  };
@@ -1204,29 +1207,45 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
  }
  
  #ifdef CONFIG_NFSD_V4
-extern time_t nfs4_leasetime(void);
-
-static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
+static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
  {
-       /* if size > 10 seconds, call
-        * nfs4_reset_lease() then write out the new lease (seconds) as reply
-        */
         char *mesg = buf;
-       int rv, lease;
+       int rv, i;
  
         if (size > 0) {
                 if (nfsd_serv)
                         return -EBUSY;
-               rv = get_int(&mesg, &lease);
+               rv = get_int(&mesg, &i);
                 if (rv)
                         return rv;
-               if (lease < 10 || lease > 3600)
+               /*
+                * Some sanity checking.  We don't have a reason for
+                * these particular numbers, but problems with the
+                * extremes are:
+                *      - Too short: the briefest network outage may
+                *        cause clients to lose all their locks.  Also,
+                *        the frequent polling may be wasteful.
+                *      - Too long: do you really want reboot recovery
+                *        to take more than an hour?  Or to make other
+                *        clients wait an hour before being able to
+                *        revoke a dead client's locks?
+                */
+               if (i < 10 || i > 3600)
                         return -EINVAL;
-               nfs4_reset_lease(lease);
+               *time = i;
         }
  
-       return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n",
-                                                       nfs4_lease_time());
+       return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time);
+}
+
+static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
+{
+       ssize_t rv;
+
+       mutex_lock(&nfsd_mutex);
+       rv = __nfsd4_write_time(file, buf, size, time);
+       mutex_unlock(&nfsd_mutex);
+       return rv;
  }
  
  /**
@@ -1252,12 +1271,22 @@ static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
   */
  static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
  {
-       ssize_t rv;
+       return nfsd4_write_time(file, buf, size, &nfsd4_lease);
+}
  
-       mutex_lock(&nfsd_mutex);
-       rv = __write_leasetime(file, buf, size);
-       mutex_unlock(&nfsd_mutex);
-       return rv;
+/**
+ * write_gracetime - Set or report current NFSv4 grace period time
+ *
+ * As above, but sets the time of the NFSv4 grace period.
+ *
+ * Note this should never be set to less than the *previous*
+ * lease-period time, but we don't try to enforce this.  (In the common
+ * case (a new boot), we don't know what the previous lease time was
+ * anyway.)
+ */
+static ssize_t write_gracetime(struct file *file, char *buf, size_t size)
+{
+       return nfsd4_write_time(file, buf, size, &nfsd4_grace);
  }
  
  extern char *nfs4_recoverydir(void);
@@ -1351,6 +1380,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
                 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
  #ifdef CONFIG_NFSD_V4
                 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
+               [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
                 [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
  #endif
                 /* last one */ {""}
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h

index e942a1a..7237776 100644 (file)
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -82,7 +82,6 @@ int nfs4_state_init(void);
  void nfsd4_free_slabs(void);
  int nfs4_state_start(void);
  void nfs4_state_shutdown(void);
-time_t nfs4_lease_time(void);
  void nfs4_reset_lease(time_t leasetime);
  int nfs4_reset_recoverydir(char *recdir);
  #else
@@ -90,7 +89,6 @@ static inline int nfs4_state_init(void) { return 0; }
  static inline void nfsd4_free_slabs(void) { }
  static inline int nfs4_state_start(void) { return 0; }
  static inline void nfs4_state_shutdown(void) { }
-static inline time_t nfs4_lease_time(void) { return 0; }
  static inline void nfs4_reset_lease(time_t leasetime) { }
  static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
  #endif
@@ -229,6 +227,9 @@ extern struct timeval       nfssvc_boot;
  
  #ifdef CONFIG_NFSD_V4
  
+extern time_t nfsd4_lease;
+extern time_t nfsd4_grace;
+
  /* before processing a COMPOUND operation, we have to check that there
   * is enough space in the buffer for XDR encode to succeed.  otherwise,
   * we might process an operation with side effects, and be unable to
@@ -247,7 +248,6 @@ extern struct timeval       nfssvc_boot;
  #define        COMPOUND_SLACK_SPACE            140    /* OP_GETFH */
  #define COMPOUND_ERR_SLACK_SPACE       12     /* OP_SETATTR */
  
-#define NFSD_LEASE_TIME                 (nfs4_lease_time())
  #define NFSD_LAUNDROMAT_MINTIMEOUT      10   /* seconds */
  
  /*
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c

index 171699e..06b2a26 100644 (file)
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -120,7 +120,7 @@ u32 nfsd_supported_minorversion;
  int nfsd_vers(int vers, enum vers_op change)
  {
         if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS)
-               return -1;
+               return 0;
         switch(change) {
         case NFSD_SET:
                 nfsd_versions[vers] = nfsd_version[vers];
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h

index fefeae2..006c842 100644 (file)
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -70,6 +70,16 @@ struct nfsd4_cb_sequence {
         struct nfs4_client      *cbs_clp;
  };
  
+struct nfs4_rpc_args {
+       void                            *args_op;
+       struct nfsd4_cb_sequence        args_seq;
+};
+
+struct nfsd4_callback {
+       struct nfs4_rpc_args cb_args;
+       struct work_struct cb_work;
+};
+
  struct nfs4_delegation {
         struct list_head        dl_perfile;
         struct list_head        dl_perclnt;
@@ -86,6 +96,7 @@ struct nfs4_delegation {
         stateid_t               dl_stateid;
         struct knfsd_fh         dl_fh;
         int                     dl_retries;
+       struct nfsd4_callback   dl_recall;
  };
  
  /* client delegation callback info */
@@ -96,9 +107,7 @@ struct nfs4_cb_conn {
         u32                     cb_prog;
         u32                     cb_minorversion;
         u32                     cb_ident;       /* minorversion 0 only */
-       /* RPC client info */
-       atomic_t                cb_set;     /* successful CB_NULL call */
-       struct rpc_clnt *       cb_client;
+       struct svc_xprt         *cb_xprt;       /* minorversion 1 only */
  };
  
  /* Maximum number of slots per session. 160 is useful for long haul TCP */
@@ -157,7 +166,7 @@ struct nfsd4_session {
         struct list_head        se_hash;        /* hash by sessionid */
         struct list_head        se_perclnt;
         u32                     se_flags;
-       struct nfs4_client      *se_client;     /* for expire_client */
+       struct nfs4_client      *se_client;
         struct nfs4_sessionid   se_sessionid;
         struct nfsd4_channel_attrs se_fchannel;
         struct nfsd4_channel_attrs se_bchannel;
@@ -212,25 +221,41 @@ struct nfs4_client {
         struct svc_cred         cl_cred;        /* setclientid principal */
         clientid_t              cl_clientid;    /* generated by server */
         nfs4_verifier           cl_confirm;     /* generated by server */
-       struct nfs4_cb_conn     cl_cb_conn;     /* callback info */
-       atomic_t                cl_count;       /* ref count */
         u32                     cl_firststate;  /* recovery dir creation */
  
+       /* for v4.0 and v4.1 callbacks: */
+       struct nfs4_cb_conn     cl_cb_conn;
+       struct rpc_clnt         *cl_cb_client;
+       atomic_t                cl_cb_set;
+
         /* for nfs41 */
         struct list_head        cl_sessions;
         struct nfsd4_clid_slot  cl_cs_slot;     /* create_session slot */
         u32                     cl_exchange_flags;
         struct nfs4_sessionid   cl_sessionid;
+       /* number of rpc's in progress over an associated session: */
+       atomic_t                cl_refcount;
  
         /* for nfs41 callbacks */
         /* We currently support a single back channel with a single slot */
         unsigned long           cl_cb_slot_busy;
         u32                     cl_cb_seq_nr;
-       struct svc_xprt         *cl_cb_xprt;    /* 4.1 callback transport */
         struct rpc_wait_queue   cl_cb_waitq;    /* backchannel callers may */
                                                 /* wait here for slots */
  };
  
+static inline void
+mark_client_expired(struct nfs4_client *clp)
+{
+       clp->cl_time = 0;
+}
+
+static inline bool
+is_client_expired(struct nfs4_client *clp)
+{
+       return clp->cl_time == 0;
+}
+
  /* struct nfs4_client_reset
   * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl
   * upon lease reset, or from upcall to state_daemon (to read in state
@@ -377,11 +402,14 @@ extern void nfs4_lock_state(void);
  extern void nfs4_unlock_state(void);
  extern int nfs4_in_grace(void);
  extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
-extern void put_nfs4_client(struct nfs4_client *clp);
  extern void nfs4_free_stateowner(struct kref *kref);
  extern int set_callback_cred(void);
-extern void nfsd4_probe_callback(struct nfs4_client *clp);
+extern void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
+extern void nfsd4_do_callback_rpc(struct work_struct *);
  extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
+extern int nfsd4_create_callback_queue(void);
+extern void nfsd4_destroy_callback_queue(void);
+extern void nfsd4_set_callback_client(struct nfs4_client *, struct rpc_clnt *);
  extern void nfs4_put_delegation(struct nfs4_delegation *dp);
  extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
  extern void nfsd4_init_recdir(char *recdir_name);
@@ -392,6 +420,7 @@ extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
  extern void nfsd4_recdir_purge_old(void);
  extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
  extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
+extern void release_session_client(struct nfsd4_session *);
  
  static inline void
  nfs4_put_stateowner(struct nfs4_stateowner *so)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c

index 3440dd8..4eb9baa 100644 (file)
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -723,7 +723,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
         struct inode    *inode;
         int             flags = O_RDONLY|O_LARGEFILE;
         __be32          err;
-       int             host_err;
+       int             host_err = 0;
  
         validate_process_creds();
  
@@ -760,7 +760,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
          * Check to see if there are any leases on this file.
          * This may block while leases are broken.
          */
-       host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0));
+       if (!(access & NFSD_MAY_NOT_BREAK_LEASE))
+               host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0));
         if (host_err == -EWOULDBLOCK)
                 host_err = -ETIMEDOUT;
         if (host_err) /* NOMEM or WOULDBLOCK */
@@ -1168,7 +1169,8 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
                         goto out;
         }
  
-       err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
+       err = nfsd_open(rqstp, fhp, S_IFREG,
+                       NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file);
         if (err)
                 goto out;
         if (EX_ISSYNC(fhp->fh_export)) {
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h

index 4b1de0a..217a62c 100644 (file)
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -20,6 +20,7 @@
  #define NFSD_MAY_OWNER_OVERRIDE        64
  #define NFSD_MAY_LOCAL_ACCESS  128 /* IRIX doing local access check on device special file*/
  #define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
+#define NFSD_MAY_NOT_BREAK_LEASE 512
  
  #define NFSD_MAY_CREATE                (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
  #define NFSD_MAY_REMOVE                (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h

index efa3377..4d476ff 100644 (file)
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -381,6 +381,10 @@ struct nfsd4_destroy_session {
         struct nfs4_sessionid   sessionid;
  };
  
+struct nfsd4_reclaim_complete {
+       u32 rca_one_fs;
+};
+
  struct nfsd4_op {
         int                                     opnum;
         __be32                                  status;
@@ -421,6 +425,7 @@ struct nfsd4_op {
                 struct nfsd4_create_session     create_session;
                 struct nfsd4_destroy_session    destroy_session;
                 struct nfsd4_sequence           sequence;
+               struct nfsd4_reclaim_complete   reclaim_complete;
         } u;
         struct nfs4_replay *                    replay;
  };
@@ -513,9 +518,8 @@ extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
  extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
                 struct nfsd4_sequence *seq);
  extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
-               struct nfsd4_compound_state *,
-struct nfsd4_exchange_id *);
-               extern __be32 nfsd4_create_session(struct svc_rqst *,
+               struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
+extern __be32 nfsd4_create_session(struct svc_rqst *,
                 struct nfsd4_compound_state *,
                 struct nfsd4_create_session *);
  extern __be32 nfsd4_sequence(struct svc_rqst *,
@@ -524,6 +528,7 @@ extern __be32 nfsd4_sequence(struct svc_rqst *,
  extern __be32 nfsd4_destroy_session(struct svc_rqst *,
                 struct nfsd4_compound_state *,
                 struct nfsd4_destroy_session *);
+__be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *);
  extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
                 struct nfsd4_open *open);
  extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h

index 65e333a..80d55bb 100644 (file)
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -40,12 +40,12 @@ struct nfs_fhbase_old {
   * This is the new flexible, extensible style NFSv2/v3 file handle.
   * by Neil Brown <neilb@cse.unsw.edu.au> - March 2000
   *
- * The file handle is seens as a list of 4byte words.
- * The first word contains a version number (1) and four descriptor bytes
+ * The file handle starts with a sequence of four-byte words.
+ * The first word contains a version number (1) and three descriptor bytes
   * that tell how the remaining 3 variable length fields should be handled.
   * These three bytes are auth_type, fsid_type and fileid_type.
   *
- * All 4byte values are in host-byte-order.
+ * All four-byte values are in host-byte-order.
   *
   * The auth_type field specifies how the filehandle can be authenticated
   * This might allow a file to be confirmed to be in a writable part of a
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c

index 39bddba..a3f340c 100644 (file)
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -49,11 +49,17 @@ static void cache_init(struct cache_head *h)
         h->last_refresh = now;
  }
  
+static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
+{
+       return  (h->expiry_time < get_seconds()) ||
+               (detail->flush_time > h->last_refresh);
+}
+
  struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
                                        struct cache_head *key, int hash)
  {
         struct cache_head **head,  **hp;
-       struct cache_head *new = NULL;
+       struct cache_head *new = NULL, *freeme = NULL;
  
         head = &detail->hash_table[hash];
  
@@ -62,6 +68,9 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
         for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
                 struct cache_head *tmp = *hp;
                 if (detail->match(tmp, key)) {
+                       if (cache_is_expired(detail, tmp))
+                               /* This entry is expired, we will discard it. */
+                               break;
                         cache_get(tmp);
                         read_unlock(&detail->hash_lock);
                         return tmp;
@@ -86,6 +95,13 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
         for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
                 struct cache_head *tmp = *hp;
                 if (detail->match(tmp, key)) {
+                       if (cache_is_expired(detail, tmp)) {
+                               *hp = tmp->next;
+                               tmp->next = NULL;
+                               detail->entries --;
+                               freeme = tmp;
+                               break;
+                       }
                         cache_get(tmp);
                         write_unlock(&detail->hash_lock);
                         cache_put(new, detail);
@@ -98,6 +114,8 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
         cache_get(new);
         write_unlock(&detail->hash_lock);
  
+       if (freeme)
+               cache_put(freeme, detail);
         return new;
  }
  EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
@@ -183,10 +201,7 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h)
  
  static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h)
  {
-       if (!test_bit(CACHE_VALID, &h->flags) ||
-           h->expiry_time < get_seconds())
-               return -EAGAIN;
-       else if (detail->flush_time > h->last_refresh)
+       if (!test_bit(CACHE_VALID, &h->flags))
                 return -EAGAIN;
         else {
                 /* entry is valid */
@@ -397,31 +412,27 @@ static int cache_clean(void)
                 /* Ok, now to clean this strand */
  
                 cp = & current_detail->hash_table[current_index];
-               ch = *cp;
-               for (; ch; cp= & ch->next, ch= *cp) {
+               for (ch = *cp ; ch ; cp = & ch->next, ch = *cp) {
                         if (current_detail->nextcheck > ch->expiry_time)
                                 current_detail->nextcheck = ch->expiry_time+1;
-                       if (ch->expiry_time >= get_seconds() &&
-                           ch->last_refresh >= current_detail->flush_time)
+                       if (!cache_is_expired(current_detail, ch))
                                 continue;
-                       if (test_and_clear_bit(CACHE_PENDING, &ch->flags))
-                               cache_dequeue(current_detail, ch);
  
-                       if (atomic_read(&ch->ref.refcount) == 1)
-                               break;
-               }
-               if (ch) {
                         *cp = ch->next;
                         ch->next = NULL;
                         current_detail->entries--;
                         rv = 1;
+                       break;
                 }
+
                 write_unlock(&current_detail->hash_lock);
                 d = current_detail;
                 if (!ch)
                         current_index ++;
                 spin_unlock(&cache_list_lock);
                 if (ch) {
+                       if (test_and_clear_bit(CACHE_PENDING, &ch->flags))
+                               cache_dequeue(current_detail, ch);
                         cache_revisit_request(ch);
                         cache_put(ch, d);
                 }
@@ -1233,8 +1244,10 @@ static int content_open(struct inode *inode, struct file *file,
         if (!cd || !try_module_get(cd->owner))
                 return -EACCES;
         han = __seq_open_private(file, &cache_content_op, sizeof(*han));
-       if (han == NULL)
+       if (han == NULL) {
+               module_put(cd->owner);
                 return -ENOMEM;
+       }
  
         han->cd = cd;
         return 0;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c

index 061b2e0..cbc0849 100644 (file)
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -744,8 +744,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
                 if (rqstp->rq_deferred) {
                         svc_xprt_received(xprt);
                         len = svc_deferred_recv(rqstp);
-               } else
+               } else {
                         len = xprt->xpt_ops->xpo_recvfrom(rqstp);
+                       svc_xprt_received(xprt);
+               }
                 dprintk("svc: got len=%d\n", len);
         }
  
@@ -893,12 +895,12 @@ void svc_delete_xprt(struct svc_xprt *xprt)
          */
         if (test_bit(XPT_TEMP, &xprt->xpt_flags))
                 serv->sv_tmpcnt--;
+       spin_unlock_bh(&serv->sv_lock);
  
         while ((dr = svc_deferred_dequeue(xprt)) != NULL)
                 kfree(dr);
  
         svc_xprt_put(xprt);
-       spin_unlock_bh(&serv->sv_lock);
  }
  
  void svc_close_xprt(struct svc_xprt *xprt)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c

index a29f259..a338927 100644 (file)
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -547,7 +547,6 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
                         dprintk("svc: recvfrom returned error %d\n", -err);
                         set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
                 }
-               svc_xprt_received(&svsk->sk_xprt);
                 return -EAGAIN;
         }
         len = svc_addr_len(svc_addr(rqstp));
@@ -562,11 +561,6 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
         svsk->sk_sk->sk_stamp = skb->tstamp;
         set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
  
-       /*
-        * Maybe more packets - kick another thread ASAP.
-        */
-       svc_xprt_received(&svsk->sk_xprt);
-
         len  = skb->len - sizeof(struct udphdr);
         rqstp->rq_arg.len = len;
  
@@ -917,7 +911,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
                 if (len < want) {
                         dprintk("svc: short recvfrom while reading record "
                                 "length (%d of %d)\n", len, want);
-                       svc_xprt_received(&svsk->sk_xprt);
                         goto err_again; /* record header not complete */
                 }
  
@@ -953,7 +946,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
         if (len < svsk->sk_reclen) {
                 dprintk("svc: incomplete TCP record (%d of %d)\n",
                         len, svsk->sk_reclen);
-               svc_xprt_received(&svsk->sk_xprt);
                 goto err_again; /* record not complete */
         }
         len = svsk->sk_reclen;
@@ -961,14 +953,11 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
  
         return len;
   error:
-       if (len == -EAGAIN) {
+       if (len == -EAGAIN)
                 dprintk("RPC: TCP recv_record got EAGAIN\n");
-               svc_xprt_received(&svsk->sk_xprt);
-       }
         return len;
   err_delete:
         set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
-       svc_xprt_received(&svsk->sk_xprt);
   err_again:
         return -EAGAIN;
  }
@@ -1110,7 +1099,6 @@ out:
         svsk->sk_tcplen = 0;
  
         svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
-       svc_xprt_received(&svsk->sk_xprt);
         if (serv->sv_stats)
                 serv->sv_stats->nettcpcnt++;
  
@@ -1119,7 +1107,6 @@ out:
  err_again:
         if (len == -EAGAIN) {
                 dprintk("RPC: TCP recvfrom got EAGAIN\n");
-               svc_xprt_received(&svsk->sk_xprt);
                 return len;
         }
  error:
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c

index f92e37e..0194de8 100644 (file)
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -566,7 +566,6 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
                 ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base,
                 rqstp->rq_arg.head[0].iov_len);
  
-       svc_xprt_received(rqstp->rq_xprt);
         return ret;
  }
  
@@ -665,7 +664,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
                 rqstp->rq_arg.head[0].iov_len);
         rqstp->rq_prot = IPPROTO_MAX;
         svc_xprt_copy_addrs(rqstp, xprt);
-       svc_xprt_received(xprt);
         return ret;
  
   close_out:
@@ -678,6 +676,5 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
          */
         set_bit(XPT_CLOSE, &xprt->xpt_flags);
  defer:
-       svc_xprt_received(xprt);
         return 0;
  }
author	J. Bruce Fields <bfields@citi.umich.edu>
	Wed, 9 Jun 2010 00:05:18 +0000 (20:05 -0400)
committer	J. Bruce Fields <bfields@citi.umich.edu>
	Wed, 9 Jun 2010 00:05:18 +0000 (20:05 -0400)
Documentation/filesystems/nfs/nfs41-server.txt		patch \| blob \| history
fs/nfsd/export.c		patch \| blob \| history
fs/nfsd/nfs4callback.c		patch \| blob \| history
fs/nfsd/nfs4proc.c		patch \| blob \| history
fs/nfsd/nfs4state.c		patch \| blob \| history
fs/nfsd/nfs4xdr.c		patch \| blob \| history
fs/nfsd/nfsctl.c		patch \| blob \| history
fs/nfsd/nfsd.h		patch \| blob \| history
fs/nfsd/nfssvc.c		patch \| blob \| history
fs/nfsd/state.h		patch \| blob \| history
fs/nfsd/vfs.c		patch \| blob \| history
fs/nfsd/vfs.h		patch \| blob \| history
fs/nfsd/xdr4.h		patch \| blob \| history
include/linux/nfsd/nfsfh.h		patch \| blob \| history
net/sunrpc/cache.c		patch \| blob \| history
net/sunrpc/svc_xprt.c		patch \| blob \| history
net/sunrpc/svcsock.c		patch \| blob \| history
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c		patch \| blob \| history