dlm: reject normal unlock when lock is waiting for lookup
[safe/jmp/linux-2.6] / fs / dlm / lock.c
index b455919..fa68e9b 100644 (file)
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2008 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -88,7 +88,6 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
 static int receive_extralen(struct dlm_message *ms);
 static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
 static void del_timeout(struct dlm_lkb *lkb);
-void dlm_timeout_warn(struct dlm_lkb *lkb);
 
 /*
  * Lock compatibilty matrix - thanks Steve
@@ -335,7 +334,7 @@ static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len)
 {
        struct dlm_rsb *r;
 
-       r = allocate_rsb(ls, len);
+       r = dlm_allocate_rsb(ls, len);
        if (!r)
                return NULL;
 
@@ -478,7 +477,7 @@ static int find_rsb(struct dlm_ls *ls, char *name, int namelen,
        error = _search_rsb(ls, name, namelen, bucket, 0, &tmp);
        if (!error) {
                write_unlock(&ls->ls_rsbtbl[bucket].lock);
-               free_rsb(r);
+               dlm_free_rsb(r);
                r = tmp;
                goto out;
        }
@@ -519,7 +518,7 @@ static void toss_rsb(struct kref *kref)
        list_move(&r->res_hashchain, &ls->ls_rsbtbl[r->res_bucket].toss);
        r->res_toss_time = jiffies;
        if (r->res_lvbptr) {
-               free_lvb(r->res_lvbptr);
+               dlm_free_lvb(r->res_lvbptr);
                r->res_lvbptr = NULL;
        }
 }
@@ -589,7 +588,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
        uint32_t lkid = 0;
        uint16_t bucket;
 
-       lkb = allocate_lkb(ls);
+       lkb = dlm_allocate_lkb(ls);
        if (!lkb)
                return -ENOMEM;
 
@@ -683,8 +682,8 @@ static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb)
 
                /* for local/process lkbs, lvbptr points to caller's lksb */
                if (lkb->lkb_lvbptr && is_master_copy(lkb))
-                       free_lvb(lkb->lkb_lvbptr);
-               free_lkb(lkb);
+                       dlm_free_lvb(lkb->lkb_lvbptr);
+               dlm_free_lkb(lkb);
                return 1;
        } else {
                write_unlock(&ls->ls_lkbtbl[bucket].lock);
@@ -988,7 +987,7 @@ static int shrink_bucket(struct dlm_ls *ls, int b)
 
                        if (is_master(r))
                                dir_remove(r);
-                       free_rsb(r);
+                       dlm_free_rsb(r);
                        count++;
                } else {
                        write_unlock(&ls->ls_rsbtbl[b].lock);
@@ -1171,7 +1170,7 @@ static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
                        return;
 
                if (!r->res_lvbptr)
-                       r->res_lvbptr = allocate_lvb(r->res_ls);
+                       r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
 
                if (!r->res_lvbptr)
                        return;
@@ -1203,7 +1202,7 @@ static void set_lvb_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb)
                return;
 
        if (!r->res_lvbptr)
-               r->res_lvbptr = allocate_lvb(r->res_ls);
+               r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
 
        if (!r->res_lvbptr)
                return;
@@ -1670,9 +1669,10 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now,
    with a deadlk here, we'd have to generate something like grant_lock with
    the deadlk error.) */
 
-/* returns the highest requested mode of all blocked conversions */
+/* Returns the highest requested mode of all blocked conversions; sets
+   cw if there's a blocked conversion to DLM_LOCK_CW. */
 
-static int grant_pending_convert(struct dlm_rsb *r, int high)
+static int grant_pending_convert(struct dlm_rsb *r, int high, int *cw)
 {
        struct dlm_lkb *lkb, *s;
        int hi, demoted, quit, grant_restart, demote_restart;
@@ -1709,6 +1709,9 @@ static int grant_pending_convert(struct dlm_rsb *r, int high)
                }
 
                hi = max_t(int, lkb->lkb_rqmode, hi);
+
+               if (cw && lkb->lkb_rqmode == DLM_LOCK_CW)
+                       *cw = 1;
        }
 
        if (grant_restart)
@@ -1721,29 +1724,52 @@ static int grant_pending_convert(struct dlm_rsb *r, int high)
        return max_t(int, high, hi);
 }
 
-static int grant_pending_wait(struct dlm_rsb *r, int high)
+static int grant_pending_wait(struct dlm_rsb *r, int high, int *cw)
 {
        struct dlm_lkb *lkb, *s;
 
        list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) {
                if (can_be_granted(r, lkb, 0, NULL))
                        grant_lock_pending(r, lkb);
-                else
+                else {
                        high = max_t(int, lkb->lkb_rqmode, high);
+                       if (lkb->lkb_rqmode == DLM_LOCK_CW)
+                               *cw = 1;
+               }
        }
 
        return high;
 }
 
+/* cw of 1 means there's a lock with a rqmode of DLM_LOCK_CW that's blocked
+   on either the convert or waiting queue.
+   high is the largest rqmode of all locks blocked on the convert or
+   waiting queue. */
+
+static int lock_requires_bast(struct dlm_lkb *gr, int high, int cw)
+{
+       if (gr->lkb_grmode == DLM_LOCK_PR && cw) {
+               if (gr->lkb_highbast < DLM_LOCK_EX)
+                       return 1;
+               return 0;
+       }
+
+       if (gr->lkb_highbast < high &&
+           !__dlm_compat_matrix[gr->lkb_grmode+1][high+1])
+               return 1;
+       return 0;
+}
+
 static void grant_pending_locks(struct dlm_rsb *r)
 {
        struct dlm_lkb *lkb, *s;
        int high = DLM_LOCK_IV;
+       int cw = 0;
 
        DLM_ASSERT(is_master(r), dlm_dump_rsb(r););
 
-       high = grant_pending_convert(r, high);
-       high = grant_pending_wait(r, high);
+       high = grant_pending_convert(r, high, &cw);
+       high = grant_pending_wait(r, high, &cw);
 
        if (high == DLM_LOCK_IV)
                return;
@@ -1751,27 +1777,41 @@ static void grant_pending_locks(struct dlm_rsb *r)
        /*
         * If there are locks left on the wait/convert queue then send blocking
         * ASTs to granted locks based on the largest requested mode (high)
-        * found above. FIXME: highbast < high comparison not valid for PR/CW.
+        * found above.
         */
 
        list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) {
-               if (lkb->lkb_bastaddr && (lkb->lkb_highbast < high) &&
-                   !__dlm_compat_matrix[lkb->lkb_grmode+1][high+1]) {
-                       queue_bast(r, lkb, high);
+               if (lkb->lkb_bastaddr && lock_requires_bast(lkb, high, cw)) {
+                       if (cw && high == DLM_LOCK_PR)
+                               queue_bast(r, lkb, DLM_LOCK_CW);
+                       else
+                               queue_bast(r, lkb, high);
                        lkb->lkb_highbast = high;
                }
        }
 }
 
+static int modes_require_bast(struct dlm_lkb *gr, struct dlm_lkb *rq)
+{
+       if ((gr->lkb_grmode == DLM_LOCK_PR && rq->lkb_rqmode == DLM_LOCK_CW) ||
+           (gr->lkb_grmode == DLM_LOCK_CW && rq->lkb_rqmode == DLM_LOCK_PR)) {
+               if (gr->lkb_highbast < DLM_LOCK_EX)
+                       return 1;
+               return 0;
+       }
+
+       if (gr->lkb_highbast < rq->lkb_rqmode && !modes_compat(gr, rq))
+               return 1;
+       return 0;
+}
+
 static void send_bast_queue(struct dlm_rsb *r, struct list_head *head,
                            struct dlm_lkb *lkb)
 {
        struct dlm_lkb *gr;
 
        list_for_each_entry(gr, head, lkb_statequeue) {
-               if (gr->lkb_bastaddr &&
-                   gr->lkb_highbast < lkb->lkb_rqmode &&
-                   !modes_compat(gr, lkb)) {
+               if (gr->lkb_bastaddr && modes_require_bast(gr, lkb)) {
                        queue_bast(r, gr, lkb->lkb_rqmode);
                        gr->lkb_highbast = lkb->lkb_rqmode;
                }
@@ -1900,8 +1940,11 @@ static void confirm_master(struct dlm_rsb *r, int error)
                break;
 
        case -EAGAIN:
-               /* the remote master didn't queue our NOQUEUE request;
-                  make a waiting lkb the first_lkid */
+       case -EBADR:
+       case -ENOTBLK:
+               /* the remote request failed and won't be retried (it was
+                  a NOQUEUE, or has been canceled/unlocked); make a waiting
+                  lkb the first_lkid */
 
                r->res_first_lkid = 0;
 
@@ -2067,17 +2110,18 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
        /* an lkb may be waiting for an rsb lookup to complete where the
           lookup was initiated by another lock */
 
-       if (args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) {
-               if (!list_empty(&lkb->lkb_rsb_lookup)) {
+       if (!list_empty(&lkb->lkb_rsb_lookup)) {
+               if (args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) {
                        log_debug(ls, "unlock on rsb_lookup %x", lkb->lkb_id);
                        list_del_init(&lkb->lkb_rsb_lookup);
                        queue_cast(lkb->lkb_resource, lkb,
                                   args->flags & DLM_LKF_CANCEL ?
                                   -DLM_ECANCEL : -DLM_EUNLOCK);
                        unhold_lkb(lkb); /* undoes create_lkb() */
-                       rv = -EBUSY;
-                       goto out;
                }
+               /* caller changes -EBUSY to 0 for CANCEL and FORCEUNLOCK */
+               rv = -EBUSY;
+               goto out;
        }
 
        /* cancel not allowed with another cancel/unlock in progress */
@@ -2235,7 +2279,7 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
           before we try again to grant this one. */
 
        if (is_demoted(lkb)) {
-               grant_pending_convert(r, DLM_LOCK_IV);
+               grant_pending_convert(r, DLM_LOCK_IV, NULL);
                if (_can_be_granted(r, lkb, 1)) {
                        grant_lock(r, lkb);
                        queue_cast(r, lkb, 0);
@@ -2945,7 +2989,7 @@ static int receive_lvb(struct dlm_ls *ls, struct dlm_lkb *lkb,
 
        if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
                if (!lkb->lkb_lvbptr)
-                       lkb->lkb_lvbptr = allocate_lvb(ls);
+                       lkb->lkb_lvbptr = dlm_allocate_lvb(ls);
                if (!lkb->lkb_lvbptr)
                        return -ENOMEM;
                len = receive_extralen(ms);
@@ -2965,11 +3009,9 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
        lkb->lkb_bastaddr = (void *) (long) (ms->m_asts & AST_BAST);
        lkb->lkb_astaddr = (void *) (long) (ms->m_asts & AST_COMP);
 
-       DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb););
-
        if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
                /* lkb was just created so there won't be an lvb yet */
-               lkb->lkb_lvbptr = allocate_lvb(ls);
+               lkb->lkb_lvbptr = dlm_allocate_lvb(ls);
                if (!lkb->lkb_lvbptr)
                        return -ENOMEM;
        }
@@ -2980,16 +3022,6 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
 static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
                                struct dlm_message *ms)
 {
-       if (lkb->lkb_nodeid != ms->m_header.h_nodeid) {
-               log_error(ls, "convert_args nodeid %d %d lkid %x %x",
-                         lkb->lkb_nodeid, ms->m_header.h_nodeid,
-                         lkb->lkb_id, lkb->lkb_remid);
-               return -EINVAL;
-       }
-
-       if (!is_master_copy(lkb))
-               return -EINVAL;
-
        if (lkb->lkb_status != DLM_LKSTS_GRANTED)
                return -EBUSY;
 
@@ -3005,8 +3037,6 @@ static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
 static int receive_unlock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
                               struct dlm_message *ms)
 {
-       if (!is_master_copy(lkb))
-               return -EINVAL;
        if (receive_lvb(ls, lkb, ms))
                return -ENOMEM;
        return 0;
@@ -3022,6 +3052,50 @@ static void setup_stub_lkb(struct dlm_ls *ls, struct dlm_message *ms)
        lkb->lkb_remid = ms->m_lkid;
 }
 
+/* This is called after the rsb is locked so that we can safely inspect
+   fields in the lkb. */
+
+static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms)
+{
+       int from = ms->m_header.h_nodeid;
+       int error = 0;
+
+       switch (ms->m_type) {
+       case DLM_MSG_CONVERT:
+       case DLM_MSG_UNLOCK:
+       case DLM_MSG_CANCEL:
+               if (!is_master_copy(lkb) || lkb->lkb_nodeid != from)
+                       error = -EINVAL;
+               break;
+
+       case DLM_MSG_CONVERT_REPLY:
+       case DLM_MSG_UNLOCK_REPLY:
+       case DLM_MSG_CANCEL_REPLY:
+       case DLM_MSG_GRANT:
+       case DLM_MSG_BAST:
+               if (!is_process_copy(lkb) || lkb->lkb_nodeid != from)
+                       error = -EINVAL;
+               break;
+
+       case DLM_MSG_REQUEST_REPLY:
+               if (!is_process_copy(lkb))
+                       error = -EINVAL;
+               else if (lkb->lkb_nodeid != -1 && lkb->lkb_nodeid != from)
+                       error = -EINVAL;
+               break;
+
+       default:
+               error = -EINVAL;
+       }
+
+       if (error)
+               log_error(lkb->lkb_resource->res_ls,
+                         "ignore invalid message %d from %d %x %x %x %d",
+                         ms->m_type, from, lkb->lkb_id, lkb->lkb_remid,
+                         lkb->lkb_flags, lkb->lkb_nodeid);
+       return error;
+}
+
 static void receive_request(struct dlm_ls *ls, struct dlm_message *ms)
 {
        struct dlm_lkb *lkb;
@@ -3083,17 +3157,21 @@ static void receive_convert(struct dlm_ls *ls, struct dlm_message *ms)
        hold_rsb(r);
        lock_rsb(r);
 
+       error = validate_message(lkb, ms);
+       if (error)
+               goto out;
+
        receive_flags(lkb, ms);
        error = receive_convert_args(ls, lkb, ms);
        if (error)
-               goto out;
+               goto out_reply;
        reply = !down_conversion(lkb);
 
        error = do_convert(r, lkb);
- out:
+ out_reply:
        if (reply)
                send_convert_reply(r, lkb, error);
-
+ out:
        unlock_rsb(r);
        put_rsb(r);
        dlm_put_lkb(lkb);
@@ -3119,15 +3197,19 @@ static void receive_unlock(struct dlm_ls *ls, struct dlm_message *ms)
        hold_rsb(r);
        lock_rsb(r);
 
+       error = validate_message(lkb, ms);
+       if (error)
+               goto out;
+
        receive_flags(lkb, ms);
        error = receive_unlock_args(ls, lkb, ms);
        if (error)
-               goto out;
+               goto out_reply;
 
        error = do_unlock(r, lkb);
- out:
+ out_reply:
        send_unlock_reply(r, lkb, error);
-
+ out:
        unlock_rsb(r);
        put_rsb(r);
        dlm_put_lkb(lkb);
@@ -3155,9 +3237,13 @@ static void receive_cancel(struct dlm_ls *ls, struct dlm_message *ms)
        hold_rsb(r);
        lock_rsb(r);
 
+       error = validate_message(lkb, ms);
+       if (error)
+               goto out;
+
        error = do_cancel(r, lkb);
        send_cancel_reply(r, lkb, error);
-
+ out:
        unlock_rsb(r);
        put_rsb(r);
        dlm_put_lkb(lkb);
@@ -3176,22 +3262,26 @@ static void receive_grant(struct dlm_ls *ls, struct dlm_message *ms)
 
        error = find_lkb(ls, ms->m_remid, &lkb);
        if (error) {
-               log_error(ls, "receive_grant no lkb");
+               log_debug(ls, "receive_grant from %d no lkb %x",
+                         ms->m_header.h_nodeid, ms->m_remid);
                return;
        }
-       DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
 
        r = lkb->lkb_resource;
 
        hold_rsb(r);
        lock_rsb(r);
 
+       error = validate_message(lkb, ms);
+       if (error)
+               goto out;
+
        receive_flags_reply(lkb, ms);
        if (is_altmode(lkb))
                munge_altmode(lkb, ms);
        grant_lock_pc(r, lkb, ms);
        queue_cast(r, lkb, 0);
-
+ out:
        unlock_rsb(r);
        put_rsb(r);
        dlm_put_lkb(lkb);
@@ -3205,18 +3295,22 @@ static void receive_bast(struct dlm_ls *ls, struct dlm_message *ms)
 
        error = find_lkb(ls, ms->m_remid, &lkb);
        if (error) {
-               log_error(ls, "receive_bast no lkb");
+               log_debug(ls, "receive_bast from %d no lkb %x",
+                         ms->m_header.h_nodeid, ms->m_remid);
                return;
        }
-       DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
 
        r = lkb->lkb_resource;
 
        hold_rsb(r);
        lock_rsb(r);
 
-       queue_bast(r, lkb, ms->m_bastmode);
+       error = validate_message(lkb, ms);
+       if (error)
+               goto out;
 
+       queue_bast(r, lkb, ms->m_bastmode);
+ out:
        unlock_rsb(r);
        put_rsb(r);
        dlm_put_lkb(lkb);
@@ -3282,15 +3376,19 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
 
        error = find_lkb(ls, ms->m_remid, &lkb);
        if (error) {
-               log_error(ls, "receive_request_reply no lkb");
+               log_debug(ls, "receive_request_reply from %d no lkb %x",
+                         ms->m_header.h_nodeid, ms->m_remid);
                return;
        }
-       DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
 
        r = lkb->lkb_resource;
        hold_rsb(r);
        lock_rsb(r);
 
+       error = validate_message(lkb, ms);
+       if (error)
+               goto out;
+
        mstype = lkb->lkb_wait_type;
        error = remove_from_waiters(lkb, DLM_MSG_REQUEST_REPLY);
        if (error)
@@ -3342,6 +3440,7 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
                if (is_overlap(lkb)) {
                        /* we'll ignore error in cancel/unlock reply */
                        queue_cast_overlap(r, lkb);
+                       confirm_master(r, result);
                        unhold_lkb(lkb); /* undoes create_lkb() */
                } else
                        _request_lock(r, lkb);
@@ -3422,6 +3521,10 @@ static void _receive_convert_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
        hold_rsb(r);
        lock_rsb(r);
 
+       error = validate_message(lkb, ms);
+       if (error)
+               goto out;
+
        /* stub reply can happen with waiters_mutex held */
        error = remove_from_waiters_ms(lkb, ms);
        if (error)
@@ -3440,10 +3543,10 @@ static void receive_convert_reply(struct dlm_ls *ls, struct dlm_message *ms)
 
        error = find_lkb(ls, ms->m_remid, &lkb);
        if (error) {
-               log_error(ls, "receive_convert_reply no lkb");
+               log_debug(ls, "receive_convert_reply from %d no lkb %x",
+                         ms->m_header.h_nodeid, ms->m_remid);
                return;
        }
-       DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
 
        _receive_convert_reply(lkb, ms);
        dlm_put_lkb(lkb);
@@ -3457,6 +3560,10 @@ static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
        hold_rsb(r);
        lock_rsb(r);
 
+       error = validate_message(lkb, ms);
+       if (error)
+               goto out;
+
        /* stub reply can happen with waiters_mutex held */
        error = remove_from_waiters_ms(lkb, ms);
        if (error)
@@ -3488,10 +3595,10 @@ static void receive_unlock_reply(struct dlm_ls *ls, struct dlm_message *ms)
 
        error = find_lkb(ls, ms->m_remid, &lkb);
        if (error) {
-               log_error(ls, "receive_unlock_reply no lkb");
+               log_debug(ls, "receive_unlock_reply from %d no lkb %x",
+                         ms->m_header.h_nodeid, ms->m_remid);
                return;
        }
-       DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
 
        _receive_unlock_reply(lkb, ms);
        dlm_put_lkb(lkb);
@@ -3505,6 +3612,10 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
        hold_rsb(r);
        lock_rsb(r);
 
+       error = validate_message(lkb, ms);
+       if (error)
+               goto out;
+
        /* stub reply can happen with waiters_mutex held */
        error = remove_from_waiters_ms(lkb, ms);
        if (error)
@@ -3536,10 +3647,10 @@ static void receive_cancel_reply(struct dlm_ls *ls, struct dlm_message *ms)
 
        error = find_lkb(ls, ms->m_remid, &lkb);
        if (error) {
-               log_error(ls, "receive_cancel_reply no lkb");
+               log_debug(ls, "receive_cancel_reply from %d no lkb %x",
+                         ms->m_header.h_nodeid, ms->m_remid);
                return;
        }
-       DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
 
        _receive_cancel_reply(lkb, ms);
        dlm_put_lkb(lkb);
@@ -3597,53 +3708,13 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms)
        dlm_put_lkb(lkb);
 }
 
-int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
+static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms)
 {
-       struct dlm_message *ms = (struct dlm_message *) hd;
-       struct dlm_ls *ls;
-       int error = 0;
-
-       if (!recovery)
-               dlm_message_in(ms);
-
-       ls = dlm_find_lockspace_global(hd->h_lockspace);
-       if (!ls) {
-               log_print("drop message %d from %d for unknown lockspace %d",
-                         ms->m_type, nodeid, hd->h_lockspace);
-               return -EINVAL;
-       }
-
-       /* recovery may have just ended leaving a bunch of backed-up requests
-          in the requestqueue; wait while dlm_recoverd clears them */
-
-       if (!recovery)
-               dlm_wait_requestqueue(ls);
-
-       /* recovery may have just started while there were a bunch of
-          in-flight requests -- save them in requestqueue to be processed
-          after recovery.  we can't let dlm_recvd block on the recovery
-          lock.  if dlm_recoverd is calling this function to clear the
-          requestqueue, it needs to be interrupted (-EINTR) if another
-          recovery operation is starting. */
-
-       while (1) {
-               if (dlm_locking_stopped(ls)) {
-                       if (recovery) {
-                               error = -EINTR;
-                               goto out;
-                       }
-                       error = dlm_add_requestqueue(ls, nodeid, hd);
-                       if (error == -EAGAIN)
-                               continue;
-                       else {
-                               error = -EINTR;
-                               goto out;
-                       }
-               }
-
-               if (dlm_lock_recovery_try(ls))
-                       break;
-               schedule();
+       if (!dlm_is_member(ls, ms->m_header.h_nodeid)) {
+               log_debug(ls, "ignore non-member message %d from %d %x %x %d",
+                         ms->m_type, ms->m_header.h_nodeid, ms->m_lkid,
+                         ms->m_remid, ms->m_result);
+               return;
        }
 
        switch (ms->m_type) {
@@ -3720,17 +3791,90 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
                log_error(ls, "unknown message type %d", ms->m_type);
        }
 
-       dlm_unlock_recovery(ls);
- out:
-       dlm_put_lockspace(ls);
        dlm_astd_wake();
-       return error;
 }
 
+/* If the lockspace is in recovery mode (locking stopped), then normal
+   messages are saved on the requestqueue for processing after recovery is
+   done.  When not in recovery mode, we wait for dlm_recoverd to drain saved
+   messages off the requestqueue before we process new ones. This occurs right
+   after recovery completes when we transition from saving all messages on
+   requestqueue, to processing all the saved messages, to processing new
+   messages as they arrive. */
 
-/*
- * Recovery related
- */
+static void dlm_receive_message(struct dlm_ls *ls, struct dlm_message *ms,
+                               int nodeid)
+{
+       if (dlm_locking_stopped(ls)) {
+               dlm_add_requestqueue(ls, nodeid, (struct dlm_header *) ms);
+       } else {
+               dlm_wait_requestqueue(ls);
+               _receive_message(ls, ms);
+       }
+}
+
+/* This is called by dlm_recoverd to process messages that were saved on
+   the requestqueue. */
+
+void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms)
+{
+       _receive_message(ls, ms);
+}
+
+/* This is called by the midcomms layer when something is received for
+   the lockspace.  It could be either a MSG (normal message sent as part of
+   standard locking activity) or an RCOM (recovery message sent as part of
+   lockspace recovery). */
+
+void dlm_receive_buffer(struct dlm_header *hd, int nodeid)
+{
+       struct dlm_message *ms = (struct dlm_message *) hd;
+       struct dlm_rcom *rc = (struct dlm_rcom *) hd;
+       struct dlm_ls *ls;
+       int type = 0;
+
+       switch (hd->h_cmd) {
+       case DLM_MSG:
+               dlm_message_in(ms);
+               type = ms->m_type;
+               break;
+       case DLM_RCOM:
+               dlm_rcom_in(rc);
+               type = rc->rc_type;
+               break;
+       default:
+               log_print("invalid h_cmd %d from %u", hd->h_cmd, nodeid);
+               return;
+       }
+
+       if (hd->h_nodeid != nodeid) {
+               log_print("invalid h_nodeid %d from %d lockspace %x",
+                         hd->h_nodeid, nodeid, hd->h_lockspace);
+               return;
+       }
+
+       ls = dlm_find_lockspace_global(hd->h_lockspace);
+       if (!ls) {
+               log_print("invalid h_lockspace %x from %d cmd %d type %d",
+                         hd->h_lockspace, nodeid, hd->h_cmd, type);
+
+               if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS)
+                       dlm_send_ls_not_ready(nodeid, rc);
+               return;
+       }
+
+       /* this rwsem allows dlm_ls_stop() to wait for all dlm_recv threads to
+          be inactive (in this ls) before transitioning to recovery mode */
+
+       down_read(&ls->ls_recv_active);
+       if (hd->h_cmd == DLM_MSG)
+               dlm_receive_message(ls, ms, nodeid);
+       else
+               dlm_receive_rcom(ls, rc, nodeid);
+       up_read(&ls->ls_recv_active);
+
+       dlm_put_lockspace(ls);
+}
 
 static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
 {
@@ -3739,6 +3883,7 @@ static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
                ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
                ls->ls_stub_ms.m_result = -EINPROGRESS;
                ls->ls_stub_ms.m_flags = lkb->lkb_flags;
+               ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
                _receive_convert_reply(lkb, &ls->ls_stub_ms);
 
                /* Same special case as in receive_rcom_lock_args() */
@@ -3780,6 +3925,7 @@ static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb)
 void dlm_recover_waiters_pre(struct dlm_ls *ls)
 {
        struct dlm_lkb *lkb, *safe;
+       int wait_type, stub_unlock_result, stub_cancel_result;
 
        mutex_lock(&ls->ls_waiters_mutex);
 
@@ -3798,7 +3944,33 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
                if (!waiter_needs_recovery(ls, lkb))
                        continue;
 
-               switch (lkb->lkb_wait_type) {
+               wait_type = lkb->lkb_wait_type;
+               stub_unlock_result = -DLM_EUNLOCK;
+               stub_cancel_result = -DLM_ECANCEL;
+
+               /* Main reply may have been received leaving a zero wait_type,
+                  but a reply for the overlapping op may not have been
+                  received.  In that case we need to fake the appropriate
+                  reply for the overlap op. */
+
+               if (!wait_type) {
+                       if (is_overlap_cancel(lkb)) {
+                               wait_type = DLM_MSG_CANCEL;
+                               if (lkb->lkb_grmode == DLM_LOCK_IV)
+                                       stub_cancel_result = 0;
+                       }
+                       if (is_overlap_unlock(lkb)) {
+                               wait_type = DLM_MSG_UNLOCK;
+                               if (lkb->lkb_grmode == DLM_LOCK_IV)
+                                       stub_unlock_result = -ENOENT;
+                       }
+
+                       log_debug(ls, "rwpre overlap %x %x %d %d %d",
+                                 lkb->lkb_id, lkb->lkb_flags, wait_type,
+                                 stub_cancel_result, stub_unlock_result);
+               }
+
+               switch (wait_type) {
 
                case DLM_MSG_REQUEST:
                        lkb->lkb_flags |= DLM_IFL_RESEND;
@@ -3811,8 +3983,9 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
                case DLM_MSG_UNLOCK:
                        hold_lkb(lkb);
                        ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY;
-                       ls->ls_stub_ms.m_result = -DLM_EUNLOCK;
+                       ls->ls_stub_ms.m_result = stub_unlock_result;
                        ls->ls_stub_ms.m_flags = lkb->lkb_flags;
+                       ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
                        _receive_unlock_reply(lkb, &ls->ls_stub_ms);
                        dlm_put_lkb(lkb);
                        break;
@@ -3820,15 +3993,16 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
                case DLM_MSG_CANCEL:
                        hold_lkb(lkb);
                        ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY;
-                       ls->ls_stub_ms.m_result = -DLM_ECANCEL;
+                       ls->ls_stub_ms.m_result = stub_cancel_result;
                        ls->ls_stub_ms.m_flags = lkb->lkb_flags;
+                       ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
                        _receive_cancel_reply(lkb, &ls->ls_stub_ms);
                        dlm_put_lkb(lkb);
                        break;
 
                default:
-                       log_error(ls, "invalid lkb wait_type %d",
-                                 lkb->lkb_wait_type);
+                       log_error(ls, "invalid lkb wait_type %d %d",
+                                 lkb->lkb_wait_type, wait_type);
                }
                schedule();
        }
@@ -4117,7 +4291,7 @@ static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
        lkb->lkb_astaddr = (void *) (long) (rl->rl_asts & AST_COMP);
 
        if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
-               lkb->lkb_lvbptr = allocate_lvb(ls);
+               lkb->lkb_lvbptr = dlm_allocate_lvb(ls);
                if (!lkb->lkb_lvbptr)
                        return -ENOMEM;
                lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) -
@@ -4192,7 +4366,7 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
        put_rsb(r);
  out:
        if (error)
-               log_print("recover_master_copy %d %x", error, rl->rl_lkid);
+               log_debug(ls, "recover_master_copy %d %x", error, rl->rl_lkid);
        rl->rl_result = error;
        return error;
 }
@@ -4275,7 +4449,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
                }
        }
 
-       /* After ua is attached to lkb it will be freed by free_lkb().
+       /* After ua is attached to lkb it will be freed by dlm_free_lkb().
           When DLM_IFL_USER is set, the dlm knows that this is a userspace
           lock and that lkb_astparam is the dlm_user_args structure. */
 
@@ -4388,7 +4562,8 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 
        if (lvb_in && ua->lksb.sb_lvbptr)
                memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
-       ua->castparam = ua_tmp->castparam;
+       if (ua_tmp->castparam)
+               ua->castparam = ua_tmp->castparam;
        ua->user_lksb = ua_tmp->user_lksb;
 
        error = set_unlock_args(flags, ua, &args);
@@ -4433,7 +4608,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
                goto out;
 
        ua = (struct dlm_user_args *)lkb->lkb_astparam;
-       ua->castparam = ua_tmp->castparam;
+       if (ua_tmp->castparam)
+               ua->castparam = ua_tmp->castparam;
        ua->user_lksb = ua_tmp->user_lksb;
 
        error = set_unlock_args(flags, ua, &args);
@@ -4610,6 +4786,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
        }
 
        list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
+               lkb->lkb_ast_type = 0;
                list_del(&lkb->lkb_astqueue);
                dlm_put_lkb(lkb);
        }