/******************************************************************************
*******************************************************************************
**
-** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
static int receive_extralen(struct dlm_message *ms);
static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
static void del_timeout(struct dlm_lkb *lkb);
-void dlm_timeout_warn(struct dlm_lkb *lkb);
/*
* Lock compatibilty matrix - thanks Steve
rv = -ETIMEDOUT;
}
+ if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) {
+ lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL;
+ rv = -EDEADLK;
+ }
+
lkb->lkb_lksb->sb_status = rv;
lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;
{
struct dlm_rsb *r;
- r = allocate_rsb(ls, len);
+ r = dlm_allocate_rsb(ls, len);
if (!r)
return NULL;
{
struct dlm_rsb *r, *tmp;
uint32_t hash, bucket;
- int error = 0;
+ int error = -EINVAL;
+
+ if (namelen > DLM_RESNAME_MAXLEN)
+ goto out;
if (dlm_no_directory(ls))
flags |= R_CREATE;
+ error = 0;
hash = jhash(name, namelen, 0);
bucket = hash & (ls->ls_rsbtbl_size - 1);
error = _search_rsb(ls, name, namelen, bucket, 0, &tmp);
if (!error) {
write_unlock(&ls->ls_rsbtbl[bucket].lock);
- free_rsb(r);
+ dlm_free_rsb(r);
r = tmp;
goto out;
}
return error;
}
-int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen,
- unsigned int flags, struct dlm_rsb **r_ret)
-{
- return find_rsb(ls, name, namelen, flags, r_ret);
-}
-
/* This is only called to add a reference when the code already holds
a valid reference to the rsb, so there's no need for locking. */
list_move(&r->res_hashchain, &ls->ls_rsbtbl[r->res_bucket].toss);
r->res_toss_time = jiffies;
if (r->res_lvbptr) {
- free_lvb(r->res_lvbptr);
+ dlm_free_lvb(r->res_lvbptr);
r->res_lvbptr = NULL;
}
}
uint32_t lkid = 0;
uint16_t bucket;
- lkb = allocate_lkb(ls);
+ lkb = dlm_allocate_lkb(ls);
if (!lkb)
return -ENOMEM;
/* for local/process lkbs, lvbptr points to caller's lksb */
if (lkb->lkb_lvbptr && is_master_copy(lkb))
- free_lvb(lkb->lkb_lvbptr);
- free_lkb(lkb);
+ dlm_free_lvb(lkb->lkb_lvbptr);
+ dlm_free_lkb(lkb);
return 1;
} else {
write_unlock(&ls->ls_lkbtbl[bucket].lock);
if (is_master(r))
dir_remove(r);
- free_rsb(r);
+ dlm_free_rsb(r);
count++;
} else {
write_unlock(&ls->ls_rsbtbl[b].lock);
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
- if (is_master_copy(lkb))
+ if (is_master_copy(lkb)) {
+ lkb->lkb_timestamp = jiffies;
return;
-
- if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
- goto add_it;
+ }
if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
!(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN;
goto add_it;
}
+ if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
+ goto add_it;
return;
add_it:
}
if (do_cancel) {
- log_debug(r->res_ls, "timeout cancel %x node %d %s",
+ log_debug(ls, "timeout cancel %x node %d %s",
lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
lkb->lkb_flags |= DLM_IFL_TIMEOUT_CANCEL;
return;
if (!r->res_lvbptr)
- r->res_lvbptr = allocate_lvb(r->res_ls);
+ r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
if (!r->res_lvbptr)
return;
return;
if (!r->res_lvbptr)
- r->res_lvbptr = allocate_lvb(r->res_ls);
+ r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
if (!r->res_lvbptr)
return;
b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1];
if (b == 1) {
int len = receive_extralen(ms);
+ if (len > DLM_RESNAME_MAXLEN)
+ len = DLM_RESNAME_MAXLEN;
memcpy(lkb->lkb_lvbptr, ms->m_extra, len);
lkb->lkb_lvbseq = ms->m_lvbseq;
}
with a deadlk here, we'd have to generate something like grant_lock with
the deadlk error.) */
-/* returns the highest requested mode of all blocked conversions */
+/* Returns the highest requested mode of all blocked conversions; sets
+ cw if there's a blocked conversion to DLM_LOCK_CW. */
-static int grant_pending_convert(struct dlm_rsb *r, int high)
+static int grant_pending_convert(struct dlm_rsb *r, int high, int *cw)
{
struct dlm_lkb *lkb, *s;
int hi, demoted, quit, grant_restart, demote_restart;
}
hi = max_t(int, lkb->lkb_rqmode, hi);
+
+ if (cw && lkb->lkb_rqmode == DLM_LOCK_CW)
+ *cw = 1;
}
if (grant_restart)
return max_t(int, high, hi);
}
-static int grant_pending_wait(struct dlm_rsb *r, int high)
+static int grant_pending_wait(struct dlm_rsb *r, int high, int *cw)
{
struct dlm_lkb *lkb, *s;
list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) {
if (can_be_granted(r, lkb, 0, NULL))
grant_lock_pending(r, lkb);
- else
+ else {
high = max_t(int, lkb->lkb_rqmode, high);
+ if (lkb->lkb_rqmode == DLM_LOCK_CW)
+ *cw = 1;
+ }
}
return high;
}
+/* cw of 1 means there's a lock with a rqmode of DLM_LOCK_CW that's blocked
+ on either the convert or waiting queue.
+ high is the largest rqmode of all locks blocked on the convert or
+ waiting queue. */
+
+static int lock_requires_bast(struct dlm_lkb *gr, int high, int cw)
+{
+ if (gr->lkb_grmode == DLM_LOCK_PR && cw) {
+ if (gr->lkb_highbast < DLM_LOCK_EX)
+ return 1;
+ return 0;
+ }
+
+ if (gr->lkb_highbast < high &&
+ !__dlm_compat_matrix[gr->lkb_grmode+1][high+1])
+ return 1;
+ return 0;
+}
+
static void grant_pending_locks(struct dlm_rsb *r)
{
struct dlm_lkb *lkb, *s;
int high = DLM_LOCK_IV;
+ int cw = 0;
DLM_ASSERT(is_master(r), dlm_dump_rsb(r););
- high = grant_pending_convert(r, high);
- high = grant_pending_wait(r, high);
+ high = grant_pending_convert(r, high, &cw);
+ high = grant_pending_wait(r, high, &cw);
if (high == DLM_LOCK_IV)
return;
/*
* If there are locks left on the wait/convert queue then send blocking
* ASTs to granted locks based on the largest requested mode (high)
- * found above. FIXME: highbast < high comparison not valid for PR/CW.
+ * found above.
*/
list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) {
- if (lkb->lkb_bastaddr && (lkb->lkb_highbast < high) &&
- !__dlm_compat_matrix[lkb->lkb_grmode+1][high+1]) {
- queue_bast(r, lkb, high);
+ if (lkb->lkb_bastfn && lock_requires_bast(lkb, high, cw)) {
+ if (cw && high == DLM_LOCK_PR)
+ queue_bast(r, lkb, DLM_LOCK_CW);
+ else
+ queue_bast(r, lkb, high);
lkb->lkb_highbast = high;
}
}
}
+static int modes_require_bast(struct dlm_lkb *gr, struct dlm_lkb *rq)
+{
+ if ((gr->lkb_grmode == DLM_LOCK_PR && rq->lkb_rqmode == DLM_LOCK_CW) ||
+ (gr->lkb_grmode == DLM_LOCK_CW && rq->lkb_rqmode == DLM_LOCK_PR)) {
+ if (gr->lkb_highbast < DLM_LOCK_EX)
+ return 1;
+ return 0;
+ }
+
+ if (gr->lkb_highbast < rq->lkb_rqmode && !modes_compat(gr, rq))
+ return 1;
+ return 0;
+}
+
static void send_bast_queue(struct dlm_rsb *r, struct list_head *head,
struct dlm_lkb *lkb)
{
struct dlm_lkb *gr;
list_for_each_entry(gr, head, lkb_statequeue) {
- if (gr->lkb_bastaddr &&
- gr->lkb_highbast < lkb->lkb_rqmode &&
- !modes_compat(gr, lkb)) {
+ if (gr->lkb_bastfn && modes_require_bast(gr, lkb)) {
queue_bast(r, gr, lkb->lkb_rqmode);
gr->lkb_highbast = lkb->lkb_rqmode;
}
static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb)
{
struct dlm_ls *ls = r->res_ls;
- int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid();
+ int i, error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid();
if (rsb_flag(r, RSB_MASTER_UNCERTAIN)) {
rsb_clear_flag(r, RSB_MASTER_UNCERTAIN);
return 1;
}
- for (;;) {
+ for (i = 0; i < 2; i++) {
/* It's possible for dlm_scand to remove an old rsb for
this same resource from the toss list, us to create
a new one, look up the master locally, and find it
log_debug(ls, "dir_lookup error %d %s", error, r->res_name);
schedule();
}
+ if (error && error != -EEXIST)
+ return error;
if (ret_nodeid == our_nodeid) {
r->res_first_lkid = 0;
break;
case -EAGAIN:
- /* the remote master didn't queue our NOQUEUE request;
- make a waiting lkb the first_lkid */
+ case -EBADR:
+ case -ENOTBLK:
+ /* the remote request failed and won't be retried (it was
+ a NOQUEUE, or has been canceled/unlocked); make a waiting
+ lkb the first_lkid */
r->res_first_lkid = 0;
}
static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
- int namelen, unsigned long timeout_cs, void *ast,
- void *astarg, void *bast, struct dlm_args *args)
+ int namelen, unsigned long timeout_cs,
+ void (*ast) (void *astparam),
+ void *astparam,
+ void (*bast) (void *astparam, int mode),
+ struct dlm_args *args)
{
int rv = -EINVAL;
an active lkb cannot be modified before locking the rsb */
args->flags = flags;
- args->astaddr = ast;
- args->astparam = (long) astarg;
- args->bastaddr = bast;
+ args->astfn = ast;
+ args->astparam = astparam;
+ args->bastfn = bast;
args->timeout = timeout_cs;
args->mode = mode;
args->lksb = lksb;
return -EINVAL;
args->flags = flags;
- args->astparam = (long) astarg;
+ args->astparam = astarg;
return 0;
}
lkb->lkb_exflags = args->flags;
lkb->lkb_sbflags = 0;
- lkb->lkb_astaddr = args->astaddr;
+ lkb->lkb_astfn = args->astfn;
lkb->lkb_astparam = args->astparam;
- lkb->lkb_bastaddr = args->bastaddr;
+ lkb->lkb_bastfn = args->bastfn;
lkb->lkb_rqmode = args->mode;
lkb->lkb_lksb = args->lksb;
lkb->lkb_lvbptr = args->lksb->sb_lvbptr;
/* an lkb may be waiting for an rsb lookup to complete where the
lookup was initiated by another lock */
- if (args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) {
- if (!list_empty(&lkb->lkb_rsb_lookup)) {
+ if (!list_empty(&lkb->lkb_rsb_lookup)) {
+ if (args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) {
log_debug(ls, "unlock on rsb_lookup %x", lkb->lkb_id);
list_del_init(&lkb->lkb_rsb_lookup);
queue_cast(lkb->lkb_resource, lkb,
args->flags & DLM_LKF_CANCEL ?
-DLM_ECANCEL : -DLM_EUNLOCK);
unhold_lkb(lkb); /* undoes create_lkb() */
- rv = -EBUSY;
- goto out;
}
+ /* caller changes -EBUSY to 0 for CANCEL and FORCEUNLOCK */
+ rv = -EBUSY;
+ goto out;
}
/* cancel not allowed with another cancel/unlock in progress */
before we try again to grant this one. */
if (is_demoted(lkb)) {
- grant_pending_convert(r, DLM_LOCK_IV);
+ grant_pending_convert(r, DLM_LOCK_IV, NULL);
if (_can_be_granted(r, lkb, 1)) {
grant_lock(r, lkb);
queue_cast(r, lkb, 0);
pass into lowcomms_commit and a message buffer (mb) that we
write our data into */
- mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb);
+ mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
if (!mh)
return -ENOBUFS;
/* m_result and m_bastmode are set from function args,
not from lkb fields */
- if (lkb->lkb_bastaddr)
+ if (lkb->lkb_bastfn)
ms->m_asts |= AST_BAST;
- if (lkb->lkb_astaddr)
+ if (lkb->lkb_astfn)
ms->m_asts |= AST_COMP;
/* compare with switch in create_message; send_remove() doesn't
if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
if (!lkb->lkb_lvbptr)
- lkb->lkb_lvbptr = allocate_lvb(ls);
+ lkb->lkb_lvbptr = dlm_allocate_lvb(ls);
if (!lkb->lkb_lvbptr)
return -ENOMEM;
len = receive_extralen(ms);
+ if (len > DLM_RESNAME_MAXLEN)
+ len = DLM_RESNAME_MAXLEN;
memcpy(lkb->lkb_lvbptr, ms->m_extra, len);
}
return 0;
}
+static void fake_bastfn(void *astparam, int mode)
+{
+ log_print("fake_bastfn should not be called");
+}
+
+static void fake_astfn(void *astparam)
+{
+ log_print("fake_astfn should not be called");
+}
+
static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
struct dlm_message *ms)
{
lkb->lkb_remid = ms->m_lkid;
lkb->lkb_grmode = DLM_LOCK_IV;
lkb->lkb_rqmode = ms->m_rqmode;
- lkb->lkb_bastaddr = (void *) (long) (ms->m_asts & AST_BAST);
- lkb->lkb_astaddr = (void *) (long) (ms->m_asts & AST_COMP);
- DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb););
+ lkb->lkb_bastfn = (ms->m_asts & AST_BAST) ? &fake_bastfn : NULL;
+ lkb->lkb_astfn = (ms->m_asts & AST_COMP) ? &fake_astfn : NULL;
if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
/* lkb was just created so there won't be an lvb yet */
- lkb->lkb_lvbptr = allocate_lvb(ls);
+ lkb->lkb_lvbptr = dlm_allocate_lvb(ls);
if (!lkb->lkb_lvbptr)
return -ENOMEM;
}
static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
struct dlm_message *ms)
{
- if (lkb->lkb_nodeid != ms->m_header.h_nodeid) {
- log_error(ls, "convert_args nodeid %d %d lkid %x %x",
- lkb->lkb_nodeid, ms->m_header.h_nodeid,
- lkb->lkb_id, lkb->lkb_remid);
- return -EINVAL;
- }
-
- if (!is_master_copy(lkb))
- return -EINVAL;
-
if (lkb->lkb_status != DLM_LKSTS_GRANTED)
return -EBUSY;
static int receive_unlock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
struct dlm_message *ms)
{
- if (!is_master_copy(lkb))
- return -EINVAL;
if (receive_lvb(ls, lkb, ms))
return -ENOMEM;
return 0;
lkb->lkb_remid = ms->m_lkid;
}
+/* This is called after the rsb is locked so that we can safely inspect
+ fields in the lkb. */
+
+static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms)
+{
+ int from = ms->m_header.h_nodeid;
+ int error = 0;
+
+ switch (ms->m_type) {
+ case DLM_MSG_CONVERT:
+ case DLM_MSG_UNLOCK:
+ case DLM_MSG_CANCEL:
+ if (!is_master_copy(lkb) || lkb->lkb_nodeid != from)
+ error = -EINVAL;
+ break;
+
+ case DLM_MSG_CONVERT_REPLY:
+ case DLM_MSG_UNLOCK_REPLY:
+ case DLM_MSG_CANCEL_REPLY:
+ case DLM_MSG_GRANT:
+ case DLM_MSG_BAST:
+ if (!is_process_copy(lkb) || lkb->lkb_nodeid != from)
+ error = -EINVAL;
+ break;
+
+ case DLM_MSG_REQUEST_REPLY:
+ if (!is_process_copy(lkb))
+ error = -EINVAL;
+ else if (lkb->lkb_nodeid != -1 && lkb->lkb_nodeid != from)
+ error = -EINVAL;
+ break;
+
+ default:
+ error = -EINVAL;
+ }
+
+ if (error)
+ log_error(lkb->lkb_resource->res_ls,
+ "ignore invalid message %d from %d %x %x %x %d",
+ ms->m_type, from, lkb->lkb_id, lkb->lkb_remid,
+ lkb->lkb_flags, lkb->lkb_nodeid);
+ return error;
+}
+
static void receive_request(struct dlm_ls *ls, struct dlm_message *ms)
{
struct dlm_lkb *lkb;
hold_rsb(r);
lock_rsb(r);
+ error = validate_message(lkb, ms);
+ if (error)
+ goto out;
+
receive_flags(lkb, ms);
error = receive_convert_args(ls, lkb, ms);
if (error)
- goto out;
+ goto out_reply;
reply = !down_conversion(lkb);
error = do_convert(r, lkb);
- out:
+ out_reply:
if (reply)
send_convert_reply(r, lkb, error);
-
+ out:
unlock_rsb(r);
put_rsb(r);
dlm_put_lkb(lkb);
hold_rsb(r);
lock_rsb(r);
+ error = validate_message(lkb, ms);
+ if (error)
+ goto out;
+
receive_flags(lkb, ms);
error = receive_unlock_args(ls, lkb, ms);
if (error)
- goto out;
+ goto out_reply;
error = do_unlock(r, lkb);
- out:
+ out_reply:
send_unlock_reply(r, lkb, error);
-
+ out:
unlock_rsb(r);
put_rsb(r);
dlm_put_lkb(lkb);
hold_rsb(r);
lock_rsb(r);
+ error = validate_message(lkb, ms);
+ if (error)
+ goto out;
+
error = do_cancel(r, lkb);
send_cancel_reply(r, lkb, error);
-
+ out:
unlock_rsb(r);
put_rsb(r);
dlm_put_lkb(lkb);
error = find_lkb(ls, ms->m_remid, &lkb);
if (error) {
- log_error(ls, "receive_grant no lkb");
+ log_debug(ls, "receive_grant from %d no lkb %x",
+ ms->m_header.h_nodeid, ms->m_remid);
return;
}
- DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
r = lkb->lkb_resource;
hold_rsb(r);
lock_rsb(r);
+ error = validate_message(lkb, ms);
+ if (error)
+ goto out;
+
receive_flags_reply(lkb, ms);
if (is_altmode(lkb))
munge_altmode(lkb, ms);
grant_lock_pc(r, lkb, ms);
queue_cast(r, lkb, 0);
-
+ out:
unlock_rsb(r);
put_rsb(r);
dlm_put_lkb(lkb);
error = find_lkb(ls, ms->m_remid, &lkb);
if (error) {
- log_error(ls, "receive_bast no lkb");
+ log_debug(ls, "receive_bast from %d no lkb %x",
+ ms->m_header.h_nodeid, ms->m_remid);
return;
}
- DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
r = lkb->lkb_resource;
hold_rsb(r);
lock_rsb(r);
- queue_bast(r, lkb, ms->m_bastmode);
+ error = validate_message(lkb, ms);
+ if (error)
+ goto out;
+ queue_bast(r, lkb, ms->m_bastmode);
+ out:
unlock_rsb(r);
put_rsb(r);
dlm_put_lkb(lkb);
error = find_lkb(ls, ms->m_remid, &lkb);
if (error) {
- log_error(ls, "receive_request_reply no lkb");
+ log_debug(ls, "receive_request_reply from %d no lkb %x",
+ ms->m_header.h_nodeid, ms->m_remid);
return;
}
- DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
r = lkb->lkb_resource;
hold_rsb(r);
lock_rsb(r);
+ error = validate_message(lkb, ms);
+ if (error)
+ goto out;
+
mstype = lkb->lkb_wait_type;
error = remove_from_waiters(lkb, DLM_MSG_REQUEST_REPLY);
if (error)
if (is_overlap(lkb)) {
/* we'll ignore error in cancel/unlock reply */
queue_cast_overlap(r, lkb);
+ confirm_master(r, result);
unhold_lkb(lkb); /* undoes create_lkb() */
} else
_request_lock(r, lkb);
hold_rsb(r);
lock_rsb(r);
+ error = validate_message(lkb, ms);
+ if (error)
+ goto out;
+
/* stub reply can happen with waiters_mutex held */
error = remove_from_waiters_ms(lkb, ms);
if (error)
error = find_lkb(ls, ms->m_remid, &lkb);
if (error) {
- log_error(ls, "receive_convert_reply no lkb");
+ log_debug(ls, "receive_convert_reply from %d no lkb %x",
+ ms->m_header.h_nodeid, ms->m_remid);
return;
}
- DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
_receive_convert_reply(lkb, ms);
dlm_put_lkb(lkb);
hold_rsb(r);
lock_rsb(r);
+ error = validate_message(lkb, ms);
+ if (error)
+ goto out;
+
/* stub reply can happen with waiters_mutex held */
error = remove_from_waiters_ms(lkb, ms);
if (error)
error = find_lkb(ls, ms->m_remid, &lkb);
if (error) {
- log_error(ls, "receive_unlock_reply no lkb");
+ log_debug(ls, "receive_unlock_reply from %d no lkb %x",
+ ms->m_header.h_nodeid, ms->m_remid);
return;
}
- DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
_receive_unlock_reply(lkb, ms);
dlm_put_lkb(lkb);
hold_rsb(r);
lock_rsb(r);
+ error = validate_message(lkb, ms);
+ if (error)
+ goto out;
+
/* stub reply can happen with waiters_mutex held */
error = remove_from_waiters_ms(lkb, ms);
if (error)
case -DLM_ECANCEL:
receive_flags_reply(lkb, ms);
revert_lock_pc(r, lkb);
- if (ms->m_result)
- queue_cast(r, lkb, -DLM_ECANCEL);
+ queue_cast(r, lkb, -DLM_ECANCEL);
break;
case 0:
break;
error = find_lkb(ls, ms->m_remid, &lkb);
if (error) {
- log_error(ls, "receive_cancel_reply no lkb");
+ log_debug(ls, "receive_cancel_reply from %d no lkb %x",
+ ms->m_header.h_nodeid, ms->m_remid);
return;
}
- DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
_receive_cancel_reply(lkb, ms);
dlm_put_lkb(lkb);
dlm_put_lkb(lkb);
}
-int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
+static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms)
{
- struct dlm_message *ms = (struct dlm_message *) hd;
- struct dlm_ls *ls;
- int error = 0;
-
- if (!recovery)
- dlm_message_in(ms);
-
- ls = dlm_find_lockspace_global(hd->h_lockspace);
- if (!ls) {
- log_print("drop message %d from %d for unknown lockspace %d",
- ms->m_type, nodeid, hd->h_lockspace);
- return -EINVAL;
- }
-
- /* recovery may have just ended leaving a bunch of backed-up requests
- in the requestqueue; wait while dlm_recoverd clears them */
-
- if (!recovery)
- dlm_wait_requestqueue(ls);
-
- /* recovery may have just started while there were a bunch of
- in-flight requests -- save them in requestqueue to be processed
- after recovery. we can't let dlm_recvd block on the recovery
- lock. if dlm_recoverd is calling this function to clear the
- requestqueue, it needs to be interrupted (-EINTR) if another
- recovery operation is starting. */
-
- while (1) {
- if (dlm_locking_stopped(ls)) {
- if (recovery) {
- error = -EINTR;
- goto out;
- }
- error = dlm_add_requestqueue(ls, nodeid, hd);
- if (error == -EAGAIN)
- continue;
- else {
- error = -EINTR;
- goto out;
- }
- }
-
- if (dlm_lock_recovery_try(ls))
- break;
- schedule();
+ if (!dlm_is_member(ls, ms->m_header.h_nodeid)) {
+ log_debug(ls, "ignore non-member message %d from %d %x %x %d",
+ ms->m_type, ms->m_header.h_nodeid, ms->m_lkid,
+ ms->m_remid, ms->m_result);
+ return;
}
switch (ms->m_type) {
log_error(ls, "unknown message type %d", ms->m_type);
}
- dlm_unlock_recovery(ls);
- out:
- dlm_put_lockspace(ls);
dlm_astd_wake();
- return error;
}
+/* If the lockspace is in recovery mode (locking stopped), then normal
+ messages are saved on the requestqueue for processing after recovery is
+ done. When not in recovery mode, we wait for dlm_recoverd to drain saved
+ messages off the requestqueue before we process new ones. This occurs right
+ after recovery completes when we transition from saving all messages on
+ requestqueue, to processing all the saved messages, to processing new
+ messages as they arrive. */
-/*
- * Recovery related
- */
+static void dlm_receive_message(struct dlm_ls *ls, struct dlm_message *ms,
+ int nodeid)
+{
+ if (dlm_locking_stopped(ls)) {
+ dlm_add_requestqueue(ls, nodeid, ms);
+ } else {
+ dlm_wait_requestqueue(ls);
+ _receive_message(ls, ms);
+ }
+}
+
+/* This is called by dlm_recoverd to process messages that were saved on
+ the requestqueue. */
+
+void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms)
+{
+ _receive_message(ls, ms);
+}
+
+/* This is called by the midcomms layer when something is received for
+ the lockspace. It could be either a MSG (normal message sent as part of
+ standard locking activity) or an RCOM (recovery message sent as part of
+ lockspace recovery). */
+
+void dlm_receive_buffer(union dlm_packet *p, int nodeid)
+{
+ struct dlm_header *hd = &p->header;
+ struct dlm_ls *ls;
+ int type = 0;
+
+ switch (hd->h_cmd) {
+ case DLM_MSG:
+ dlm_message_in(&p->message);
+ type = p->message.m_type;
+ break;
+ case DLM_RCOM:
+ dlm_rcom_in(&p->rcom);
+ type = p->rcom.rc_type;
+ break;
+ default:
+ log_print("invalid h_cmd %d from %u", hd->h_cmd, nodeid);
+ return;
+ }
+
+ if (hd->h_nodeid != nodeid) {
+ log_print("invalid h_nodeid %d from %d lockspace %x",
+ hd->h_nodeid, nodeid, hd->h_lockspace);
+ return;
+ }
+
+ ls = dlm_find_lockspace_global(hd->h_lockspace);
+ if (!ls) {
+ if (dlm_config.ci_log_debug)
+ log_print("invalid lockspace %x from %d cmd %d type %d",
+ hd->h_lockspace, nodeid, hd->h_cmd, type);
+
+ if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS)
+ dlm_send_ls_not_ready(nodeid, &p->rcom);
+ return;
+ }
+
+ /* this rwsem allows dlm_ls_stop() to wait for all dlm_recv threads to
+ be inactive (in this ls) before transitioning to recovery mode */
+
+ down_read(&ls->ls_recv_active);
+ if (hd->h_cmd == DLM_MSG)
+ dlm_receive_message(ls, &p->message, nodeid);
+ else
+ dlm_receive_rcom(ls, &p->rcom, nodeid);
+ up_read(&ls->ls_recv_active);
+
+ dlm_put_lockspace(ls);
+}
static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
{
ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
ls->ls_stub_ms.m_result = -EINPROGRESS;
ls->ls_stub_ms.m_flags = lkb->lkb_flags;
+ ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
_receive_convert_reply(lkb, &ls->ls_stub_ms);
/* Same special case as in receive_rcom_lock_args() */
void dlm_recover_waiters_pre(struct dlm_ls *ls)
{
struct dlm_lkb *lkb, *safe;
+ int wait_type, stub_unlock_result, stub_cancel_result;
mutex_lock(&ls->ls_waiters_mutex);
if (!waiter_needs_recovery(ls, lkb))
continue;
- switch (lkb->lkb_wait_type) {
+ wait_type = lkb->lkb_wait_type;
+ stub_unlock_result = -DLM_EUNLOCK;
+ stub_cancel_result = -DLM_ECANCEL;
+
+ /* Main reply may have been received leaving a zero wait_type,
+ but a reply for the overlapping op may not have been
+ received. In that case we need to fake the appropriate
+ reply for the overlap op. */
+
+ if (!wait_type) {
+ if (is_overlap_cancel(lkb)) {
+ wait_type = DLM_MSG_CANCEL;
+ if (lkb->lkb_grmode == DLM_LOCK_IV)
+ stub_cancel_result = 0;
+ }
+ if (is_overlap_unlock(lkb)) {
+ wait_type = DLM_MSG_UNLOCK;
+ if (lkb->lkb_grmode == DLM_LOCK_IV)
+ stub_unlock_result = -ENOENT;
+ }
+
+ log_debug(ls, "rwpre overlap %x %x %d %d %d",
+ lkb->lkb_id, lkb->lkb_flags, wait_type,
+ stub_cancel_result, stub_unlock_result);
+ }
+
+ switch (wait_type) {
case DLM_MSG_REQUEST:
lkb->lkb_flags |= DLM_IFL_RESEND;
case DLM_MSG_UNLOCK:
hold_lkb(lkb);
ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY;
- ls->ls_stub_ms.m_result = -DLM_EUNLOCK;
+ ls->ls_stub_ms.m_result = stub_unlock_result;
ls->ls_stub_ms.m_flags = lkb->lkb_flags;
+ ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
_receive_unlock_reply(lkb, &ls->ls_stub_ms);
dlm_put_lkb(lkb);
break;
case DLM_MSG_CANCEL:
hold_lkb(lkb);
ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY;
- ls->ls_stub_ms.m_result = -DLM_ECANCEL;
+ ls->ls_stub_ms.m_result = stub_cancel_result;
ls->ls_stub_ms.m_flags = lkb->lkb_flags;
+ ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
_receive_cancel_reply(lkb, &ls->ls_stub_ms);
dlm_put_lkb(lkb);
break;
default:
- log_error(ls, "invalid lkb wait_type %d",
- lkb->lkb_wait_type);
+ log_error(ls, "invalid lkb wait_type %d %d",
+ lkb->lkb_wait_type, wait_type);
}
schedule();
}
return NULL;
}
+/* needs at least dlm_rcom + rcom_lock */
static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
struct dlm_rsb *r, struct dlm_rcom *rc)
{
struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf;
- int lvblen;
lkb->lkb_nodeid = rc->rc_header.h_nodeid;
- lkb->lkb_ownpid = rl->rl_ownpid;
- lkb->lkb_remid = rl->rl_lkid;
- lkb->lkb_exflags = rl->rl_exflags;
- lkb->lkb_flags = rl->rl_flags & 0x0000FFFF;
+ lkb->lkb_ownpid = le32_to_cpu(rl->rl_ownpid);
+ lkb->lkb_remid = le32_to_cpu(rl->rl_lkid);
+ lkb->lkb_exflags = le32_to_cpu(rl->rl_exflags);
+ lkb->lkb_flags = le32_to_cpu(rl->rl_flags) & 0x0000FFFF;
lkb->lkb_flags |= DLM_IFL_MSTCPY;
- lkb->lkb_lvbseq = rl->rl_lvbseq;
+ lkb->lkb_lvbseq = le32_to_cpu(rl->rl_lvbseq);
lkb->lkb_rqmode = rl->rl_rqmode;
lkb->lkb_grmode = rl->rl_grmode;
/* don't set lkb_status because add_lkb wants to itself */
- lkb->lkb_bastaddr = (void *) (long) (rl->rl_asts & AST_BAST);
- lkb->lkb_astaddr = (void *) (long) (rl->rl_asts & AST_COMP);
+ lkb->lkb_bastfn = (rl->rl_asts & AST_BAST) ? &fake_bastfn : NULL;
+ lkb->lkb_astfn = (rl->rl_asts & AST_COMP) ? &fake_astfn : NULL;
if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
- lkb->lkb_lvbptr = allocate_lvb(ls);
+ int lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) -
+ sizeof(struct rcom_lock);
+ if (lvblen > ls->ls_lvblen)
+ return -EINVAL;
+ lkb->lkb_lvbptr = dlm_allocate_lvb(ls);
if (!lkb->lkb_lvbptr)
return -ENOMEM;
- lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) -
- sizeof(struct rcom_lock);
memcpy(lkb->lkb_lvbptr, rl->rl_lvb, lvblen);
}
The real granted mode of these converting locks cannot be determined
until all locks have been rebuilt on the rsb (recover_conversion) */
- if (rl->rl_wait_type == DLM_MSG_CONVERT && middle_conversion(lkb)) {
+ if (rl->rl_wait_type == cpu_to_le16(DLM_MSG_CONVERT) &&
+ middle_conversion(lkb)) {
rl->rl_status = DLM_LKSTS_CONVERT;
lkb->lkb_grmode = DLM_LOCK_IV;
rsb_set_flag(r, RSB_RECOVER_CONVERT);
the given values and send back our lkid. We send back our lkid by sending
back the rcom_lock struct we got but with the remid field filled in. */
+/* needs at least dlm_rcom + rcom_lock */
int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
{
struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf;
goto out;
}
- error = find_rsb(ls, rl->rl_name, rl->rl_namelen, R_MASTER, &r);
+ error = find_rsb(ls, rl->rl_name, le16_to_cpu(rl->rl_namelen),
+ R_MASTER, &r);
if (error)
goto out;
lock_rsb(r);
- lkb = search_remid(r, rc->rc_header.h_nodeid, rl->rl_lkid);
+ lkb = search_remid(r, rc->rc_header.h_nodeid, le32_to_cpu(rl->rl_lkid));
if (lkb) {
error = -EEXIST;
goto out_remid;
out_remid:
/* this is the new value returned to the lock holder for
saving in its process-copy lkb */
- rl->rl_remid = lkb->lkb_id;
+ rl->rl_remid = cpu_to_le32(lkb->lkb_id);
out_unlock:
unlock_rsb(r);
put_rsb(r);
out:
if (error)
- log_print("recover_master_copy %d %x", error, rl->rl_lkid);
- rl->rl_result = error;
+ log_debug(ls, "recover_master_copy %d %x", error,
+ le32_to_cpu(rl->rl_lkid));
+ rl->rl_result = cpu_to_le32(error);
return error;
}
+/* needs at least dlm_rcom + rcom_lock */
int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
{
struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf;
struct dlm_lkb *lkb;
int error;
- error = find_lkb(ls, rl->rl_lkid, &lkb);
+ error = find_lkb(ls, le32_to_cpu(rl->rl_lkid), &lkb);
if (error) {
- log_error(ls, "recover_process_copy no lkid %x", rl->rl_lkid);
+ log_error(ls, "recover_process_copy no lkid %x",
+ le32_to_cpu(rl->rl_lkid));
return error;
}
DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
- error = rl->rl_result;
+ error = le32_to_cpu(rl->rl_result);
r = lkb->lkb_resource;
hold_rsb(r);
log_debug(ls, "master copy exists %x", lkb->lkb_id);
/* fall through */
case 0:
- lkb->lkb_remid = rl->rl_remid;
+ lkb->lkb_remid = le32_to_cpu(rl->rl_remid);
break;
default:
log_error(ls, "dlm_recover_process_copy unknown error %d %x",
}
}
- /* After ua is attached to lkb it will be freed by free_lkb().
+ /* After ua is attached to lkb it will be freed by dlm_free_lkb().
When DLM_IFL_USER is set, the dlm knows that this is a userspace
lock and that lkb_astparam is the dlm_user_args structure. */
error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
- DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
+ fake_astfn, ua, fake_bastfn, &args);
lkb->lkb_flags |= DLM_IFL_USER;
ua->old_mode = DLM_LOCK_IV;
/* user can change the params on its lock when it converts it, or
add an lvb that didn't exist before */
- ua = (struct dlm_user_args *)lkb->lkb_astparam;
+ ua = lkb->lkb_ua;
if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) {
ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
ua->old_mode = lkb->lkb_grmode;
error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs,
- DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
+ fake_astfn, ua, fake_bastfn, &args);
if (error)
goto out_put;
if (error)
goto out;
- ua = (struct dlm_user_args *)lkb->lkb_astparam;
+ ua = lkb->lkb_ua;
if (lvb_in && ua->lksb.sb_lvbptr)
memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
- ua->castparam = ua_tmp->castparam;
+ if (ua_tmp->castparam)
+ ua->castparam = ua_tmp->castparam;
ua->user_lksb = ua_tmp->user_lksb;
error = set_unlock_args(flags, ua, &args);
if (error)
goto out;
- ua = (struct dlm_user_args *)lkb->lkb_astparam;
- ua->castparam = ua_tmp->castparam;
+ ua = lkb->lkb_ua;
+ if (ua_tmp->castparam)
+ ua->castparam = ua_tmp->castparam;
ua->user_lksb = ua_tmp->user_lksb;
error = set_unlock_args(flags, ua, &args);
return error;
}
+int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid)
+{
+ struct dlm_lkb *lkb;
+ struct dlm_args args;
+ struct dlm_user_args *ua;
+ struct dlm_rsb *r;
+ int error;
+
+ dlm_lock_recovery(ls);
+
+ error = find_lkb(ls, lkid, &lkb);
+ if (error)
+ goto out;
+
+ ua = lkb->lkb_ua;
+
+ error = set_unlock_args(flags, ua, &args);
+ if (error)
+ goto out_put;
+
+ /* same as cancel_lock(), but set DEADLOCK_CANCEL after lock_rsb */
+
+ r = lkb->lkb_resource;
+ hold_rsb(r);
+ lock_rsb(r);
+
+ error = validate_unlock_args(lkb, &args);
+ if (error)
+ goto out_r;
+ lkb->lkb_flags |= DLM_IFL_DEADLOCK_CANCEL;
+
+ error = _cancel_lock(r, lkb);
+ out_r:
+ unlock_rsb(r);
+ put_rsb(r);
+
+ if (error == -DLM_ECANCEL)
+ error = 0;
+ /* from validate_unlock_args() */
+ if (error == -EBUSY)
+ error = 0;
+ out_put:
+ dlm_put_lkb(lkb);
+ out:
+ dlm_unlock_recovery(ls);
+ return error;
+}
+
/* lkb's that are removed from the waiters list by revert are just left on the
orphans list with the granted orphan locks, to be freed by purge */
static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb)
{
- struct dlm_user_args *ua = (struct dlm_user_args *)lkb->lkb_astparam;
struct dlm_args args;
int error;
list_add_tail(&lkb->lkb_ownqueue, &ls->ls_orphans);
mutex_unlock(&ls->ls_orphans_mutex);
- set_unlock_args(0, ua, &args);
+ set_unlock_args(0, lkb->lkb_ua, &args);
error = cancel_lock(ls, lkb, &args);
if (error == -DLM_ECANCEL)
static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb)
{
- struct dlm_user_args *ua = (struct dlm_user_args *)lkb->lkb_astparam;
struct dlm_args args;
int error;
- set_unlock_args(DLM_LKF_FORCEUNLOCK, ua, &args);
+ set_unlock_args(DLM_LKF_FORCEUNLOCK, lkb->lkb_ua, &args);
error = unlock_lock(ls, lkb, &args);
if (error == -DLM_EUNLOCK)
lkb = del_proc_lock(ls, proc);
if (!lkb)
break;
+ del_timeout(lkb);
if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
orphan_proc_lock(ls, lkb);
else
}
list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
+ lkb->lkb_ast_type = 0;
list_del(&lkb->lkb_astqueue);
dlm_put_lkb(lkb);
}