/******************************************************************************
*******************************************************************************
**
-** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2005-2010 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
L: receive_xxxx_reply() <- R: send_xxxx_reply()
*/
#include <linux/types.h>
+#include <linux/slab.h>
#include "dlm_internal.h"
#include <linux/dlm_device.h>
#include "memory.h"
lkb->lkb_lksb->sb_status = rv;
lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;
- dlm_add_ast(lkb, AST_COMP);
+ dlm_add_ast(lkb, AST_COMP, lkb->lkb_grmode);
}
static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb)
static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode)
{
- if (is_master_copy(lkb))
+ lkb->lkb_time_bast = ktime_get();
+
+ if (is_master_copy(lkb)) {
+ lkb->lkb_bastmode = rqmode; /* printed by debugfs */
send_bast(r, lkb, rqmode);
- else {
- lkb->lkb_bastmode = rqmode;
- dlm_add_ast(lkb, AST_BAST);
+ } else {
+ dlm_add_ast(lkb, AST_BAST, rqmode);
}
}
unsigned int flags, struct dlm_rsb **r_ret)
{
int error;
- write_lock(&ls->ls_rsbtbl[b].lock);
+ spin_lock(&ls->ls_rsbtbl[b].lock);
error = _search_rsb(ls, name, len, b, flags, r_ret);
- write_unlock(&ls->ls_rsbtbl[b].lock);
+ spin_unlock(&ls->ls_rsbtbl[b].lock);
return error;
}
static int find_rsb(struct dlm_ls *ls, char *name, int namelen,
unsigned int flags, struct dlm_rsb **r_ret)
{
- struct dlm_rsb *r, *tmp;
+ struct dlm_rsb *r = NULL, *tmp;
uint32_t hash, bucket;
int error = -EINVAL;
r->res_nodeid = nodeid;
}
- write_lock(&ls->ls_rsbtbl[bucket].lock);
+ spin_lock(&ls->ls_rsbtbl[bucket].lock);
error = _search_rsb(ls, name, namelen, bucket, 0, &tmp);
if (!error) {
- write_unlock(&ls->ls_rsbtbl[bucket].lock);
+ spin_unlock(&ls->ls_rsbtbl[bucket].lock);
dlm_free_rsb(r);
r = tmp;
goto out;
}
list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list);
- write_unlock(&ls->ls_rsbtbl[bucket].lock);
+ spin_unlock(&ls->ls_rsbtbl[bucket].lock);
error = 0;
out:
*r_ret = r;
struct dlm_ls *ls = r->res_ls;
uint32_t bucket = r->res_bucket;
- write_lock(&ls->ls_rsbtbl[bucket].lock);
+ spin_lock(&ls->ls_rsbtbl[bucket].lock);
kref_put(&r->res_ref, toss_rsb);
- write_unlock(&ls->ls_rsbtbl[bucket].lock);
+ spin_unlock(&ls->ls_rsbtbl[bucket].lock);
}
void dlm_put_rsb(struct dlm_rsb *r)
if (lkb->lkb_rqmode < mode)
break;
- if (!lkb)
- list_add_tail(new, head);
- else
- __list_add(new, lkb->lkb_statequeue.prev, &lkb->lkb_statequeue);
+ __list_add(new, lkb->lkb_statequeue.prev, &lkb->lkb_statequeue);
}
/* add/remove lkb to rsb's grant/convert/wait queue */
DLM_ASSERT(!lkb->lkb_status, dlm_print_lkb(lkb););
+ lkb->lkb_timestamp = ktime_get();
+
lkb->lkb_status = status;
switch (status) {
lkb->lkb_wait_count++;
hold_lkb(lkb);
- log_debug(ls, "add overlap %x cur %d new %d count %d flags %x",
+ log_debug(ls, "addwait %x cur %d overlap %d count %d f %x",
lkb->lkb_id, lkb->lkb_wait_type, mstype,
lkb->lkb_wait_count, lkb->lkb_flags);
goto out;
list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
out:
if (error)
- log_error(ls, "add_to_waiters %x error %d flags %x %d %d %s",
+ log_error(ls, "addwait error %x %d flags %x %d %d %s",
lkb->lkb_id, error, lkb->lkb_flags, mstype,
lkb->lkb_wait_type, lkb->lkb_resource->res_name);
mutex_unlock(&ls->ls_waiters_mutex);
request reply on the requestqueue) between dlm_recover_waiters_pre() which
set RESEND and dlm_recover_waiters_post() */
-static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype)
+static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype,
+ struct dlm_message *ms)
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
int overlap_done = 0;
if (is_overlap_unlock(lkb) && (mstype == DLM_MSG_UNLOCK_REPLY)) {
+ log_debug(ls, "remwait %x unlock_reply overlap", lkb->lkb_id);
lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK;
overlap_done = 1;
goto out_del;
}
if (is_overlap_cancel(lkb) && (mstype == DLM_MSG_CANCEL_REPLY)) {
+ log_debug(ls, "remwait %x cancel_reply overlap", lkb->lkb_id);
lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
overlap_done = 1;
goto out_del;
}
+ /* Cancel state was preemptively cleared by a successful convert,
+ see next comment, nothing to do. */
+
+ if ((mstype == DLM_MSG_CANCEL_REPLY) &&
+ (lkb->lkb_wait_type != DLM_MSG_CANCEL)) {
+ log_debug(ls, "remwait %x cancel_reply wait_type %d",
+ lkb->lkb_id, lkb->lkb_wait_type);
+ return -1;
+ }
+
+ /* Remove for the convert reply, and premptively remove for the
+ cancel reply. A convert has been granted while there's still
+ an outstanding cancel on it (the cancel is moot and the result
+ in the cancel reply should be 0). We preempt the cancel reply
+ because the app gets the convert result and then can follow up
+ with another op, like convert. This subsequent op would see the
+ lingering state of the cancel and fail with -EBUSY. */
+
+ if ((mstype == DLM_MSG_CONVERT_REPLY) &&
+ (lkb->lkb_wait_type == DLM_MSG_CONVERT) &&
+ is_overlap_cancel(lkb) && ms && !ms->m_result) {
+ log_debug(ls, "remwait %x convert_reply zap overlap_cancel",
+ lkb->lkb_id);
+ lkb->lkb_wait_type = 0;
+ lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
+ lkb->lkb_wait_count--;
+ goto out_del;
+ }
+
/* N.B. type of reply may not always correspond to type of original
msg due to lookup->request optimization, verify others? */
goto out_del;
}
- log_error(ls, "remove_from_waiters lkid %x flags %x types %d %d",
- lkb->lkb_id, lkb->lkb_flags, mstype, lkb->lkb_wait_type);
+ log_error(ls, "remwait error %x reply %d flags %x no wait_type",
+ lkb->lkb_id, mstype, lkb->lkb_flags);
return -1;
out_del:
this would happen */
if (overlap_done && lkb->lkb_wait_type) {
- log_error(ls, "remove_from_waiters %x reply %d give up on %d",
+ log_error(ls, "remwait error %x reply %d wait_type %d overlap",
lkb->lkb_id, mstype, lkb->lkb_wait_type);
lkb->lkb_wait_count--;
lkb->lkb_wait_type = 0;
int error;
mutex_lock(&ls->ls_waiters_mutex);
- error = _remove_from_waiters(lkb, mstype);
+ error = _remove_from_waiters(lkb, mstype, NULL);
mutex_unlock(&ls->ls_waiters_mutex);
return error;
}
if (ms != &ls->ls_stub_ms)
mutex_lock(&ls->ls_waiters_mutex);
- error = _remove_from_waiters(lkb, ms->m_type);
+ error = _remove_from_waiters(lkb, ms->m_type, ms);
if (ms != &ls->ls_stub_ms)
mutex_unlock(&ls->ls_waiters_mutex);
return error;
for (;;) {
found = 0;
- write_lock(&ls->ls_rsbtbl[b].lock);
+ spin_lock(&ls->ls_rsbtbl[b].lock);
list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss,
res_hashchain) {
if (!time_after_eq(jiffies, r->res_toss_time +
}
if (!found) {
- write_unlock(&ls->ls_rsbtbl[b].lock);
+ spin_unlock(&ls->ls_rsbtbl[b].lock);
break;
}
if (kref_put(&r->res_ref, kill_rsb)) {
list_del(&r->res_hashchain);
- write_unlock(&ls->ls_rsbtbl[b].lock);
+ spin_unlock(&ls->ls_rsbtbl[b].lock);
if (is_master(r))
dir_remove(r);
dlm_free_rsb(r);
count++;
} else {
- write_unlock(&ls->ls_rsbtbl[b].lock);
+ spin_unlock(&ls->ls_rsbtbl[b].lock);
log_error(ls, "tossed rsb in use %s", r->res_name);
}
}
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
- if (is_master_copy(lkb)) {
- lkb->lkb_timestamp = jiffies;
+ if (is_master_copy(lkb))
return;
- }
if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
!(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
DLM_ASSERT(list_empty(&lkb->lkb_time_list), dlm_print_lkb(lkb););
mutex_lock(&ls->ls_timeout_mutex);
hold_lkb(lkb);
- lkb->lkb_timestamp = jiffies;
list_add_tail(&lkb->lkb_time_list, &ls->ls_timeout);
mutex_unlock(&ls->ls_timeout_mutex);
}
struct dlm_rsb *r;
struct dlm_lkb *lkb;
int do_cancel, do_warn;
+ s64 wait_us;
for (;;) {
if (dlm_locking_stopped(ls))
mutex_lock(&ls->ls_timeout_mutex);
list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) {
+ wait_us = ktime_to_us(ktime_sub(ktime_get(),
+ lkb->lkb_timestamp));
+
if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) &&
- time_after_eq(jiffies, lkb->lkb_timestamp +
- lkb->lkb_timeout_cs * HZ/100))
+ wait_us >= (lkb->lkb_timeout_cs * 10000))
do_cancel = 1;
if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
- time_after_eq(jiffies, lkb->lkb_timestamp +
- dlm_config.ci_timewarn_cs * HZ/100))
+ wait_us >= dlm_config.ci_timewarn_cs * 10000)
do_warn = 1;
if (!do_cancel && !do_warn)
void dlm_adjust_timeouts(struct dlm_ls *ls)
{
struct dlm_lkb *lkb;
- long adj = jiffies - ls->ls_recover_begin;
+ u64 adj_us = jiffies_to_usecs(jiffies - ls->ls_recover_begin);
ls->ls_recover_begin = 0;
mutex_lock(&ls->ls_timeout_mutex);
list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
- lkb->lkb_timestamp += adj;
+ lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us);
mutex_unlock(&ls->ls_timeout_mutex);
}
lkb->lkb_timeout_cs = args->timeout;
rv = 0;
out:
+ if (rv)
+ log_debug(ls, "validate_lock_args %d %x %x %x %d %d %s",
+ rv, lkb->lkb_id, lkb->lkb_flags, args->flags,
+ lkb->lkb_status, lkb->lkb_wait_type,
+ lkb->lkb_resource->res_name);
return rv;
}
goto out;
}
+ /* there's nothing to cancel */
+ if (lkb->lkb_status == DLM_LKSTS_GRANTED &&
+ !lkb->lkb_wait_type) {
+ rv = -EBUSY;
+ goto out;
+ }
+
switch (lkb->lkb_wait_type) {
case DLM_MSG_LOOKUP:
case DLM_MSG_REQUEST:
if (can_be_queued(lkb)) {
error = -EINPROGRESS;
add_lkb(r, lkb, DLM_LKSTS_WAITING);
- send_blocking_asts(r, lkb);
add_timeout(lkb);
goto out;
}
error = -EAGAIN;
- if (force_blocking_asts(lkb))
- send_blocking_asts_all(r, lkb);
queue_cast(r, lkb, -EAGAIN);
-
out:
return error;
}
+static void do_request_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
+ int error)
+{
+ switch (error) {
+ case -EAGAIN:
+ if (force_blocking_asts(lkb))
+ send_blocking_asts_all(r, lkb);
+ break;
+ case -EINPROGRESS:
+ send_blocking_asts(r, lkb);
+ break;
+ }
+}
+
static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
{
int error = 0;
if (can_be_granted(r, lkb, 1, &deadlk)) {
grant_lock(r, lkb);
queue_cast(r, lkb, 0);
- grant_pending_locks(r);
goto out;
}
if (_can_be_granted(r, lkb, 1)) {
grant_lock(r, lkb);
queue_cast(r, lkb, 0);
- grant_pending_locks(r);
goto out;
}
/* else fall through and move to convert queue */
error = -EINPROGRESS;
del_lkb(r, lkb);
add_lkb(r, lkb, DLM_LKSTS_CONVERT);
- send_blocking_asts(r, lkb);
add_timeout(lkb);
goto out;
}
error = -EAGAIN;
- if (force_blocking_asts(lkb))
- send_blocking_asts_all(r, lkb);
queue_cast(r, lkb, -EAGAIN);
-
out:
return error;
}
+static void do_convert_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
+ int error)
+{
+ switch (error) {
+ case 0:
+ grant_pending_locks(r);
+ /* grant_pending_locks also sends basts */
+ break;
+ case -EAGAIN:
+ if (force_blocking_asts(lkb))
+ send_blocking_asts_all(r, lkb);
+ break;
+ case -EINPROGRESS:
+ send_blocking_asts(r, lkb);
+ break;
+ }
+}
+
static int do_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb)
{
remove_lock(r, lkb);
queue_cast(r, lkb, -DLM_EUNLOCK);
- grant_pending_locks(r);
return -DLM_EUNLOCK;
}
+static void do_unlock_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
+ int error)
+{
+ grant_pending_locks(r);
+}
+
/* returns: 0 did nothing, -DLM_ECANCEL canceled lock */
static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb)
error = revert_lock(r, lkb);
if (error) {
queue_cast(r, lkb, -DLM_ECANCEL);
- grant_pending_locks(r);
return -DLM_ECANCEL;
}
return 0;
}
+static void do_cancel_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
+ int error)
+{
+ if (error)
+ grant_pending_locks(r);
+}
+
/*
* Four stage 3 varieties:
* _request_lock(), _convert_lock(), _unlock_lock(), _cancel_lock()
goto out;
}
- if (is_remote(r))
+ if (is_remote(r)) {
/* receive_request() calls do_request() on remote node */
error = send_request(r, lkb);
- else
+ } else {
error = do_request(r, lkb);
+ /* for remote locks the request_reply is sent
+ between do_request and do_request_effects */
+ do_request_effects(r, lkb, error);
+ }
out:
return error;
}
{
int error;
- if (is_remote(r))
+ if (is_remote(r)) {
/* receive_convert() calls do_convert() on remote node */
error = send_convert(r, lkb);
- else
+ } else {
error = do_convert(r, lkb);
+ /* for remote locks the convert_reply is sent
+ between do_convert and do_convert_effects */
+ do_convert_effects(r, lkb, error);
+ }
return error;
}
{
int error;
- if (is_remote(r))
+ if (is_remote(r)) {
/* receive_unlock() calls do_unlock() on remote node */
error = send_unlock(r, lkb);
- else
+ } else {
error = do_unlock(r, lkb);
+ /* for remote locks the unlock_reply is sent
+ between do_unlock and do_unlock_effects */
+ do_unlock_effects(r, lkb, error);
+ }
return error;
}
{
int error;
- if (is_remote(r))
+ if (is_remote(r)) {
/* receive_cancel() calls do_cancel() on remote node */
error = send_cancel(r, lkb);
- else
+ } else {
error = do_cancel(r, lkb);
+ /* for remote locks the cancel_reply is sent
+ between do_cancel and do_cancel_effects */
+ do_cancel_effects(r, lkb, error);
+ }
return error;
}
pass into lowcomms_commit and a message buffer (mb) that we
write our data into */
- mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
+ mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_NOFS, &mb);
if (!mh)
return -ENOBUFS;
attach_lkb(r, lkb);
error = do_request(r, lkb);
send_request_reply(r, lkb, error);
+ do_request_effects(r, lkb, error);
unlock_rsb(r);
put_rsb(r);
goto out;
receive_flags(lkb, ms);
+
error = receive_convert_args(ls, lkb, ms);
- if (error)
- goto out_reply;
+ if (error) {
+ send_convert_reply(r, lkb, error);
+ goto out;
+ }
+
reply = !down_conversion(lkb);
error = do_convert(r, lkb);
- out_reply:
if (reply)
send_convert_reply(r, lkb, error);
+ do_convert_effects(r, lkb, error);
out:
unlock_rsb(r);
put_rsb(r);
goto out;
receive_flags(lkb, ms);
+
error = receive_unlock_args(ls, lkb, ms);
- if (error)
- goto out_reply;
+ if (error) {
+ send_unlock_reply(r, lkb, error);
+ goto out;
+ }
error = do_unlock(r, lkb);
- out_reply:
send_unlock_reply(r, lkb, error);
+ do_unlock_effects(r, lkb, error);
out:
unlock_rsb(r);
put_rsb(r);
error = do_cancel(r, lkb);
send_cancel_reply(r, lkb, error);
+ do_cancel_effects(r, lkb, error);
out:
unlock_rsb(r);
put_rsb(r);
{
struct dlm_rsb *r, *r_ret = NULL;
- read_lock(&ls->ls_rsbtbl[bucket].lock);
+ spin_lock(&ls->ls_rsbtbl[bucket].lock);
list_for_each_entry(r, &ls->ls_rsbtbl[bucket].list, res_hashchain) {
if (!rsb_flag(r, RSB_LOCKS_PURGED))
continue;
r_ret = r;
break;
}
- read_unlock(&ls->ls_rsbtbl[bucket].lock);
+ spin_unlock(&ls->ls_rsbtbl[bucket].lock);
return r_ret;
}
}
if (flags & DLM_LKF_VALBLK) {
- ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
+ ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_NOFS);
if (!ua->lksb.sb_lvbptr) {
kfree(ua);
__put_lkb(ls, lkb);
ua = lkb->lkb_ua;
if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) {
- ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
+ ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_NOFS);
if (!ua->lksb.sb_lvbptr) {
error = -ENOMEM;
goto out_put;