/*
- * Copyright (C) 2006 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2006-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
#include "lockspace.h"
#include "lock.h"
#include "lvb_table.h"
+#include "user.h"
-static const char *name_prefix="dlm";
-static struct miscdevice ctl_device;
-static struct file_operations device_fops;
+static const char name_prefix[] = "dlm";
+static const struct file_operations device_fops;
+static atomic_t dlm_monitor_opened;
+static int dlm_monitor_unused = 1;
#ifdef CONFIG_COMPAT
struct dlm_lock_params32 {
__u8 mode;
__u8 namelen;
- __u16 flags;
+ __u16 unused;
+ __u32 flags;
__u32 lkid;
__u32 parent;
-
+ __u64 xid;
+ __u64 timeout;
__u32 castparam;
__u32 castaddr;
__u32 bastparam;
__u32 bastaddr;
__u32 lksb;
-
char lvb[DLM_USER_LVB_LEN];
char name[0];
};
union {
struct dlm_lock_params32 lock;
struct dlm_lspace_params lspace;
+ struct dlm_purge_params purge;
} i;
};
};
struct dlm_lock_result32 {
+ __u32 version[3];
__u32 length;
__u32 user_astaddr;
__u32 user_astparam;
};
static void compat_input(struct dlm_write_request *kb,
- struct dlm_write_request32 *kb32)
+ struct dlm_write_request32 *kb32,
+ int namelen)
{
kb->version[0] = kb32->version[0];
kb->version[1] = kb32->version[1];
kb->cmd == DLM_USER_REMOVE_LOCKSPACE) {
kb->i.lspace.flags = kb32->i.lspace.flags;
kb->i.lspace.minor = kb32->i.lspace.minor;
- strcpy(kb->i.lspace.name, kb32->i.lspace.name);
+ memcpy(kb->i.lspace.name, kb32->i.lspace.name, namelen);
+ } else if (kb->cmd == DLM_USER_PURGE) {
+ kb->i.purge.nodeid = kb32->i.purge.nodeid;
+ kb->i.purge.pid = kb32->i.purge.pid;
} else {
kb->i.lock.mode = kb32->i.lock.mode;
kb->i.lock.namelen = kb32->i.lock.namelen;
kb->i.lock.flags = kb32->i.lock.flags;
kb->i.lock.lkid = kb32->i.lock.lkid;
kb->i.lock.parent = kb32->i.lock.parent;
+ kb->i.lock.xid = kb32->i.lock.xid;
+ kb->i.lock.timeout = kb32->i.lock.timeout;
kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam;
kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr;
kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam;
kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr;
kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb;
memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN);
- memcpy(kb->i.lock.name, kb32->i.lock.name, kb->i.lock.namelen);
+ memcpy(kb->i.lock.name, kb32->i.lock.name, namelen);
}
}
static void compat_output(struct dlm_lock_result *res,
struct dlm_lock_result32 *res32)
{
- res32->length = res->length - (sizeof(struct dlm_lock_result) -
- sizeof(struct dlm_lock_result32));
+ res32->version[0] = res->version[0];
+ res32->version[1] = res->version[1];
+ res32->version[2] = res->version[2];
+
res32->user_astaddr = (__u32)(long)res->user_astaddr;
res32->user_astparam = (__u32)(long)res->user_astparam;
res32->user_lksb = (__u32)(long)res->user_lksb;
}
#endif
+/* Figure out if this lock is at the end of its life and no longer
+ available for the application to use. The lkb still exists until
+ the final ast is read. A lock becomes EOL in three situations:
+ 1. a noqueue request fails with EAGAIN
+ 2. an unlock completes with EUNLOCK
+ 3. a cancel of a waiting request completes with ECANCEL/EDEADLK
+ An EOL lock needs to be removed from the process's list of locks.
+ And we can't allow any new operation on an EOL lock. This is
+ not related to the lifetime of the lkb struct which is managed
+ entirely by refcount. */
+
+static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type)
+{
+ switch (sb_status) {
+ case -DLM_EUNLOCK:
+ return 1;
+ case -DLM_ECANCEL:
+ case -ETIMEDOUT:
+ case -EDEADLK:
+ if (lkb->lkb_grmode == DLM_LOCK_IV)
+ return 1;
+ break;
+ case -EAGAIN:
+ if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV)
+ return 1;
+ break;
+ }
+ return 0;
+}
+
+/* we could possibly check if the cancel of an orphan has resulted in the lkb
+ being removed and then remove that lkb from the orphans list and free it */
-void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
+void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int bastmode)
{
struct dlm_ls *ls;
struct dlm_user_args *ua;
struct dlm_user_proc *proc;
- int remove_ownqueue = 0;
+ int eol = 0, ast_type;
- /* dlm_clear_proc_locks() sets ORPHAN/DEAD flag on each
- lkb before dealing with it. We need to check this
- flag before taking ls_clear_proc_locks mutex because if
- it's set, dlm_clear_proc_locks() holds the mutex. */
-
- if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) {
- /* log_print("user_add_ast skip1 %x", lkb->lkb_flags); */
+ if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD))
return;
- }
ls = lkb->lkb_resource->res_ls;
mutex_lock(&ls->ls_clear_proc_locks);
/* If ORPHAN/DEAD flag is set, it means the process is dead so an ast
can't be delivered. For ORPHAN's, dlm_clear_proc_locks() freed
- lkb->ua so we can't try to use it. */
+ lkb->ua so we can't try to use it. This second check is necessary
+ for cases where a completion ast is received for an operation that
+ began before clear_proc_locks did its cancel/unlock. */
- if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) {
- /* log_print("user_add_ast skip2 %x", lkb->lkb_flags); */
+ if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD))
goto out;
- }
- DLM_ASSERT(lkb->lkb_astparam, dlm_print_lkb(lkb););
- ua = (struct dlm_user_args *)lkb->lkb_astparam;
+ DLM_ASSERT(lkb->lkb_ua, dlm_print_lkb(lkb););
+ ua = lkb->lkb_ua;
proc = ua->proc;
if (type == AST_BAST && ua->bastaddr == NULL)
goto out;
spin_lock(&proc->asts_spin);
- if (!(lkb->lkb_ast_type & (AST_COMP | AST_BAST))) {
+
+ ast_type = lkb->lkb_ast_type;
+ lkb->lkb_ast_type |= type;
+ if (bastmode)
+ lkb->lkb_bastmode = bastmode;
+
+ if (!ast_type) {
kref_get(&lkb->lkb_ref);
list_add_tail(&lkb->lkb_astqueue, &proc->asts);
- lkb->lkb_ast_type |= type;
wake_up_interruptible(&proc->wait);
}
+ if (type == AST_COMP && (ast_type & AST_COMP))
+ log_debug(ls, "ast overlap %x status %x %x",
+ lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags);
- /* noqueue requests that fail may need to be removed from the
- proc's locks list, there should be a better way of detecting
- this situation than checking all these things... */
-
- if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV &&
- ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue))
- remove_ownqueue = 1;
+ eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type);
+ if (eol) {
+ lkb->lkb_ast_type &= ~AST_BAST;
+ lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
+ }
/* We want to copy the lvb to userspace when the completion
ast is read if the status is 0, the lock has an lvb and
spin_unlock(&proc->asts_spin);
- if (remove_ownqueue) {
- spin_lock(&ua->proc->locks_spin);
- list_del_init(&lkb->lkb_ownqueue);
- spin_unlock(&ua->proc->locks_spin);
- dlm_put_lkb(lkb);
+ if (eol) {
+ spin_lock(&proc->locks_spin);
+ if (!list_empty(&lkb->lkb_ownqueue)) {
+ list_del_init(&lkb->lkb_ownqueue);
+ dlm_put_lkb(lkb);
+ }
+ spin_unlock(&proc->locks_spin);
}
out:
mutex_unlock(&ls->ls_clear_proc_locks);
ua->castaddr = params->castaddr;
ua->bastparam = params->bastparam;
ua->bastaddr = params->bastaddr;
+ ua->xid = params->xid;
if (params->flags & DLM_LKF_CONVERT)
error = dlm_user_convert(ls, ua,
params->mode, params->flags,
- params->lkid, params->lvb);
+ params->lkid, params->lvb,
+ (unsigned long) params->timeout);
else {
error = dlm_user_request(ls, ua,
params->mode, params->flags,
params->name, params->namelen,
- params->parent);
+ (unsigned long) params->timeout);
if (!error)
error = ua->lksb.sb_lkid;
}
return error;
}
-static int device_create_lockspace(struct dlm_lspace_params *params)
+static int device_user_deadlock(struct dlm_user_proc *proc,
+ struct dlm_lock_params *params)
{
- dlm_lockspace_t *lockspace;
struct dlm_ls *ls;
- int error, len;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- error = dlm_new_lockspace(params->name, strlen(params->name),
- &lockspace, 0, DLM_USER_LVB_LEN);
- if (error)
- return error;
+ int error;
- ls = dlm_find_lockspace_local(lockspace);
+ ls = dlm_find_lockspace_local(proc->lockspace);
if (!ls)
return -ENOENT;
+ error = dlm_user_deadlock(ls, params->flags, params->lkid);
+
+ dlm_put_lockspace(ls);
+ return error;
+}
+
+static int dlm_device_register(struct dlm_ls *ls, char *name)
+{
+ int error, len;
+
+ /* The device is already registered. This happens when the
+ lockspace is created multiple times from userspace. */
+ if (ls->ls_device.name)
+ return 0;
+
error = -ENOMEM;
- len = strlen(params->name) + strlen(name_prefix) + 2;
+ len = strlen(name) + strlen(name_prefix) + 2;
ls->ls_device.name = kzalloc(len, GFP_KERNEL);
if (!ls->ls_device.name)
goto fail;
+
snprintf((char *)ls->ls_device.name, len, "%s_%s", name_prefix,
- params->name);
+ name);
ls->ls_device.fops = &device_fops;
ls->ls_device.minor = MISC_DYNAMIC_MINOR;
error = misc_register(&ls->ls_device);
if (error) {
kfree(ls->ls_device.name);
- goto fail;
}
+fail:
+ return error;
+}
+
+int dlm_device_deregister(struct dlm_ls *ls)
+{
+ int error;
+
+ /* The device is not registered. This happens when the lockspace
+ was never used from userspace, or when device_create_lockspace()
+ calls dlm_release_lockspace() after the register fails. */
+ if (!ls->ls_device.name)
+ return 0;
+
+ error = misc_deregister(&ls->ls_device);
+ if (!error)
+ kfree(ls->ls_device.name);
+ return error;
+}
+
+static int device_user_purge(struct dlm_user_proc *proc,
+ struct dlm_purge_params *params)
+{
+ struct dlm_ls *ls;
+ int error;
+
+ ls = dlm_find_lockspace_local(proc->lockspace);
+ if (!ls)
+ return -ENOENT;
+
+ error = dlm_user_purge(ls, proc, params->nodeid, params->pid);
- error = ls->ls_device.minor;
dlm_put_lockspace(ls);
return error;
+}
+
+static int device_create_lockspace(struct dlm_lspace_params *params)
+{
+ dlm_lockspace_t *lockspace;
+ struct dlm_ls *ls;
+ int error;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ error = dlm_new_lockspace(params->name, strlen(params->name),
+ &lockspace, params->flags, DLM_USER_LVB_LEN);
+ if (error)
+ return error;
+
+ ls = dlm_find_lockspace_local(lockspace);
+ if (!ls)
+ return -ENOENT;
- fail:
+ error = dlm_device_register(ls, params->name);
dlm_put_lockspace(ls);
- dlm_release_lockspace(lockspace, 0);
+
+ if (error)
+ dlm_release_lockspace(lockspace, 0);
+ else
+ error = ls->ls_device.minor;
+
return error;
}
{
dlm_lockspace_t *lockspace;
struct dlm_ls *ls;
- int error;
+ int error, force = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (!ls)
return -ENOENT;
- error = misc_deregister(&ls->ls_device);
- if (error) {
- dlm_put_lockspace(ls);
- goto out;
- }
- kfree(ls->ls_device.name);
+ if (params->flags & DLM_USER_LSFLG_FORCEFREE)
+ force = 2;
lockspace = ls->ls_local_handle;
+ dlm_put_lockspace(ls);
- /* dlm_release_lockspace waits for references to go to zero,
- so all processes will need to close their device for the ls
- before the release will procede */
+ /* The final dlm_release_lockspace waits for references to go to
+ zero, so all processes will need to close their device for the
+ ls before the release will proceed. release also calls the
+ device_deregister above. Converting a positive return value
+ from release to zero means that userspace won't know when its
+ release was the final one, but it shouldn't need to know. */
- dlm_put_lockspace(ls);
- error = dlm_release_lockspace(lockspace, 0);
-out:
+ error = dlm_release_lockspace(lockspace, force);
+ if (error > 0)
+ error = 0;
return error;
}
printk(KERN_DEBUG "dlm: process %s (%d) version mismatch "
"user (%d.%d.%d) kernel (%d.%d.%d)\n",
current->comm,
- current->pid,
+ task_pid_nr(current),
req->version[0],
req->version[1],
req->version[2],
#endif
return -EINVAL;
- kbuf = kmalloc(count, GFP_KERNEL);
+ kbuf = kzalloc(count + 1, GFP_KERNEL);
if (!kbuf)
return -ENOMEM;
#ifdef CONFIG_COMPAT
if (!kbuf->is64bit) {
struct dlm_write_request32 *k32buf;
+ int namelen = 0;
+
+ if (count > sizeof(struct dlm_write_request32))
+ namelen = count - sizeof(struct dlm_write_request32);
+
k32buf = (struct dlm_write_request32 *)kbuf;
- kbuf = kmalloc(count + (sizeof(struct dlm_write_request) -
- sizeof(struct dlm_write_request32)), GFP_KERNEL);
- if (!kbuf)
+
+ /* add 1 after namelen so that the name string is terminated */
+ kbuf = kzalloc(sizeof(struct dlm_write_request) + namelen + 1,
+ GFP_KERNEL);
+ if (!kbuf) {
+ kfree(k32buf);
return -ENOMEM;
+ }
if (proc)
set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags);
- compat_input(kbuf, k32buf);
+
+ compat_input(kbuf, k32buf, namelen);
kfree(k32buf);
}
#endif
/* do we really need this? can a write happen after a close? */
if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) &&
- test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))
- return -EINVAL;
+ (proc && test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))) {
+ error = -EINVAL;
+ goto out_free;
+ }
sigfillset(&allsigs);
sigprocmask(SIG_BLOCK, &allsigs, &tmpsig);
error = device_user_unlock(proc, &kbuf->i.lock);
break;
+ case DLM_USER_DEADLOCK:
+ if (!proc) {
+ log_print("no locking on control device");
+ goto out_sig;
+ }
+ error = device_user_deadlock(proc, &kbuf->i.lock);
+ break;
+
case DLM_USER_CREATE_LOCKSPACE:
if (proc) {
log_print("create/remove only on control device");
error = device_remove_lockspace(&kbuf->i.lspace);
break;
+ case DLM_USER_PURGE:
+ if (!proc) {
+ log_print("no locking on control device");
+ goto out_sig;
+ }
+ error = device_user_purge(proc, &kbuf->i.purge);
+ break;
+
default:
log_print("Unknown command passed to DLM device : %d\n",
kbuf->cmd);
proc->lockspace = ls->ls_local_handle;
INIT_LIST_HEAD(&proc->asts);
INIT_LIST_HEAD(&proc->locks);
+ INIT_LIST_HEAD(&proc->unlocking);
spin_lock_init(&proc->asts_spin);
spin_lock_init(&proc->locks_spin);
init_waitqueue_head(&proc->wait);
int struct_len;
memset(&result, 0, sizeof(struct dlm_lock_result));
+ result.version[0] = DLM_DEVICE_VERSION_MAJOR;
+ result.version[1] = DLM_DEVICE_VERSION_MINOR;
+ result.version[2] = DLM_DEVICE_VERSION_PATCH;
memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb));
result.user_lksb = ua->user_lksb;
return error;
}
+static int copy_version_to_user(char __user *buf, size_t count)
+{
+ struct dlm_device_version ver;
+
+ memset(&ver, 0, sizeof(struct dlm_device_version));
+ ver.version[0] = DLM_DEVICE_VERSION_MAJOR;
+ ver.version[1] = DLM_DEVICE_VERSION_MINOR;
+ ver.version[2] = DLM_DEVICE_VERSION_PATCH;
+
+ if (copy_to_user(buf, &ver, sizeof(struct dlm_device_version)))
+ return -EFAULT;
+ return sizeof(struct dlm_device_version);
+}
+
/* a read returns a single ast described in a struct dlm_lock_result */
static ssize_t device_read(struct file *file, char __user *buf, size_t count,
{
struct dlm_user_proc *proc = file->private_data;
struct dlm_lkb *lkb;
- struct dlm_user_args *ua;
DECLARE_WAITQUEUE(wait, current);
int error, type=0, bmode=0, removed = 0;
+ if (count == sizeof(struct dlm_device_version)) {
+ error = copy_version_to_user(buf, count);
+ return error;
+ }
+
+ if (!proc) {
+ log_print("non-version read from control device %zu", count);
+ return -EINVAL;
+ }
+
#ifdef CONFIG_COMPAT
if (count < sizeof(struct dlm_lock_result32))
#else
}
}
- if (list_empty(&proc->asts)) {
- spin_unlock(&proc->asts_spin);
- return -EAGAIN;
- }
-
/* there may be both completion and blocking asts to return for
the lkb, don't remove lkb from asts list unless no asts remain */
}
spin_unlock(&proc->asts_spin);
- ua = (struct dlm_user_args *)lkb->lkb_astparam;
- error = copy_result_to_user(ua,
+ error = copy_result_to_user(lkb->lkb_ua,
test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags),
type, bmode, buf, count);
return 0;
}
+int dlm_user_daemon_available(void)
+{
+ /* dlm_controld hasn't started (or, has started, but not
+ properly populated configfs) */
+
+ if (!dlm_our_nodeid())
+ return 0;
+
+ /* This is to deal with versions of dlm_controld that don't
+ know about the monitor device. We assume that if the
+ dlm_controld was started (above), but the monitor device
+ was never opened, that it's an old version. dlm_controld
+ should open the monitor device before populating configfs. */
+
+ if (dlm_monitor_unused)
+ return 1;
+
+ return atomic_read(&dlm_monitor_opened) ? 1 : 0;
+}
+
static int ctl_device_open(struct inode *inode, struct file *file)
{
file->private_data = NULL;
return 0;
}
-static struct file_operations device_fops = {
+static int monitor_device_open(struct inode *inode, struct file *file)
+{
+ atomic_inc(&dlm_monitor_opened);
+ dlm_monitor_unused = 0;
+ return 0;
+}
+
+static int monitor_device_close(struct inode *inode, struct file *file)
+{
+ if (atomic_dec_and_test(&dlm_monitor_opened))
+ dlm_stop_lockspaces();
+ return 0;
+}
+
+static const struct file_operations device_fops = {
.open = device_open,
.release = device_close,
.read = device_read,
.owner = THIS_MODULE,
};
-static struct file_operations ctl_device_fops = {
+static const struct file_operations ctl_device_fops = {
.open = ctl_device_open,
.release = ctl_device_close,
+ .read = device_read,
.write = device_write,
.owner = THIS_MODULE,
};
-int dlm_user_init(void)
+static struct miscdevice ctl_device = {
+ .name = "dlm-control",
+ .fops = &ctl_device_fops,
+ .minor = MISC_DYNAMIC_MINOR,
+};
+
+static const struct file_operations monitor_device_fops = {
+ .open = monitor_device_open,
+ .release = monitor_device_close,
+ .owner = THIS_MODULE,
+};
+
+static struct miscdevice monitor_device = {
+ .name = "dlm-monitor",
+ .fops = &monitor_device_fops,
+ .minor = MISC_DYNAMIC_MINOR,
+};
+
+int __init dlm_user_init(void)
{
int error;
- ctl_device.name = "dlm-control";
- ctl_device.fops = &ctl_device_fops;
- ctl_device.minor = MISC_DYNAMIC_MINOR;
+ atomic_set(&dlm_monitor_opened, 0);
error = misc_register(&ctl_device);
- if (error)
+ if (error) {
log_print("misc_register failed for control device");
+ goto out;
+ }
+ error = misc_register(&monitor_device);
+ if (error) {
+ log_print("misc_register failed for monitor device");
+ misc_deregister(&ctl_device);
+ }
+ out:
return error;
}
void dlm_user_exit(void)
{
misc_deregister(&ctl_device);
+ misc_deregister(&monitor_device);
}