ocfs2_dlm: Check for migrateable lockres in dlm_empty_lockres()
[safe/jmp/linux-2.6] / fs / ocfs2 / dlm / dlmdomain.c
index a818fde..c558442 100644 (file)
@@ -41,7 +41,6 @@
 #include "dlmapi.h"
 #include "dlmcommon.h"
 
-#include "dlmdebug.h"
 #include "dlmdomain.h"
 
 #include "dlmver.h"
 #include "cluster/masklog.h"
 
 /*
+ * ocfs2 node maps are array of long int, which limits to send them freely
+ * across the wire due to endianness issues. To workaround this, we convert
+ * long ints to byte arrays. Following 3 routines are helper functions to
+ * set/test/copy bits within those array of bytes
+ */
+static inline void byte_set_bit(u8 nr, u8 map[])
+{
+       map[nr >> 3] |= (1UL << (nr & 7));
+}
+
+static inline int byte_test_bit(u8 nr, u8 map[])
+{
+       return ((1UL << (nr & 7)) & (map[nr >> 3])) != 0;
+}
+
+static inline void byte_copymap(u8 dmap[], unsigned long smap[],
+                       unsigned int sz)
+{
+       unsigned int nn;
+
+       if (!sz)
+               return;
+
+       memset(dmap, 0, ((sz + 7) >> 3));
+       for (nn = 0 ; nn < sz; nn++)
+               if (test_bit(nn, smap))
+                       byte_set_bit(nn, dmap);
+}
+
+static void dlm_free_pagevec(void **vec, int pages)
+{
+       while (pages--)
+               free_page((unsigned long)vec[pages]);
+       kfree(vec);
+}
+
+static void **dlm_alloc_pagevec(int pages)
+{
+       void **vec = kmalloc(pages * sizeof(void *), GFP_KERNEL);
+       int i;
+
+       if (!vec)
+               return NULL;
+
+       for (i = 0; i < pages; i++)
+               if (!(vec[i] = (void *)__get_free_page(GFP_KERNEL)))
+                       goto out_free;
+
+       mlog(0, "Allocated DLM hash pagevec; %d pages (%lu expected), %lu buckets per page\n",
+            pages, (unsigned long)DLM_HASH_PAGES,
+            (unsigned long)DLM_BUCKETS_PER_PAGE);
+       return vec;
+out_free:
+       dlm_free_pagevec(vec, i);
+       return NULL;
+}
+
+/*
  *
  * spinlock lock ordering: if multiple locks are needed, obey this ordering:
  *    dlm_domain_lock
  *
  */
 
-spinlock_t dlm_domain_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(dlm_domain_lock);
 LIST_HEAD(dlm_domains);
 static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events);
 
 #define DLM_DOMAIN_BACKOFF_MS 200
 
-static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data);
-static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data);
-static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data);
-static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data);
+static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
+                                 void **ret_data);
+static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
+                                    void **ret_data);
+static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data,
+                                  void **ret_data);
+static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
+                                  void **ret_data);
 
 static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm);
 
 void __dlm_unhash_lockres(struct dlm_lock_resource *lockres)
 {
-       hlist_del_init(&lockres->hash_node);
-       dlm_lockres_put(lockres);
+       if (!hlist_unhashed(&lockres->hash_node)) {
+               hlist_del_init(&lockres->hash_node);
+               dlm_lockres_put(lockres);
+       }
 }
 
 void __dlm_insert_lockres(struct dlm_ctxt *dlm,
@@ -90,7 +153,7 @@ void __dlm_insert_lockres(struct dlm_ctxt *dlm,
        assert_spin_locked(&dlm->spinlock);
 
        q = &res->lockname;
-       bucket = &(dlm->lockres_hash[q->hash % DLM_HASH_BUCKETS]);
+       bucket = dlm_lockres_hash(dlm, q->hash);
 
        /* get a reference for our hashtable */
        dlm_lockres_get(res);
@@ -98,33 +161,64 @@ void __dlm_insert_lockres(struct dlm_ctxt *dlm,
        hlist_add_head(&res->hash_node, bucket);
 }
 
+struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm,
+                                                    const char *name,
+                                                    unsigned int len,
+                                                    unsigned int hash)
+{
+       struct hlist_head *bucket;
+       struct hlist_node *list;
+
+       mlog_entry("%.*s\n", len, name);
+
+       assert_spin_locked(&dlm->spinlock);
+
+       bucket = dlm_lockres_hash(dlm, hash);
+
+       hlist_for_each(list, bucket) {
+               struct dlm_lock_resource *res = hlist_entry(list,
+                       struct dlm_lock_resource, hash_node);
+               if (res->lockname.name[0] != name[0])
+                       continue;
+               if (unlikely(res->lockname.len != len))
+                       continue;
+               if (memcmp(res->lockname.name + 1, name + 1, len - 1))
+                       continue;
+               dlm_lockres_get(res);
+               return res;
+       }
+       return NULL;
+}
+
+/* intended to be called by functions which do not care about lock
+ * resources which are being purged (most net _handler functions).
+ * this will return NULL for any lock resource which is found but
+ * currently in the process of dropping its mastery reference.
+ * use __dlm_lookup_lockres_full when you need the lock resource
+ * regardless (e.g. dlm_get_lock_resource) */
 struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,
                                                const char *name,
                                                unsigned int len,
                                                unsigned int hash)
 {
-       struct hlist_node *iter;
-       struct dlm_lock_resource *tmpres=NULL;
-       struct hlist_head *bucket;
+       struct dlm_lock_resource *res = NULL;
 
        mlog_entry("%.*s\n", len, name);
 
        assert_spin_locked(&dlm->spinlock);
 
-       bucket = &(dlm->lockres_hash[hash % DLM_HASH_BUCKETS]);
-
-       /* check for pre-existing lock */
-       hlist_for_each(iter, bucket) {
-               tmpres = hlist_entry(iter, struct dlm_lock_resource, hash_node);
-               if (tmpres->lockname.len == len &&
-                   memcmp(tmpres->lockname.name, name, len) == 0) {
-                       dlm_lockres_get(tmpres);
-                       break;
+       res = __dlm_lookup_lockres_full(dlm, name, len, hash);
+       if (res) {
+               spin_lock(&res->spinlock);
+               if (res->state & DLM_LOCK_RES_DROPPING_REF) {
+                       spin_unlock(&res->spinlock);
+                       dlm_lockres_put(res);
+                       return NULL;
                }
-
-               tmpres = NULL;
+               spin_unlock(&res->spinlock);
        }
-       return tmpres;
+
+       return res;
 }
 
 struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
@@ -192,7 +286,7 @@ static int dlm_wait_on_domain_helper(const char *domain)
 static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm)
 {
        if (dlm->lockres_hash)
-               free_page((unsigned long) dlm->lockres_hash);
+               dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
 
        if (dlm->name)
                kfree(dlm->name);
@@ -276,11 +370,21 @@ int dlm_domain_fully_joined(struct dlm_ctxt *dlm)
        return ret;
 }
 
+static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm)
+{
+       if (dlm->dlm_worker) {
+               flush_workqueue(dlm->dlm_worker);
+               destroy_workqueue(dlm->dlm_worker);
+               dlm->dlm_worker = NULL;
+       }
+}
+
 static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm)
 {
        dlm_unregister_domain_handlers(dlm);
        dlm_complete_thread(dlm);
        dlm_complete_recovery_thread(dlm);
+       dlm_destroy_dlm_worker(dlm);
 
        /* We've left the domain. Now we can take ourselves out of the
         * list and allow the kref stuff to help us free the
@@ -293,43 +397,60 @@ static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm)
        wake_up(&dlm_domain_events);
 }
 
-static void dlm_migrate_all_locks(struct dlm_ctxt *dlm)
+static int dlm_migrate_all_locks(struct dlm_ctxt *dlm)
 {
-       int i;
+       int i, num, n, ret = 0;
        struct dlm_lock_resource *res;
+       struct hlist_node *iter;
+       struct hlist_head *bucket;
+       int dropped;
 
        mlog(0, "Migrating locks from domain %s\n", dlm->name);
-restart:
+
+       num = 0;
        spin_lock(&dlm->spinlock);
        for (i = 0; i < DLM_HASH_BUCKETS; i++) {
-               while (!hlist_empty(&dlm->lockres_hash[i])) {
-                       res = hlist_entry(dlm->lockres_hash[i].first,
-                                         struct dlm_lock_resource, hash_node);
-                       /* need reference when manually grabbing lockres */
+redo_bucket:
+               n = 0;
+               bucket = dlm_lockres_hash(dlm, i);
+               iter = bucket->first;
+               while (iter) {
+                       n++;
+                       res = hlist_entry(iter, struct dlm_lock_resource,
+                                         hash_node);
                        dlm_lockres_get(res);
-                       /* this should unhash the lockres
-                        * and exit with dlm->spinlock */
-                       mlog(0, "purging res=%p\n", res);
-                       if (dlm_lockres_is_dirty(dlm, res)) {
-                               /* HACK!  this should absolutely go.
-                                * need to figure out why some empty
-                                * lockreses are still marked dirty */
-                               mlog(ML_ERROR, "lockres %.*s dirty!\n",
-                                    res->lockname.len, res->lockname.name);
-
-                               spin_unlock(&dlm->spinlock);
-                               dlm_kick_thread(dlm, res);
-                               wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res));
-                               dlm_lockres_put(res);
-                               goto restart;
-                       }
-                       dlm_purge_lockres(dlm, res);
+                       /* migrate, if necessary.  this will drop the dlm
+                        * spinlock and retake it if it does migration. */
+                       dropped = dlm_empty_lockres(dlm, res);
+
+                       spin_lock(&res->spinlock);
+                       __dlm_lockres_calc_usage(dlm, res);
+                       iter = res->hash_node.next;
+                       spin_unlock(&res->spinlock);
+
                        dlm_lockres_put(res);
+
+                       cond_resched_lock(&dlm->spinlock);
+
+                       if (dropped)
+                               goto redo_bucket;
                }
+               num += n;
+               mlog(0, "%s: touched %d lockreses in bucket %d "
+                    "(tot=%d)\n", dlm->name, n, i, num);
        }
        spin_unlock(&dlm->spinlock);
-
+       wake_up(&dlm->dlm_thread_wq);
+
+       /* let the dlm thread take care of purging, keep scanning until
+        * nothing remains in the hash */
+       if (num) {
+               mlog(0, "%s: %d lock resources in hash last pass\n",
+                    dlm->name, num);
+               ret = -EAGAIN;
+       }
        mlog(0, "DONE Migrating locks from domain %s\n", dlm->name);
+       return ret;
 }
 
 static int dlm_no_joining_node(struct dlm_ctxt *dlm)
@@ -372,15 +493,17 @@ static void __dlm_print_nodes(struct dlm_ctxt *dlm)
 
        assert_spin_locked(&dlm->spinlock);
 
-       mlog(ML_NOTICE, "Nodes in my domain (\"%s\"):\n", dlm->name);
+       printk(KERN_INFO "ocfs2_dlm: Nodes in domain (\"%s\"): ", dlm->name);
 
        while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES,
                                     node + 1)) < O2NM_MAX_NODES) {
-               mlog(ML_NOTICE, " node %d\n", node);
+               printk("%d ", node);
        }
+       printk("\n");
 }
 
-static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data)
+static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
+                                  void **ret_data)
 {
        struct dlm_ctxt *dlm = data;
        unsigned int node;
@@ -393,7 +516,7 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data)
 
        node = exit_msg->node_idx;
 
-       mlog(0, "Node %u leaves domain %s\n", node, dlm->name);
+       printk(KERN_INFO "ocfs2_dlm: Node %u leaves domain %s\n", node, dlm->name);
 
        spin_lock(&dlm->spinlock);
        clear_bit(node, dlm->domain_map);
@@ -533,7 +656,11 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
                /* We changed dlm state, notify the thread */
                dlm_kick_thread(dlm, NULL);
 
-               dlm_migrate_all_locks(dlm);
+               while (dlm_migrate_all_locks(dlm)) {
+                       /* Give dlm_thread time to purge the lockres' */
+                       msleep(500);
+                       mlog(0, "%s: more migration to do\n", dlm->name);
+               }
                dlm_mark_domain_leaving(dlm);
                dlm_leave_domain(dlm);
                dlm_complete_dlm_shutdown(dlm);
@@ -542,11 +669,13 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
 }
 EXPORT_SYMBOL_GPL(dlm_unregister_domain);
 
-static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data)
+static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
+                                 void **ret_data)
 {
        struct dlm_query_join_request *query;
        enum dlm_query_join_response response;
        struct dlm_ctxt *dlm = NULL;
+       u8 nodenum;
 
        query = (struct dlm_query_join_request *) msg->buf;
 
@@ -570,6 +699,28 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data)
 
        spin_lock(&dlm_domain_lock);
        dlm = __dlm_lookup_domain_full(query->domain, query->name_len);
+       if (!dlm)
+               goto unlock_respond;
+
+       /*
+        * There is a small window where the joining node may not see the
+        * node(s) that just left but still part of the cluster. DISALLOW
+        * join request if joining node has different node map.
+        */
+       nodenum=0;
+       while (nodenum < O2NM_MAX_NODES) {
+               if (test_bit(nodenum, dlm->domain_map)) {
+                       if (!byte_test_bit(nodenum, query->node_map)) {
+                               mlog(0, "disallow join as node %u does not "
+                                    "have node %u in its nodemap\n",
+                                    query->node_idx, nodenum);
+                               response = JOIN_DISALLOW;
+                               goto unlock_respond;
+                       }
+               }
+               nodenum++;
+       }
+
        /* Once the dlm ctxt is marked as leaving then we don't want
         * to be put in someone's domain map. 
         * Also, explicitly disallow joining at certain troublesome
@@ -588,15 +739,15 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data)
                        /* Disallow parallel joins. */
                        response = JOIN_DISALLOW;
                } else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) {
-                       mlog(ML_NOTICE, "node %u trying to join, but recovery "
+                       mlog(0, "node %u trying to join, but recovery "
                             "is ongoing.\n", bit);
                        response = JOIN_DISALLOW;
                } else if (test_bit(bit, dlm->recovery_map)) {
-                       mlog(ML_NOTICE, "node %u trying to join, but it "
+                       mlog(0, "node %u trying to join, but it "
                             "still needs recovery.\n", bit);
                        response = JOIN_DISALLOW;
                } else if (test_bit(bit, dlm->domain_map)) {
-                       mlog(ML_NOTICE, "node %u trying to join, but it "
+                       mlog(0, "node %u trying to join, but it "
                             "is still in the domain! needs recovery?\n",
                             bit);
                        response = JOIN_DISALLOW;
@@ -611,6 +762,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data)
 
                spin_unlock(&dlm->spinlock);
        }
+unlock_respond:
        spin_unlock(&dlm_domain_lock);
 
 respond:
@@ -619,7 +771,8 @@ respond:
        return response;
 }
 
-static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data)
+static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
+                                    void **ret_data)
 {
        struct dlm_assert_joined *assert;
        struct dlm_ctxt *dlm = NULL;
@@ -642,6 +795,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data)
                set_bit(assert->node_idx, dlm->domain_map);
                __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
 
+               printk(KERN_INFO "ocfs2_dlm: Node %u joins domain %s\n",
+                      assert->node_idx, dlm->name);
                __dlm_print_nodes(dlm);
 
                /* notify anything attached to the heartbeat events */
@@ -654,7 +809,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data)
        return 0;
 }
 
-static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data)
+static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data,
+                                  void **ret_data)
 {
        struct dlm_cancel_join *cancel;
        struct dlm_ctxt *dlm = NULL;
@@ -756,6 +912,9 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
        join_msg.name_len = strlen(dlm->name);
        memcpy(join_msg.domain, dlm->name, join_msg.name_len);
 
+       /* copy live node map to join message */
+       byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES);
+
        status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg,
                                    sizeof(join_msg), node, &retval);
        if (status < 0 && status != -ENOPROTOOPT) {
@@ -880,7 +1039,7 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
 
        mlog_entry("%p", dlm);
 
-       ctxt = kcalloc(1, sizeof(*ctxt), GFP_KERNEL);
+       ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
        if (!ctxt) {
                status = -ENOMEM;
                mlog_errno(status);
@@ -996,98 +1155,106 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm)
        status = o2net_register_handler(DLM_MASTER_REQUEST_MSG, dlm->key,
                                        sizeof(struct dlm_master_request),
                                        dlm_master_request_handler,
-                                       dlm, &dlm->dlm_domain_handlers);
+                                       dlm, NULL, &dlm->dlm_domain_handlers);
        if (status)
                goto bail;
 
        status = o2net_register_handler(DLM_ASSERT_MASTER_MSG, dlm->key,
                                        sizeof(struct dlm_assert_master),
                                        dlm_assert_master_handler,
-                                       dlm, &dlm->dlm_domain_handlers);
+                                       dlm, dlm_assert_master_post_handler,
+                                       &dlm->dlm_domain_handlers);
        if (status)
                goto bail;
 
        status = o2net_register_handler(DLM_CREATE_LOCK_MSG, dlm->key,
                                        sizeof(struct dlm_create_lock),
                                        dlm_create_lock_handler,
-                                       dlm, &dlm->dlm_domain_handlers);
+                                       dlm, NULL, &dlm->dlm_domain_handlers);
        if (status)
                goto bail;
 
        status = o2net_register_handler(DLM_CONVERT_LOCK_MSG, dlm->key,
                                        DLM_CONVERT_LOCK_MAX_LEN,
                                        dlm_convert_lock_handler,
-                                       dlm, &dlm->dlm_domain_handlers);
+                                       dlm, NULL, &dlm->dlm_domain_handlers);
        if (status)
                goto bail;
 
        status = o2net_register_handler(DLM_UNLOCK_LOCK_MSG, dlm->key,
                                        DLM_UNLOCK_LOCK_MAX_LEN,
                                        dlm_unlock_lock_handler,
-                                       dlm, &dlm->dlm_domain_handlers);
+                                       dlm, NULL, &dlm->dlm_domain_handlers);
        if (status)
                goto bail;
 
        status = o2net_register_handler(DLM_PROXY_AST_MSG, dlm->key,
                                        DLM_PROXY_AST_MAX_LEN,
                                        dlm_proxy_ast_handler,
-                                       dlm, &dlm->dlm_domain_handlers);
+                                       dlm, NULL, &dlm->dlm_domain_handlers);
        if (status)
                goto bail;
 
        status = o2net_register_handler(DLM_EXIT_DOMAIN_MSG, dlm->key,
                                        sizeof(struct dlm_exit_domain),
                                        dlm_exit_domain_handler,
-                                       dlm, &dlm->dlm_domain_handlers);
+                                       dlm, NULL, &dlm->dlm_domain_handlers);
+       if (status)
+               goto bail;
+
+       status = o2net_register_handler(DLM_DEREF_LOCKRES_MSG, dlm->key,
+                                       sizeof(struct dlm_deref_lockres),
+                                       dlm_deref_lockres_handler,
+                                       dlm, NULL, &dlm->dlm_domain_handlers);
        if (status)
                goto bail;
 
        status = o2net_register_handler(DLM_MIGRATE_REQUEST_MSG, dlm->key,
                                        sizeof(struct dlm_migrate_request),
                                        dlm_migrate_request_handler,
-                                       dlm, &dlm->dlm_domain_handlers);
+                                       dlm, NULL, &dlm->dlm_domain_handlers);
        if (status)
                goto bail;
 
        status = o2net_register_handler(DLM_MIG_LOCKRES_MSG, dlm->key,
                                        DLM_MIG_LOCKRES_MAX_LEN,
                                        dlm_mig_lockres_handler,
-                                       dlm, &dlm->dlm_domain_handlers);
+                                       dlm, NULL, &dlm->dlm_domain_handlers);
        if (status)
                goto bail;
 
        status = o2net_register_handler(DLM_MASTER_REQUERY_MSG, dlm->key,
                                        sizeof(struct dlm_master_requery),
                                        dlm_master_requery_handler,
-                                       dlm, &dlm->dlm_domain_handlers);
+                                       dlm, NULL, &dlm->dlm_domain_handlers);
        if (status)
                goto bail;
 
        status = o2net_register_handler(DLM_LOCK_REQUEST_MSG, dlm->key,
                                        sizeof(struct dlm_lock_request),
                                        dlm_request_all_locks_handler,
-                                       dlm, &dlm->dlm_domain_handlers);
+                                       dlm, NULL, &dlm->dlm_domain_handlers);
        if (status)
                goto bail;
 
        status = o2net_register_handler(DLM_RECO_DATA_DONE_MSG, dlm->key,
                                        sizeof(struct dlm_reco_data_done),
                                        dlm_reco_data_done_handler,
-                                       dlm, &dlm->dlm_domain_handlers);
+                                       dlm, NULL, &dlm->dlm_domain_handlers);
        if (status)
                goto bail;
 
        status = o2net_register_handler(DLM_BEGIN_RECO_MSG, dlm->key,
                                        sizeof(struct dlm_begin_reco),
                                        dlm_begin_reco_handler,
-                                       dlm, &dlm->dlm_domain_handlers);
+                                       dlm, NULL, &dlm->dlm_domain_handlers);
        if (status)
                goto bail;
 
        status = o2net_register_handler(DLM_FINALIZE_RECO_MSG, dlm->key,
                                        sizeof(struct dlm_finalize_reco),
                                        dlm_finalize_reco_handler,
-                                       dlm, &dlm->dlm_domain_handlers);
+                                       dlm, NULL, &dlm->dlm_domain_handlers);
        if (status)
                goto bail;
 
@@ -1101,6 +1268,8 @@ bail:
 static int dlm_join_domain(struct dlm_ctxt *dlm)
 {
        int status;
+       unsigned int backoff;
+       unsigned int total_backoff = 0;
 
        BUG_ON(!dlm);
 
@@ -1124,19 +1293,35 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
                goto bail;
        }
 
+       dlm->dlm_worker = create_singlethread_workqueue("dlm_wq");
+       if (!dlm->dlm_worker) {
+               status = -ENOMEM;
+               mlog_errno(status);
+               goto bail;
+       }
+
        do {
-               unsigned int backoff;
                status = dlm_try_to_join_domain(dlm);
 
                /* If we're racing another node to the join, then we
                 * need to back off temporarily and let them
                 * complete. */
+#define        DLM_JOIN_TIMEOUT_MSECS  90000
                if (status == -EAGAIN) {
                        if (signal_pending(current)) {
                                status = -ERESTARTSYS;
                                goto bail;
                        }
 
+                       if (total_backoff >
+                           msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) {
+                               status = -ERESTARTSYS;
+                               mlog(ML_NOTICE, "Timed out joining dlm domain "
+                                    "%s after %u msecs\n", dlm->name,
+                                    jiffies_to_msecs(total_backoff));
+                               goto bail;
+                       }
+
                        /*
                         * <chip> After you!
                         * <dale> No, after you!
@@ -1146,6 +1331,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
                         */
                        backoff = (unsigned int)(jiffies & 0x3);
                        backoff *= DLM_DOMAIN_BACKOFF_MS;
+                       total_backoff += backoff;
                        mlog(0, "backoff %d\n", backoff);
                        msleep(backoff);
                }
@@ -1164,6 +1350,7 @@ bail:
                dlm_unregister_domain_handlers(dlm);
                dlm_complete_thread(dlm);
                dlm_complete_recovery_thread(dlm);
+               dlm_destroy_dlm_worker(dlm);
        }
 
        return status;
@@ -1175,7 +1362,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
        int i;
        struct dlm_ctxt *dlm = NULL;
 
-       dlm = kcalloc(1, sizeof(*dlm), GFP_KERNEL);
+       dlm = kzalloc(sizeof(*dlm), GFP_KERNEL);
        if (!dlm) {
                mlog_errno(-ENOMEM);
                goto leave;
@@ -1189,7 +1376,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
                goto leave;
        }
 
-       dlm->lockres_hash = (struct hlist_head *) __get_free_page(GFP_KERNEL);
+       dlm->lockres_hash = (struct hlist_head **)dlm_alloc_pagevec(DLM_HASH_PAGES);
        if (!dlm->lockres_hash) {
                mlog_errno(-ENOMEM);
                kfree(dlm->name);
@@ -1198,8 +1385,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
                goto leave;
        }
 
-       for (i=0; i<DLM_HASH_BUCKETS; i++)
-               INIT_HLIST_HEAD(&dlm->lockres_hash[i]);
+       for (i = 0; i < DLM_HASH_BUCKETS; i++)
+               INIT_HLIST_HEAD(dlm_lockres_hash(dlm, i));
 
        strcpy(dlm->name, domain);
        dlm->key = key;
@@ -1229,6 +1416,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
 
        dlm->dlm_thread_task = NULL;
        dlm->dlm_reco_thread_task = NULL;
+       dlm->dlm_worker = NULL;
        init_waitqueue_head(&dlm->dlm_thread_wq);
        init_waitqueue_head(&dlm->dlm_reco_thread_wq);
        init_waitqueue_head(&dlm->reco.event);
@@ -1248,7 +1436,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
 
        spin_lock_init(&dlm->work_lock);
        INIT_LIST_HEAD(&dlm->work_list);
-       INIT_WORK(&dlm->dispatched_work, dlm_dispatch_work, dlm);
+       INIT_WORK(&dlm->dispatched_work, dlm_dispatch_work);
 
        kref_init(&dlm->dlm_refs);
        dlm->dlm_state = DLM_CTXT_NEW;
@@ -1372,21 +1560,21 @@ static int dlm_register_net_handlers(void)
        status = o2net_register_handler(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY,
                                        sizeof(struct dlm_query_join_request),
                                        dlm_query_join_handler,
-                                       NULL, &dlm_join_handlers);
+                                       NULL, NULL, &dlm_join_handlers);
        if (status)
                goto bail;
 
        status = o2net_register_handler(DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY,
                                        sizeof(struct dlm_assert_joined),
                                        dlm_assert_joined_handler,
-                                       NULL, &dlm_join_handlers);
+                                       NULL, NULL, &dlm_join_handlers);
        if (status)
                goto bail;
 
        status = o2net_register_handler(DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY,
                                        sizeof(struct dlm_cancel_join),
                                        dlm_cancel_join_handler,
-                                       NULL, &dlm_join_handlers);
+                                       NULL, NULL, &dlm_join_handlers);
 
 bail:
        if (status < 0)