[PATCH] ocfs2: add dlm_wait_for_node_death
authorKurt Hackel <kurt.hackel@oracle.com>
Thu, 19 Jan 2006 01:05:38 +0000 (17:05 -0800)
committerMark Fasheh <mark.fasheh@oracle.com>
Thu, 16 Feb 2006 20:01:38 +0000 (12:01 -0800)
* add dlm_wait_for_node_death function to be used after receiving a network
  error.  this will wait for the given timeout to allow the heartbeat
  callbacks to update the domain map.  without this, some paths may spin
  and consume enough cpu that the heartbeat gets starved and never updates.

Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
fs/ocfs2/dlm/dlmcommon.h
fs/ocfs2/dlm/dlmconvert.c
fs/ocfs2/dlm/dlmlock.c
fs/ocfs2/dlm/dlmrecovery.c

index 42eb53b..23ceaa7 100644 (file)
@@ -208,6 +208,9 @@ static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm,
 #define DLM_LOCK_RES_IN_PROGRESS          0x00000010
 #define DLM_LOCK_RES_MIGRATING            0x00000020
 
+/* max milliseconds to wait to sync up a network failure with a node death */
+#define DLM_NODE_DEATH_WAIT_MAX (5 * 1000)
+
 #define DLM_PURGE_INTERVAL_MS   (8 * 1000)
 
 struct dlm_lock_resource
@@ -658,6 +661,7 @@ int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);
 void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
 void dlm_wait_for_recovery(struct dlm_ctxt *dlm);
 int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node);
+int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout);
 
 void dlm_put(struct dlm_ctxt *dlm);
 struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm);
index f5c2f19..f66e2d8 100644 (file)
@@ -392,6 +392,11 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
        } else {
                mlog_errno(tmpret);
                if (dlm_is_host_down(tmpret)) {
+                       /* instead of logging the same network error over
+                        * and over, sleep here and wait for the heartbeat
+                        * to notice the node is dead.  times out after 5s. */
+                       dlm_wait_for_node_death(dlm, res->owner, 
+                                               DLM_NODE_DEATH_WAIT_MAX);
                        ret = DLM_RECOVERING;
                        mlog(0, "node %u died so returning DLM_RECOVERING "
                             "from convert message!\n", res->owner);
index d1a0038..e709412 100644 (file)
@@ -646,7 +646,19 @@ retry_lock:
                        mlog(0, "retrying lock with migration/"
                             "recovery/in progress\n");
                        msleep(100);
-                       dlm_wait_for_recovery(dlm);
+                       /* no waiting for dlm_reco_thread */
+                       if (recovery) {
+                               if (status == DLM_RECOVERING) {
+                                       mlog(0, "%s: got RECOVERING "
+                                            "for $REOCVERY lock, master "
+                                            "was %u\n", dlm->name, 
+                                            res->owner);
+                                       dlm_wait_for_node_death(dlm, res->owner, 
+                                                       DLM_NODE_DEATH_WAIT_MAX);
+                               }
+                       } else {
+                               dlm_wait_for_recovery(dlm);
+                       }
                        goto retry_lock;
                }
 
index f9ce864..ed76bda 100644 (file)
@@ -278,6 +278,24 @@ int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node)
        return dead;
 }
 
+int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout)
+{
+       if (timeout) {
+               mlog(ML_NOTICE, "%s: waiting %dms for notification of "
+                    "death of node %u\n", dlm->name, timeout, node);
+               wait_event_timeout(dlm->dlm_reco_thread_wq,
+                          dlm_is_node_dead(dlm, node),
+                          msecs_to_jiffies(timeout));
+       } else {
+               mlog(ML_NOTICE, "%s: waiting indefinitely for notification "
+                    "of death of node %u\n", dlm->name, node);
+               wait_event(dlm->dlm_reco_thread_wq,
+                          dlm_is_node_dead(dlm, node));
+       }
+       /* for now, return 0 */
+       return 0;
+}
+
 /* callers of the top-level api calls (dlmlock/dlmunlock) should
  * block on the dlm->reco.event when recovery is in progress.
  * the dlm recovery thread will set this state when it begins