Gitweb:
http://git.fedorahosted.org/git/?p=dlm.git;a=commitdiff;h=16ddbe2d61b4edb...
Commit: 16ddbe2d61b4edbed4d911c523a82bcf5f325b70
Parent: f65f3f67b0f87f52398ff1100aaede36e254abda
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Wed Feb 6 14:11:15 2013 -0600
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Wed Feb 6 14:11:15 2013 -0600
dlm_controld: ignore fence_ack for member
When a node is still a member of the daemon cpg,
ignore a fence_ack for it. Either it never failed
and the ack is pointless, or it merged after a
partition, fencing hasn't completed and it's still
a merged member that needs to be reset.
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
dlm_controld/daemon_cpg.c | 17 +++++++++++++++++
1 files changed, 17 insertions(+), 0 deletions(-)
diff --git a/dlm_controld/daemon_cpg.c b/dlm_controld/daemon_cpg.c
index af9b91d..641b189 100644
--- a/dlm_controld/daemon_cpg.c
+++ b/dlm_controld/daemon_cpg.c
@@ -1251,6 +1251,7 @@ static void receive_fence_result(struct dlm_header *hd, int len)
if (count) {
log_debug("receive_fence_result %d from %d clear startup",
fr->nodeid, hd->nodeid);
+ return;
}
node = get_node_daemon(fr->nodeid);
@@ -1274,6 +1275,22 @@ static void receive_fence_result(struct dlm_header *hd, int len)
/* should we ignore and return here? */
}
+ if (!fr->result && node->daemon_member) {
+
+ /*
+ * the only time I think this can happen is if there is a
+ * manual dlm_tool fence_ack for a node that is a member,
+ * e.g. partition, merge, fence_ack while it's a merged member.
+ * Ideally it would be killed after merging with state, but
+ * not necessarily, i.e. it's start message can't be sent or
+ * received.
+ */
+
+ log_error("receive_fence_result %d from %d result %d node not dead",
+ fr->nodeid, hd->nodeid, fr->result);
+ return;
+ }
+
if ((hd->nodeid == our_nodeid) && (fr->result != -ECANCELED))
node->fence_result_wait = 0;