Gitweb:
http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=7cdf5a89712...
Commit: 7cdf5a89712994a06e1bf95879e580e81e6a9b9a
Parent: 87135d56ee90e7bb1a87b6d12fdd69a63aee747f
Author: Christine Caulfield <ccaulfie(a)redhat.com>
AuthorDate: Fri Jan 11 13:32:17 2013 +0000
Committer: Christine Caulfield <ccaulfie(a)redhat.com>
CommitterDate: Fri Jan 11 13:32:17 2013 +0000
cman|fenced: Fix node killing in case of a 2node cluster that suffers brief network
outage
This patch fixes a rare but nasty condition in cman and fenced. In a 2node cluster
if the network splits for a period of time longer than the token timeout but
shorter than the time needed to fence a node then both nodes can send 'kill'
packets to the other with the effect that both nodes' cmans will quit
leaving no operational cluster.
This patch adds a check for a 2node cluster and only sends a 'kill' packet
to the node with the higher nodeid thus ensuring a predictable response
to such events and ensuring that services can continue to run.
The cman part of this patch is only active if Disallowed is enabled,
which is rare; the fenced part is more likely to be invoked
Signed-off-by: David Teigland <teigland(a)redhat.com>
Signed-off-by: Christine Caulfield <ccaulfie(a)redhat.com>
Acked-By: Fabio M. Di Nitto <fdinitto(a)redhat.com>
---
cman/daemon/commands.c | 23 +++++++++++++++++++++--
fence/fenced/config.c | 3 +++
fence/fenced/fd.h | 1 +
fence/fenced/main.c | 1 +
fence/fenced/member_cman.c | 11 +++++++++++
5 files changed, 37 insertions(+), 2 deletions(-)
diff --git a/cman/daemon/commands.c b/cman/daemon/commands.c
index e7df2f3..bbbc460 100644
--- a/cman/daemon/commands.c
+++ b/cman/daemon/commands.c
@@ -2054,9 +2054,28 @@ static void do_process_transition(int nodeid, char *data)
/* Don't duplicate messages */
if (node->state != NODESTATE_AISONLY) {
if (cluster_is_quorate) {
- log_printf(LOG_CRIT, "Killing node %s because it has rejoined the cluster with
existing state", node->name);
node->state = NODESTATE_AISONLY;
- send_kill(nodeid, CLUSTER_KILL_REJOIN);
+
+ /* Oh, this gets even more complicated. Don't send a KILL message if we are in a
two_node
+ * cluster and that node has a lower node ID than us.
+ * This allows fencing time to startup and caters for the situation where
+ * a node rejoins REALLY quickly, before fencing has had time to work.
+ * I've split this up a bit partly for clarity, but mainly so allow us to
+ * print out helpful messages as to what we are up to here.
+ */
+ if (two_node) {
+ if (node->node_id > us->node_id) {
+ log_printf(LOG_CRIT, "Killing node %s because it has rejoined the cluster
with existing state and has higher node ID", node->name);
+ send_kill(nodeid, CLUSTER_KILL_REJOIN);
+ }
+ else {
+ log_printf(LOG_CRIT, "Not killing node %s despite it rejoining the cluster
with existing state, it has a lower node ID", node->name);
+ }
+ }
+ else {
+ log_printf(LOG_CRIT, "Killing node %s because it has rejoined the cluster with
existing state", node->name);
+ send_kill(nodeid, CLUSTER_KILL_REJOIN);
+ }
}
else {
log_printf(LOG_CRIT, "Node %s not joined to cman because it has existing
state", node->name);
diff --git a/fence/fenced/config.c b/fence/fenced/config.c
index 66610ef..651ea8b 100644
--- a/fence/fenced/config.c
+++ b/fence/fenced/config.c
@@ -95,6 +95,7 @@ void read_ccs_int(const char *path, int *config_val)
#define OVERRIDE_PATH_PATH "/cluster/fence_daemon/@override_path"
#define OVERRIDE_TIME_PATH "/cluster/fence_daemon/@override_time"
#define METHOD_NAME_PATH
"/cluster/clusternodes/clusternode[@name=\"%s\"]/fence/method[%d]/@name"
+#define TWO_NODE_PATH "/cluster/cman/two_node"
static int count_methods(char *victim)
{
@@ -139,6 +140,8 @@ int read_ccs(struct fd *fd)
if (!optd_clean_start)
read_ccs_int(CLEAN_START_PATH, &cfgd_clean_start);
+ read_ccs_int(TWO_NODE_PATH, &two_node_mode);
+
reread_ccs();
if (!optd_override_path) {
diff --git a/fence/fenced/fd.h b/fence/fenced/fd.h
index 8423d9d..3032369 100644
--- a/fence/fenced/fd.h
+++ b/fence/fenced/fd.h
@@ -75,6 +75,7 @@ extern char dump_buf[FENCED_DUMP_SIZE];
extern int dump_point;
extern int dump_wrap;
extern int group_mode;
+extern int two_node_mode;
extern void daemon_dump_save(void);
diff --git a/fence/fenced/main.c b/fence/fenced/main.c
index c7f6269..360e73e 100644
--- a/fence/fenced/main.c
+++ b/fence/fenced/main.c
@@ -1088,4 +1088,5 @@ char dump_buf[FENCED_DUMP_SIZE];
int dump_point;
int dump_wrap;
int group_mode;
+int two_node_mode;
diff --git a/fence/fenced/member_cman.c b/fence/fenced/member_cman.c
index 061136e..859bbf0 100644
--- a/fence/fenced/member_cman.c
+++ b/fence/fenced/member_cman.c
@@ -27,6 +27,17 @@ void kick_node_from_cluster(int nodeid)
log_error("telling cman to shut down cluster locally");
cman_shutdown(ch_admin, CMAN_SHUTDOWN_ANYWAY);
} else {
+
+ /* in a two_node cluster where both nodes maintain quorum
+ * by themselves during a partition+merge, both will kick
+ * the other, which can leave both dead and unfenced.
+ * this delay should help */
+
+ if (two_node_mode && our_nodeid > nodeid) {
+ log_debug("kick_node_from_cluster %d delay", nodeid);
+ sleep(5);
+ }
+
log_error("telling cman to remove nodeid %d from cluster",
nodeid);
cman_kill_node(ch_admin, nodeid);