Gitweb:
http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 83fa41cb23b81108a6ffc6fe79c2656238a0ffb7
Parent: 1f283367656fdad0ae5fd66c2cd58ec0fc08f9f4
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Mon Dec 21 17:54:37 2009 -0500
Committer: Lon Hohberger <lhh(a)redhat.com>
CommitterDate: Tue Dec 22 10:53:47 2009 -0500
rgmanager: Fix event generation with central_processing
This patch fixes event generation and processing when
a node dies. Effectively, what was happening is that when
a node failed and was fenced, no events for the dead
services on that host were generated. This led to dependent
services not restarting correctly in many cases.
Resolves: rhbz#523999
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
---
rgmanager/include/resgroup.h | 2 ++
rgmanager/src/daemons/main.c | 1 +
rgmanager/src/daemons/rg_forward.c | 4 ++--
rgmanager/src/daemons/rg_state.c | 9 ++++++---
rgmanager/src/daemons/service_op.c | 2 +-
rgmanager/src/resources/default_event_script.sl | 8 ++++++++
6 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/rgmanager/include/resgroup.h b/rgmanager/include/resgroup.h
index 7011a0c..5a13fcf 100644
--- a/rgmanager/include/resgroup.h
+++ b/rgmanager/include/resgroup.h
@@ -180,6 +180,8 @@ int get_rg_state_local(const char *servicename, rg_state_t *svcblk);
uint32_t best_target_node(cluster_member_list_t *allowed, uint32_t owner,
const char *rg_name, int lock);
+extern int cluster_timeout;
+
#ifdef DEBUG
int _rg_lock(const char *name, struct dlm_lksb *p);
int _rg_lock_dbg(const char *, struct dlm_lksb *, const char *, int);
diff --git a/rgmanager/src/daemons/main.c b/rgmanager/src/daemons/main.c
index 04d9961..883266a 100644
--- a/rgmanager/src/daemons/main.c
+++ b/rgmanager/src/daemons/main.c
@@ -34,6 +34,7 @@ void flag_shutdown(int sig);
int watchdog_init(void);
+int cluster_timeout = 10;
int shutdown_pending = 0, running = 1, need_reconfigure = 0;
char debug = 0; /* XXX* */
static int signalled = 0;
diff --git a/rgmanager/src/daemons/rg_forward.c b/rgmanager/src/daemons/rg_forward.c
index bb42922..48649b8 100644
--- a/rgmanager/src/daemons/rg_forward.c
+++ b/rgmanager/src/daemons/rg_forward.c
@@ -85,7 +85,7 @@ forwarding_thread(void *arg)
build_message(&msg, req->rr_request, req->rr_group, req->rr_target,
req->rr_arg0, req->rr_arg1);
- if (msg_open(MSG_CLUSTER, rgs.rs_owner, RG_PORT, ctx, 10) < 0) {
+ if (msg_open(MSG_CLUSTER, rgs.rs_owner, RG_PORT, ctx, 2 * cluster_timeout) < 0) {
logt_print(LOG_DEBUG, "FW: Failed to open channel to %d CTX: %p\n",
rgs.rs_owner, ctx);
goto out_fail;
@@ -184,7 +184,7 @@ forwarding_thread_v2(void *arg)
strerror(errno));
goto out_fail;
}
- if (msg_open(MSG_CLUSTER, target, RG_PORT, ctx, 10) < 0) {
+ if (msg_open(MSG_CLUSTER, target, RG_PORT, ctx, 2 * cluster_timeout) < 0) {
logt_print(LOG_DEBUG, "FW: Failed to open channel to %d CTX: %p\n",
target, ctx);
goto out_fail;
diff --git a/rgmanager/src/daemons/rg_state.c b/rgmanager/src/daemons/rg_state.c
index 6f80047..029100e 100644
--- a/rgmanager/src/daemons/rg_state.c
+++ b/rgmanager/src/daemons/rg_state.c
@@ -969,7 +969,7 @@ get_new_owner(const char *svcName)
continue;
if (msg_open(MSG_CLUSTER, membership->cml_members[x].cn_nodeid,
- RG_PORT, &ctx, 10) < 0) {
+ RG_PORT, &ctx, 2 * cluster_timeout) < 0) {
/* failed to open: better to claim false successful
status rather than claim a failure and possibly
end up with a service on >1 node */
@@ -1254,7 +1254,10 @@ _svc_stop(const char *svcName, int req, int recover, uint32_t
newstate)
rg_unlock(&lockp);
return RG_EFAIL;
}
- /* FALLTHROUGH */
+ rg_unlock(&lockp);
+ broadcast_event(svcName, RG_STATE_STOPPED,
+ -1, svcStatus.rs_last_owner);
+ return RG_ESUCCESS;
case 2:
rg_unlock(&lockp);
return RG_ESUCCESS;
@@ -1553,7 +1556,7 @@ svc_start_remote(const char *svcName, int request, uint32_t target)
msg_relo.sm_data.d_svcOwner = target;
/* Open a connection to the other node */
- if (msg_open(MSG_CLUSTER, target, RG_PORT, &ctx, 2)< 0) {
+ if (msg_open(MSG_CLUSTER, target, RG_PORT, &ctx, 2 * cluster_timeout)< 0) {
logt_print(LOG_ERR,
"#58: Failed opening connection to member #%d\n",
target);
diff --git a/rgmanager/src/daemons/service_op.c b/rgmanager/src/daemons/service_op.c
index a508f1e..112b267 100644
--- a/rgmanager/src/daemons/service_op.c
+++ b/rgmanager/src/daemons/service_op.c
@@ -142,7 +142,7 @@ service_op_stop(char *svcName, int do_disable, int event_type)
}
}
- if (msg_open(MSG_CLUSTER, msgtarget, RG_PORT, &ctx, 2)< 0) {
+ if (msg_open(MSG_CLUSTER, msgtarget, RG_PORT, &ctx, 2 * cluster_timeout)< 0) {
logt_print(LOG_ERR,
"#58: Failed opening connection to member #%d\n",
my_id());
diff --git a/rgmanager/src/resources/default_event_script.sl
b/rgmanager/src/resources/default_event_script.sl
index 84e6d72..fad22ac 100644
--- a/rgmanager/src/resources/default_event_script.sl
+++ b/rgmanager/src/resources/default_event_script.sl
@@ -157,6 +157,14 @@ define move_or_start(service, node_list)
(,,, owner, state) = service_status(service);
debug("Evaluating ", service, " state=", state, " owner=",
owner);
+ if ((event_type == EVENT_NODE) and (node_id == owner) and
+ (node_state == NODE_OFFLINE)) {
+ info("Marking service ", service, " on down member ",
+ owner, " as stopped");
+ if (service_stop(service) < 0) {
+ return ERR_ABORT;
+ }
+ }
len = length(node_list);
if (len == 0) {