cluster: RHEL56 - cman: Clean shutdown_con if the controlling process is killed.
by Fabio M. Di Nitto
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 1671c39acedc665dc827165d0aa0a1c696c2da8f
Parent: 36fd94d68003389d9585c87a649726742e06badd
Author: Christine Caulfield <ccaulfie(a)redhat.com>
AuthorDate: Fri Sep 19 13:02:40 2008 +0100
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Mon Feb 27 15:54:57 2012 +0100
cman: Clean shutdown_con if the controlling process is killed.
If a shutdown is initiated by a process that is then killed, the
shutdown_con isn't cleared. So if another process replies to the
shutdown request cman could segfault.
Resolves: rhbz#795814
Signed-off-by: Christine Caulfield <ccaulfie(a)redhat.com>
Signed-off-by: Fabio M. Di Nitto <fdintto(a)redhat.com>
---
cman/daemon/commands.c | 5 +++++
1 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/cman/daemon/commands.c b/cman/daemon/commands.c
index 6ffe9e5..781e564 100644
--- a/cman/daemon/commands.c
+++ b/cman/daemon/commands.c
@@ -1482,6 +1482,11 @@ void unbind_con(struct connection *con)
check_shutdown_status();
}
+
+ /* If the controlling shutdown process has quit, then cancel the
+ shutdown session */
+ if (con == shutdown_con)
+ shutdown_con = NULL;
}
/* Post a PORT OPEN/CLOSE event to anyone listening on this end */
12 years, 1 month
dlm: master - dlm_controld: fix stonith calls
by David Teigland
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: eeb80cd73160856a16f73fc45d1ab48904aa55f0
Parent: 2ad89c869cbd6338361ccf25f7c84c719d88e5a9
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Wed Feb 22 14:37:52 2012 -0600
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Wed Feb 22 14:38:56 2012 -0600
dlm_controld: fix stonith calls
and handle errors
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
dlm_controld/cpg.c | 21 +++++++++++++++------
dlm_controld/dlm_daemon.h | 2 +-
dlm_controld/fence.c | 20 +++++++++-----------
3 files changed, 25 insertions(+), 18 deletions(-)
diff --git a/dlm_controld/cpg.c b/dlm_controld/cpg.c
index ced912f..1360493 100644
--- a/dlm_controld/cpg.c
+++ b/dlm_controld/cpg.c
@@ -618,8 +618,10 @@ static int check_fencing_done(struct lockspace *ls)
fenced since node->start_time */
rv = fence_node_time(node->nodeid, &last_fenced_time);
- if (rv < 0)
- log_error("fenced_node_info error %d", rv);
+ if (rv < 0) {
+ log_error("fenced_node_time error %d", rv);
+ continue;
+ }
/* fenced gives us real time */
@@ -653,8 +655,10 @@ static int check_fencing_done(struct lockspace *ls)
}
}
- if (wait_count)
+ if (wait_count) {
+ log_group(ls, "check_fencing wait_count %d", wait_count);
return 0;
+ }
/* now check if there are any outstanding fencing ops (for nodes
we may not have seen in any lockspace), and return 0 if there
@@ -666,8 +670,10 @@ static int check_fencing_done(struct lockspace *ls)
return 0;
}
- if (in_progress)
+ if (in_progress) {
+ log_group(ls, "check_fencing in progress %d", in_progress);
return 0;
+ }
log_group(ls, "check_fencing done");
return 1;
@@ -694,12 +700,15 @@ static int need_fencing(struct lockspace *ls)
static void request_fencing(struct lockspace *ls)
{
struct node *node;
+ int rv;
list_for_each_entry(node, &ls->node_history, list) {
if (!node->request_fencing)
continue;
- fence_request(node->nodeid);
- node->request_fencing = 0;
+ log_group(ls, "fence_request %d", node->nodeid);
+ rv = fence_request(node->nodeid);
+ if (!rv)
+ node->request_fencing = 0;
}
}
diff --git a/dlm_controld/dlm_daemon.h b/dlm_controld/dlm_daemon.h
index 3899550..5fca041 100644
--- a/dlm_controld/dlm_daemon.h
+++ b/dlm_controld/dlm_daemon.h
@@ -348,7 +348,7 @@ void process_cluster_cfg(int ci);
void kick_node_from_cluster(int nodeid);
/* fence.c */
-void fence_request(int nodeid);
+int fence_request(int nodeid);
int fence_node_time(int nodeid, uint64_t *last_fenced_time);
int fence_in_progress(int *count);
diff --git a/dlm_controld/fence.c b/dlm_controld/fence.c
index f91d9de..50b7d4d 100644
--- a/dlm_controld/fence.c
+++ b/dlm_controld/fence.c
@@ -7,30 +7,28 @@
*/
#include "dlm_daemon.h"
-#ifdef STONITH
#include <pacemaker/crm/stonith-ng.h>
-#endif
-void fence_request(int nodeid)
+int fence_request(int nodeid)
{
-#ifdef STONITH
int rv;
- rv = stonith_api_kick_cs_helper(nodeid, 300, 1);
- if (rv)
- log_error("stonith_api_kick_cs_helper %d error %d", nodeid, rv);
-#endif
+ rv = stonith_api_kick_helper(nodeid, 300, 1);
+ if (rv) {
+ log_error("stonith_api_kick_helper %d error %d", nodeid, rv);
+ return rv;
+ }
+ return 0;
}
int fence_node_time(int nodeid, uint64_t *last_fenced_time)
{
-#ifdef STONITH
- *last_fenced_time = stonith_api_time_cs_helper(nodeid, 0);
-#endif
+ *last_fenced_time = stonith_api_time_helper(nodeid, 0);
return 0;
}
int fence_in_progress(int *count)
{
+ *count = 0;
return 0;
}
12 years, 1 month
cluster: STABLE32 - rgmanager: Retry when config is out of sync
by Lon Hohberger
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: b2012d8fe8b6a30f16091a8c96b5665e34892160
Parent: b4ca5bbb924ef9ef8e574a171e178f9440a28932
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Thu Aug 5 16:53:22 2010 -0400
Committer: Lon Hohberger <lhh(a)redhat.com>
CommitterDate: Wed Feb 22 10:31:44 2012 -0500
rgmanager: Retry when config is out of sync
If you add a service to rgmanager v1 or v2 and that
service fails to start on the first node but succeeds
in its initial stop operation, there is a chance that
the remote instance of rgmanager has not yet reread
the configuration, causing the service to be placed
into the 'recovering' state without further action.
This patch causes the originator of the request to
retry the operation.
Later versions of rgmanager (ex STABLE3 branch and
derivatives) are unlikely to have this problem since
configuration updates are not polled, but rather
delivered to clients.
Update 22-Feb-2012: The above is incorrect, this was
reproduced a rgmanager v3 installation.
Resolves: rhbz#568126
Resolves: rhbz#796272
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
---
rgmanager/src/daemons/rg_state.c | 19 +++++++++++++++++++
1 files changed, 19 insertions(+), 0 deletions(-)
diff --git a/rgmanager/src/daemons/rg_state.c b/rgmanager/src/daemons/rg_state.c
index 8b5dcaa..80b992a 100644
--- a/rgmanager/src/daemons/rg_state.c
+++ b/rgmanager/src/daemons/rg_state.c
@@ -1809,6 +1809,7 @@ handle_relocate_req(char *svcName, int orig_request, int preferred_target,
rg_state_t svcStatus;
int target = preferred_target, me = my_id();
int ret, x, request = orig_request;
+ int retries;
get_rg_state_local(svcName, &svcStatus);
if (svcStatus.rs_state == RG_STATE_DISABLED ||
@@ -1941,6 +1942,8 @@ handle_relocate_req(char *svcName, int orig_request, int preferred_target,
if (target == me)
goto exhausted;
+ retries = 0;
+retry:
ret = svc_start_remote(svcName, request, target);
switch (ret) {
case RG_ERUN:
@@ -1950,6 +1953,22 @@ handle_relocate_req(char *svcName, int orig_request, int preferred_target,
*new_owner = svcStatus.rs_owner;
free_member_list(allowed_nodes);
return 0;
+ case RG_ENOSERVICE:
+ /*
+ * Configuration update pending on remote node? Give it
+ * a few seconds to sync up. rhbz#568126
+ *
+ * Configuration updates are synchronized in later releases
+ * of rgmanager; this should not be needed.
+ */
+ if (retries++ < 4) {
+ sleep(3);
+ goto retry;
+ }
+ logt_print(LOG_WARNING, "Member #%d has a different "
+ "configuration than I do; trying next "
+ "member.", target);
+ /* Deliberate */
case RG_EDEPEND:
case RG_EFAIL:
/* Uh oh - we failed to relocate to this node.
12 years, 1 month
cluster: RHEL6 - rgmanager: Fix dependency restart bug in CP mode
by Lon Hohberger
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 4fbd15844e11e3677e865779e57b5943ca6f3c99
Parent: 4758c35d2a9862bd7a8ac79d5c2239b6a79ca0bf
Author: Adam Drew <adrew(a)redhat.com>
AuthorDate: Fri Oct 7 12:31:16 2011 -0400
Committer: Lon Hohberger <lhh(a)redhat.com>
CommitterDate: Tue Feb 21 13:37:11 2012 -0500
rgmanager: Fix dependency restart bug in CP mode
If we:
- had central_processing mode enabled,
- had a service with 'depend' set on another
service,
- we attempted to restart the service with the
dependency, and
- the dependency was missing
rgmanager would leave the state in 'recovering'
This is the corrected patch from Adam.
Resolves: rhbz#744824
Signed-off-by: Adam Drew <adrew(a)redhat.com>
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
---
rgmanager/src/resources/default_event_script.sl | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/rgmanager/src/resources/default_event_script.sl b/rgmanager/src/resources/default_event_script.sl
index b14f466..c54c068 100644
--- a/rgmanager/src/resources/default_event_script.sl
+++ b/rgmanager/src/resources/default_event_script.sl
@@ -180,6 +180,7 @@ define move_or_start(service, node_list)
((event_type == EVENT_NODE) and (owner == node_id) and
(node_state == 0))) {
debug(service, " is not runnable; dependency not met");
+ ()=service_stop(service);
return ERR_DEPEND;
}
}
12 years, 1 month
cluster: RHEL6 - rgmanager: Fix dependency issue related to ordering
by Lon Hohberger
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 4758c35d2a9862bd7a8ac79d5c2239b6a79ca0bf
Parent: 2543119ffd1ee25ff8e6f205f696f1f53a5292cf
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Thu Jan 5 10:43:12 2012 -0500
Committer: Lon Hohberger <lhh(a)redhat.com>
CommitterDate: Tue Feb 21 13:34:21 2012 -0500
rgmanager: Fix dependency issue related to ordering
If you ordered two services the opposite way in cluster.conf
('b' first, which depends on 'a'), then rgmanager would fail
over 'b' despite the fact that 'a' was not running yet - this
is because the state of 'a' was not set to 'stopped' - so we
need to check for a node-death event in move_or_start().
Resolves: rhbz#743218
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
---
rgmanager/src/resources/default_event_script.sl | 4 +++-
1 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/rgmanager/src/resources/default_event_script.sl b/rgmanager/src/resources/default_event_script.sl
index f3b35cd..b14f466 100644
--- a/rgmanager/src/resources/default_event_script.sl
+++ b/rgmanager/src/resources/default_event_script.sl
@@ -176,7 +176,9 @@ define move_or_start(service, node_list)
depends = service_property(service, "depend");
if (depends != "") {
(,,, owner, state) = service_status(depends);
- if (owner < 0) {
+ if ((owner < 0) or
+ ((event_type == EVENT_NODE) and (owner == node_id) and
+ (node_state == 0))) {
debug(service, " is not runnable; dependency not met");
return ERR_DEPEND;
}
12 years, 1 month
cluster: RHEL6 - rgmanager: Add -F to clusvcadm man page
by Lon Hohberger
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: fdcec853307c5d9ce0517cb0088de2e970f76ae7
Parent: 4fbd15844e11e3677e865779e57b5943ca6f3c99
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Fri Jul 29 13:30:38 2011 -0400
Committer: Lon Hohberger <lhh(a)redhat.com>
CommitterDate: Tue Feb 21 13:38:13 2012 -0500
rgmanager: Add -F to clusvcadm man page
Resolves: rhbz#745226
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
---
rgmanager/man/clusvcadm.8 | 9 +++++++--
1 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/rgmanager/man/clusvcadm.8 b/rgmanager/man/clusvcadm.8
index d5e09eb..055a0bf 100644
--- a/rgmanager/man/clusvcadm.8
+++ b/rgmanager/man/clusvcadm.8
@@ -8,7 +8,7 @@ clusvcadm \- Cluster User Service Administration Utility
.B ]
.B [\-e
.I <service>
-.B ]
+.B [\-F] ]
.B [\-l]
.B [\-u]
.B [\-S]
@@ -52,10 +52,15 @@ on which the command is invoked.
Stops and disables the user service named
.I
service
-.IP "\-e <service>"
+.IP "\-e <service> [\-F]"
Enables and starts the user service named
.I
service
+
+Normally, rgmanager starts the service locally (i.e. on
+the host where clusvcadm was run). However, if the \fB-F\fP
+option is specified, rgmanager will use the assigned failover
+domain rules as hints on where to start the service.
.IP \-l
Lock services in preparation for cluster shutdown. This should only
be used if the administrator intends to perform a global, cluster
12 years, 1 month
cluster: RHEL6 - rgmanager: Fix call to service_status()
by Lon Hohberger
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 2353fc5a8b4f590d97e10e0c6cd2c91506eb853a
Parent: 6563d1529e4929ad4094326da06ba1eb6b73c7c3
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Thu Jan 5 09:03:28 2012 -0500
Committer: Lon Hohberger <lhh(a)redhat.com>
CommitterDate: Tue Feb 21 13:33:03 2012 -0500
rgmanager: Fix call to service_status()
This patch simply fixes the calls to service_status(), the
s_trans variable was being incorrectly set.
Resolves: rhbz#743218
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
---
rgmanager/src/resources/default_event_script.sl | 7 +++++--
1 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/rgmanager/src/resources/default_event_script.sl b/rgmanager/src/resources/default_event_script.sl
index 763201d..f3b35cd 100644
--- a/rgmanager/src/resources/default_event_script.sl
+++ b/rgmanager/src/resources/default_event_script.sl
@@ -519,7 +519,7 @@ define default_service_event_handler()
continue;
}
- (d_trans,,,, owner, state) = service_status(services[x], 1);
+ (d_trans,,,,, owner, state) = service_status(services[x], 1);
if ((service_state == "started") and (owner < 0) and
(state == "stopped")) {
info("Dependency met; starting ", services[x]);
@@ -537,9 +537,12 @@ define default_service_event_handler()
% as above is running and the dependent service was
% started at or after the service, then stopping it
% will result in unwanted service outage.
- (s_trans,,,, s_state) = service_status(service_name);
+ (s_trans,,,,,, s_state) = service_status(service_name, 1);
if ((s_state == "started") and (state == "started") and
(d_trans >= s_trans)) {
+ %debug("S:", service_name, " trans ", s_trans);
+ %debug("D:", services[x], " trans ", d_trans);
+
debug("Skipping ", services[x],
"; restart not needed");
continue;
12 years, 1 month
cluster: RHEL6 - rgmanager: Fix tiny memory leak in sl_service_status
by Lon Hohberger
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 2543119ffd1ee25ff8e6f205f696f1f53a5292cf
Parent: 2353fc5a8b4f590d97e10e0c6cd2c91506eb853a
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Thu Jan 5 10:12:05 2012 -0500
Committer: Lon Hohberger <lhh(a)redhat.com>
CommitterDate: Tue Feb 21 13:33:45 2012 -0500
rgmanager: Fix tiny memory leak in sl_service_status
Resolves: rhbz#743218
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
---
rgmanager/src/daemons/slang_event.c | 23 ++++++++++++++---------
1 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/rgmanager/src/daemons/slang_event.c b/rgmanager/src/daemons/slang_event.c
index c302de5..45110a3 100644
--- a/rgmanager/src/daemons/slang_event.c
+++ b/rgmanager/src/daemons/slang_event.c
@@ -209,7 +209,8 @@ get_service_state_internal(const char *svcName, rg_state_t *svcStatus)
service_status(servicename)
For extra information (flags, transition time)
- (transition_time, flags, rte, restarts, owner, state) =
+ (transition_time, flags, rte, restarts, last_owner,
+ owner, state) =
service_status(servicename, 1);
*/
static void
@@ -273,7 +274,7 @@ sl_service_status(void)
(char *)"%s: Failed to get status for %s",
__FUNCTION__,
svcName);
- return;
+ goto out_free;
}
if (extra) {
@@ -285,7 +286,7 @@ sl_service_status(void)
(char *)"%s: Failed to push mtime %s",
__FUNCTION__,
svcName);
- return;
+ goto out_free;
}
flags = (int)svcStatus.rs_flags;
@@ -294,7 +295,7 @@ sl_service_status(void)
(char *)"%s: Failed to push flags %s",
__FUNCTION__,
svcName);
- return;
+ goto out_free;
}
}
@@ -304,7 +305,7 @@ sl_service_status(void)
(char *)"%s: Failed to push restarts_exceeded %s",
__FUNCTION__,
svcName);
- return;
+ goto out_free;
}
if (SLang_push_integer(svcStatus.rs_restarts) < 0) {
@@ -312,7 +313,7 @@ sl_service_status(void)
(char *)"%s: Failed to push restarts for %s",
__FUNCTION__,
svcName);
- return;
+ goto out_free;
}
if (SLang_push_integer(svcStatus.rs_last_owner) < 0) {
@@ -320,7 +321,7 @@ sl_service_status(void)
(char *)"%s: Failed to push last owner of %s",
__FUNCTION__,
svcName);
- return;
+ goto out_free;
}
switch(svcStatus.rs_state) {
@@ -338,7 +339,7 @@ sl_service_status(void)
(char *)"%s: Failed to push owner of %s",
__FUNCTION__,
svcName);
- return;
+ goto out_free;
}
if (svcStatus.rs_flags & RG_FLAG_FROZEN) {
@@ -354,7 +355,7 @@ sl_service_status(void)
(char *)"%s: Failed to duplicate state of %s",
__FUNCTION__,
svcName);
- return;
+ goto out_free;
}
if (SLang_push_malloced_string(state_str) < 0) {
@@ -364,6 +365,10 @@ sl_service_status(void)
svcName);
free(state_str);
}
+
+out_free:
+ if (svcName)
+ free(svcName);
}
12 years, 1 month
cluster: RHEL6 - rgmanager: Fix clusvcadm message when run with -F
by Lon Hohberger
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 6563d1529e4929ad4094326da06ba1eb6b73c7c3
Parent: 8168796b4f1d4e7435b2212a18d4cffbd86e1fa4
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Mon Aug 1 16:31:57 2011 -0400
Committer: Lon Hohberger <lhh(a)redhat.com>
CommitterDate: Tue Feb 21 13:27:22 2012 -0500
rgmanager: Fix clusvcadm message when run with -F
The new_owner was not being correctly set when enabling a service with
-F when run without central processing enabled.
Resolves: rhbz#727326
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
---
rgmanager/src/daemons/rg_state.c | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/rgmanager/src/daemons/rg_state.c b/rgmanager/src/daemons/rg_state.c
index 5501b3f..23a4bec 100644
--- a/rgmanager/src/daemons/rg_state.c
+++ b/rgmanager/src/daemons/rg_state.c
@@ -2293,6 +2293,7 @@ handle_fd_start_req(char *svcName, int request, int *new_owner)
switch(ret) {
case RG_ESUCCESS:
+ *new_owner = target;
ret = RG_ESUCCESS;
goto out;
case RG_ERUN:
12 years, 1 month
cluster: RHEL6 - rgmanager: Resolve rare deadlock
by Lon Hohberger
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 8168796b4f1d4e7435b2212a18d4cffbd86e1fa4
Parent: 3624e3e0deed9eb3e6b4325a9ac3249dcb0a110a
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Tue Sep 6 15:42:41 2011 -0400
Committer: Lon Hohberger <lhh(a)redhat.com>
CommitterDate: Tue Feb 21 13:26:33 2012 -0500
rgmanager: Resolve rare deadlock
In very rare cases, it was possible to fill up a pipe used for
IPC between threads during reconfiguration, causing a deadlock.
This patch resolves the issue.
Resolves: rhbz#635152
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
---
rgmanager/src/daemons/groups.c | 2 --
1 files changed, 0 insertions(+), 2 deletions(-)
diff --git a/rgmanager/src/daemons/groups.c b/rgmanager/src/daemons/groups.c
index e9413cd..ee0fc81 100644
--- a/rgmanager/src/daemons/groups.c
+++ b/rgmanager/src/daemons/groups.c
@@ -1683,8 +1683,6 @@ init_resource_groups(int reconfigure, int do_init)
free(val);
}
- /* Wait for any pending requests */
- rg_wait_threads();
/* Block operations that would break during configuration
changes */
rg_clear_initialized(FL_CONFIG);
12 years, 1 month