cluster: RHEL55 - fencing: New option --retry-on <N>
by Marek Grác
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: d51f90a0d58780c18671708c7c18483b1ef25b9c
Parent: 670629a1d944a4084258bca807d0152c246d2d4e
Author: Marek 'marx' Grac <mgrac(a)redhat.com>
AuthorDate: Fri Nov 6 14:13:02 2009 +0100
Committer: Marek 'marx' Grac <mgrac(a)redhat.com>
CommitterDate: Fri Nov 6 14:41:49 2009 +0100
fencing: New option --retry-on <N>
New firmware (v1.70+) for ilo2 is much slower then before. Option --retry-on=X (on stdin retry_on=X)
is introduced and it attempts to send power on command to machine and wait for results X times.
Default for fence agents is 1 so they won't be affected; fence_ilo default value is 3.
Resolves: #bz507514 (timeout options are needed before applying this patch separately)
---
fence/agents/ilo/fence_ilo.py | 4 +++-
fence/agents/lib/fencing.py.py | 35 ++++++++++++++++++++++++++++-------
2 files changed, 31 insertions(+), 8 deletions(-)
diff --git a/fence/agents/ilo/fence_ilo.py b/fence/agents/ilo/fence_ilo.py
index ae94017..3e45c84 100755
--- a/fence/agents/ilo/fence_ilo.py
+++ b/fence/agents/ilo/fence_ilo.py
@@ -58,11 +58,13 @@ def main():
device_opt = [ "help", "version", "agent", "quiet", "verbose", "debug",
"action", "ipaddr", "login", "passwd", "passwd_script",
"ssl", "ribcl", "inet4_only", "inet6_only", "ipport",
- "power_timeout", "shell_timeout", "login_timeout", "power_wait" ]
+ "power_timeout", "shell_timeout", "login_timeout", "power_wait",
+ "retry_on" ]
atexit.register(atexit_handler)
all_opt["login_timeout"]["default"] = "10"
+ all_opt["retry_on"]["default"] = "3"
pinput = process_input(device_opt)
pinput["-z"] = 1
diff --git a/fence/agents/lib/fencing.py.py b/fence/agents/lib/fencing.py.py
index cb9bfde..3dd9763 100644
--- a/fence/agents/lib/fencing.py.py
+++ b/fence/agents/lib/fencing.py.py
@@ -346,7 +346,13 @@ all_opt = {
"getopt" : "M",
"longopt" : "missing-as-off",
"help" : "--missing-as-off Missing port returns OFF instead of failure",
- "order" : 200}
+ "order" : 200 },
+ "retry_on" : {
+ "getopt" : "r:",
+ "longopt" : "retry-on",
+ "help" : "--retry-on <attempts> Count of attempts to retry power on",
+ "default" : "1",
+ "order" : 200 }
}
class fspawn(pexpect.spawn):
@@ -725,9 +731,15 @@ def fence_action(tn, options, set_power_fn, get_power_fn, get_outlet_list = None
if status == "on":
print "Success: Already ON"
else:
- set_power_fn(tn, options)
- time.sleep(int(options["-G"]))
- if wait_power_status(tn, options, get_power_fn):
+ power_on = False
+ for i in range(1,int(options["-r"])):
+ set_power_fn(tn, options)
+ time.sleep(int(options["-G"]))
+ if wait_power_status(tn, options, get_power_fn):
+ power_on = True
+ break
+
+ if power_on:
print "Success: Powered ON"
else:
fail(EC_WAITING_ON)
@@ -749,10 +761,19 @@ def fence_action(tn, options, set_power_fn, get_power_fn, get_outlet_list = None
if wait_power_status(tn, options, get_power_fn) == 0:
fail(EC_WAITING_OFF)
options["-o"] = "on"
- set_power_fn(tn, options)
- time.sleep(int(options["-G"]))
- if wait_power_status(tn, options, get_power_fn) == 0:
+
+ power_on = False
+ for i in range(1,int(options["-r"])):
+ set_power_fn(tn, options)
+ time.sleep(int(options["-G"]))
+ if wait_power_status(tn, options, get_power_fn) == 1:
+ power_on = True
+ break
+
+ if power_on == False:
+ # this should not fail as not was fenced succesfully
sys.stderr.write('Timed out waiting to power ON\n')
+
print "Success: Rebooted"
elif options["-o"] == "status":
print "Status: " + status.upper()
14 years, 6 months
cluster: RHEL55 - fencing: New option '--missing-as-off' to return OFF is machine is missing
by Marek Grác
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 670629a1d944a4084258bca807d0152c246d2d4e
Parent: 2d6c88823e2f2e663d4499769152cc0d21644d34
Author: Marek 'marx' Grac <mgrac(a)redhat.com>
AuthorDate: Mon Oct 19 15:28:23 2009 +0200
Committer: Marek 'marx' Grac <mgrac(a)redhat.com>
CommitterDate: Fri Nov 6 14:41:23 2009 +0100
fencing: New option '--missing-as-off' to return OFF is machine is missing
If a blade is not present (i.e. removed for maintenance), the fence_bladecenter
cannot check the state as it is reported empty.
Resolves: bz#248006
---
fence/agents/bladecenter/fence_bladecenter.py | 7 +++++--
fence/agents/lib/fencing.py.py | 7 ++++++-
2 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/fence/agents/bladecenter/fence_bladecenter.py b/fence/agents/bladecenter/fence_bladecenter.py
index ef7ed53..ccb7436 100755
--- a/fence/agents/bladecenter/fence_bladecenter.py
+++ b/fence/agents/bladecenter/fence_bladecenter.py
@@ -31,7 +31,10 @@ def get_power_status(conn, options):
i = conn.log_expect(options, [ node_cmd, "system>" ] , int(options["-Y"]))
if i == 1:
## Given blade number does not exist
- fail(EC_STATUS)
+ if options.has_key("-M"):
+ return "off"
+ else:
+ fail(EC_STATUS)
conn.send("power -state\r\n")
conn.log_expect(options, node_cmd, int(options["-Y"]))
status = conn.before.splitlines()[-1]
@@ -93,7 +96,7 @@ def main():
"action", "ipaddr", "login", "passwd", "passwd_script",
"cmd_prompt", "secure", "port", "identity_file", "separator",
"inet4_only", "inet6_only", "ipport",
- "power_timeout", "shell_timeout", "login_timeout", "power_wait" ]
+ "power_timeout", "shell_timeout", "login_timeout", "power_wait", "missing_as_off" ]
atexit.register(atexit_handler)
diff --git a/fence/agents/lib/fencing.py.py b/fence/agents/lib/fencing.py.py
index 19644c2..cb9bfde 100644
--- a/fence/agents/lib/fencing.py.py
+++ b/fence/agents/lib/fencing.py.py
@@ -341,7 +341,12 @@ all_opt = {
"longopt" : "power-wait",
"help" : "--power-wait <seconds> Wait X seconds after issuing ON/OFF",
"default" : "0",
- "order" : 200 }
+ "order" : 200 },
+ "missing_as_off" : {
+ "getopt" : "M",
+ "longopt" : "missing-as-off",
+ "help" : "--missing-as-off Missing port returns OFF instead of failure",
+ "order" : 200}
}
class fspawn(pexpect.spawn):
14 years, 6 months
cluster: STABLE3 - fencing: fence_bladecenter needs longer timeout
by Marek Grác
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 07ad4fabbc55d7c58e427c2065bdbe8ecc7d80eb
Parent: 45ed9fd03c0718252f2ae418ffa7161198aa4688
Author: Marek 'marx' Grac <mgrac(a)redhat.com>
AuthorDate: Fri Nov 6 14:16:20 2009 +0100
Committer: Marek 'marx' Grac <mgrac(a)redhat.com>
CommitterDate: Fri Nov 6 14:16:20 2009 +0100
fencing: fence_bladecenter needs longer timeout
Fence bladecenter needs longer power_wait 5 -> 10 seconds. Found during
tests for RHEV-H
Resolves: #bz526806
---
fence/agents/bladecenter/fence_bladecenter.py | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/fence/agents/bladecenter/fence_bladecenter.py b/fence/agents/bladecenter/fence_bladecenter.py
index 64b7351..708fc84 100644
--- a/fence/agents/bladecenter/fence_bladecenter.py
+++ b/fence/agents/bladecenter/fence_bladecenter.py
@@ -99,7 +99,7 @@ def main():
atexit.register(atexit_handler)
- all_opt["power_wait"]["default"] = "5"
+ all_opt["power_wait"]["default"] = "10"
all_opt["cmd_prompt"]["default"] = "system>"
options = check_input(device_opt, process_input(device_opt))
14 years, 6 months
cluster: STABLE3 - fencing: New option --retry-on <N>
by Marek Grác
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 45ed9fd03c0718252f2ae418ffa7161198aa4688
Parent: e8c2ab811f02e891d6bc374b0d9aa43408d90456
Author: Marek 'marx' Grac <mgrac(a)redhat.com>
AuthorDate: Fri Nov 6 14:13:02 2009 +0100
Committer: Marek 'marx' Grac <mgrac(a)redhat.com>
CommitterDate: Fri Nov 6 14:13:02 2009 +0100
fencing: New option --retry-on <N>
New firmware (v1.70+) for ilo2 is much slower then before. Option --retry-on=X (on stdin retry_on=X)
is introduced and it attempts to send power on command to machine and wait for results X times.
Default for fence agents is 1 so they won't be affected; fence_ilo default value is 3.
Resolves: #bz507514 (timeout options are needed before applying this patch separately)
---
fence/agents/ilo/fence_ilo.py | 4 +++-
fence/agents/lib/fencing.py.py | 35 ++++++++++++++++++++++++++++-------
2 files changed, 31 insertions(+), 8 deletions(-)
diff --git a/fence/agents/ilo/fence_ilo.py b/fence/agents/ilo/fence_ilo.py
index 9207ec1..f0c7165 100755
--- a/fence/agents/ilo/fence_ilo.py
+++ b/fence/agents/ilo/fence_ilo.py
@@ -56,11 +56,13 @@ def main():
device_opt = [ "help", "version", "agent", "quiet", "verbose", "debug",
"action", "ipaddr", "login", "passwd", "passwd_script",
"ssl", "ribcl", "inet4_only", "inet6_only", "ipport",
- "power_timeout", "shell_timeout", "login_timeout", "power_wait" ]
+ "power_timeout", "shell_timeout", "login_timeout", "power_wait",
+ "retry_on" ]
atexit.register(atexit_handler)
all_opt["login_timeout"]["default"] = "10"
+ all_opt["retry_on"]["default"] = "3"
pinput = process_input(device_opt)
pinput["-z"] = 1
diff --git a/fence/agents/lib/fencing.py.py b/fence/agents/lib/fencing.py.py
index e30a87a..fb87b13 100644
--- a/fence/agents/lib/fencing.py.py
+++ b/fence/agents/lib/fencing.py.py
@@ -347,7 +347,13 @@ all_opt = {
"getopt" : "M",
"longopt" : "missing-as-off",
"help" : "--missing-as-off Missing port returns OFF instead of failure",
- "order" : 200}
+ "order" : 200 },
+ "retry_on" : {
+ "getopt" : "r:",
+ "longopt" : "retry-on",
+ "help" : "--retry-on <attempts> Count of attempts to retry power on",
+ "default" : "1",
+ "order" : 200 }
}
class fspawn(pexpect.spawn):
@@ -715,9 +721,15 @@ def fence_action(tn, options, set_power_fn, get_power_fn, get_outlet_list = None
if status == "on":
print "Success: Already ON"
else:
- set_power_fn(tn, options)
- time.sleep(int(options["-G"]))
- if wait_power_status(tn, options, get_power_fn):
+ power_on = False
+ for i in range(1,int(options["-r"])):
+ set_power_fn(tn, options)
+ time.sleep(int(options["-G"]))
+ if wait_power_status(tn, options, get_power_fn):
+ power_on = True
+ break
+
+ if power_on:
print "Success: Powered ON"
else:
fail(EC_WAITING_ON)
@@ -739,10 +751,19 @@ def fence_action(tn, options, set_power_fn, get_power_fn, get_outlet_list = None
if wait_power_status(tn, options, get_power_fn) == 0:
fail(EC_WAITING_OFF)
options["-o"] = "on"
- set_power_fn(tn, options)
- time.sleep(int(options["-G"]))
- if wait_power_status(tn, options, get_power_fn) == 0:
+
+ power_on = False
+ for i in range(1,int(options["-r"])):
+ set_power_fn(tn, options)
+ time.sleep(int(options["-G"]))
+ if wait_power_status(tn, options, get_power_fn) == 1:
+ power_on = True
+ break
+
+ if power_on == False:
+ # this should not fail as not was fenced succesfully
sys.stderr.write('Timed out waiting to power ON\n')
+
print "Success: Rebooted"
elif options["-o"] == "status":
print "Status: " + status.upper()
14 years, 6 months
fence-agents: master - fencing: fence_bladecenter needs longer timeout
by Marek Grác
Gitweb: http://git.fedorahosted.org/git/fence-agents.git?p=fence-agents.git;a=com...
Commit: 2e1aedd179ba57fcc17f78cd5271a3e850e853f2
Parent: 8acc1a69d6695bc5d3a86f21f60910186053bac1
Author: Marek 'marx' Grac <mgrac(a)redhat.com>
AuthorDate: Fri Nov 6 12:56:06 2009 +0100
Committer: Marek 'marx' Grac <mgrac(a)redhat.com>
CommitterDate: Fri Nov 6 12:56:06 2009 +0100
fencing: fence_bladecenter needs longer timeout
Fence bladecenter needs longer power_wait 5 -> 10 seconds. Found during
tests for RHEV-H
Resolves: #bz526806
---
fence/agents/bladecenter/fence_bladecenter.py | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/fence/agents/bladecenter/fence_bladecenter.py b/fence/agents/bladecenter/fence_bladecenter.py
index 414c260..86346f6 100644
--- a/fence/agents/bladecenter/fence_bladecenter.py
+++ b/fence/agents/bladecenter/fence_bladecenter.py
@@ -99,7 +99,7 @@ def main():
atexit.register(atexit_handler)
- all_opt["power_wait"]["default"] = "5"
+ all_opt["power_wait"]["default"] = "10"
all_opt["cmd_prompt"]["default"] = "system>"
options = check_input(device_opt, process_input(device_opt))
14 years, 6 months
fence-agents: master - fencing: New option --retry-on <N>
by Marek Grác
Gitweb: http://git.fedorahosted.org/git/fence-agents.git?p=fence-agents.git;a=com...
Commit: 8acc1a69d6695bc5d3a86f21f60910186053bac1
Parent: dd4badc487c104422a389e7b5a18921beb5459fa
Author: Marek 'marx' Grac <mgrac(a)redhat.com>
AuthorDate: Fri Nov 6 12:42:52 2009 +0100
Committer: Marek 'marx' Grac <mgrac(a)redhat.com>
CommitterDate: Fri Nov 6 12:42:52 2009 +0100
fencing: New option --retry-on <N>
New firmware (v1.70+) for ilo2 is much slower then before. Option --retry-on=X (on stdin retry_on=X)
is introduced and it attempts to send power on command to machine and wait for results X times. Default for
fence agents is 1 so they won't be affected; fence_ilo default value is 3.
Resolves: #bz507514 (timeout options are needed before applying this patch separately)
---
fence/agents/ilo/fence_ilo.py | 4 +++-
fence/agents/lib/fencing.py.py | 35 ++++++++++++++++++++++++++++-------
2 files changed, 31 insertions(+), 8 deletions(-)
diff --git a/fence/agents/ilo/fence_ilo.py b/fence/agents/ilo/fence_ilo.py
index 0ff40d7..a55c989 100755
--- a/fence/agents/ilo/fence_ilo.py
+++ b/fence/agents/ilo/fence_ilo.py
@@ -57,11 +57,13 @@ def main():
device_opt = [ "help", "version", "agent", "quiet", "verbose", "debug",
"action", "ipaddr", "login", "passwd", "passwd_script",
"ssl", "ribcl", "inet4_only", "inet6_only", "ipport",
- "power_timeout", "shell_timeout", "login_timeout", "power_wait" ]
+ "power_timeout", "shell_timeout", "login_timeout", "power_wait",
+ "retry_on" ]
atexit.register(atexit_handler)
all_opt["login_timeout"]["default"] = "10"
+ all_opt["retry_on"]["default"] = "3"
pinput = process_input(device_opt)
pinput["-z"] = 1
diff --git a/fence/agents/lib/fencing.py.py b/fence/agents/lib/fencing.py.py
index 566cebf..26f28f7 100644
--- a/fence/agents/lib/fencing.py.py
+++ b/fence/agents/lib/fencing.py.py
@@ -347,7 +347,13 @@ all_opt = {
"getopt" : "M",
"longopt" : "missing-as-off",
"help" : "--missing-as-off Missing port returns OFF instead of failure",
- "order" : 200}
+ "order" : 200 },
+ "retry_on" : {
+ "getopt" : "r:",
+ "longopt" : "retry-on",
+ "help" : "--retry-on <attempts> Count of attempts to retry power on",
+ "default" : "1",
+ "order" : 200 }
}
class fspawn(pexpect.spawn):
@@ -715,9 +721,15 @@ def fence_action(tn, options, set_power_fn, get_power_fn, get_outlet_list = None
if status == "on":
print "Success: Already ON"
else:
- set_power_fn(tn, options)
- time.sleep(int(options["-G"]))
- if wait_power_status(tn, options, get_power_fn):
+ power_on = False
+ for i in range(1,int(options["-r"])):
+ set_power_fn(tn, options)
+ time.sleep(int(options["-G"]))
+ if wait_power_status(tn, options, get_power_fn):
+ power_on = True
+ break
+
+ if power_on:
print "Success: Powered ON"
else:
fail(EC_WAITING_ON)
@@ -739,10 +751,19 @@ def fence_action(tn, options, set_power_fn, get_power_fn, get_outlet_list = None
if wait_power_status(tn, options, get_power_fn) == 0:
fail(EC_WAITING_OFF)
options["-o"] = "on"
- set_power_fn(tn, options)
- time.sleep(int(options["-G"]))
- if wait_power_status(tn, options, get_power_fn) == 0:
+
+ power_on = False
+ for i in range(1,int(options["-r"])):
+ set_power_fn(tn, options)
+ time.sleep(int(options["-G"]))
+ if wait_power_status(tn, options, get_power_fn) == 1:
+ power_on = True
+ break
+
+ if power_on == False:
+ # this should not fail as not was fenced succesfully
sys.stderr.write('Timed out waiting to power ON\n')
+
print "Success: Rebooted"
elif options["-o"] == "status":
print "Status: " + status.upper()
14 years, 6 months
dlm: master - dlm_controld: detect lowcomms protocol
by David Teigland
Gitweb: http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=7ce1ce66...
Commit: 7ce1ce66232e3c15fcce95fea72b3490c4a7a4cd
Parent: 3fe2f1f087d64c52a4bdf7b86cc68699a2ac2a10
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Thu Nov 5 12:24:23 2009 -0600
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Thu Nov 5 12:24:23 2009 -0600
dlm_controld: detect lowcomms protocol
based on value of totem/rrp_mode in confdb.
Also allow protocol to be set on command line.
Based on initial patch from Jiaju Zhang <jjzhang.linux(a)gmail.com>
Signed-off-by: Jiaju Zhang <jjzhang.linux(a)gmail.com>
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
group/dlm_controld/action.c | 64 ++++++++++++++++++++++++++++++++++++++-
group/dlm_controld/config.c | 7 ++--
group/dlm_controld/dlm_daemon.h | 6 ++++
group/dlm_controld/main.c | 16 ++++++----
4 files changed, 82 insertions(+), 11 deletions(-)
diff --git a/group/dlm_controld/action.c b/group/dlm_controld/action.c
index 229823b..1069418 100644
--- a/group/dlm_controld/action.c
+++ b/group/dlm_controld/action.c
@@ -1,6 +1,9 @@
#include "dlm_daemon.h"
#include "config.h"
+#include <corosync/corotypes.h>
+#include <corosync/confdb.h>
+
static int dir_members[MAX_NODES];
static int dir_members_count;
static int comms_nodes[MAX_NODES];
@@ -12,6 +15,58 @@ static char mg_name[DLM_LOCKSPACE_LEN+1];
#define SPACES_DIR "/sys/kernel/config/dlm/cluster/spaces"
#define COMMS_DIR "/sys/kernel/config/dlm/cluster/comms"
+static int detect_protocol(void)
+{
+ confdb_handle_t handle;
+ hdb_handle_t totem_handle;
+ char key_value[256];
+ size_t value_len;
+ int rv, proto = -1;
+ confdb_callbacks_t callbacks = {
+ .confdb_key_change_notify_fn = NULL,
+ .confdb_object_create_change_notify_fn = NULL,
+ .confdb_object_delete_change_notify_fn = NULL
+ };
+
+ rv = confdb_initialize(&handle, &callbacks);
+ if (rv != CS_OK) {
+ log_error("confdb_initialize error %d", rv);
+ return -1;
+ }
+
+ rv = confdb_object_find_start(handle, OBJECT_PARENT_HANDLE);
+ if (rv != CS_OK) {
+ log_error("confdb_object_find_start error %d", rv);
+ goto out;
+ }
+
+ rv = confdb_object_find(handle, OBJECT_PARENT_HANDLE,
+ "totem", strlen("totem"), &totem_handle);
+ if (rv != CS_OK) {
+ log_error("confdb_object_find error %d", rv);
+ goto out;
+ }
+
+ rv = confdb_key_get(handle, totem_handle,
+ "rrp_mode", strlen("rrp_mode"),
+ key_value, &value_len);
+ if (rv != CS_OK) {
+ log_error("confdb_key_get error %d", rv);
+ goto out;
+ }
+
+ key_value[value_len] = '\0';
+ log_debug("totem/rrp_mode = '%s'", key_value);
+
+ if (!strcmp(key_value, "none"))
+ proto = PROTO_TCP;
+ else
+ proto = PROTO_SCTP;
+ out:
+ confdb_finalize(handle);
+ return proto;
+}
+
/* look for an id that matches in e.g. /sys/fs/gfs/bull\:x/lock_module/id
and then extract the "x" as the name */
@@ -824,7 +879,14 @@ int setup_configfs(void)
set_configfs_debug(cfgk_debug);
if (cfgk_timewarn != -1)
set_configfs_timewarn(cfgk_timewarn);
- if (cfgk_protocol != -1)
+
+ if (cfgk_protocol == PROTO_DETECT) {
+ rv = detect_protocol();
+ if (rv == PROTO_TCP || rv == PROTO_SCTP)
+ cfgk_protocol = rv;
+ }
+
+ if (cfgk_protocol == PROTO_TCP || cfgk_protocol == PROTO_SCTP)
set_configfs_protocol(cfgk_protocol);
return 0;
diff --git a/group/dlm_controld/config.c b/group/dlm_controld/config.c
index 04f1987..5633538 100644
--- a/group/dlm_controld/config.c
+++ b/group/dlm_controld/config.c
@@ -26,9 +26,6 @@
#include "config.h"
#include "ccs.h"
-#define PROTO_TCP 0
-#define PROTO_SCTP 1
-
int ccs_handle;
/* when not set in cluster.conf, a node's default weight is 1 */
@@ -242,6 +239,8 @@ static void read_ccs_protocol(const char *path, int *config_val)
val = PROTO_TCP;
else if (!strncasecmp(str, "sctp", 4))
val = PROTO_SCTP;
+ else if (!strncasecmp(str, "detect", 6))
+ val = PROTO_DETECT;
else {
log_error("ignore invalid value %s for %s", str, path);
return;
@@ -277,6 +276,7 @@ int setup_ccs(void)
{
int cd, rv;
+ /* skip things that cannot be changed while running */
if (ccs_handle)
goto update;
@@ -310,7 +310,6 @@ int setup_ccs(void)
read_ccs_int(GFS_PLOCK_OWNERSHIP_PATH, &cfgd_plock_ownership);
}
-
/* The following can be changed while running */
update:
if (!optd_plock_debug) {
diff --git a/group/dlm_controld/dlm_daemon.h b/group/dlm_controld/dlm_daemon.h
index acd1c52..84a3d0e 100644
--- a/group/dlm_controld/dlm_daemon.h
+++ b/group/dlm_controld/dlm_daemon.h
@@ -63,6 +63,12 @@
#define MAXLINE 256
+/* cfgk_protocol */
+
+#define PROTO_TCP 0
+#define PROTO_SCTP 1
+#define PROTO_DETECT 2
+
extern int daemon_debug_opt;
extern int daemon_quit;
extern int cluster_down;
diff --git a/group/dlm_controld/main.c b/group/dlm_controld/main.c
index f90cd21..12ffd79 100644
--- a/group/dlm_controld/main.c
+++ b/group/dlm_controld/main.c
@@ -1039,6 +1039,9 @@ static void print_usage(void)
printf(" -D Enable debugging to stderr and don't fork\n");
printf(" -L Enable debugging to log file\n");
printf(" -K Enable kernel dlm debugging messages\n");
+ printf(" -r <num> dlm kernel lowcomms protocol, 0 tcp, 1 sctp, 2 detect\n");
+ printf(" 2 selects tcp if corosync rrp_mode is \"none\", otherwise sctp\n");
+ printf(" Default is 2\n");
printf(" -f <num> Enable (1) or disable (0) fencing recovery dependency\n");
printf(" Default is %d\n", DEFAULT_ENABLE_FENCING);
printf(" -q <num> Enable (1) or disable (0) quorum recovery dependency\n");
@@ -1062,17 +1065,13 @@ static void print_usage(void)
printf(" -V Print program version information, then exit\n");
}
-#define OPTION_STRING "LDKf:q:d:p:Pl:o:t:c:a:hV"
+#define OPTION_STRING "LDKf:q:d:p:Pl:o:t:c:a:hVr:"
static void read_arguments(int argc, char **argv)
{
int cont = 1;
int optchar;
- /* we don't allow these to be set on command line, should we? */
- optk_timewarn = 0;
- optk_timewarn = 0;
-
while (cont) {
optchar = getopt(argc, argv, OPTION_STRING);
@@ -1092,6 +1091,11 @@ static void read_arguments(int argc, char **argv)
cfgk_debug = 1;
break;
+ case 'r':
+ optk_protocol = 1;
+ cfgk_protocol = atoi(optarg);
+ break;
+
case 'f':
optd_enable_fencing = 1;
cfgd_enable_fencing = atoi(optarg);
@@ -1295,7 +1299,7 @@ int optd_drop_resources_age;
int cfgk_debug = -1;
int cfgk_timewarn = -1;
-int cfgk_protocol = -1;
+int cfgk_protocol = PROTO_DETECT;
int cfgd_debug_logfile = DEFAULT_DEBUG_LOGFILE;
int cfgd_enable_fencing = DEFAULT_ENABLE_FENCING;
int cfgd_enable_quorum = DEFAULT_ENABLE_QUORUM;
14 years, 6 months
cluster: STABLE3 - dlm_controld: detect lowcomms protocol
by David Teigland
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: e8c2ab811f02e891d6bc374b0d9aa43408d90456
Parent: 5e62d76ae6b653e89036a96247d317b98ef03fc9
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Wed Nov 4 15:20:47 2009 -0600
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Wed Nov 4 15:20:47 2009 -0600
dlm_controld: detect lowcomms protocol
based on value of totem/rrp_mode in confdb.
Also allow protocol to be set on command line.
Based on initial patch from Jiaju Zhang <jjzhang.linux(a)gmail.com>
Signed-off-by: Jiaju Zhang <jjzhang.linux(a)gmail.com>
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
group/dlm_controld/action.c | 64 ++++++++++++++++++++++++++++++++++++++-
group/dlm_controld/config.c | 7 ++--
group/dlm_controld/dlm_daemon.h | 6 ++++
group/dlm_controld/main.c | 16 ++++++----
4 files changed, 82 insertions(+), 11 deletions(-)
diff --git a/group/dlm_controld/action.c b/group/dlm_controld/action.c
index 229823b..1069418 100644
--- a/group/dlm_controld/action.c
+++ b/group/dlm_controld/action.c
@@ -1,6 +1,9 @@
#include "dlm_daemon.h"
#include "config.h"
+#include <corosync/corotypes.h>
+#include <corosync/confdb.h>
+
static int dir_members[MAX_NODES];
static int dir_members_count;
static int comms_nodes[MAX_NODES];
@@ -12,6 +15,58 @@ static char mg_name[DLM_LOCKSPACE_LEN+1];
#define SPACES_DIR "/sys/kernel/config/dlm/cluster/spaces"
#define COMMS_DIR "/sys/kernel/config/dlm/cluster/comms"
+static int detect_protocol(void)
+{
+ confdb_handle_t handle;
+ hdb_handle_t totem_handle;
+ char key_value[256];
+ size_t value_len;
+ int rv, proto = -1;
+ confdb_callbacks_t callbacks = {
+ .confdb_key_change_notify_fn = NULL,
+ .confdb_object_create_change_notify_fn = NULL,
+ .confdb_object_delete_change_notify_fn = NULL
+ };
+
+ rv = confdb_initialize(&handle, &callbacks);
+ if (rv != CS_OK) {
+ log_error("confdb_initialize error %d", rv);
+ return -1;
+ }
+
+ rv = confdb_object_find_start(handle, OBJECT_PARENT_HANDLE);
+ if (rv != CS_OK) {
+ log_error("confdb_object_find_start error %d", rv);
+ goto out;
+ }
+
+ rv = confdb_object_find(handle, OBJECT_PARENT_HANDLE,
+ "totem", strlen("totem"), &totem_handle);
+ if (rv != CS_OK) {
+ log_error("confdb_object_find error %d", rv);
+ goto out;
+ }
+
+ rv = confdb_key_get(handle, totem_handle,
+ "rrp_mode", strlen("rrp_mode"),
+ key_value, &value_len);
+ if (rv != CS_OK) {
+ log_error("confdb_key_get error %d", rv);
+ goto out;
+ }
+
+ key_value[value_len] = '\0';
+ log_debug("totem/rrp_mode = '%s'", key_value);
+
+ if (!strcmp(key_value, "none"))
+ proto = PROTO_TCP;
+ else
+ proto = PROTO_SCTP;
+ out:
+ confdb_finalize(handle);
+ return proto;
+}
+
/* look for an id that matches in e.g. /sys/fs/gfs/bull\:x/lock_module/id
and then extract the "x" as the name */
@@ -824,7 +879,14 @@ int setup_configfs(void)
set_configfs_debug(cfgk_debug);
if (cfgk_timewarn != -1)
set_configfs_timewarn(cfgk_timewarn);
- if (cfgk_protocol != -1)
+
+ if (cfgk_protocol == PROTO_DETECT) {
+ rv = detect_protocol();
+ if (rv == PROTO_TCP || rv == PROTO_SCTP)
+ cfgk_protocol = rv;
+ }
+
+ if (cfgk_protocol == PROTO_TCP || cfgk_protocol == PROTO_SCTP)
set_configfs_protocol(cfgk_protocol);
return 0;
diff --git a/group/dlm_controld/config.c b/group/dlm_controld/config.c
index 16c4efb..1720e7a 100644
--- a/group/dlm_controld/config.c
+++ b/group/dlm_controld/config.c
@@ -25,9 +25,6 @@
#include "config.h"
#include "ccs.h"
-#define PROTO_TCP 0
-#define PROTO_SCTP 1
-
int ccs_handle;
/* when not set in cluster.conf, a node's default weight is 1 */
@@ -199,6 +196,8 @@ static void read_ccs_protocol(const char *path, int *config_val)
val = PROTO_TCP;
else if (!strncasecmp(str, "sctp", 4))
val = PROTO_SCTP;
+ else if (!strncasecmp(str, "detect", 6))
+ val = PROTO_DETECT;
else {
log_error("ignore invalid value %s for %s", str, path);
return;
@@ -235,6 +234,7 @@ int setup_ccs(void)
{
int cd, rv;
+ /* skip things that cannot be changed while running */
if (ccs_handle)
goto update;
@@ -270,7 +270,6 @@ int setup_ccs(void)
read_ccs_int(GFS_PLOCK_OWNERSHIP_PATH, &cfgd_plock_ownership);
}
-
/* The following can be changed while running */
update:
if (!optd_plock_debug) {
diff --git a/group/dlm_controld/dlm_daemon.h b/group/dlm_controld/dlm_daemon.h
index 0ca895a..dd6c7cc 100644
--- a/group/dlm_controld/dlm_daemon.h
+++ b/group/dlm_controld/dlm_daemon.h
@@ -66,6 +66,12 @@
#define GROUP_LIBGROUP 2
#define GROUP_LIBCPG 3
+/* cfgk_protocol */
+
+#define PROTO_TCP 0
+#define PROTO_SCTP 1
+#define PROTO_DETECT 2
+
extern int daemon_debug_opt;
extern int daemon_quit;
extern int cluster_down;
diff --git a/group/dlm_controld/main.c b/group/dlm_controld/main.c
index 712bed6..af96527 100644
--- a/group/dlm_controld/main.c
+++ b/group/dlm_controld/main.c
@@ -1089,6 +1089,9 @@ static void print_usage(void)
printf(" -D Enable debugging to stderr and don't fork\n");
printf(" -L Enable debugging to log file\n");
printf(" -K Enable kernel dlm debugging messages\n");
+ printf(" -r <num> dlm kernel lowcomms protocol, 0 tcp, 1 sctp, 2 detect\n");
+ printf(" 2 selects tcp if corosync rrp_mode is \"none\", otherwise sctp\n");
+ printf(" Default is 2\n");
printf(" -g <num> groupd compatibility mode, 0 off, 1 on, 2 detect\n");
printf(" 0: use libcpg, no backward compat, best performance\n");
printf(" 1: use libgroup for compat with cluster2/rhel5\n");
@@ -1118,17 +1121,13 @@ static void print_usage(void)
printf(" -V Print program version information, then exit\n");
}
-#define OPTION_STRING "LDKg:f:q:d:p:Pl:o:t:c:a:hV"
+#define OPTION_STRING "LDKg:f:q:d:p:Pl:o:t:c:a:hVr:"
static void read_arguments(int argc, char **argv)
{
int cont = 1;
int optchar;
- /* we don't allow these to be set on command line, should we? */
- optk_timewarn = 0;
- optk_timewarn = 0;
-
while (cont) {
optchar = getopt(argc, argv, OPTION_STRING);
@@ -1153,6 +1152,11 @@ static void read_arguments(int argc, char **argv)
cfgk_debug = 1;
break;
+ case 'r':
+ optk_protocol = 1;
+ cfgk_protocol = atoi(optarg);
+ break;
+
case 'f':
optd_enable_fencing = 1;
cfgd_enable_fencing = atoi(optarg);
@@ -1359,7 +1363,7 @@ int optd_drop_resources_age;
int cfgk_debug = -1;
int cfgk_timewarn = -1;
-int cfgk_protocol = -1;
+int cfgk_protocol = PROTO_DETECT;
int cfgd_groupd_compat = DEFAULT_GROUPD_COMPAT;
int cfgd_debug_logfile = DEFAULT_DEBUG_LOGFILE;
int cfgd_enable_fencing = DEFAULT_ENABLE_FENCING;
14 years, 6 months
cluster: RHEL55 - cman: Make master-wins mode work
by Lon Hohberger
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 2d6c88823e2f2e663d4499769152cc0d21644d34
Parent: a52ad31c2140655e327c27caa0fef8d23adb3bef
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Tue Nov 3 15:50:59 2009 -0500
Committer: Lon Hohberger <lhh(a)redhat.com>
CommitterDate: Tue Nov 3 15:50:59 2009 -0500
cman: Make master-wins mode work
Resolves: bz372901
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
---
cman/man/qdisk.5 | 18 ++++++++++++++++++
cman/qdisk/disk.h | 3 ++-
cman/qdisk/main.c | 19 ++++++++++++++++++-
3 files changed, 38 insertions(+), 2 deletions(-)
diff --git a/cman/man/qdisk.5 b/cman/man/qdisk.5
index 7d28f90..0bca22a 100644
--- a/cman/man/qdisk.5
+++ b/cman/man/qdisk.5
@@ -277,6 +277,24 @@ as a result in a change in score (see section 2.2). The default for
this value is 1 (on).
.in 9
+\fImaster_wins\fP\fB="\fP0\fB"\fP
+.in 12
+If set to 1 (on), only the qdiskd master will advertise its votes
+to CMAN. In a network partition, only the qdisk master will provide
+votes to CMAN. Consequently, that node will automatically "win" in
+a fence race.
+
+This option requires careful tuning of the CMAN timeout, the qdiskd
+timeout, and CMAN's quorum_dev_poll value. As a rule of thumb,
+CMAN's quorum_dev_poll value should be equal to Totem's token timeout
+and qdiskd's timeout (interval*tko) should be less than half of
+Totem's token timeout.
+
+This option only takes effect if there are no heuristics
+configured. Usage of this option in configurations with more than
+two cluster nodes is undefined and should not be done.
+
+.in 9
\fIallow_kill\fP\fB="\fP1\fB"\fP
.in 12
If set to 0 (off), qdiskd will *not* instruct to kill nodes it thinks
diff --git a/cman/qdisk/disk.h b/cman/qdisk/disk.h
index 0b652b2..b784220 100644
--- a/cman/qdisk/disk.h
+++ b/cman/qdisk/disk.h
@@ -74,7 +74,8 @@ typedef enum {
RF_ALLOW_KILL = 0x10,
RF_UPTIME = 0x20,
RF_CMAN_LABEL = 0x40,
- RF_IOTIMEOUT = 0x80
+ RF_IOTIMEOUT = 0x80,
+ RF_MASTER_WINS = 0x100
} run_flag_t;
diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c
index cc18a05..81c6545 100644
--- a/cman/qdisk/main.c
+++ b/cman/qdisk/main.c
@@ -1061,7 +1061,8 @@ quorum_loop(qd_ctx *ctx, node_info_t *ni, int max)
"Halting qdisk operations\n");
return -1;
}
- if (!errors)
+ if (!errors &&
+ (!(ctx->qc_flags & RF_MASTER_WINS)))
cman_poll_quorum_device(ctx->qc_ch, 1);
}
}
@@ -1365,6 +1366,15 @@ get_config_data(char *cluster_name, qd_ctx *ctx, struct h_data *h, int maxh,
free(val);
}
+ /* Get master-wins flag for when we transition -> offline */
+ /* default = off, so, 1 to turn on */
+ snprintf(query, sizeof(query), "/cluster/quorumd/@master_wins");
+ if (ccs_get(ccsfd, query, &val) == 0) {
+ if (atoi(val))
+ ctx->qc_flags |= RF_MASTER_WINS;
+ free(val);
+ }
+
/* Get cman_label */
snprintf(query, sizeof(query), "/cluster/quorumd/@cman_label");
if (ccs_get(ccsfd, query, &val) == 0) {
@@ -1460,6 +1470,13 @@ get_config_data(char *cluster_name, qd_ctx *ctx, struct h_data *h, int maxh,
*cfh = configure_heuristics(ccsfd, h, maxh);
+ if (*cfh) {
+ if (ctx->qc_flags & RF_MASTER_WINS) {
+ clulog(LOG_WARNING, "Master-wins mode disabled\n");
+ ctx->qc_flags &= ~RF_MASTER_WINS;
+ }
+ }
+
clulog(LOG_DEBUG,
"Quorum Daemon: %d heuristics, %d interval, %d tko, %d votes\n",
*cfh, ctx->qc_interval, ctx->qc_tko, ctx->qc_votes);
14 years, 6 months
resource-agents: master - resource-agents: Add "path" support to virsh mode
by Lon Hohberger
Gitweb: http://git.fedorahosted.org/git/resource-agents.git?p=resource-agents.git...
Commit: 1e42add4f2ec63d4c796d544118dfcccbd8a042b
Parent: 655c17612390283deed576751f0dfea04603e4c1
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Tue Nov 3 12:46:56 2009 -0500
Committer: Lon Hohberger <lhh(a)redhat.com>
CommitterDate: Tue Nov 3 12:48:05 2009 -0500
resource-agents: Add "path" support to virsh mode
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
---
rgmanager/src/resources/vm.sh | 96 ++++++++++++++++++++++++++++++++---------
1 files changed, 75 insertions(+), 21 deletions(-)
diff --git a/rgmanager/src/resources/vm.sh b/rgmanager/src/resources/vm.sh
index 4f2d0d8..a372d53 100644
--- a/rgmanager/src/resources/vm.sh
+++ b/rgmanager/src/resources/vm.sh
@@ -563,6 +563,64 @@ do_status()
}
+#
+# virsh "path" attribute support
+#
+check_config_file()
+{
+ declare path=$1
+
+ if [ -f "$path/$OCF_RESKEY_name" ]; then
+ echo $path/$OCF_RESKEY_name
+ return 2
+ elif [ -f "$path/$OCF_RESKEY_name.xml" ]; then
+ echo $path/$OCF_RESKEY_name.xml
+ return 2
+ fi
+
+ return 0
+}
+
+
+parse_input()
+{
+ declare delim=$1
+ declare input=$2
+ declare func=$3
+ declare inp
+ declare value
+
+ while [ -n "$input" ]; do
+ value=${input/$delim*/}
+ if [ -n "$value" ]; then
+ eval $func $value
+ if [ $? -eq 2 ]; then
+ return 0
+ fi
+ fi
+ inp=${input/$value$delim/}
+ if [ "$input" = "$inp" ]; then
+ inp=${input/$value/}
+ fi
+ input=$inp
+ done
+}
+
+
+search_config_path()
+{
+ declare config_file=$(parse_input ":" "$OCF_RESKEY_path" check_config_file)
+
+ if [ -n "$config_file" ]; then
+ export OCF_RESKEY_xmlfile=$config_file
+ return 0
+ fi
+
+ return 1
+}
+
+
+
validate_all()
{
if [ "$(id -u)" != "0" ]; then
@@ -592,29 +650,25 @@ validate_all()
ocf_log err "Cannot use $OCF_RESKEY_hypervisor hypervisor without using virsh"
return $OCF_ERR_ARGS
fi
+ echo "Management tool: xm"
else
-
- #
- # If no path is set, use virsh. Otherwise, use xm.
- # xm only works with Xen.
- #
- if [ -z "$OCF_RESKEY_path" ] ||
- [ "$OCF_RESKEY_path" = "/etc/xen" ]; then
- echo "Management tool: virsh"
- export OCF_RESKEY_use_virsh=1
- else
- if [ -n "$OCF_RESKEY_use_virsh" ]; then
- ocf_log warning "Cannot use virsh with 'path' attribute set"
- ocf_log warning "Setting use_virsh to 0."
- fi
-
- if [ "$OCF_RESKEY_hypervisor" != "xen" ]; then
- ocf_log err "Cannot use $OCF_RESKEY_hypervisor hypervisor with 'path' attribute"
- return $OCF_ERR_ARGS
+ export OCF_RESKEY_use_virsh="1"
+ echo "Management tool: virsh"
+
+ if [ -n "$OCF_RESKEY_path" ]; then
+ if [ -n "$OCF_RESKEY_xmlfile" ]; then
+ ocf_log warning "Using $OCF_RESKEY_xmlfile instead of searching $OCF_RESKEY_path"
+ else
+ search_config_path
+ if [ $? -ne 0 ]; then
+ ocf_log warning "Could not find $OCF_RESKEY_name or $OCF_RESKEY_name.xml in search path $OCF_RESKEY_path"
+ unset OCF_RESKEY_xmlfile
+ else
+ ocf_log debug "Using $OCF_RESKEY_xmlfile"
+ # No longer needed :)
+ unset OCF_RESKEY_path
+ fi
fi
-
- echo "Management tool: xm"
- export OCF_RESKEY_use_virsh=0
fi
fi
14 years, 6 months