cluster: STABLE32 - qdiskd: allow master to failover quickly when using master_wins
by Fabio M. Di Nitto
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=e8af462b753...
Commit: e8af462b7531f87b5cb20f7204eeb4b520591da9
Parent: 03e2215bd277fd79b8a6ee70a49de711e0f343ad
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Tue Jul 24 10:27:57 2012 +0200
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Tue Jul 24 10:27:57 2012 +0200
qdiskd: allow master to failover quickly when using master_wins
in case of master_wins and we are shutting down the master qdiskd,
there is a small window in which the other node is not quorate
because qdiskd has not become master yet.
this patch allows the master qdiskd to communicate to the other
nodes that it is going away and gives enough time to elect
a new master before dieing.
the process itself is safe and the worst case scenario the cluster
will behave as-is now (temporary loss of quorum), otherwise a fast
switch will take place.
Resolves: rhbz#814807
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Reviewed-by: Lon Hohberger <lhh(a)redhat.com>
---
cman/qdisk/disk.h | 5 +++-
cman/qdisk/main.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 75 insertions(+), 2 deletions(-)
diff --git a/cman/qdisk/disk.h b/cman/qdisk/disk.h
index fd80fa6..6bed41d 100644
--- a/cman/qdisk/disk.h
+++ b/cman/qdisk/disk.h
@@ -24,9 +24,12 @@ typedef enum {
S_INIT = 0x2, // Initializing. Hold your fire.
/* vvv Fencing will kill a node */
S_RUN = 0x5, // I think I'm running.
- S_MASTER= 0x6 // I know I'm running, and have advertised to
+ S_MASTER= 0x6, // I know I'm running, and have advertised to
// CMAN the availability of the disk vote for my
// partition.
+ S_EXIT = 0x7 // trigger master re-election before exit
+ // status is set only by master in master-win | auto-masterwin
+ // and next status _must_ be S_NONE
} disk_node_state_t;
diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c
index 32677a2..16c26e4 100644
--- a/cman/qdisk/main.c
+++ b/cman/qdisk/main.c
@@ -196,7 +196,8 @@ read_node_blocks(qd_ctx *ctx, node_info_t *ni, int max)
continue;
/* Unchanged timestamp: miss */
- if (sb->ps_timestamp == ni[x].ni_last_seen) {
+ if ((sb->ps_timestamp == ni[x].ni_last_seen) &&
+ (ni[x].ni_state != S_EXIT)) {
/* XXX check for average + allow grace */
ni[x].ni_misses++;
if (ni[x].ni_misses > 1) {
@@ -231,6 +232,22 @@ check_transitions(qd_ctx *ctx, node_info_t *ni, int max, memb_mask_t mask)
for (x = 0; x < max; x++) {
/*
+ Case 0: check if master node is about to leave
+ */
+ if (ni[x].ni_state == S_EXIT) {
+ logt_print(LOG_NOTICE, "Node %d is about to leave\n", ni[x].ni_status.ps_nodeid);
+ ni[x].ni_evil_incarnation = 0;
+ ni[x].ni_incarnation = 0;
+ ni[x].ni_seen = 0;
+ ni[x].ni_misses = 0;
+ ni[x].ni_state = S_NONE;
+ if (mask)
+ clear_bit(mask, (ni[x].ni_status.ps_nodeid-1),
+ sizeof(memb_mask_t));
+ continue;
+ }
+
+ /*
Case 1: check to see if the node is still up
according to our internal state, but has been
evicted by the master or cleanly shut down
@@ -1269,6 +1286,50 @@ quorum_loop(qd_ctx *ctx, node_info_t *ni, int max)
/**
+ Tell the other nodes to elect a new master != me.
+ */
+static int
+quorum_reelect_master(qd_ctx *ctx, node_info_t *ni, int max)
+{
+ if (qd_write_status(ctx, ctx->qc_my_id, S_EXIT,
+ NULL, NULL, NULL) != 0) {
+ logt_print(LOG_WARNING,
+ "Error writing to quorum disk during reelect_master\n");
+ }
+
+ while (1) {
+ int master, x;
+ int found = 0;
+ int low_id, count;
+
+ read_node_blocks(ctx, ni, max);
+
+ for (x = 0; x < max; x++) {
+ if (ni[x].ni_state >= S_RUN) {
+ found = 1;
+ }
+ }
+
+ if (!found) {
+ logt_print(LOG_DEBUG, "No other nodes are active. Exiting\n");
+ break;
+ }
+
+ master = master_exists(ctx, ni, max, &low_id, &count);
+ if (master) {
+ logt_print(LOG_DEBUG, "New master elected: %d\n", master);
+ break;
+ }
+ /*
+ * give time for message to be read
+ */
+ sleep(1);
+ }
+
+ return 0;
+}
+
+/**
Tell the other nodes we're done (safely!).
*/
static int
@@ -2173,6 +2234,15 @@ main(int argc, char **argv)
io_nanny_start(ch_user, ctx.qc_tko * ctx.qc_interval);
if (quorum_loop(&ctx, ni, MAX_NODES_DISK) == 0) {
+ /*
+ * if we are master and we are in master-win mode,
+ * request other qdiskd to elect a new one
+ */
+ if ((ctx.qc_status == S_MASTER) &&
+ ((ctx.qc_flags & RF_MASTER_WINS) ||
+ (ctx.qc_flags & RF_AUTO_MASTER_WINS))) {
+ quorum_reelect_master(&ctx, ni, MAX_NODES_DISK);
+ }
/* Only clean up if we're exiting w/o error) */
logt_print(LOG_NOTICE, "Unregistering quorum device.\n");
cman_unregister_quorum_device(ctx.qc_cman_admin);
11 years, 9 months
cluster: RHEL59 - cman init: allow dlm hash table sizes to be tunable at startup
by Fabio M. Di Nitto
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=332db7754d6...
Commit: 332db7754d6210991c34be056789aa1b33b26a0a
Parent: 77bb92ad9dbad9c8755e45211214ab1e099cdb0e
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Tue Jul 24 08:55:27 2012 +0200
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Tue Jul 24 08:55:27 2012 +0200
cman init: allow dlm hash table sizes to be tunable at startup
Resolves: rhbz#836963
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
---
cman/init.d/cman | 43 ++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 42 insertions(+), 1 deletions(-)
diff --git a/cman/init.d/cman b/cman/init.d/cman
index 4727d22..2101eb7 100755
--- a/cman/init.d/cman
+++ b/cman/init.d/cman
@@ -297,6 +297,37 @@ fence_join_enabled()
fi
}
+tune_dlm_hash_sizes()
+{
+ dlmdir="/sys/kernel/config/dlm/cluster"
+ maxloop=21
+
+ if [ -z "$DLM_LKBTBL_SIZE" ] && \
+ [ -z "$DLM_RSBTBL_SIZE" ] && \
+ [ -z "$DLM_DIRTBL_SIZE" ]; then
+ return 0
+ fi
+ for i in $(seq 1 $maxloop); do
+ if [ -d $dlmdir ]; then
+ break
+ fi
+ sleep 0.5
+ done
+ if [ "$i" = "$maxloop" ]; then
+ return 1
+ fi
+ if [ -n "$DLM_LKBTBL_SIZE" ]; then
+ echo $DLM_LKBTBL_SIZE > $dlmdir/lkbtbl_size
+ fi
+ if [ -n "$DLM_RSBTBL_SIZE" ]; then
+ echo $DLM_RSBTBL_SIZE > $dlmdir/rsbtbl_size
+ fi
+ if [ -n "$DLM_DIRTBL_SIZE" ]; then
+ echo $DLM_DIRTBL_SIZE > $dlmdir/dirtbl_size
+ fi
+ return 0
+}
+
start()
{
echo "Starting cluster: "
@@ -391,7 +422,17 @@ start()
return 1
fi
fi
-
+
+ echo -n " Tuning DLM... "
+ tune_dlm_hash_sizes
+ if [ $? -eq 0 ]
+ then
+ echo "done"
+ else
+ echo "failed"
+ return 1
+ fi
+
return 0
}
11 years, 9 months
cluster: RHEL59 - fenced: fix double free when second method has no devices
by David Teigland
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=77bb92ad9db...
Commit: 77bb92ad9dbad9c8755e45211214ab1e099cdb0e
Parent: 6c9717cbbca38ee94911cbbc481c310f6375d560
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Mon Jul 23 17:01:57 2012 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Mon Jul 23 17:01:57 2012 -0500
fenced: fix double free when second method has no devices
When the second fence method is empty, the already
freed device is freed again, causing a segfault.
bz 809390
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
fence/fenced/agent.c | 4 +++-
1 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/fence/fenced/agent.c b/fence/fenced/agent.c
index 753b915..de4467c 100644
--- a/fence/fenced/agent.c
+++ b/fence/fenced/agent.c
@@ -361,8 +361,10 @@ int dispatch_fence_agent(char *victim, int force)
device = NULL;
}
- if (device)
+ if (device) {
free(device);
+ device = NULL;
+ }
free(method);
if (!error) {
11 years, 9 months
cluster: RHEL59 - fenced: increase MAX_DEVICES to 8
by David Teigland
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=6c9717cbbca...
Commit: 6c9717cbbca38ee94911cbbc481c310f6375d560
Parent: b5253b0c74175ecda233f573fb0afa733b60eda2
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Mon Jul 23 10:23:56 2012 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Mon Jul 23 16:36:44 2012 -0500
fenced: increase MAX_DEVICES to 8
Make this match rhel6.
bz 821857
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
fence/fenced/agent.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/fence/fenced/agent.c b/fence/fenced/agent.c
index e3aab9a..753b915 100644
--- a/fence/fenced/agent.c
+++ b/fence/fenced/agent.c
@@ -27,7 +27,7 @@
#include "ccs.h"
#define MAX_METHODS 8
-#define MAX_DEVICES 4
+#define MAX_DEVICES 8
#define MAX_AGENT_ARGS_LEN 512
#define METHOD_NAME_PATH "/cluster/clusternodes/clusternode[@name=\"%s\"]/fence/method[%d]/@name"
11 years, 9 months
cluster: RHEL59 - fencing: Automatic detection of EOL during login process
by Marek Grác
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=b5253b0c741...
Commit: b5253b0c74175ecda233f573fb0afa733b60eda2
Parent: 7d64a32254aac1c4bbeb7d7d684c606893d9fda1
Author: Marek 'marx' Grac <mgrac(a)redhat.com>
AuthorDate: Mon Jul 23 17:23:39 2012 +0200
Committer: Marek 'marx' Grac <mgrac(a)redhat.com>
CommitterDate: Mon Jul 23 17:23:39 2012 +0200
fencing: Automatic detection of EOL during login process
Resolves: rhbz#810949
---
fence/agents/apc/fence_apc.py | 44 +++++++++++-----------
fence/agents/bladecenter/fence_bladecenter.py | 18 +++++-----
fence/agents/drac/fence_drac5.py | 12 +++---
fence/agents/ilo_mp/fence_ilo_mp.py | 12 +++---
fence/agents/lib/fencing.py.py | 48 +++++++++++++++---------
fence/agents/rsa/fence_rsa.py | 6 ++--
fence/agents/sanbox2/fence_sanbox2.py | 32 ++++++++--------
fence/agents/wti/fence_wti.py | 2 +-
8 files changed, 93 insertions(+), 81 deletions(-)
diff --git a/fence/agents/apc/fence_apc.py b/fence/agents/apc/fence_apc.py
index 0a3ce58..4125d39 100755
--- a/fence/agents/apc/fence_apc.py
+++ b/fence/agents/apc/fence_apc.py
@@ -29,7 +29,7 @@ def get_power_status(conn, options):
exp_result = 0
outlets = {}
try:
- conn.send("1\r\n")
+ conn.send_eol("1")
conn.log_expect(options, options["-c"], int(options["-Y"]))
version = 0
@@ -58,15 +58,15 @@ def get_power_status(conn, options):
if switch == 0:
if version == 2:
if admin == 0:
- conn.send("2\r\n")
+ conn.send_eol("2")
else:
- conn.send("3\r\n")
+ conn.send_eol("3")
else:
- conn.send("2\r\n")
+ conn.send_eol("2")
conn.log_expect(options, options["-c"], int(options["-Y"]))
- conn.send("1\r\n")
+ conn.send_eol("1")
else:
- conn.send(options["-s"]+"\r\n")
+ conn.send_eol(options["-s"])
while True:
exp_result = conn.log_expect(options, [ options["-c"], "Press <ENTER>" ], int(options["-Y"]))
@@ -76,7 +76,7 @@ def get_power_status(conn, options):
res = show_re.search(x)
if (res != None):
outlets[res.group(2)] = (res.group(3), res.group(4))
- conn.send("\r\n")
+ conn.send_eol("")
if exp_result == 0:
break
conn.send(chr(03))
@@ -103,7 +103,7 @@ def set_power_status(conn, options):
}[options["-o"]]
try:
- conn.send("1\r\n")
+ conn.send_eol("1")
conn.log_expect(options, options["-c"], int(options["-Y"]))
version = 0
@@ -138,41 +138,41 @@ def set_power_status(conn, options):
if switch == 0:
if version == 2:
if admin2 == 0:
- conn.send("2\r\n")
+ conn.send_eol("2")
else:
- conn.send("3\r\n")
+ conn.send_eol("3")
else:
- conn.send("2\r\n")
+ conn.send_eol("2")
conn.log_expect(options, options["-c"], int(options["-Y"]))
if (None == re.compile('.*2- Outlet Restriction.*', re.IGNORECASE | re.S).match(conn.before)):
admin3 = 0
else:
admin3 = 1
- conn.send("1\r\n")
+ conn.send_eol("1")
else:
- conn.send(options["-s"] + "\r\n")
+ conn.send_eol(options["-s"])
while 1 == conn.log_expect(options, [ options["-c"], "Press <ENTER>" ], int(options["-Y"])):
- conn.send("\r\n")
- conn.send(options["-n"]+"\r\n")
+ conn.send_eol("")
+ conn.send_eol(options["-n"]+"")
conn.log_expect(options, options["-c"], int(options["-Y"]))
if switch == 0:
if admin2 == 1:
- conn.send("1\r\n")
+ conn.send_eol("1")
conn.log_expect(options, options["-c"], int(options["-Y"]))
if admin3 == 1:
- conn.send("1\r\n")
+ conn.send_eol("1")
conn.log_expect(options, options["-c"], int(options["-Y"]))
else:
- conn.send("1\r\n")
+ conn.send_eol("1")
conn.log_expect(options, options["-c"], int(options["-Y"]))
- conn.send(action+"\r\n")
+ conn.send_eol(action)
conn.log_expect(options, "Enter 'YES' to continue or <ENTER> to cancel :", int(options["-Y"]))
- conn.send("YES\r\n")
+ conn.send_eol("YES")
conn.log_expect(options, "Press <ENTER> to continue...", int(options["-Y"]))
- conn.send("\r\n")
+ conn.send_eol("")
conn.log_expect(options, options["-c"], int(options["-Y"]))
conn.send(chr(03))
conn.log_expect(options, "- Logout", int(options["-Y"]))
@@ -231,7 +231,7 @@ will block any necessary fencing actions."
## a problem because everything is checked before.
######
try:
- conn.sendline("4")
+ conn.send_eol("4")
conn.close()
except exceptions.OSError:
pass
diff --git a/fence/agents/bladecenter/fence_bladecenter.py b/fence/agents/bladecenter/fence_bladecenter.py
index ac65f65..d97b9e4 100755
--- a/fence/agents/bladecenter/fence_bladecenter.py
+++ b/fence/agents/bladecenter/fence_bladecenter.py
@@ -27,7 +27,7 @@ def get_power_status(conn, options):
try:
node_cmd = "system:blade\[" + options["-n"] + "\]>"
- conn.send("env -T system:blade[" + options["-n"] + "]\r\n")
+ conn.send_eol("env -T system:blade[" + options["-n"] + "]")
i = conn.log_expect(options, [ node_cmd, "system>" ] , int(options["-Y"]))
if i == 1:
## Given blade number does not exist
@@ -35,10 +35,10 @@ def get_power_status(conn, options):
return "off"
else:
fail(EC_STATUS)
- conn.send("power -state\r\n")
+ conn.send_eol("power -state")
conn.log_expect(options, node_cmd, int(options["-Y"]))
status = conn.before.splitlines()[-1]
- conn.send("env -T system\r\n")
+ conn.send_eol("env -T system")
conn.log_expect(options, options["-c"], int(options["-Y"]))
except pexpect.EOF:
fail(EC_CONNECTION_LOST)
@@ -56,7 +56,7 @@ def set_power_status(conn, options):
try:
node_cmd = "system:blade\[" + options["-n"] + "\]>"
- conn.send("env -T system:blade[" + options["-n"] + "]\r\n")
+ conn.send_eol("env -T system:blade[" + options["-n"] + "]")
i = conn.log_expect(options, [ node_cmd, "system>" ] , int(options["-Y"]))
if i == 1:
## Given blade number does not exist
@@ -65,9 +65,9 @@ def set_power_status(conn, options):
else:
fail(EC_GENERIC_ERROR)
- conn.send("power -"+options["-o"]+"\r\n")
+ conn.send_eol("power -"+options["-o"])
conn.log_expect(options, node_cmd, int(options["-Y"]))
- conn.send("env -T system\r\n")
+ conn.send_eol("env -T system")
conn.log_expect(options, options["-c"], int(options["-Y"]))
except pexpect.EOF:
fail(EC_CONNECTION_LOST)
@@ -79,9 +79,9 @@ def get_blades_list(conn, options):
try:
node_cmd = "system>"
- conn.send("env -T system\r\n")
+ conn.send_eol("env -T system")
conn.log_expect(options, node_cmd, int(options["-Y"]))
- conn.send("list -l 2\r\n")
+ conn.send_eol("list -l 2")
conn.log_expect(options, node_cmd, int(options["-Y"]))
lines = conn.before.split("\r\n")
@@ -131,7 +131,7 @@ and uses the command line interface to power on and off blades."
## Logout from system
######
try:
- conn.send("exit\r\n")
+ conn.send_eol("exit")
conn.close()
except exceptions.OSError:
pass
diff --git a/fence/agents/drac/fence_drac5.py b/fence/agents/drac/fence_drac5.py
index 23cd7ca..298339f 100755
--- a/fence/agents/drac/fence_drac5.py
+++ b/fence/agents/drac/fence_drac5.py
@@ -25,9 +25,9 @@ BUILD_DATE=""
def get_power_status(conn, options):
try:
if options["model"] == "DRAC CMC":
- conn.sendline("racadm serveraction powerstatus -m " + options["-m"])
+ conn.send_eol("racadm serveraction powerstatus -m " + options["-m"])
elif options["model"] == "DRAC 5":
- conn.sendline("racadm serveraction powerstatus")
+ conn.send_eol("racadm serveraction powerstatus")
conn.log_expect(options, options["-c"], int(options["-Y"]))
except pexpect.EOF:
@@ -49,9 +49,9 @@ def set_power_status(conn, options):
try:
if options["model"] == "DRAC CMC":
- conn.sendline("racadm serveraction " + action + " -m " + options["-m"])
+ conn.send_eol("racadm serveraction " + action + " -m " + options["-m"])
elif options["model"] == "DRAC 5":
- conn.sendline("racadm serveraction " + action)
+ conn.send_eol("racadm serveraction " + action)
conn.log_expect(options, options["-c"], int(options["-g"]))
except pexpect.EOF:
fail(EC_CONNECTION_LOST)
@@ -63,7 +63,7 @@ def get_list_devices(conn, options):
try:
if options["model"] == "DRAC CMC":
- conn.sendline("getmodinfo")
+ conn.send_eol("getmodinfo")
list_re = re.compile("^([^\s]*?)\s+Present\s*(ON|OFF)\s*.*$")
conn.log_expect(options, options["-c"], int(options["-g"]))
@@ -133,7 +133,7 @@ By default, the telnet interface is not enabled."
## Logout from system
######
try:
- conn.sendline("exit")
+ conn.send_eol("exit")
time.sleep(1)
conn.close()
except exceptions.OSError:
diff --git a/fence/agents/ilo_mp/fence_ilo_mp.py b/fence/agents/ilo_mp/fence_ilo_mp.py
index 06e1358..40ac597 100644
--- a/fence/agents/ilo_mp/fence_ilo_mp.py
+++ b/fence/agents/ilo_mp/fence_ilo_mp.py
@@ -12,7 +12,7 @@ BUILD_DATE=""
def get_power_status(conn, options):
try:
- conn.send("show /system1\r\n")
+ conn.send_eol("show /system1")
re_state = re.compile('EnabledState=(.*)', re.IGNORECASE)
conn.log_expect(options, re_state, int(options["-Y"]))
@@ -31,9 +31,9 @@ def get_power_status(conn, options):
def set_power_status(conn, options):
try:
if options["-o"] == "on":
- conn.send("start /system1\r\n")
+ conn.send_eol("start /system1")
else:
- conn.send("stop -f /system1\r\n")
+ conn.send_eol("stop -f /system1")
conn.log_expect(options, options["-c"], int(options["-g"]))
@@ -46,7 +46,7 @@ def set_power_status(conn, options):
def main():
device_opt = [ "help", "version", "agent", "quiet", "verbose", "debug",
"action", "ipaddr", "login", "passwd", "passwd_script",
- "secure", "identity_file", "cmd_prompt", "ipport", "login_eol_lf",
+ "secure", "identity_file", "cmd_prompt", "ipport",
"separator", "inet4_only", "inet6_only",
"power_timeout", "shell_timeout", "login_timeout", "power_wait" ]
@@ -64,7 +64,7 @@ def main():
show_docs(options, docs)
conn = fence_login(options)
- conn.send("SMCLP\r\n")
+ conn.send_eol("SMCLP")
##
## Fence operations
@@ -72,7 +72,7 @@ def main():
result = fence_action(conn, options, set_power_status, get_power_status)
try:
- conn.send("exit\r\n")
+ conn.send_eol("exit")
except exceptions.OSError:
pass
except pexpect.ExceptionPexpect:
diff --git a/fence/agents/lib/fencing.py.py b/fence/agents/lib/fencing.py.py
index c4fe218..25a0f7e 100644
--- a/fence/agents/lib/fencing.py.py
+++ b/fence/agents/lib/fencing.py.py
@@ -178,11 +178,6 @@ all_opt = {
"required" : "0",
"shortdesc" : "Force ribcl version to use",
"order" : 1 },
- "login_eol_lf" : {
- "getopt" : "",
- "help" : "",
- "order" : 1
- },
"cmd_prompt" : {
"getopt" : "c:",
"longopt" : "command-prompt",
@@ -403,12 +398,20 @@ all_opt = {
common_opt = [ "retry_on", "delay" ]
class fspawn(pexpect.spawn):
+ def __init__(self, options, command):
+ pexpect.spawn.__init__(self, command)
+ self.opt = options
+
def log_expect(self, options, pattern, timeout):
result = self.expect(pattern, timeout)
if options["log"] >= LOG_MODE_VERBOSE:
options["debug_fh"].write(self.before + self.after)
return result
+ # send EOL according to what was detected in login process (telnet)
+ def send_eol(self, message):
+ self.send(message + self.opt["eol"])
+
def atexit_handler():
try:
sys.stdout.close()
@@ -859,10 +862,7 @@ def fence_login(options):
if (options.has_key("-4")):
force_ipvx="-4 "
- if (options["device_opt"].count("login_eol_lf")):
- login_eol = "\n"
- else:
- login_eol = "\r\n"
+ options["eol"] = "\r\n"
## Do the delay of the fence device before logging in
## Delay is important for two-node clusters fencing but we do not need to delay 'status' operations
@@ -876,7 +876,7 @@ def fence_login(options):
if options.has_key("-z"):
command = '%s %s %s %s' % (SSL_PATH, force_ipvx, options["-a"], options["-u"])
try:
- conn = fspawn(command)
+ conn = fspawn(options, command)
except pexpect.ExceptionPexpect, ex:
## SSL telnet is part of the fencing package
sys.stderr.write(str(ex) + "\n")
@@ -886,7 +886,7 @@ def fence_login(options):
if options.has_key("ssh_options"):
command += ' ' + options["ssh_options"]
try:
- conn = fspawn(command)
+ conn = fspawn(options, command)
except pexpect.ExceptionPexpect, ex:
sys.stderr.write(str(ex) + "\n")
sys.stderr.write("Due to limitations, binary dependencies on fence agents "
@@ -915,7 +915,7 @@ def fence_login(options):
if options.has_key("ssh_options"):
command += ' ' + options["ssh_options"]
try:
- conn = fspawn(command)
+ conn = fspawn(options, command)
except pexpect.ExceptionPexpect, ex:
sys.stderr.write(str(ex) + "\n")
sys.stderr.write("Due to limitations, binary dependencies on fence agents "
@@ -934,7 +934,7 @@ def fence_login(options):
fail_usage("Failed: You have to enter passphrase (-p) for identity file")
else:
try:
- conn = fspawn(TELNET_PATH)
+ conn = fspawn(options, TELNET_PATH)
conn.send("set binary\n")
conn.send("open %s -%s\n"%(options["-a"], options["-u"]))
except pexpect.ExceptionPexpect, ex:
@@ -943,11 +943,23 @@ def fence_login(options):
"are not in the spec file and must be installed separately." + "\n")
sys.exit(EC_GENERIC_ERROR)
- conn.log_expect(options, re_login, int(options["-y"]))
- conn.send(options["-l"] + login_eol)
- conn.log_expect(options, re_pass, int(options["-Y"]))
- conn.send(options["-p"] + login_eol)
- conn.log_expect(options, options["-c"], int(options["-Y"]))
+ result = conn.log_expect(options, re_login, int(options["-y"]))
+ conn.send_eol(options["-l"])
+
+ ## automatically change end of line separator
+ screen = conn.read_nonblocking(size=100, timeout=int(options["-Y"]))
+ if (re_login.search(screen) != None):
+ options["eol"] = "\n"
+ conn.send_eol(options["-l"])
+ result = conn.log_expect(options, re_pass, int(options["-y"]))
+ elif (re_pass.search(screen) == None):
+ conn.log_expect(options, re_pass, int(options["-Y"]))
+
+ try:
+ conn.send_eol(options["-p"])
+ conn.log_expect(options, options["-c"], int(options["-Y"]))
+ except KeyError:
+ fail(EC_PASSWORD_MISSING)
except pexpect.EOF:
fail(EC_LOGIN_DENIED)
except pexpect.TIMEOUT:
diff --git a/fence/agents/rsa/fence_rsa.py b/fence/agents/rsa/fence_rsa.py
index 33c7fe9..d82c448 100755
--- a/fence/agents/rsa/fence_rsa.py
+++ b/fence/agents/rsa/fence_rsa.py
@@ -19,7 +19,7 @@ BUILD_DATE="March, 2009"
def get_power_status(conn, options):
try:
- conn.send("power state\r\n")
+ conn.send_eol("power state")
conn.log_expect(options, options["-c"], int(options["-Y"]))
except pexpect.EOF:
fail(EC_CONNECTION_LOST)
@@ -36,7 +36,7 @@ def get_power_status(conn, options):
def set_power_status(conn, options):
try:
- conn.send("power " + options["-o"] + "\r\n")
+ conn.send_eol("power " + options["-o"])
conn.log_expect(options, options["-c"], int(options["-g"]))
except pexpect.EOF:
fail(EC_CONNECTION_LOST)
@@ -84,7 +84,7 @@ will block any necessary fencing actions."
## Logout from system
######
try:
- conn.sendline("exit")
+ conn.send_eol("exit")
conn.close()
except exceptions.OSError:
pass
diff --git a/fence/agents/sanbox2/fence_sanbox2.py b/fence/agents/sanbox2/fence_sanbox2.py
index c95e602..d20f7e2 100644
--- a/fence/agents/sanbox2/fence_sanbox2.py
+++ b/fence/agents/sanbox2/fence_sanbox2.py
@@ -24,14 +24,14 @@ def get_power_status(conn, options):
'offline' : "off"
}
try:
- conn.send("show port " + options["-n"] + "\n")
+ conn.send_eol("show port " + options["-n"])
conn.log_expect(options, options["-c"], int(options["-Y"]))
except pexpect.EOF:
fail(EC_CONNECTION_LOST)
except pexpect.TIMEOUT:
try:
- conn.send("admin end\n")
- conn.send("exit\n")
+ conn.send_eol("admin end")
+ conn.send_eol("exit")
conn.close()
except:
pass
@@ -51,28 +51,28 @@ def set_power_status(conn, options):
}[options["-o"]]
try:
- conn.send("set port " + options["-n"] + " state " + action + "\n")
+ conn.send_eol("set port " + options["-n"] + " state " + action)
conn.log_expect(options, options["-c"], int(options["-g"]))
except pexpect.EOF:
fail(EC_CONNECTION_LOST)
except pexpect.TIMEOUT:
try:
- conn.send("admin end\n")
- conn.send("exit\n")
+ conn.send_eol("admin end")
+ conn.send_eol("exit")
conn.close()
except:
pass
fail(EC_TIMED_OUT)
try:
- conn.send("set port " + options["-n"] + " state " + action + "\n")
+ conn.send_eol("set port " + options["-n"] + " state " + action)
conn.log_expect(options, options["-c"], int(options["-g"]))
except pexpect.EOF:
fail(EC_CONNECTION_LOST)
except pexpect.TIMEOUT:
try:
- conn.send("admin end\n")
- conn.send("exit\n")
+ conn.send_eol("admin end")
+ conn.send_eol("exit")
conn.close()
except:
pass
@@ -82,7 +82,7 @@ def get_list_devices(conn, options):
outlets = { }
try:
- conn.send("show port" + "\n")
+ conn.send_eol("show port")
conn.log_expect(options, options["-c"], int(options["-Y"]))
list_re = re.compile("^\s+(\d+?)\s+(Online|Offline)\s+", re.IGNORECASE)
@@ -98,8 +98,8 @@ def get_list_devices(conn, options):
fail(EC_CONNECTION_LOST)
except pexpect.TIMEOUT:
try:
- conn.send("admin end\n")
- conn.send("exit\n")
+ conn.send_eol("admin end")
+ conn.send_eol("exit")
conn.close()
except:
pass
@@ -110,7 +110,7 @@ def get_list_devices(conn, options):
def main():
device_opt = [ "help", "version", "agent", "quiet", "verbose", "debug",
"io_fencing", "ipaddr", "login", "passwd", "passwd_script",
- "cmd_prompt", "port", "ipport", "login_eol_lf", "separator",
+ "cmd_prompt", "port", "ipport", "separator",
"power_timeout", "shell_timeout", "login_timeout", "power_wait" ]
atexit.register(atexit_handler)
@@ -138,7 +138,7 @@ because the connection will block any necessary fencing actions."
##
conn = fence_login(options)
- conn.send("admin start\n")
+ conn.send_eol("admin start")
conn.log_expect(options, options["-c"], int(options["-Y"]))
if (re.search("\(admin\)", conn.before, re.MULTILINE) == None):
@@ -153,8 +153,8 @@ because the connection will block any necessary fencing actions."
## Logout from system
######
try:
- conn.send("admin end\n")
- conn.send("exit\n")
+ conn.send_eol("admin end")
+ conn.send_eol("exit\n")
conn.close()
except exceptions.OSError:
pass
diff --git a/fence/agents/wti/fence_wti.py b/fence/agents/wti/fence_wti.py
index c742e71..40b97e3 100755
--- a/fence/agents/wti/fence_wti.py
+++ b/fence/agents/wti/fence_wti.py
@@ -125,7 +125,7 @@ is running because the connection will block any necessary fencing actions."
if 0 == options.has_key("-x"):
try:
try:
- conn = fspawn(TELNET_PATH)
+ conn = fspawn(options, TELNET_PATH)
conn.send("set binary\n")
conn.send("open %s -%s\n"%(options["-a"], options["-u"]))
except pexpect.ExceptionPexpect, ex:
11 years, 9 months
cluster: RHEL59 - fence_apc: fence_apc fails for some port numbers
by Marek Grác
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=7d64a32254a...
Commit: 7d64a32254aac1c4bbeb7d7d684c606893d9fda1
Parent: 1ed8b72936fa7d871dbdc9b787e7e775d1ea3699
Author: Marek 'marx' Grac <mgrac(a)redhat.com>
AuthorDate: Wed Jun 23 18:29:02 2010 +0200
Committer: Marek 'marx' Grac <mgrac(a)redhat.com>
CommitterDate: Mon Jul 23 17:20:28 2012 +0200
fence_apc: fence_apc fails for some port numbers
Problem is that string we parse looks like:
'Press Enter to continue ...\r 23-bar1 ON' and we did not expect text
message before our line.
Resolves: #606315
---
fence/agents/apc/fence_apc.py | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/fence/agents/apc/fence_apc.py b/fence/agents/apc/fence_apc.py
index 1aa7bed..0a3ce58 100755
--- a/fence/agents/apc/fence_apc.py
+++ b/fence/agents/apc/fence_apc.py
@@ -71,11 +71,11 @@ def get_power_status(conn, options):
while True:
exp_result = conn.log_expect(options, [ options["-c"], "Press <ENTER>" ], int(options["-Y"]))
lines = conn.before.split("\n");
- show_re = re.compile('^\s*(\d+)- (.*?)\s+(ON|OFF)\s*')
+ show_re = re.compile('(^|\x0D)\s*(\d+)- (.*?)\s+(ON|OFF)\s*')
for x in lines:
res = show_re.search(x)
if (res != None):
- outlets[res.group(1)] = (res.group(2), res.group(3))
+ outlets[res.group(2)] = (res.group(3), res.group(4))
conn.send("\r\n")
if exp_result == 0:
break
11 years, 9 months
cluster: RHEL59 - fencing: Add iPDU fencing agent for model 46M4002
by Marek Grác
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=1ed8b72936f...
Commit: 1ed8b72936fa7d871dbdc9b787e7e775d1ea3699
Parent: 76741bb2a94ae94e493c609d50f570d02e2f3029
Author: Marek 'marx' Grac <mgrac(a)redhat.com>
AuthorDate: Mon Jul 23 17:06:32 2012 +0200
Committer: Marek 'marx' Grac <mgrac(a)redhat.com>
CommitterDate: Mon Jul 23 17:06:32 2012 +0200
fencing: Add iPDU fencing agent for model 46M4002
Resolves: rhbz#741985
---
fence/agents/Makefile | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)
diff --git a/fence/agents/Makefile b/fence/agents/Makefile
index 716530c..669e291 100644
--- a/fence/agents/Makefile
+++ b/fence/agents/Makefile
@@ -30,6 +30,7 @@ all:
${MAKE} -C ifmib all
${MAKE} -C ilo all
${MAKE} -C ilo_mp all
+ ${MAKE} -C ipdu all
${MAKE} -C ipmilan all
${MAKE} -C lpar all
${MAKE} -C manual all
@@ -65,6 +66,7 @@ install: all
${MAKE} -C ifmib install
${MAKE} -C ilo install
${MAKE} -C ilo_mp install
+ ${MAKE} -C ipdu install
${MAKE} -C ipmilan install
${MAKE} -C lpar install
${MAKE} -C manual install
@@ -100,6 +102,7 @@ clean:
${MAKE} -C ifmib clean
${MAKE} -C ilo clean
${MAKE} -C ilo_mp clean
+ ${MAKE} -C ipdu clean
${MAKE} -C ipmilan clean
${MAKE} -C lpar clean
${MAKE} -C manual clean
11 years, 9 months
cluster: RHEL59 - qdiskd: Make multipath issues go away
by Fabio M. Di Nitto
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=76741bb2a94...
Commit: 76741bb2a94ae94e493c609d50f570d02e2f3029
Parent: 3290dff9b88992e913b28dd57c6b64a3c2b75c8a
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Fri May 6 10:14:04 2011 -0400
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Mon Jul 23 14:54:07 2012 +0200
qdiskd: Make multipath issues go away
Qdiskd hsitorically has required significant tuning to work around
delays which occur during multipath failover, overloaded I/O, and LUN
trespasses in both device-mapper-multipath and EMC PowerPath
environments.
This patch goes a very long way towards eliminating false evictions
when these conditions occur by making qdiskd whine to the other
cluster members when it detects hung system calls. When a cluster
member whines, it indicates the source of the problem (which system
call is hung), and the act of receiving a whine from a host indicates
that qdiskd is operational, but that I/O is hung. Hung I/O is different
from losing storage entirely (where you get I/O errors).
Possible problems:
- Receive queue getting very full, causing messages to become blocked on
a node where I/O is hung. 1) that would take a very long time, and 2)
node should get evicted at that point anyway.
Resolves: rhbz#782900
this version of the patch is a backport of:
e2937eb33f224f86904fead08499a6178868ca6a
34d2872fb7e60be1594158acaaeb8acd74f78d22
There is a minor change vs original patch based on how qdiskd
in RHEL5 handles cman connection. We add an extra call to cman_alive
in main qdisk_loop to make sure data are not stalled on the
cman port, and data_callback to qdiskd_whine executed.
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
---
cman/daemon/cnxman-socket.h | 1 +
cman/qdisk/Makefile | 2 +-
cman/qdisk/disk.h | 6 ++++
cman/qdisk/iostate.c | 17 +++++++++++--
cman/qdisk/iostate.h | 4 ++-
cman/qdisk/main.c | 54 +++++++++++++++++++++++++++++++++++++++----
6 files changed, 74 insertions(+), 10 deletions(-)
diff --git a/cman/daemon/cnxman-socket.h b/cman/daemon/cnxman-socket.h
index 351c97c..1d01b44 100644
--- a/cman/daemon/cnxman-socket.h
+++ b/cman/daemon/cnxman-socket.h
@@ -79,6 +79,7 @@
#define CLUSTER_PORT_SERVICES 2
#define CLUSTER_PORT_SYSMAN 10 /* Remote execution daemon */
#define CLUSTER_PORT_CLVMD 11 /* Cluster LVM daemon */
+#define CLUSTER_PORT_QDISKD 178 /* Quorum disk daemon */
/* Port numbers above this will be blocked when the cluster is inquorate or in
* transition */
diff --git a/cman/qdisk/Makefile b/cman/qdisk/Makefile
index f58806b..9bfc486 100644
--- a/cman/qdisk/Makefile
+++ b/cman/qdisk/Makefile
@@ -32,7 +32,7 @@ qdiskd: disk.o crc32.o disk_util.o main.o score.o bitmap.o clulog.o \
gcc -o $@ $^ -lpthread -L../lib -L${ccslibdir} -lccs -lrt
mkqdisk: disk.o crc32.o disk_util.o iostate.o \
- proc.o mkqdisk.o scandisk.o clulog.o gettid.o
+ proc.o mkqdisk.o scandisk.o clulog.o gettid.o ../lib/libcman.a
gcc -o $@ $^ -lrt
%.o: %.c
diff --git a/cman/qdisk/disk.h b/cman/qdisk/disk.h
index b784220..d491de1 100644
--- a/cman/qdisk/disk.h
+++ b/cman/qdisk/disk.h
@@ -290,6 +290,12 @@ typedef struct {
status_block_t ni_status;
} node_info_t;
+typedef struct {
+ qd_ctx *ctx;
+ node_info_t *ni;
+ size_t ni_len;
+} qd_priv_t;
+
int qd_write_status(qd_ctx *ctx, int nid, disk_node_state_t state,
disk_msg_t *msg, memb_mask_t mask, memb_mask_t master);
int qd_read_print_status(target_info_t *disk, int nid);
diff --git a/cman/qdisk/iostate.c b/cman/qdisk/iostate.c
index 65b4d50..eb74ad2 100644
--- a/cman/qdisk/iostate.c
+++ b/cman/qdisk/iostate.c
@@ -1,10 +1,14 @@
#include <pthread.h>
+#include <libcman.h>
#include <iostate.h>
#include <unistd.h>
#include <time.h>
#include <sys/time.h>
#include <clulog.h>
+#include <stdint.h>
+#include "platform.h"
#include "iostate.h"
+#include "../daemon/cnxman-socket.h"
static iostate_t main_state = 0;
static int main_incarnation = 0;
@@ -26,7 +30,7 @@ static struct state_table io_state_table[] = {
{ STATE_LSEEK, "seek" },
{ -1, NULL } };
-static const char *
+const char *
state_to_string(iostate_t state)
{
static const char *ret = "unknown";
@@ -65,6 +69,8 @@ io_nanny_thread(void *arg)
iostate_t last_main_state = 0, current_main_state = 0;
int last_main_incarnation = 0, current_main_incarnation = 0;
int logged_incarnation = 0;
+ cman_handle_t ch = (cman_handle_t)arg;
+ int32_t whine_state;
/* Start with wherever we're at now */
pthread_mutex_lock(&state_mutex);
@@ -96,6 +102,11 @@ io_nanny_thread(void *arg)
continue;
}
+ /* Whine on CMAN api */
+ whine_state = (int32_t)current_main_state;
+ swab32(whine_state);
+ cman_send_data(ch, &whine_state, sizeof(int32_t), 0, CLUSTER_PORT_QDISKD, 0);
+
/* Don't log things twice */
if (logged_incarnation == current_main_incarnation)
continue;
@@ -114,7 +125,7 @@ io_nanny_thread(void *arg)
int
-io_nanny_start(int timeout)
+io_nanny_start(cman_handle_t ch, int timeout)
{
int ret;
@@ -124,7 +135,7 @@ io_nanny_start(int timeout)
qdisk_timeout = timeout;
thread_active = 1;
- ret = pthread_create(&io_nanny_tid, NULL, io_nanny_thread, NULL);
+ ret = pthread_create(&io_nanny_tid, NULL, io_nanny_thread, ch);
pthread_mutex_unlock(&state_mutex);
return ret;
diff --git a/cman/qdisk/iostate.h b/cman/qdisk/iostate.h
index 7dd7bf6..a65b1d4 100644
--- a/cman/qdisk/iostate.h
+++ b/cman/qdisk/iostate.h
@@ -11,7 +11,9 @@ typedef enum {
void io_state(iostate_t state);
-int io_nanny_start(int timeout);
+int io_nanny_start(cman_handle_t ch, int timeout);
int io_nanny_stop(void);
+const char * state_to_string(iostate_t state);
+
#endif
diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c
index 0d7bb3d..90d00ab 100644
--- a/cman/qdisk/main.c
+++ b/cman/qdisk/main.c
@@ -48,6 +48,7 @@
(defined(LIBCMAN_VERSION) && LIBCMAN_VERSION < 2))
#include <cluster/cnxman-socket.h>
#endif
+#include "../daemon/cnxman-socket.h"
int daemon_init(char *);
int check_process_running(char *, pid_t *);
@@ -892,6 +893,11 @@ quorum_loop(qd_ctx *ctx, node_info_t *ni, int max)
_running = 1;
while (_running) {
+ /* perform a forceful cman dispatch */
+ if (cman_alive(ctx->qc_ch) < 0) {
+ clulog(LOG_ERR, "cman: %s\n", strerror(errno));
+ }
+
/* XXX this was getuptime() in clumanager */
get_time(&oldtime, (ctx->qc_flags&RF_UPTIME));
@@ -1514,6 +1520,31 @@ check_stop_cman(qd_ctx *ctx)
}
}
+static void
+qdisk_whine(cman_handle_t h, void *privdata, char *buf, int len,
+ uint8_t port, int nodeid)
+{
+ int32_t dstate;
+ qd_priv_t *qp = (qd_priv_t *)privdata;
+ node_info_t *ni = qp->ni;
+
+ if (len != sizeof(dstate)) {
+ return;
+ }
+
+ dstate = *((int32_t*)buf);
+
+ if (nodeid == (qp->ctx->qc_my_id))
+ return;
+
+ swab32(dstate);
+
+ if (dstate) {
+ clulog(LOG_CRIT, "qdiskd on node %d reports hung %s()\n",
+ state_to_string(dstate));
+ ni[nodeid-1].ni_misses = 0;
+ }
+}
int
main(int argc, char **argv)
@@ -1528,6 +1559,7 @@ main(int argc, char **argv)
char device[128];
pid_t pid;
quorum_header_t qh;
+ qd_priv_t qp;
if (check_process_running(argv[0], &pid) && pid !=getpid()) {
printf("QDisk services already running\n");
@@ -1559,10 +1591,16 @@ main(int argc, char **argv)
}
}
+ /* For cman notifications we need two sockets - one for events,
+ one for config change callbacks */
+ qp.ctx = &ctx;
+ qp.ni = &ni[0];
+ qp.ni_len = MAX_NODES_DISK;
+
#if (defined(LIBCMAN_VERSION) && LIBCMAN_VERSION >= 2)
- ch = cman_admin_init(NULL);
+ ch = cman_admin_init(&qp);
#else
- ch = cman_init(NULL);
+ ch = cman_init(&qp);
#endif
if (!ch) {
if (!foreground && !forked) {
@@ -1577,13 +1615,19 @@ main(int argc, char **argv)
do {
sleep(5);
#if (defined(LIBCMAN_VERSION) && LIBCMAN_VERSION >= 2)
- ch = cman_admin_init(NULL);
+ ch = cman_admin_init(&qp);
#else
- ch = cman_init(NULL);
+ ch = cman_init(&qp);
#endif
} while (!ch);
}
+ if (cman_start_recv_data(ch, qdisk_whine, CLUSTER_PORT_QDISKD) != 0) {
+ clulog_and_print(LOG_CRIT, "Could not register with CMAN: %s\n",
+ strerror(errno));
+ goto out;
+ }
+
memset(&me, 0, sizeof(me));
while (cman_get_node(ch, CMAN_NODEID_US, &me) < 0) {
if (!foreground && !forked) {
@@ -1696,7 +1740,7 @@ main(int argc, char **argv)
}
}
- io_nanny_start(ctx.qc_tko * ctx.qc_interval);
+ io_nanny_start(ch, ctx.qc_tko * ctx.qc_interval);
if (quorum_loop(&ctx, ni, MAX_NODES_DISK) == 0)
cman_unregister_quorum_device(ctx.qc_ch);
11 years, 9 months
fence-agents: master - fence_ipdu: Minor fixes to fence agent
by Marek Grác
Gitweb: http://git.fedorahosted.org/git/?p=fence-agents.git;a=commitdiff;h=dff6b1...
Commit: dff6b12ab73c8c38fc579ec312f398d6711cfeb8
Parent: 74d1242d8b968d5886be4394c8792d8c8d93f321
Author: Marek 'marx' Grac <mgrac(a)redhat.com>
AuthorDate: Mon Jul 23 11:24:43 2012 +0200
Committer: Marek 'marx' Grac <mgrac(a)redhat.com>
CommitterDate: Mon Jul 23 11:24:43 2012 +0200
fence_ipdu: Minor fixes to fence agent
* replace code to use 'default' value
* remove code for backward compatibility with old options (-n switch:port)
Resolves: rhbz#741985
---
fence/agents/ipdu/fence_ipdu.py | 13 ++-----------
1 files changed, 2 insertions(+), 11 deletions(-)
diff --git a/fence/agents/ipdu/fence_ipdu.py b/fence/agents/ipdu/fence_ipdu.py
index 9c135e6..aac0b7d 100644
--- a/fence/agents/ipdu/fence_ipdu.py
+++ b/fence/agents/ipdu/fence_ipdu.py
@@ -49,7 +49,7 @@ def ipdu_set_device(conn,options):
agents_dir={'.1.3.6.1.4.1.2.6.223':IBMiPDU,
None:IBMiPDU}
- # First resolve type of APC
+ # First resolve type of PDU device
pdu_type=conn.walk(OID_SYS_OBJECT_ID)
if (not ((len(pdu_type)==1) and (agents_dir.has_key(pdu_type[0][1])))):
@@ -133,6 +133,7 @@ def get_outlets_status(conn, options):
def ipdu_snmp_define_defaults():
all_opt["snmp_version"]["default"]="3"
all_opt["community"]["default"]="private"
+ all_opt["switch"]["default"]="1"
device=IBMiPDU
# Main agent method
@@ -152,16 +153,6 @@ def main():
options=check_input(device_opt,process_input(device_opt))
- ## Support for -n [switch]:[plug] notation that was used before
- if ((options.has_key("-n")) and (-1 != options["-n"].find(":"))):
- (switch, plug) = options["-n"].split(":", 1)
- if ((switch.isdigit()) and (plug.isdigit())):
- options["-s"] = switch
- options["-n"] = plug
-
- if (not (options.has_key("-s"))):
- options["-s"]="1"
-
docs = { }
docs["shortdesc"] = "Fence agent for iPDU over SNMP"
docs["longdesc"] = "fence_ipdu is an I/O Fencing agent \
11 years, 9 months
fence-agents: master - Add iPDU fencing agent for model 46M4002
by Marek Grác
Gitweb: http://git.fedorahosted.org/git/?p=fence-agents.git;a=commitdiff;h=74d124...
Commit: 74d1242d8b968d5886be4394c8792d8c8d93f321
Parent: efcf34f5156385c397509f9fe40ae08ca74a96a0
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Fri Jul 20 15:43:14 2012 -0400
Committer: Marek 'marx' Grac <mgrac(a)redhat.com>
CommitterDate: Mon Jul 23 11:10:19 2012 +0200
Add iPDU fencing agent for model 46M4002
Resolves: rhbz#741985
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
Signed-off-by: Marek 'marx' Grac <mgrac(a)redhat.com>
---
.gitignore | 1 +
configure.ac | 1 +
fence/agents/ipdu/Makefile.am | 17 ++++
fence/agents/ipdu/fence_ipdu.py | 179 +++++++++++++++++++++++++++++++++++++++
4 files changed, 198 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
index 3a182d8..c1b028c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -49,6 +49,7 @@ fence/agents/ifmib/fence_ifmib
fence/agents/ilo/fence_ilo
fence/agents/ilo_mp/fence_ilo_mp
fence/agents/intelmodular/fence_intelmodular
+fence/agents/ipdu/fence_ipdu
fence/agents/ipmilan/fence_ipmilan
fence/agents/ldom/fence_ldom
fence/agents/lib/fencing.py
diff --git a/configure.ac b/configure.ac
index 4797740..1b0ee22 100644
--- a/configure.ac
+++ b/configure.ac
@@ -263,6 +263,7 @@ AC_CONFIG_FILES([Makefile
fence/agents/eps/Makefile
fence/agents/hpblade/Makefile
fence/agents/ibmblade/Makefile
+ fence/agents/ipdu/Makefile
fence/agents/ifmib/Makefile
fence/agents/ilo/Makefile
fence/agents/ilo_mp/Makefile
diff --git a/fence/agents/ipdu/Makefile.am b/fence/agents/ipdu/Makefile.am
new file mode 100644
index 0000000..fb02cc8
--- /dev/null
+++ b/fence/agents/ipdu/Makefile.am
@@ -0,0 +1,17 @@
+MAINTAINERCLEANFILES = Makefile.in
+
+TARGET = fence_ipdu
+
+SRC = $(TARGET).py
+
+EXTRA_DIST = $(SRC)
+
+sbin_SCRIPTS = $(TARGET)
+
+man_MANS = $(TARGET).8
+
+include $(top_srcdir)/make/fencebuild.mk
+include $(top_srcdir)/make/fenceman.mk
+
+clean-local: clean-man
+ rm -f $(TARGET)
diff --git a/fence/agents/ipdu/fence_ipdu.py b/fence/agents/ipdu/fence_ipdu.py
new file mode 100644
index 0000000..9c135e6
--- /dev/null
+++ b/fence/agents/ipdu/fence_ipdu.py
@@ -0,0 +1,179 @@
+#!/usr/bin/python
+
+# The Following agent has been tested on:
+# IBM iPDU model 46M4002
+# Firmware release OPDP_sIBM_v01.2_1
+#
+
+import sys, re, pexpect
+sys.path.append("/usr/share/fence")
+from fencing import *
+from fencing_snmp import *
+
+#BEGIN_VERSION_GENERATION
+RELEASE_VERSION="IBM iPDU SNMP fence agent"
+REDHAT_COPYRIGHT=""
+BUILD_DATE=""
+#END_VERSION_GENERATION
+
+### CONSTANTS ###
+# oid defining fence device
+OID_SYS_OBJECT_ID='.1.3.6.1.2.1.1.2.0'
+
+### GLOBAL VARIABLES ###
+# Device - see IBM iPDU
+device=None
+
+# Port ID
+port_id=None
+# Switch ID
+switch_id=None
+
+# Classes describing Device params
+class IBMiPDU:
+ # iPDU
+ status_oid= '.1.3.6.1.4.1.2.6.223.8.2.2.1.11.%d'
+ control_oid= '.1.3.6.1.4.1.2.6.223.8.2.2.1.11.%d'
+ outlet_table_oid='.1.3.6.1.4.1.2.6.223.8.2.2.1.2'
+ ident_str="IBM iPDU"
+ state_on=1
+ state_off=0
+ turn_on=1
+ turn_off=0
+ has_switches=False
+
+### FUNCTIONS ###
+def ipdu_set_device(conn,options):
+ global device
+
+ agents_dir={'.1.3.6.1.4.1.2.6.223':IBMiPDU,
+ None:IBMiPDU}
+
+ # First resolve type of APC
+ pdu_type=conn.walk(OID_SYS_OBJECT_ID)
+
+ if (not ((len(pdu_type)==1) and (agents_dir.has_key(pdu_type[0][1])))):
+ pdu_type=[[None,None]]
+
+ device=agents_dir[pdu_type[0][1]]
+
+ conn.log_command("Trying %s"%(device.ident_str))
+
+def ipdu_resolv_port_id(conn,options):
+ global port_id,switch_id,device
+
+ if (device==None):
+ ipdu_set_device(conn,options)
+
+ # Now we resolv port_id/switch_id
+ if ((options["-n"].isdigit()) and ((not device.has_switches) or (options["-s"].isdigit()))):
+ port_id=int(options["-n"])
+
+ if (device.has_switches):
+ switch_id=int(options["-s"])
+ else:
+ table=conn.walk(device.outlet_table_oid,30)
+
+ for x in table:
+ if (x[1].strip('"')==options["-n"]):
+ t=x[0].split('.')
+ if (device.has_switches):
+ port_id=int(t[len(t)-1])
+ switch_id=int(t[len(t)-3])
+ else:
+ port_id=int(t[len(t)-1])
+
+ if (port_id==None):
+ fail_usage("Can't find port with name %s!"%(options["-n"]))
+
+def get_power_status(conn,options):
+ global port_id,switch_id,device
+
+ if (port_id==None):
+ ipdu_resolv_port_id(conn,options)
+
+ oid=((device.has_switches) and device.status_oid%(switch_id,port_id) or device.status_oid%(port_id))
+
+ (oid,status)=conn.get(oid)
+ return (status==str(device.state_on) and "on" or "off")
+
+def set_power_status(conn, options):
+ global port_id,switch_id,device
+
+ if (port_id==None):
+ ipdu_resolv_port_id(conn,options)
+
+ oid=((device.has_switches) and device.control_oid%(switch_id,port_id) or device.control_oid%(port_id))
+
+ conn.set(oid,(options["-o"]=="on" and device.turn_on or device.turn_off))
+
+
+def get_outlets_status(conn, options):
+ global device
+
+ result={}
+
+ if (device==None):
+ ipdu_set_device(conn,options)
+
+ res_ports=conn.walk(device.outlet_table_oid,30)
+
+ for x in res_ports:
+ t=x[0].split('.')
+
+ port_num=((device.has_switches) and "%s:%s"%(t[len(t)-3],t[len(t)-1]) or "%s"%(t[len(t)-1]))
+
+ port_name=x[1].strip('"')
+ port_status=""
+ result[port_num]=(port_name,port_status)
+
+ return result
+
+# Define new options
+def ipdu_snmp_define_defaults():
+ all_opt["snmp_version"]["default"]="3"
+ all_opt["community"]["default"]="private"
+ device=IBMiPDU
+
+# Main agent method
+def main():
+ device_opt = [ "help", "version", "agent", "quiet", "verbose", "debug",
+ "action", "ipaddr", "login", "passwd", "passwd_script",
+ "test", "port", "separator", "no_login", "no_password",
+ "snmp_version", "community", "snmp_auth_prot", "snmp_sec_level",
+ "snmp_priv_prot", "snmp_priv_passwd", "snmp_priv_passwd_script",
+ "udpport","inet4_only","inet6_only",
+ "power_timeout", "shell_timeout", "login_timeout", "power_wait" ]
+
+ atexit.register(atexit_handler)
+
+ snmp_define_defaults ()
+ ipdu_snmp_define_defaults()
+
+ options=check_input(device_opt,process_input(device_opt))
+
+ ## Support for -n [switch]:[plug] notation that was used before
+ if ((options.has_key("-n")) and (-1 != options["-n"].find(":"))):
+ (switch, plug) = options["-n"].split(":", 1)
+ if ((switch.isdigit()) and (plug.isdigit())):
+ options["-s"] = switch
+ options["-n"] = plug
+
+ if (not (options.has_key("-s"))):
+ options["-s"]="1"
+
+ docs = { }
+ docs["shortdesc"] = "Fence agent for iPDU over SNMP"
+ docs["longdesc"] = "fence_ipdu is an I/O Fencing agent \
+which can be used with the IBM iPDU network power switch. It logs \
+into a device via SNMP and reboots a specified outlet. It supports \
+SNMP v3 with all combinations of authenticity/privacy settings."
+ docs["vendorurl"] = "http://www.ibm.com"
+ show_docs(options, docs)
+
+ # Operate the fencing device
+ result = fence_action(FencingSnmp(options), options, set_power_status, get_power_status, get_outlets_status)
+
+ sys.exit(result)
+if __name__ == "__main__":
+ main()
11 years, 9 months