dlm: master - dlm_tool: show status of startup nodes
by David Teigland
Gitweb: http://git.fedorahosted.org/git/?p=dlm.git;a=commitdiff;h=cbc0685865996ce...
Commit: cbc0685865996ced65f5b52f037d32d838d63570
Parent: 87007d98a6f20bc4b923aa87937818e765bfb0a4
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Tue Jan 29 13:58:14 2013 -0600
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Tue Jan 29 14:03:47 2013 -0600
dlm_tool: show status of startup nodes
During startup fencing, the dlm_tool status
output was not clear what was happening.
Now it displays the startup_nodes with
state 'U' (unknown).
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
dlm_controld/daemon_cpg.c | 23 +++++++++++++++++++++++
dlm_controld/dlm_controld.h | 1 +
dlm_controld/dlm_daemon.h | 1 +
dlm_controld/lib.c | 12 +++++++++++-
dlm_controld/main.c | 1 +
5 files changed, 37 insertions(+), 1 deletions(-)
diff --git a/dlm_controld/daemon_cpg.c b/dlm_controld/daemon_cpg.c
index 4002809..b6532a3 100644
--- a/dlm_controld/daemon_cpg.c
+++ b/dlm_controld/daemon_cpg.c
@@ -2133,6 +2133,29 @@ void send_state_daemon_nodes(int fd)
}
}
+void send_state_startup_nodes(int fd)
+{
+ struct node_daemon *node;
+ struct dlmc_state st;
+ char str[DLMC_STATE_MAXSTR];
+ int str_len;
+
+ list_for_each_entry(node, &startup_nodes, list) {
+ memset(&st, 0, sizeof(st));
+ st.type = DLMC_STATE_STARTUP_NODE;
+ st.nodeid = node->nodeid;
+
+ memset(str, 0, sizeof(str));
+ str_len = print_state_daemon_node(node, str);
+
+ st.str_len = str_len;
+
+ send(fd, &st, sizeof(st), MSG_NOSIGNAL);
+ if (str_len)
+ send(fd, str, str_len, MSG_NOSIGNAL);
+ }
+}
+
static int print_state_daemon(char *str)
{
snprintf(str, DLMC_STATE_MAXSTR-1,
diff --git a/dlm_controld/dlm_controld.h b/dlm_controld/dlm_controld.h
index d25bfc5..fc13795 100644
--- a/dlm_controld/dlm_controld.h
+++ b/dlm_controld/dlm_controld.h
@@ -50,6 +50,7 @@ struct dlmc_header {
#define DLMC_STATE_DAEMON 1
#define DLMC_STATE_DAEMON_NODE 2
+#define DLMC_STATE_STARTUP_NODE 3
struct dlmc_state {
uint32_t type; /* DLMC_STATE_ */
diff --git a/dlm_controld/dlm_daemon.h b/dlm_controld/dlm_daemon.h
index 662cc35..60d3d46 100644
--- a/dlm_controld/dlm_daemon.h
+++ b/dlm_controld/dlm_daemon.h
@@ -349,6 +349,7 @@ void set_protocol_stateful(void);
int set_protocol(void);
void send_state_daemon_nodes(int fd);
void send_state_daemon(int fd);
+void send_state_startup_nodes(int fd);
void log_config(const struct cpg_name *group_name,
const struct cpg_address *member_list,
diff --git a/dlm_controld/lib.c b/dlm_controld/lib.c
index 1b78407..961626f 100644
--- a/dlm_controld/lib.c
+++ b/dlm_controld/lib.c
@@ -275,11 +275,20 @@ static void format_daemon_node(struct dlmc_state *st, char *str, char *bin, uint
char *node_line, char *fence_line)
{
unsigned int delay_fencing, result_wait, killed;
+ char letter;
+
+ if (st->type == DLMC_STATE_STARTUP_NODE)
+ letter = 'U';
+ else if (kv(str, "member"))
+ letter = 'M';
+ else
+ letter = 'X';
+
snprintf(node_line, DLMC_STATE_MAXSTR - 1,
"node %d %c add %u rem %u fail %u fence %u at %u %u\n",
st->nodeid,
- kv(str, "member") ? 'M' : 'X',
+ letter,
kv(str, "add_time"),
kv(str, "rem_time"),
kv(str, "fail_monotime"),
@@ -394,6 +403,7 @@ int dlmc_print_status(uint32_t flags)
break;
case DLMC_STATE_DAEMON_NODE:
+ case DLMC_STATE_STARTUP_NODE:
if (flags & DLMC_STATUS_VERBOSE) {
printf("nodeid %d\n", st->nodeid);
diff --git a/dlm_controld/main.c b/dlm_controld/main.c
index 953cf1b..8fb16ef 100644
--- a/dlm_controld/main.c
+++ b/dlm_controld/main.c
@@ -877,6 +877,7 @@ static void *process_queries(void *arg)
case DLMC_CMD_DUMP_STATUS:
send_state_daemon(f);
send_state_daemon_nodes(f);
+ send_state_startup_nodes(f);
break;
default:
break;
11 years, 2 months
dlm: master - dlm_controld: fix fencing retries
by David Teigland
Gitweb: http://git.fedorahosted.org/git/?p=dlm.git;a=commitdiff;h=87007d98a6f20bc...
Commit: 87007d98a6f20bc4b923aa87937818e765bfb0a4
Parent: 6a030a4efa08528c75ab5d045a3b3752a4b85fb2
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Tue Jan 29 13:15:27 2013 -0600
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Tue Jan 29 13:15:27 2013 -0600
dlm_controld: fix fencing retries
Fix the previous commit which caused fencing to
not be retried when daemon_fence_work was called
from anything bug process_fencing_changes.
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
dlm_controld/daemon_cpg.c | 25 +++++++++++++------------
1 files changed, 13 insertions(+), 12 deletions(-)
diff --git a/dlm_controld/daemon_cpg.c b/dlm_controld/daemon_cpg.c
index 0eec5ce..4002809 100644
--- a/dlm_controld/daemon_cpg.c
+++ b/dlm_controld/daemon_cpg.c
@@ -731,7 +731,7 @@ static void fence_pid_cancel(int nodeid, int pid)
* later same as case B above
*/
-static int daemon_fence_work(void)
+static void daemon_fence_work(void)
{
struct node_daemon *node, *safe;
int rv, nodeid, pid, need, low, actor, result;
@@ -742,13 +742,13 @@ static int daemon_fence_work(void)
/* We've seen a nodedown confchg callback, but not the
corresponding ringid callback. */
log_retry(retry_fencing, "fence work wait for cpg ringid");
- return retry;
+ goto out;
}
if (cluster_ringid_seq != daemon_ringid.seq) {
/* wait for ringids to be in sync */
log_retry(retry_fencing, "fence work wait for cluster ringid");
- return retry;
+ goto out;
}
/* retry = 1; */
@@ -1105,21 +1105,22 @@ static int daemon_fence_work(void)
if (zombie_count)
clear_zombies();
- return retry;
-}
-
-void process_fencing_changes(void)
-{
- int retry;
-
- retry = daemon_fence_work();
-
+ /*
+ * setting retry_fencing will cause the main daemon poll loop
+ * to timeout in 1 second and call this function again.
+ */
+ out:
if (retry)
retry_fencing++;
else
retry_fencing = 0;
}
+void process_fencing_changes(void)
+{
+ daemon_fence_work();
+}
+
static void receive_fence_clear(struct dlm_header *hd, int len)
{
struct fence_result *fr;
11 years, 2 months
fence-agents: the annotated tag v4.0.0 has been created
by Marek Grác
Gitweb: http://git.fedorahosted.org/git/?p=fence-agents.git;a=commitdiff;h=09ee8c...
Commit: 09ee8c204b2736731587cbbdaaad07fea04a4638
Parent: 0000000000000000000000000000000000000000
Author: Marek 'marx' Grac <mgrac(a)redhat.com>
AuthorDate: 2013-01-29 15:41 +0000
Committer: Marek 'marx' Grac <mgrac(a)redhat.com>
CommitterDate: 2013-01-29 15:41 +0000
annotated tag: v4.0.0 has been created
at 09ee8c204b2736731587cbbdaaad07fea04a4638 (tag)
tagging f450d953656d61f609d8874f515e9d4c48308d4b (commit)
replaces v3.1.11
v4.0.0 release
David Vossel (1):
fence_scsi: support unfence action in Pacemaker
Marek 'marx' Grac (27):
code cleanup: Fix minor warning according to pylint
code cleanup: Fix warnings according to pylint
code cleanup: Fix warnings according to pylint and typos
fence agents: Exceptions (EOF/TIMEOUT) should be handled in fencing library, not in every fence agent
fencing: Add support for usage of longopt keys in options
fence agents: Transfer non-SNMP fence agents to long-opts
fencing: Simplify exceptions catching when closing connection
fence agents: Transfer SNMP fence agents to long-opts + transfer script
fence_hds_cb: Style clean-up
fence_hds_cb: Port fence agent to longopts
fence_hds_cb: Push exception handling to fencing library
fencing: UUID can be entered also as port number (-n / --plug / port)
fencing: Replace common options with more flexible mechanism
fencing: Replace all short (getopt) options in code by their long variants
fencing: Operation 'reboot' is not working because fence_fabric was wrongly used
fence_lpar: Typo in definition of 'managed' option
fence_cisco_ucs: Simplify code by using rstrip()
fencing: Improve XML metadata output
fence_ilo_mp: Remove dependant device options like in rest of fence agents
fencing: Fix minor inconsistencies
fencing: Add new option --ssh-options
fencing: Move options which are used only once to appropriate fence agent
fencing: Fix usage of UUID if option --port/plug is not defined.
fencing: Do not check IP address if fence agent does not need any.
fence_lpar: Option 'partion' is moved to fence_lpar
fence_drac5: Fix regression on Dell CMC and Dell DRAC5
fence_drac5: Fix 'list' operation
Matt Clark (2):
New fencing script for Hitachi compute blade 2000
Fixed date of copyright. Copy paste error.
Ryan O'Hara (1):
fence_scsi: change on_target metadata attribute
11 years, 2 months
fence-agents: the annotated tag v3.1.12 has been updated
by Marek Grác
Gitweb: http://git.fedorahosted.org/git/?p=fence-agents.git;a=commitdiff;h=ab138e...
Commit: ab138e9885728f48c8bd67429059d9e52362a2a9
Parent: f450d953656d61f609d8874f515e9d4c48308d4b
Author: Marek 'marx' Grac <mgrac(a)redhat.com>
AuthorDate: 2013-01-29 15:43 +0000
Committer: Marek 'marx' Grac <mgrac(a)redhat.com>
CommitterDate: 2013-01-29 15:43 +0000
annotated tag: v3.1.12 has been updated
to ab138e9885728f48c8bd67429059d9e52362a2a9 (tag)
from f450d953656d61f609d8874f515e9d4c48308d4b (which is now obsolete)
tagging f450d953656d61f609d8874f515e9d4c48308d4b (commit)
replaces v3.1.11
v3.1.12 release
David Vossel (1):
fence_scsi: support unfence action in Pacemaker
Marek 'marx' Grac (27):
code cleanup: Fix minor warning according to pylint
code cleanup: Fix warnings according to pylint
code cleanup: Fix warnings according to pylint and typos
fence agents: Exceptions (EOF/TIMEOUT) should be handled in fencing library, not in every fence agent
fencing: Add support for usage of longopt keys in options
fence agents: Transfer non-SNMP fence agents to long-opts
fencing: Simplify exceptions catching when closing connection
fence agents: Transfer SNMP fence agents to long-opts + transfer script
fence_hds_cb: Style clean-up
fence_hds_cb: Port fence agent to longopts
fence_hds_cb: Push exception handling to fencing library
fencing: UUID can be entered also as port number (-n / --plug / port)
fencing: Replace common options with more flexible mechanism
fencing: Replace all short (getopt) options in code by their long variants
fencing: Operation 'reboot' is not working because fence_fabric was wrongly used
fence_lpar: Typo in definition of 'managed' option
fence_cisco_ucs: Simplify code by using rstrip()
fencing: Improve XML metadata output
fence_ilo_mp: Remove dependant device options like in rest of fence agents
fencing: Fix minor inconsistencies
fencing: Add new option --ssh-options
fencing: Move options which are used only once to appropriate fence agent
fencing: Fix usage of UUID if option --port/plug is not defined.
fencing: Do not check IP address if fence agent does not need any.
fence_lpar: Option 'partion' is moved to fence_lpar
fence_drac5: Fix regression on Dell CMC and Dell DRAC5
fence_drac5: Fix 'list' operation
Matt Clark (2):
New fencing script for Hitachi compute blade 2000
Fixed date of copyright. Copy paste error.
Ryan O'Hara (1):
fence_scsi: change on_target metadata attribute
11 years, 2 months
fence-agents: master - fence_drac5: Fix 'list' operation
by Marek Grác
Gitweb: http://git.fedorahosted.org/git/?p=fence-agents.git;a=commitdiff;h=f450d9...
Commit: f450d953656d61f609d8874f515e9d4c48308d4b
Parent: 4bd62484e17cc63b27a103c744ec11fb00610b48
Author: Marek 'marx' Grac <mgrac(a)redhat.com>
AuthorDate: Tue Jan 29 15:32:22 2013 +0100
Committer: Marek 'marx' Grac <mgrac(a)redhat.com>
CommitterDate: Tue Jan 29 15:32:22 2013 +0100
fence_drac5: Fix 'list' operation
Device option 'separator' is set automatically when 'port' occurs but currently drac5 do not
use 'port / -n' but 'module_name / -m'.
---
fence/agents/lib/fencing.py.py | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/fence/agents/lib/fencing.py.py b/fence/agents/lib/fencing.py.py
index 6a5fd0b..12e7be3 100644
--- a/fence/agents/lib/fencing.py.py
+++ b/fence/agents/lib/fencing.py.py
@@ -381,6 +381,7 @@ DEPENDENCY_OPT = {
"secure" : [ "identity_file", "ssh_options" ],
"ipaddr" : [ "inet4_only", "inet6_only" ],
"port" : [ "separator" ],
+ "module_name" : [ "separator" ],
"community" : [ "snmp_auth_prot", "snmp_sec_level", "snmp_priv_prot", \
"snmp_priv_passwd", "snmp_priv_passwd_script" ]
}
11 years, 2 months
fence-agents: master - fence_drac5: Fix regression on Dell CMC and Dell DRAC5
by Marek Grác
Gitweb: http://git.fedorahosted.org/git/?p=fence-agents.git;a=commitdiff;h=4bd624...
Commit: 4bd62484e17cc63b27a103c744ec11fb00610b48
Parent: 81f9d0ac95b57791b5937ea96fac889e10782d53
Author: Marek 'marx' Grac <mgrac(a)redhat.com>
AuthorDate: Tue Jan 29 15:24:20 2013 +0100
Committer: Marek 'marx' Grac <mgrac(a)redhat.com>
CommitterDate: Tue Jan 29 15:24:20 2013 +0100
fence_drac5: Fix regression on Dell CMC and Dell DRAC5
Standard EOL for agents connecting via ssh is CR/LF.
Some Dell devices represents CR/LF as double-enter what creates a problem in parsing of output.
This patch adds a check for double-enter. This can be detected in function which power on/off machine
because 'get power status' was run before and if we can find a line without any command we know that
there is a double-enter problem.
Resolves: rhbz#904195
Resolves: rhbz#904195
---
fence/agents/drac5/fence_drac5.py | 7 +++++++
1 files changed, 7 insertions(+), 0 deletions(-)
diff --git a/fence/agents/drac5/fence_drac5.py b/fence/agents/drac5/fence_drac5.py
index 09157fe..3ec354e 100644
--- a/fence/agents/drac5/fence_drac5.py
+++ b/fence/agents/drac5/fence_drac5.py
@@ -46,7 +46,14 @@ def set_power_status(conn, options):
conn.send_eol("racadm serveraction " + action + " -m " + options["--module-name"])
elif options["model"] == "DRAC 5":
conn.send_eol("racadm serveraction " + action)
+
+ ## Fix issue with double-enter [CR/LF]
+ ## We need to read two additional command prompts (one from get + one from set command)
conn.log_expect(options, options["--command-prompt"], int(options["--power-timeout"]))
+ if len(conn.before.strip()) == 0:
+ options["eol"] = options["eol"][:-1]
+ conn.log_expect(options, options["--command-prompt"], int(options["--power-timeout"]))
+ conn.log_expect(options, options["--command-prompt"], int(options["--power-timeout"]))
def get_list_devices(conn, options):
outlets = { }
11 years, 2 months
dlm: master - dlm_controld: limit log debug
by David Teigland
Gitweb: http://git.fedorahosted.org/git/?p=dlm.git;a=commitdiff;h=6a030a4efa08528...
Commit: 6a030a4efa08528c75ab5d045a3b3752a4b85fb2
Parent: f15a2daed7137481c7161e1447b1796c308c7e08
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Tue Jan 22 10:41:30 2013 -0600
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Tue Jan 22 10:41:30 2013 -0600
dlm_controld: limit log debug
Avoid filling the debug buffer with log_debug
entries that occur within indefinate retries.
Limit to 60 log_debug repetitions, (once a
second for a minute.)
Also, after one minute, escalate these repeating
log_debugs to log_error messages. The log_error
messages repeat once an hour.
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
dlm_controld/cpg.c | 44 ++++++++++++++++++++++++++++---
dlm_controld/daemon_cpg.c | 61 +++++++++++++++++++++++++++++++++------------
dlm_controld/dlm_daemon.h | 4 ++-
dlm_controld/main.c | 2 +-
4 files changed, 88 insertions(+), 23 deletions(-)
diff --git a/dlm_controld/cpg.c b/dlm_controld/cpg.c
index 0318b98..487887c 100644
--- a/dlm_controld/cpg.c
+++ b/dlm_controld/cpg.c
@@ -16,6 +16,16 @@
} \
})
+/* retries are once a second */
+#define log_retry(ls, fmt, args...) ({ \
+ if (ls->wait_retry < 60) \
+ log_group(ls, fmt, ##args); \
+ else if (ls->wait_retry == 60) \
+ log_erros(ls, fmt, ##args); \
+ else if (!(ls->wait_retry % 3600)) \
+ log_erros(ls, fmt, ##args); \
+})
+
/* per lockspace cpg: ls->node_history */
struct node {
@@ -591,30 +601,54 @@ static void stop_kernel(struct lockspace *ls, uint32_t seq)
static int wait_conditions_done(struct lockspace *ls)
{
if (!check_ringid_done(ls)) {
- ls->wait_debug = DLMC_LS_WAIT_RINGID;
+ if (ls->wait_debug != DLMC_LS_WAIT_RINGID) {
+ ls->wait_debug = DLMC_LS_WAIT_RINGID;
+ ls->wait_retry = 0;
+ }
+ ls->wait_retry++;
+ /* the check function logs a message */
+
return 0;
}
if (opt(enable_quorum_lockspace_ind) && !cluster_quorate) {
- log_group(ls, "wait for quorum");
- ls->wait_debug = DLMC_LS_WAIT_QUORUM;
+ if (ls->wait_debug != DLMC_LS_WAIT_QUORUM) {
+ ls->wait_debug = DLMC_LS_WAIT_QUORUM;
+ ls->wait_retry = 0;
+ }
+ ls->wait_retry++;
+ log_retry(ls, "wait for quorum");
+
poll_lockspaces++;
return 0;
}
if (!check_fencing_done(ls)) {
- ls->wait_debug = DLMC_LS_WAIT_FENCING;
+ if (ls->wait_debug != DLMC_LS_WAIT_FENCING) {
+ ls->wait_debug = DLMC_LS_WAIT_FENCING;
+ ls->wait_retry = 0;
+ }
+ ls->wait_retry++;
+ log_retry(ls, "wait for fencing");
+
poll_lockspaces++;
return 0;
}
if (!check_fs_done(ls)) {
- ls->wait_debug = DLMC_LS_WAIT_FSDONE;
+ if (ls->wait_debug != DLMC_LS_WAIT_FSDONE) {
+ ls->wait_debug = DLMC_LS_WAIT_FSDONE;
+ ls->wait_retry = 0;
+ }
+ ls->wait_retry++;
+ log_retry(ls, "wait for fsdone");
+
poll_fs++;
return 0;
}
ls->wait_debug = 0;
+ ls->wait_retry = 0;
return 1;
}
diff --git a/dlm_controld/daemon_cpg.c b/dlm_controld/daemon_cpg.c
index 61d2da2..0eec5ce 100644
--- a/dlm_controld/daemon_cpg.c
+++ b/dlm_controld/daemon_cpg.c
@@ -11,6 +11,16 @@
/* protocol_version flags */
#define PV_STATEFUL 0x0001
+/* retries are once a second */
+#define log_retry(cur_count, fmt, args...) ({ \
+ if (cur_count < 60) \
+ log_debug(fmt, ##args); \
+ else if (cur_count == 60) \
+ log_error(fmt, ##args); \
+ else if (!(cur_count % 3600)) \
+ log_error(fmt, ##args); \
+})
+
struct protocol_version {
uint16_t major;
uint16_t minor;
@@ -106,6 +116,9 @@ static int fence_in_progress_unknown = 1;
static int zombie_pids[MAX_ZOMBIES];
static int zombie_count;
+static int fence_result_pid;
+static unsigned int fence_result_try;
+
static void send_fence_result(int nodeid, int result, uint32_t flags, uint64_t walltime);
static void send_fence_clear(int nodeid, int result, uint32_t flags, uint64_t walltime);
@@ -718,32 +731,33 @@ static void fence_pid_cancel(int nodeid, int pid)
* later same as case B above
*/
-static void daemon_fence_work(void)
+static int daemon_fence_work(void)
{
struct node_daemon *node, *safe;
int rv, nodeid, pid, need, low, actor, result;
+ int retry = 0;
uint32_t flags;
if (daemon_ringid_wait) {
/* We've seen a nodedown confchg callback, but not the
corresponding ringid callback. */
- log_debug("fence work wait for cpg ringid");
- return;
+ log_retry(retry_fencing, "fence work wait for cpg ringid");
+ return retry;
}
if (cluster_ringid_seq != daemon_ringid.seq) {
/* wait for ringids to be in sync */
- log_debug("fence work wait for cluster ringid");
- return;
+ log_retry(retry_fencing, "fence work wait for cluster ringid");
+ return retry;
}
- /* poll_fencing++; */
+ /* retry = 1; */
if (opt(enable_quorum_fencing_ind) && !cluster_quorate) {
/* wait for quorum before doing any fencing, but if there
is none, send_fence_clear below can unblock new nodes */
- log_debug("fence work wait for quorum");
- poll_fencing++;
+ log_retry(retry_fencing, "fence work wait for quorum");
+ retry = 1;
goto out_fipu;
}
@@ -766,7 +780,7 @@ static void daemon_fence_work(void)
log_debug("fence startup %d delay %d from %llu",
node->nodeid, opt(post_join_delay_ind),
(unsigned long long)daemon_last_join_monotime);
- poll_fencing++;
+ retry = 1;
continue;
}
@@ -837,10 +851,10 @@ static void daemon_fence_work(void)
if (!opt(enable_concurrent_fencing_ind) && daemon_fence_pid) {
/* run one agent at a time in case they need the same switch */
- log_debug("fence request %d delay for other pid %d",
+ log_retry(retry_fencing, "fence request %d delay for other pid %d",
node->nodeid, daemon_fence_pid);
node->delay_fencing = 1;
- poll_fencing++;
+ retry = 1;
continue;
}
@@ -849,7 +863,7 @@ static void daemon_fence_work(void)
node->nodeid, opt(post_join_delay_ind),
(unsigned long long)cluster_last_join_monotime);
node->delay_fencing = 1;
- poll_fencing++;
+ retry = 1;
continue;
}
node->delay_fencing = 0;
@@ -947,12 +961,19 @@ static void daemon_fence_work(void)
continue;
}
- poll_fencing++;
+ retry = 1;
rv = fence_result(nodeid, pid, &result);
if (rv == -EAGAIN) {
/* agent pid is still running */
- log_debug("fence wait %d pid %d running", nodeid, pid);
+
+ if (fence_result_pid != pid) {
+ fence_result_try = 0;
+ fence_result_pid = pid;
+ }
+ fence_result_try++;
+
+ log_retry(fence_result_try, "fence wait %d pid %d running", nodeid, pid);
continue;
}
@@ -1083,12 +1104,20 @@ static void daemon_fence_work(void)
if (zombie_count)
clear_zombies();
+
+ return retry;
}
void process_fencing_changes(void)
{
- poll_fencing = 0;
- daemon_fence_work();
+ int retry;
+
+ retry = daemon_fence_work();
+
+ if (retry)
+ retry_fencing++;
+ else
+ retry_fencing = 0;
}
static void receive_fence_clear(struct dlm_header *hd, int len)
diff --git a/dlm_controld/dlm_daemon.h b/dlm_controld/dlm_daemon.h
index ad99e17..662cc35 100644
--- a/dlm_controld/dlm_daemon.h
+++ b/dlm_controld/dlm_daemon.h
@@ -161,7 +161,7 @@ EXTERN struct dlm_option dlm_options[dlm_options_max];
EXTERN int daemon_quit;
EXTERN int cluster_down;
EXTERN int poll_lockspaces;
-EXTERN int poll_fencing;
+EXTERN unsigned int retry_fencing;
EXTERN int poll_fs;
EXTERN int poll_ignore_plock;
EXTERN int poll_drop_plock;
@@ -190,6 +190,7 @@ void log_level(char *name_in, uint32_t level_in, const char *fmt, ...);
#define log_error(fmt, args...) log_level(NULL, LOG_ERR, fmt, ##args)
#define log_debug(fmt, args...) log_level(NULL, LOG_DEBUG, fmt, ##args)
+#define log_erros(ls, fmt, args...) log_level((ls)->name, LOG_ERR, fmt, ##args)
#define log_group(ls, fmt, args...) log_level((ls)->name, LOG_DEBUG, fmt, ##args)
#define log_plock(ls, fmt, args...) log_level((ls)->name, LOG_PLOCK|LOG_NONE, fmt, ##args)
@@ -258,6 +259,7 @@ struct lockspace {
int kernel_stopped;
int fs_registered;
int wait_debug; /* for status/debugging */
+ uint32_t wait_retry; /* for debug rate limiting */
uint32_t change_seq;
uint32_t started_count;
struct change *started_change;
diff --git a/dlm_controld/main.c b/dlm_controld/main.c
index a68e1b8..953cf1b 100644
--- a/dlm_controld/main.c
+++ b/dlm_controld/main.c
@@ -1059,7 +1059,7 @@ static void loop(void)
poll_timeout = -1;
- if (poll_fencing) {
+ if (retry_fencing) {
process_fencing_changes();
poll_timeout = 1000;
}
11 years, 2 months
cluster: STABLE32 - config: fix cluster.conf man page to reflect correct syslog_facility default
by Fabio M. Di Nitto
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=9c9a1d36ed6...
Commit: 9c9a1d36ed64d269b98124bfff9f79f58994397c
Parent: 7cdf5a89712994a06e1bf95879e580e81e6a9b9a
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Thu Jan 17 08:53:29 2013 +0100
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Thu Jan 17 08:53:29 2013 +0100
config: fix cluster.conf man page to reflect correct syslog_facility default
Resolves: rhbz#896191
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
---
config/man/cluster.conf.5 | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/config/man/cluster.conf.5 b/config/man/cluster.conf.5
index aa653be..ad2cfee 100644
--- a/config/man/cluster.conf.5
+++ b/config/man/cluster.conf.5
@@ -136,7 +136,7 @@ enable/disable messages to log file (yes/no), default "yes"
.TP 8
.B syslog_facility
-facility used for syslog messages, default "daemon"
+facility used for syslog messages, default "local4"
.TP 8
.B syslog_priority
@@ -158,7 +158,7 @@ a shortcut for logfile_priority="debug"
An explicit configuration for the default settings would be:
.P
.nf
-<logging to_syslog="yes" to_logfile="yes" syslog_facility="daemon"
+<logging to_syslog="yes" to_logfile="yes" syslog_facility="local4"
syslog_priority="info" logfile_priority="info">
<logging_daemon name="qdiskd"
logfile="/var/log/cluster/qdiskd.log"/>
11 years, 2 months