Gitweb:
http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 704fd5bb382ff452fc8403b9a2322e73b918619d
Parent: e3fdd916da5ad54e37bbc457f336fb482dbe3b14
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Fri Feb 19 09:45:17 2010 -0500
Committer: Lon Hohberger <lhh(a)redhat.com>
CommitterDate: Fri Feb 19 11:56:47 2010 -0500
qdisk: Autoconfigure default timings
Qdiskd has historically had bad default timings in STABLE3
and STABLE2. This patch makes qdiskd scale automatically
with the Totem token timeout.
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
---
cman/man/qdisk.5 | 30 +++++++++++++++++++++++++++---
cman/qdisk/disk.h | 2 +-
cman/qdisk/main.c | 45 ++++++++++++++++++++++++++++++++++++++++++---
3 files changed, 70 insertions(+), 7 deletions(-)
diff --git a/cman/man/qdisk.5 b/cman/man/qdisk.5
index f578e92..1a74470 100644
--- a/cman/man/qdisk.5
+++ b/cman/man/qdisk.5
@@ -80,7 +80,8 @@ capacity. If CMAN is not running, qdisk will wait for it.
* CMAN's eviction timeout should be at least 2x the quorum daemon's
to give the quorum daemon adequate time to converge on a master during a
-failure + load spike situation.
+failure + load spike situation. See section 3.3.1 for specific
+details.
* For 'all-but-one' failure operation, the total number of votes assigned
to the quorum device should be equal to or greater than the total number
@@ -211,6 +212,7 @@ This is the frequency of read/write cycles, in seconds.
\fItko\fP\fB="\fP10\fB"\fP
.in 12
This is the number of cycles a node must miss in order to be declared dead.
+The default for this number is dependent on the configured token timeout.
.in 9
\fItko_up\fP\fB="\fPX\fB"\fP
@@ -289,7 +291,7 @@ This option requires careful tuning of the CMAN timeout, the qdiskd
timeout, and CMAN's quorum_dev_poll value. As a rule of thumb,
CMAN's quorum_dev_poll value should be equal to Totem's token timeout
and qdiskd's timeout (interval*tko) should be less than half of
-Totem's token timeout.
+Totem's token timeout. See section 3.3.1 for more information.
This option only takes effect if there are no heuristics
configured. Usage of this option in configurations with more than
@@ -372,7 +374,7 @@ label. This is useful in configurations where the block device name
differs on a per-node basis.
.in 9
-\fIcman_label\fP\fB="\fPmylabel\fB"/>\fP
+\fIcman_label\fP\fB="\fPmylabel\fB"\fP
.in 12
This overrides the label advertised to CMAN if present. If specified,
the quorum daemon will register with this name instead of the actual
@@ -391,6 +393,28 @@ qdiskd is running. This option is ignored if io_timeout is set to
1.
\fB/>\fP
.in 0
+.SH "3.3.1. Quorum Disk Timings"
+Qdiskd should not be used in environments requiring failure detection
+times of less than approximately 10 seconds.
+
+Qdiskd will attempt to automatically configure timings based on the
+totem timeout and the TKO. If configuring manually, Totem's token
+timeout \fBmust\fP be set to a value at least 1 interval greater than
+the the following function:
+
+ interval * (tko + master_wait + upgrade_wait)
+
+So, if you have an interval of 2, a tko of 7, master_wait of 2 and
+upgrade_wait of 2, the token timeout should be at least 24 seconds
+(24000 msec).
+
+It is recommended to have at least 3 intervals to reduce the risk of
+quorum loss during heavy I/O load. As a rule of thumb, using a totem
+timeout more than 2x of qdiskd's timeout will result in good behavior.
+
+An improper timing configuration will cause CMAN to give up on qdiskd,
+causing a temporary loss of quorum during master transition.
+
.SH "3.2. The <heuristic> tag"
This tag is a child of the <quorumd> tag. Heuristics may not be changed
while qdiskd is running.
diff --git a/cman/qdisk/disk.h b/cman/qdisk/disk.h
index c5b3d18..8678ca7 100644
--- a/cman/qdisk/disk.h
+++ b/cman/qdisk/disk.h
@@ -246,7 +246,7 @@ typedef struct {
int qc_max_error_cycles;
int qc_master; /* Master?! */
int qc_config;
- int qc_pad;
+ int qc_token_timeout;
disk_node_state_t qc_disk_status;
disk_node_state_t qc_status;
run_flag_t qc_flags;
diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c
index eb3ab3c..85a0563 100644
--- a/cman/qdisk/main.c
+++ b/cman/qdisk/main.c
@@ -24,6 +24,7 @@
#include <ccs.h>
#include <liblogthread.h>
#include "score.h"
+#include "../daemon/cman.h"
#include <sys/syslog.h>
#define LOG_DAEMON_NAME "qdiskd"
@@ -1472,6 +1473,7 @@ get_static_config_data(qd_ctx *ctx, int ccsfd)
{
char *val = NULL;
char query[256];
+ int qdisk_fo;
if (ccsfd < 0)
return -1;
@@ -1486,14 +1488,36 @@ get_static_config_data(qd_ctx *ctx, int ccsfd)
if (ctx->qc_interval < 1)
ctx->qc_interval = 1;
}
+
+ snprintf(query, sizeof(query), "/cluster/totem/@token");
+ if (ccs_get(ccsfd, query, &val) == 0) {
+ ctx->qc_token_timeout = atoi(val);
+ free(val);
+ if (ctx->qc_token_timeout < 10000) {
+ logt_print(LOG_DEBUG, "Token timeout %d is too fast "
+ "to use with qdiskd!\n",
+ ctx->qc_token_timeout);
+ }
+ } else {
+ ctx->qc_token_timeout = DEFAULT_TOKEN_TIMEOUT;
+ }
/* Get tko */
snprintf(query, sizeof(query), "/cluster/quorumd/@tko");
if (ccs_get(ccsfd, query, &val) == 0) {
ctx->qc_tko = atoi(val);
free(val);
- if (ctx->qc_tko < 3)
- ctx->qc_tko = 3;
+ } else {
+ ctx->qc_tko = ((ctx->qc_token_timeout / 1000) -
+ ctx->qc_interval) / 2;
+ logt_print(LOG_DEBUG, "Auto-configured TKO as %d based on "
+ "token=%d interval=%d\n", ctx->qc_tko,
+ ctx->qc_token_timeout, ctx->qc_interval);
+ }
+
+ if (ctx->qc_tko < 4) {
+ logt_print(LOG_ERR, "Quorum disk TKO (%d) is too low!\n",
+ ctx->qc_tko);
}
/* Get up-tko (transition off->online) */
@@ -1527,7 +1551,22 @@ get_static_config_data(qd_ctx *ctx, int ccsfd)
}
if (ctx->qc_master_wait <= ctx->qc_tko_up)
ctx->qc_master_wait = ctx->qc_tko_up + 1;
-
+
+ qdisk_fo = ctx->qc_interval * (ctx->qc_master_wait +
+ ctx->qc_upgrade_wait +
+ ctx->qc_tko) * 1000;
+ if (qdisk_fo >= ctx->qc_token_timeout) {
+ logt_print(LOG_WARNING, "Quorum disk timings are too slow for "
+ "configured token timeout\n");
+ logt_print(LOG_WARNING, " * Totem Token timeout: %dms\n",
+ ctx->qc_token_timeout);
+ logt_print(LOG_WARNING, " * Min. Master recovery time: %dms\n",
+ qdisk_fo);
+ logt_print(LOG_WARNING,
+ "Please set token timeout to at least %dms\n",
+ qdisk_fo + (ctx->qc_interval * 1000));
+ }
+
/* Get votes */
snprintf(query, sizeof(query), "/cluster/quorumd/@votes");
if (ccs_get(ccsfd, query, &val) == 0) {