cluster: RHEL4 - gfs-kernel: bz245264 - gfs_tool: page allocation failure. order:4, mode:0xd0
by Abhijith Das
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: a9b72c1a181c640f1a469eeb13fe479fc9758cc9
Parent: db5c670bfecfeb3cdf748651181e7e82438e601d
Author: Abhijith Das <adas(a)redhat.com>
AuthorDate: Tue Sep 22 09:54:41 2009 -0500
Committer: Abhijith Das <adas(a)redhat.com>
CommitterDate: Tue Sep 22 09:54:41 2009 -0500
gfs-kernel: bz245264 - gfs_tool: page allocation failure. order:4, mode:0xd0
This patch changes the allocation in gi_skeleton from kmalloc to vmalloc.
Allows for larger buffers to be allocated without tripping page allocation
failures.
---
gfs-kernel/src/gfs/ioctl.c | 5 +++--
1 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/gfs-kernel/src/gfs/ioctl.c b/gfs-kernel/src/gfs/ioctl.c
index d73926a..588bf89 100644
--- a/gfs-kernel/src/gfs/ioctl.c
+++ b/gfs-kernel/src/gfs/ioctl.c
@@ -20,6 +20,7 @@
#include <linux/buffer_head.h>
#include <asm/uaccess.h>
#include <linux/gfs_ioctl.h>
+#include <linux/vmalloc.h>
#include "gfs.h"
#include "bmap.h"
@@ -66,7 +67,7 @@ gi_skeleton(struct gfs_inode *ip, struct gfs_ioctl *gi,
if (size > gi->gi_size)
size = gi->gi_size;
- buf = kmalloc(size, GFP_KERNEL);
+ buf = vmalloc(size);
if (!buf)
return -ENOMEM;
@@ -80,7 +81,7 @@ gi_skeleton(struct gfs_inode *ip, struct gfs_ioctl *gi,
error = count + 1;
out:
- kfree(buf);
+ vfree(buf);
return error;
}
14 years, 7 months
cluster: STABLE3 - cman: Allow re-registering of a quorum disk
by Christine Caulfield
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 161b6d3d7db0b34046c6e4b8c6c5818e655e549e
Parent: 9bf4612bceb34beeb9302d0cdcfa2a0dadb90a2c
Author: Christine Caulfield <ccaulfie(a)redhat.com>
AuthorDate: Tue Sep 22 09:10:19 2009 +0100
Committer: Christine Caulfield <ccaulfie(a)redhat.com>
CommitterDate: Tue Sep 22 09:10:19 2009 +0100
cman: Allow re-registering of a quorum disk
cman-register_quorum_device now allows the quorum device to be be
registered, with different votes, provided the name stays the same.
This should make it easier for qdiskd to handle configuration changes
without the cluster losing quorum.
Signed-off-by: Christine Caulfield <ccaulfie(a)redhat.com>
---
cman/daemon/commands.c | 55 ++++++++++++++++++++++++++++-------------------
1 files changed, 33 insertions(+), 22 deletions(-)
diff --git a/cman/daemon/commands.c b/cman/daemon/commands.c
index c2169f0..cc3c8c9 100644
--- a/cman/daemon/commands.c
+++ b/cman/daemon/commands.c
@@ -488,7 +488,7 @@ static int do_cmd_set_version(char *cmdbuf, int *retlen)
/* If the passed-in version number is 0 then read the file now, then
* tell the other nodes to look for that version number.
* That means we also have to send the notification here, because it will
- * beskipped when we get our own RECONFIGURE message back as the version
+ * be skipped when we get our own RECONFIGURE message back, as the version
* number will match.
*/
if (!version->config) {
@@ -1106,38 +1106,49 @@ static int do_cmd_register_quorum_device(char *cmdbuf, int *retlen)
if (!we_are_a_cluster_member)
return -ENOENT;
- if (quorum_device)
- return -EBUSY;
-
if (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN)
return -EINVAL;
+ /* Allow re-registering of a quorum device if the name is the same */
+ if (quorum_device && strcmp(name, quorum_device->name))
+ return -EBUSY;
+
if (find_node_by_name(name))
return -EALREADY;
memcpy(&votes, cmdbuf, sizeof(int));
- quorum_device = malloc(sizeof(struct cluster_node));
- if (!quorum_device)
- return -ENOMEM;
- memset(quorum_device, 0, sizeof(struct cluster_node));
-
- quorum_device->name = malloc(strlen(name) + 1);
- if (!quorum_device->name) {
- free(quorum_device);
- quorum_device = NULL;
- return -ENOMEM;
- }
+ /* A new quorum device */
+ if (!quorum_device)
+ {
+ quorum_device = malloc(sizeof(struct cluster_node));
+ if (!quorum_device)
+ return -ENOMEM;
+ memset(quorum_device, 0, sizeof(struct cluster_node));
- strcpy(quorum_device->name, name);
- quorum_device->votes = votes;
- quorum_device->state = NODESTATE_DEAD;
- gettimeofday(&quorum_device->join_time, NULL);
+ quorum_device->name = malloc(strlen(name) + 1);
+ if (!quorum_device->name) {
+ free(quorum_device);
+ quorum_device = NULL;
+ return -ENOMEM;
+ }
+
+ strcpy(quorum_device->name, name);
+ quorum_device->state = NODESTATE_DEAD;
+ gettimeofday(&quorum_device->join_time, NULL);
+
+ /* Keep this list valid so it doesn't confuse other code */
+ list_init(&quorum_device->addr_list);
+ log_printf(LOG_INFO, "quorum device registered\n");
+ }
+ else
+ {
+ log_printf(LOG_INFO, "quorum device re-registered\n");
+ }
- /* Keep this list valid so it doesn't confuse other code */
- list_init(&quorum_device->addr_list);
+ /* Update votes even if it existed before */
+ quorum_device->votes = votes;
- log_printf(LOG_INFO, "quorum device registered\n");
return 0;
}
14 years, 7 months
cluster: RHEL55 - rgmanager: Fix missing path attribute handling
by Lon Hohberger
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: ca7c9e3bc4dafe47303e90a4a74b76ad29050e39
Parent: f847d934b59f3d085bd3923039b3064d2511c25c
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Thu Aug 27 15:12:46 2009 -0400
Committer: Lon Hohberger <lhh(a)redhat.com>
CommitterDate: Thu Sep 17 18:30:09 2009 -0400
rgmanager: Fix missing path attribute handling
If using the Xen hypervisor with vm configs in a non
standard location (e.g. not /etc/xen), rgmanager was
ignoring the path attribute, preventing VM management.
Resolves: rhbz#519786
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
---
rgmanager/src/resources/vm.sh | 11 +++++++++++
1 files changed, 11 insertions(+), 0 deletions(-)
diff --git a/rgmanager/src/resources/vm.sh b/rgmanager/src/resources/vm.sh
index 24e286d..df0c9b7 100755
--- a/rgmanager/src/resources/vm.sh
+++ b/rgmanager/src/resources/vm.sh
@@ -141,6 +141,17 @@ meta_data()
<content type="string" default="live"/>
</parameter>
+ <parameter name="path">
+ <longdesc lang="en">
+ Path specification 'xm create' will search for the specified
+ VM configuration file
+ </longdesc>
+ <shortdesc lang="en">
+ Path to virtual machine configuration files
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
<parameter name="snapshot">
<longdesc lang="en">
Path to the snapshot directory where the virtual machine
14 years, 7 months
dlm: master - dlm_controld: fix build failure
by Fabio M. Di Nitto
Gitweb: http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=aeee6d80...
Commit: aeee6d80225cc0dfa8fa8a567603d31dfee5bc84
Parent: fff520b757bfd56307ed5a1a5b7192fea714f8a7
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Fri Sep 18 03:57:55 2009 +0200
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Fri Sep 18 03:57:55 2009 +0200
dlm_controld: fix build failure
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
---
group/dlm_controld/member_cman.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/group/dlm_controld/member_cman.c b/group/dlm_controld/member_cman.c
index ad0ff8e..880e3c1 100644
--- a/group/dlm_controld/member_cman.c
+++ b/group/dlm_controld/member_cman.c
@@ -83,7 +83,7 @@ static void quorum_callback(quorum_handle_t h, uint32_t quorate,
for (i = 0; i < old_node_count; i++) {
if (!is_cluster_member(old_nodes[i])) {
log_debug("cluster node %u removed", old_nodes[i]);
- node_history_cluster_remove(old_nodes[i].cn_nodeid);
+ node_history_cluster_remove(old_nodes[i]);
del_configfs_node(old_nodes[i]);
}
}
@@ -91,7 +91,7 @@ static void quorum_callback(quorum_handle_t h, uint32_t quorate,
for (i = 0; i < quorum_node_count; i++) {
if (!is_old_member(quorum_nodes[i])) {
log_debug("cluster node %u added", quorum_nodes[i]);
- node_history_cluster_add(cman_nodes[i].cn_nodeid);
+ node_history_cluster_add(quorum_nodes[i]);
err = corosync_cfg_get_node_addrs(ch, quorum_nodes[i],
MAX_NODE_ADDRESSES,
14 years, 7 months
fence: master - fenced: fix build failure
by Fabio M. Di Nitto
Gitweb: http://git.fedorahosted.org/git/fence.git?p=fence.git;a=commitdiff;h=12ee...
Commit: 12eec752e6c0c1f3873915b5faa7d234c035537e
Parent: 89ee8af733c7030bcfce7457a2a4c4032e6bd74a
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Fri Sep 18 03:51:01 2009 +0200
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Fri Sep 18 03:51:01 2009 +0200
fenced: fix build failure
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
---
fence/fenced/recover.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/fence/fenced/recover.c b/fence/fenced/recover.c
index 27a1c16..25e719c 100644
--- a/fence/fenced/recover.c
+++ b/fence/fenced/recover.c
@@ -83,7 +83,7 @@ static int reduce_victims(struct fd *fd)
list_for_each_entry_safe(node, safe, &fd->victims, list) {
if (is_cluster_member_reread(node->nodeid) &&
- in_clean_daemon_member(node->nodeid)) {
+ is_clean_daemon_member(node->nodeid)) {
log_debug("reduce victim %s", node->name);
victim_done(fd, node->nodeid, VIC_DONE_MEMBER);
list_del(&node->list);
@@ -328,7 +328,7 @@ void fence_victims(struct fd *fd)
fd->current_victim = node->nodeid;
cluster_member = is_cluster_member_reread(node->nodeid);
- cpg_member = in_clean_daemon_member(node->nodeid);
+ cpg_member = is_clean_daemon_member(node->nodeid);
ext = is_fenced_external(fd, node->nodeid);
if ((cluster_member && cpg_member) || ext) {
14 years, 7 months
cluster: STABLE3 - dlm_controld: fix start matching for partition+merge changes
by David Teigland
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 9bf4612bceb34beeb9302d0cdcfa2a0dadb90a2c
Parent: 174f3a4cc1f5b46a90786fbacaa9d2b4a6296e44
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Thu Sep 17 14:53:25 2009 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Thu Sep 17 14:53:25 2009 -0500
dlm_controld: fix start matching for partition+merge changes
When a node is removed, added, removed due to a partition+merge,
the start messages for the second removal are mistakenly matched
to the first removal (since the change descriptions are idential).
To prevent this, detect when there are identical outstanding
changes and send a start+nack for the first before sending the
regular start for the second.
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
group/dlm_controld/cpg.c | 57 ++++++++++++++++++++++++++++++++++----
group/dlm_controld/dlm_daemon.h | 2 +
2 files changed, 53 insertions(+), 6 deletions(-)
diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c
index 4c5ed5c..cf249c2 100644
--- a/group/dlm_controld/cpg.c
+++ b/group/dlm_controld/cpg.c
@@ -904,6 +904,12 @@ static int match_change(struct lockspace *ls, struct change *cg,
return 0;
}
+ if (memb->start_flags & DLM_MFLG_NACK) {
+ log_group(ls, "match_change %d:%u skip %u is nacked",
+ hd->nodeid, seq, cg->seq);
+ return 0;
+ }
+
if (memb->start && hd->type == DLM_MSG_START) {
log_group(ls, "match_change %d:%u skip %u already start",
hd->nodeid, seq, cg->seq);
@@ -1052,6 +1058,11 @@ static void receive_start(struct lockspace *ls, struct dlm_header *hd, int len)
return;
}
+ if (memb->start_flags & DLM_MFLG_NACK) {
+ log_group(ls, "receive_start %d:%u is NACK", hd->nodeid, seq);
+ return;
+ }
+
node_history_start(ls, hd->nodeid);
memb->start = 1;
}
@@ -1095,9 +1106,9 @@ static void receive_plocks_stored(struct lockspace *ls, struct dlm_header *hd,
ls->save_plocks = 0;
}
-static void send_info(struct lockspace *ls, int type)
+static void send_info(struct lockspace *ls, struct change *cg, int type,
+ uint32_t flags)
{
- struct change *cg;
struct dlm_header *hd;
struct ls_info *li;
struct id_info *id;
@@ -1105,8 +1116,6 @@ static void send_info(struct lockspace *ls, int type)
char *buf;
int len, id_count;
- cg = list_first_entry(&ls->changes, struct change, list);
-
id_count = cg->member_count;
len = sizeof(struct dlm_header) + sizeof(struct ls_info) +
@@ -1127,6 +1136,8 @@ static void send_info(struct lockspace *ls, int type)
hd->type = type;
hd->msgdata = cg->seq;
+ hd->flags = flags;
+
if (ls->joining)
hd->flags |= DLM_MFLG_JOINING;
if (!ls->need_plocks)
@@ -1162,12 +1173,45 @@ static void send_info(struct lockspace *ls, int type)
static void send_start(struct lockspace *ls)
{
- send_info(ls, DLM_MSG_START);
+ struct change *cg = list_first_entry(&ls->changes, struct change, list);
+
+ send_info(ls, cg, DLM_MSG_START, 0);
}
static void send_plocks_stored(struct lockspace *ls)
{
- send_info(ls, DLM_MSG_PLOCKS_STORED);
+ struct change *cg = list_first_entry(&ls->changes, struct change, list);
+
+ send_info(ls, cg, DLM_MSG_PLOCKS_STORED, 0);
+}
+
+static int same_members(struct change *cg1, struct change *cg2)
+{
+ struct member *memb;
+
+ list_for_each_entry(memb, &cg1->members, list) {
+ if (!find_memb(cg2, memb->nodeid))
+ return 0;
+ }
+ return 1;
+}
+
+static void send_nacks(struct lockspace *ls, struct change *startcg)
+{
+ struct change *cg;
+
+ list_for_each_entry(cg, &ls->changes, list) {
+ if (cg->seq < startcg->seq &&
+ cg->member_count == startcg->member_count &&
+ cg->joined_count == startcg->joined_count &&
+ cg->remove_count == startcg->remove_count &&
+ cg->failed_count == startcg->failed_count &&
+ same_members(cg, startcg)) {
+ log_group(ls, "send nack old cg %u new cg %u",
+ cg->seq, startcg->seq);
+ send_info(ls, cg, DLM_MSG_START, DLM_MFLG_NACK);
+ }
+ }
}
static int nodes_added(struct lockspace *ls)
@@ -1260,6 +1304,7 @@ static void apply_changes(struct lockspace *ls)
case CGST_WAIT_CONDITIONS:
if (wait_conditions_done(ls)) {
+ send_nacks(ls, cg);
send_start(ls);
cg->state = CGST_WAIT_MESSAGES;
}
diff --git a/group/dlm_controld/dlm_daemon.h b/group/dlm_controld/dlm_daemon.h
index 8143848..0ca895a 100644
--- a/group/dlm_controld/dlm_daemon.h
+++ b/group/dlm_controld/dlm_daemon.h
@@ -142,6 +142,8 @@ enum {
/* dlm_header flags */
#define DLM_MFLG_JOINING 1 /* accompanies start, we are joining */
#define DLM_MFLG_HAVEPLOCK 2 /* accompanies start, we have plock state */
+#define DLM_MFLG_NACK 4 /* accompanies start, prevent wrong match when
+ two outstanding changes are the same */
struct dlm_header {
uint16_t version[3];
14 years, 7 months
Pb cman_tool : aisexec daemon didn't start /// with cman-2.0.115-1.el5 & openais-0.80.6-8.el5
by Alain.Moulle
Hi,
I just have installed the last releases of cman & openais on RHEL5,
with a rpm Uvh (to replace the previous releases I had on my sytems
which were cman-2.0.98 and openais-0.80.3-22.el5).
I have the same cluster.conf as with previous releases, but unfortunately,
when I start cman, I got :
Starting cluster:
Loading modules... done
Mounting configfs... done
Starting ccsd... done
Starting cman... failed
/usr/sbin/cman_tool: aisexec daemon didn't start
[FAILED]
and in the syslog I can only see :
Sep 17 16:32:14 s_sys@xena2 ccsd[11097]: Starting ccsd 2.0.115:
Sep 17 16:32:14 s_sys@xena2 ccsd[11097]: Built: Sep 17 2009 14:44:05
Sep 17 16:32:14 s_sys@xena2 ccsd[11097]: Copyright (C) Red Hat, Inc.
2004 All rights reserved.
Sep 17 16:32:14 s_sys@xena2 ccsd[11097]: cluster.conf (cluster name =
iocell4-ha1, version = 2) found.
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [MAIN ] AIS Executive
Service RELEASE 'subrev 1887 version 0.80.6'
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [MAIN ] Copyright (C)
2002-2006 MontaVista Software, Inc and contributors.
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [MAIN ] Copyright (C) 2006
Red Hat, Inc.
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [MAIN ] AIS Executive
Service: started and ready to provide service.
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [MAIN ] Using default
multicast address of 239.192.15.176
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] Token Timeout (21000
ms) retransmit timeout (1039 ms)
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] token hold (821 ms)
retransmits before loss (20 retrans)
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] join (60 ms)
send_join (0 ms) consensus (4800 ms) merge (200 ms)
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] downcheck (1000 ms)
fail to recv const (50 msgs)
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] seqno unchanged
const (30 rotations) Maximum network MTU 1500
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] window size per
rotation (50 messages) maximum messages per rotation (17 messages)
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] send threads (0 threads)
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] RRP token expired
timeout (1039 ms)
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] RRP token problem
counter (2000 ms)
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] RRP threshold (10
problem count)
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] RRP mode set to none.
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM]
heartbeat_failures_allowed (0)
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] max_network_delay
(50 ms)
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] HeartBeat is
Disabled. To enable set heartbeat_failures_allowed > 0
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] Receive multicast
socket recv buffer size (288000 bytes).
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] Transmit multicast
socket send buffer size (288000 bytes).
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] The network
interface [172.19.1.62] is now up.
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] Created or loaded
sequence id 144.172.19.1.62 for this ring.
Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] entering GATHER
state from 15.
Sep 17 16:32:43 s_sys@xena2 ccsd[11097]: Unable to connect to cluster
infrastructure after 30 seconds.
Sep 17 16:33:14 s_sys@xena2 ccsd[11097]: Unable to connect to cluster
infrastructure after 60 seconds.
Sep 17 16:33:44 s_sys@xena2 ccsd[11097]: Unable to connect to cluster
infrastructure after 90 seconds.
Sep 17 16:34:14 s_sys@xena2 ccsd[11097]: Unable to connect to cluster
infrastructure after 120 seconds.
Sep 17 16:34:44 s_sys@xena2 ccsd[11097]: Unable to connect to cluster
infrastructure after 150 seconds.
Sep 17 16:35:15 s_sys@xena2 ccsd[11097]: Unable to connect to cluster
infrastructure after 180 seconds.
etc.
I don't catch why it was working before the update, and why it is not
working anymore ...
Any idea ?
Many thanks.
Regards
Alain
14 years, 7 months
cluster: STABLE3 - dlm_controld: log_debug to log_group
by David Teigland
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 174f3a4cc1f5b46a90786fbacaa9d2b4a6296e44
Parent: 844ff74c533b72f24789f78aa82acb321fadc5bb
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Wed Sep 16 16:04:56 2009 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Wed Sep 16 16:04:56 2009 -0500
dlm_controld: log_debug to log_group
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
group/dlm_controld/cpg.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c
index eb46bec..4c5ed5c 100644
--- a/group/dlm_controld/cpg.c
+++ b/group/dlm_controld/cpg.c
@@ -921,7 +921,7 @@ static int match_change(struct lockspace *ls, struct change *cg,
}
if (node->cluster_add_time > cg->create_time) {
- log_debug("match_change %d:%u skip cg %u created %llu "
+ log_group(ls, "match_change %d:%u skip cg %u created %llu "
"cluster add %llu", hd->nodeid, seq, cg->seq,
(unsigned long long)cg->create_time,
(unsigned long long)node->cluster_add_time);
14 years, 7 months
cluster: STABLE3 - gfs_controld: copy some fenced changes
by David Teigland
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 844ff74c533b72f24789f78aa82acb321fadc5bb
Parent: f1be533f910238ab5350e1a63b2ee18f548bff6b
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Wed Sep 16 16:03:46 2009 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Wed Sep 16 16:03:46 2009 -0500
gfs_controld: copy some fenced changes
from the fenced commit bcc5fdef8473d99399c624a7bc15423a2af645c1
. copy some naming changes
. copy some logging changes
. copy some new checks for accepting start messages,
check for a start already matched to a struct change,
check that a node was a cluster member prior to the
creation of the struct change
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
group/gfs_controld/cpg-new.c | 170 +++++++++++++++++++++++++++++++++----
group/gfs_controld/gfs_daemon.h | 16 ++--
group/gfs_controld/main.c | 14 ++--
group/gfs_controld/member_cman.c | 6 +-
4 files changed, 170 insertions(+), 36 deletions(-)
diff --git a/group/gfs_controld/cpg-new.c b/group/gfs_controld/cpg-new.c
index d08ca03..b3f25ae 100644
--- a/group/gfs_controld/cpg-new.c
+++ b/group/gfs_controld/cpg-new.c
@@ -126,6 +126,9 @@ struct node {
int withdraw;
int send_withdraw_ack;
+ uint64_t cluster_add_time;
+ uint64_t cluster_remove_time;
+
struct protocol proto;
};
@@ -158,6 +161,7 @@ struct change {
int we_joined;
uint32_t seq; /* used as a reference for debugging, and for queries */
uint32_t combined_seq; /* for queries */
+ uint64_t create_time;
};
struct save_msg {
@@ -167,7 +171,7 @@ struct save_msg {
};
static int dlmcontrol_fd;
-static int daemon_cpg_fd;
+static int cpg_fd_daemon;
static struct protocol our_protocol;
static struct list_head daemon_nodes;
static struct cpg_address daemon_member[MAX_NODES];
@@ -261,6 +265,59 @@ static void apply_changes_recovery(struct mountgroup *mg);
static void send_withdraw_acks(struct mountgroup *mg);
static void leave_mountgroup(struct mountgroup *mg, int mnterr);
+static void log_config(const struct cpg_name *group_name,
+ const struct cpg_address *member_list,
+ size_t member_list_entries,
+ const struct cpg_address *left_list,
+ size_t left_list_entries,
+ const struct cpg_address *joined_list,
+ size_t joined_list_entries)
+{
+ char m_buf[128];
+ char j_buf[32];
+ char l_buf[32];
+ size_t i, len, pos;
+ int ret;
+
+ memset(m_buf, 0, sizeof(m_buf));
+ memset(j_buf, 0, sizeof(j_buf));
+ memset(l_buf, 0, sizeof(l_buf));
+
+ len = sizeof(m_buf);
+ pos = 0;
+ for (i = 0; i < member_list_entries; i++) {
+ ret = snprintf(m_buf + pos, len - pos, " %d",
+ member_list[i].nodeid);
+ if (ret >= len - pos)
+ break;
+ pos += ret;
+ }
+
+ len = sizeof(j_buf);
+ pos = 0;
+ for (i = 0; i < joined_list_entries; i++) {
+ ret = snprintf(j_buf + pos, len - pos, " %d",
+ joined_list[i].nodeid);
+ if (ret >= len - pos)
+ break;
+ pos += ret;
+ }
+
+ len = sizeof(l_buf);
+ pos = 0;
+ for (i = 0; i < left_list_entries; i++) {
+ ret = snprintf(l_buf + pos, len - pos, " %d",
+ left_list[i].nodeid);
+ if (ret >= len - pos)
+ break;
+ pos += ret;
+ }
+
+ log_debug("%s conf %zu %zu %zu memb%s join%s left%s", group_name->value,
+ member_list_entries, joined_list_entries, left_list_entries,
+ m_buf, j_buf, l_buf);
+}
+
static const char *msg_name(int type)
{
switch (type) {
@@ -470,7 +527,45 @@ static void node_history_init(struct mountgroup *mg, int nodeid,
node->nodeid = nodeid;
node->add_time = 0;
list_add_tail(&node->list, &mg->node_history);
- node->added_seq = cg->seq; /* for queries */
+
+ if (cg)
+ node->added_seq = cg->seq; /* for queries */
+}
+
+void node_history_cluster_add(int nodeid)
+{
+ struct mountgroup *mg;
+ struct node *node;
+
+ list_for_each_entry(mg, &mountgroups, list) {
+ node_history_init(mg, nodeid, NULL);
+
+ node = get_node_history(mg, nodeid);
+ if (!node) {
+ log_error("node_history_cluster_add no nodeid %d",
+ nodeid);
+ return;
+ }
+
+ node->cluster_add_time = time(NULL);
+ }
+}
+
+void node_history_cluster_remove(int nodeid)
+{
+ struct mountgroup *mg;
+ struct node *node;
+
+ list_for_each_entry(mg, &mountgroups, list) {
+ node = get_node_history(mg, nodeid);
+ if (!node) {
+ log_error("node_history_cluster_remove no nodeid %d",
+ nodeid);
+ return;
+ }
+
+ node->cluster_remove_time = time(NULL);
+ }
}
static void node_history_start(struct mountgroup *mg, int nodeid)
@@ -857,6 +952,7 @@ static int match_change(struct mountgroup *mg, struct change *cg,
{
struct id_info *id;
struct member *memb;
+ struct node *node;
uint32_t seq = hd->msgdata;
int i, members_mismatch;
@@ -880,6 +976,30 @@ static int match_change(struct mountgroup *mg, struct change *cg,
return 0;
}
+ if (memb->start && hd->type == GFS_MSG_START) {
+ log_group(mg, "match_change %d:%u skip %u already start",
+ hd->nodeid, seq, cg->seq);
+ return 0;
+ }
+
+ /* a node's start can't match a change if the node joined the cluster
+ more recently than the change was created */
+
+ node = get_node_history(mg, hd->nodeid);
+ if (!node) {
+ log_group(mg, "match_change %d:%u skip cg %u no node history",
+ hd->nodeid, seq, cg->seq);
+ return 0;
+ }
+
+ if (node->cluster_add_time > cg->create_time) {
+ log_group(mg, "match_change %d:%u skip cg %u created %llu "
+ "cluster add %llu", hd->nodeid, seq, cg->seq,
+ (unsigned long long)cg->create_time,
+ (unsigned long long)node->cluster_add_time);
+ return 0;
+ }
+
/* verify this is the right change by matching the counts
and the nodeids of the current members */
@@ -1016,7 +1136,7 @@ static void receive_start(struct mountgroup *mg, struct gfs_header *hd, int len)
added = is_added(mg, hd->nodeid);
- if (added && mi->started_count) {
+ if (added && mi->started_count && mg->started_count) {
log_error("receive_start %d:%u add node with started_count %u",
hd->nodeid, seq, mi->started_count);
@@ -1687,11 +1807,11 @@ static void create_old_nodes(struct mountgroup *mg)
return;
}
- node->jid = id->jid;
+ node->jid = id->jid;
node->kernel_mount_done = !!(id->flags & IDI_MOUNT_DONE);
node->kernel_mount_error = !!(id->flags & IDI_MOUNT_ERROR);
- node->ro = !!(id->flags & IDI_MOUNT_RO);
- node->spectator = !!(id->flags & IDI_MOUNT_SPECTATOR);
+ node->ro = !!(id->flags & IDI_MOUNT_RO);
+ node->spectator = !!(id->flags & IDI_MOUNT_SPECTATOR);
j = malloc(sizeof(struct journal));
if (!j) {
@@ -1749,7 +1869,7 @@ static void create_new_nodes(struct mountgroup *mg)
}
node->jid = JID_NONE;
- node->ro = !!(id->flags & IDI_MOUNT_RO);
+ node->ro = !!(id->flags & IDI_MOUNT_RO);
node->spectator = !!(id->flags & IDI_MOUNT_SPECTATOR);
log_group(mg, "create_new_nodes %d ro %d spect %d",
@@ -2033,7 +2153,7 @@ static void sync_state(struct mountgroup *mg)
/* Normal case where nodes join an established group that completed
first recovery sometime in the past. Existing nodes that weren't
around during first recovery come through here, and new nodes
- being added in this cycle come through here. */
+ being added in this cycle come through here. */
if (mg->first_recovery_needed) {
/* shouldn't happen */
@@ -2349,6 +2469,7 @@ static int add_change(struct mountgroup *mg,
INIT_LIST_HEAD(&cg->removed);
INIT_LIST_HEAD(&cg->saved_messages);
cg->state = CGST_WAIT_CONDITIONS;
+ cg->create_time = time(NULL);
cg->seq = ++mg->change_seq;
if (!cg->seq)
cg->seq = ++mg->change_seq;
@@ -2432,7 +2553,8 @@ static int add_change(struct mountgroup *mg,
return error;
}
-static int we_left(const struct cpg_address *left_list, size_t left_list_entries)
+static int we_left(const struct cpg_address *left_list,
+ size_t left_list_entries)
{
int i;
@@ -2456,6 +2578,10 @@ static void confchg_cb(cpg_handle_t handle,
struct change *cg;
int rv;
+ log_config(group_name, member_list, member_list_entries,
+ left_list, left_list_entries,
+ joined_list, joined_list_entries);
+
mg = find_mg_handle(handle);
if (!mg) {
log_error("confchg_cb no mountgroup for cpg %s",
@@ -2591,7 +2717,7 @@ static cpg_callbacks_t cpg_callbacks = {
.cpg_confchg_fn = confchg_cb,
};
-static void process_mountgroup_cpg(int ci)
+static void process_cpg_mountgroup(int ci)
{
struct mountgroup *mg;
cpg_error_t error;
@@ -2634,7 +2760,7 @@ int gfs_join_mountgroup(struct mountgroup *mg)
cpg_fd_get(h, &fd);
- ci = client_add(fd, process_mountgroup_cpg, NULL);
+ ci = client_add(fd, process_cpg_mountgroup, NULL);
mg->cpg_handle = h;
mg->cpg_client = ci;
@@ -3047,7 +3173,7 @@ int set_protocol(void)
int rv;
memset(&pollfd, 0, sizeof(pollfd));
- pollfd.fd = daemon_cpg_fd;
+ pollfd.fd = cpg_fd_daemon;
pollfd.events = POLLIN;
while (1) {
@@ -3093,7 +3219,7 @@ int set_protocol(void)
}
if (pollfd.revents & POLLIN)
- process_cpg(0);
+ process_cpg_daemon(0);
if (pollfd.revents & (POLLERR | POLLHUP | POLLNVAL)) {
log_error("set_protocol poll revents %u",
pollfd.revents);
@@ -3139,6 +3265,8 @@ int set_protocol(void)
our_protocol.kernel_max[0],
our_protocol.kernel_max[1],
our_protocol.kernel_max[2]);
+
+ send_protocol(&our_protocol);
return 0;
}
@@ -3182,6 +3310,10 @@ static void confchg_cb_daemon(cpg_handle_t handle,
{
int i;
+ log_config(group_name, member_list, member_list_entries,
+ left_list, left_list_entries,
+ joined_list, joined_list_entries);
+
if (joined_list_entries)
send_protocol(&our_protocol);
@@ -3199,7 +3331,7 @@ static cpg_callbacks_t cpg_callbacks_daemon = {
.cpg_confchg_fn = confchg_cb_daemon,
};
-void process_cpg(int ci)
+void process_cpg_daemon(int ci)
{
cpg_error_t error;
@@ -3208,7 +3340,7 @@ void process_cpg(int ci)
log_error("daemon cpg_dispatch error %d", error);
}
-int setup_cpg(void)
+int setup_cpg_daemon(void)
{
cpg_error_t error;
cpg_handle_t h;
@@ -3231,7 +3363,7 @@ int setup_cpg(void)
return -1;
}
- cpg_fd_get(h, &daemon_cpg_fd);
+ cpg_fd_get(h, &cpg_fd_daemon);
cpg_handle_daemon = h;
@@ -3252,15 +3384,15 @@ int setup_cpg(void)
goto fail;
}
- log_debug("setup_cpg %d", daemon_cpg_fd);
- return daemon_cpg_fd;
+ log_debug("setup_cpg_daemon %d", cpg_fd_daemon);
+ return cpg_fd_daemon;
fail:
cpg_finalize(h);
return -1;
}
-void close_cpg(void)
+void close_cpg_daemon(void)
{
struct mountgroup *mg;
cpg_error_t error;
diff --git a/group/gfs_controld/gfs_daemon.h b/group/gfs_controld/gfs_daemon.h
index 8880b42..a69385b 100644
--- a/group/gfs_controld/gfs_daemon.h
+++ b/group/gfs_controld/gfs_daemon.h
@@ -70,7 +70,7 @@ extern int poll_ignore_plock;
extern int plock_fd;
extern int plock_ci;
extern struct list_head mountgroups;
-extern int cman_quorate;
+extern int cluster_quorate;
extern int our_nodeid;
extern char *clustername;
extern char daemon_debug_buf[256];
@@ -221,9 +221,9 @@ int read_ccs_int(const char *path, int *config_val);
void read_ccs_nodir(struct mountgroup *mg, char *buf);
/* cpg-new.c */
-int setup_cpg(void);
-void close_cpg(void);
-void process_cpg(int ci);
+int setup_cpg_daemon(void);
+void close_cpg_daemon(void);
+void process_cpg_daemon(int ci);
int setup_dlmcontrol(void);
void process_dlmcontrol(int ci);
int set_protocol(void);
@@ -240,6 +240,8 @@ int set_mountgroups(int *count, struct gfsc_mountgroup **mgs_out);
int set_mountgroup_nodes(struct mountgroup *mg, int option, int *node_count,
struct gfsc_node **nodes_out);
void free_mg(struct mountgroup *mg);
+void node_history_cluster_add(int nodeid);
+void node_history_cluster_remove(int nodeid);
/* cpg-old.c */
int setup_cpg_old(void);
@@ -294,9 +296,9 @@ void process_connection(int ci);
void cluster_dead(int ci);
/* member_cman.c */
-int setup_cman(void);
-void close_cman(void);
-void process_cman(int ci);
+int setup_cluster(void);
+void close_cluster(void);
+void process_cluster(int ci);
void kick_node_from_cluster(int nodeid);
/* plock.c */
diff --git a/group/gfs_controld/main.c b/group/gfs_controld/main.c
index ae59d80..30f44eb 100644
--- a/group/gfs_controld/main.c
+++ b/group/gfs_controld/main.c
@@ -1131,10 +1131,10 @@ static void loop(void)
goto out;
client_add(rv, process_listener, NULL);
- rv = setup_cman();
+ rv = setup_cluster();
if (rv < 0)
goto out;
- client_add(rv, process_cman, cluster_dead);
+ client_add(rv, process_cluster, cluster_dead);
rv = setup_ccs();
if (rv < 0)
@@ -1184,10 +1184,10 @@ static void loop(void)
* code in: cpg-new.c
*/
- rv = setup_cpg();
+ rv = setup_cpg_daemon();
if (rv < 0)
goto out;
- client_add(rv, process_cpg, cluster_dead);
+ client_add(rv, process_cpg_daemon, cluster_dead);
rv = set_protocol();
if (rv < 0)
@@ -1291,7 +1291,7 @@ static void loop(void)
}
out:
if (group_mode == GROUP_LIBCPG)
- close_cpg();
+ close_cpg_daemon();
else if (group_mode == GROUP_LIBGROUP) {
close_plocks();
close_cpg_old();
@@ -1300,7 +1300,7 @@ static void loop(void)
close_groupd();
close_logging();
close_ccs();
- close_cman();
+ close_cluster();
if (!list_empty(&mountgroups))
log_error("mountgroups abandoned");
@@ -1565,7 +1565,7 @@ int poll_dlm;
int plock_fd;
int plock_ci;
struct list_head mountgroups;
-int cman_quorate;
+int cluster_quorate;
int our_nodeid;
char *clustername;
char daemon_debug_buf[256];
diff --git a/group/gfs_controld/member_cman.c b/group/gfs_controld/member_cman.c
index f536d30..0f78097 100644
--- a/group/gfs_controld/member_cman.c
+++ b/group/gfs_controld/member_cman.c
@@ -36,7 +36,7 @@ static void cman_callback(cman_handle_t h, void *private, int reason, int arg)
}
}
-void process_cman(int ci)
+void process_cluster(int ci)
{
int rv;
@@ -45,7 +45,7 @@ void process_cman(int ci)
cluster_dead(0);
}
-int setup_cman(void)
+int setup_cluster(void)
{
cman_node_t node;
int rv, fd;
@@ -119,7 +119,7 @@ int setup_cman(void)
return fd;
}
-void close_cman(void)
+void close_cluster(void)
{
cman_finish(ch);
cman_finish(ch_admin);
14 years, 7 months
cluster: STABLE3 - dlm_controld: copy some fenced changes
by David Teigland
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: f1be533f910238ab5350e1a63b2ee18f548bff6b
Parent: 36279bd1cbed1b4a3a9026df00326148b42e67f6
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Wed Sep 16 14:44:14 2009 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Wed Sep 16 14:44:14 2009 -0500
dlm_controld: copy some fenced changes
from the fenced commit bcc5fdef8473d99399c624a7bc15423a2af645c1
. copy some naming changes
. copy some logging changes
. copy some new checks for accepting start messages,
check for a start already matched to a struct change,
check that a node was a cluster member prior to the
creation of the struct change
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
group/dlm_controld/cpg.c | 184 ++++++++++++++++++++++++++++++++------
group/dlm_controld/dlm_daemon.h | 8 +-
group/dlm_controld/main.c | 6 +-
group/dlm_controld/member_cman.c | 10 ++-
4 files changed, 173 insertions(+), 35 deletions(-)
diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c
index 1704986..eb46bec 100644
--- a/group/dlm_controld/cpg.c
+++ b/group/dlm_controld/cpg.c
@@ -46,6 +46,8 @@ struct node {
int fs_notified;
uint64_t add_time;
uint64_t fence_time; /* for debug */
+ uint64_t cluster_add_time;
+ uint64_t cluster_remove_time;
uint32_t fence_queries; /* for debug */
uint32_t added_seq; /* for queries */
uint32_t removed_seq; /* for queries */
@@ -71,6 +73,7 @@ struct change {
int we_joined;
uint32_t seq; /* used as a reference for debugging, and for queries */
uint32_t combined_seq; /* for queries */
+ uint64_t create_time;
};
struct ls_info {
@@ -91,13 +94,66 @@ struct id_info {
};
int message_flow_control_on;
-static int daemon_cpg_fd;
-static cpg_handle_t daemon_cpg_handle;
+static cpg_handle_t cpg_handle_daemon;
+static int cpg_fd_daemon;
static struct protocol our_protocol;
static struct list_head daemon_nodes;
static struct cpg_address daemon_member[MAX_NODES];
static int daemon_member_count;
+static void log_config(const struct cpg_name *group_name,
+ const struct cpg_address *member_list,
+ size_t member_list_entries,
+ const struct cpg_address *left_list,
+ size_t left_list_entries,
+ const struct cpg_address *joined_list,
+ size_t joined_list_entries)
+{
+ char m_buf[128];
+ char j_buf[32];
+ char l_buf[32];
+ size_t i, len, pos;
+ int ret;
+
+ memset(m_buf, 0, sizeof(m_buf));
+ memset(j_buf, 0, sizeof(j_buf));
+ memset(l_buf, 0, sizeof(l_buf));
+
+ len = sizeof(m_buf);
+ pos = 0;
+ for (i = 0; i < member_list_entries; i++) {
+ ret = snprintf(m_buf + pos, len - pos, " %d",
+ member_list[i].nodeid);
+ if (ret >= len - pos)
+ break;
+ pos += ret;
+ }
+
+ len = sizeof(j_buf);
+ pos = 0;
+ for (i = 0; i < joined_list_entries; i++) {
+ ret = snprintf(j_buf + pos, len - pos, " %d",
+ joined_list[i].nodeid);
+ if (ret >= len - pos)
+ break;
+ pos += ret;
+ }
+
+ len = sizeof(l_buf);
+ pos = 0;
+ for (i = 0; i < left_list_entries; i++) {
+ ret = snprintf(l_buf + pos, len - pos, " %d",
+ left_list[i].nodeid);
+ if (ret >= len - pos)
+ break;
+ pos += ret;
+ }
+
+ log_debug("%s conf %zu %zu %zu memb%s join%s left%s", group_name->value,
+ member_list_entries, joined_list_entries, left_list_entries,
+ m_buf, j_buf, l_buf);
+}
+
static void ls_info_in(struct ls_info *li)
{
li->ls_info_size = le32_to_cpu(li->ls_info_size);
@@ -359,7 +415,44 @@ static void node_history_init(struct lockspace *ls, int nodeid,
node->add_time = 0;
list_add_tail(&node->list, &ls->node_history);
out:
- node->added_seq = cg->seq; /* for queries */
+ if (cg)
+ node->added_seq = cg->seq; /* for queries */
+}
+
+void node_history_cluster_add(int nodeid)
+{
+ struct lockspace *ls;
+ struct node *node;
+
+ list_for_each_entry(ls, &lockspaces, list) {
+ node_history_init(ls, nodeid, NULL);
+
+ node = get_node_history(ls, nodeid);
+ if (!node) {
+ log_error("node_history_cluster_add no nodeid %d",
+ nodeid);
+ return;
+ }
+
+ node->cluster_add_time = time(NULL);
+ }
+}
+
+void node_history_cluster_remove(int nodeid)
+{
+ struct lockspace *ls;
+ struct node *node;
+
+ list_for_each_entry(ls, &lockspaces, list) {
+ node = get_node_history(ls, nodeid);
+ if (!node) {
+ log_error("node_history_cluster_remove no nodeid %d",
+ nodeid);
+ return;
+ }
+
+ node->cluster_remove_time = time(NULL);
+ }
}
static void node_history_start(struct lockspace *ls, int nodeid)
@@ -766,7 +859,7 @@ static void set_plock_ckpt_node(struct lockspace *ls)
}
static struct id_info *get_id_struct(struct id_info *ids, int count, int size,
- int nodeid)
+ int nodeid)
{
struct id_info *id = ids;
int i;
@@ -787,6 +880,7 @@ static int match_change(struct lockspace *ls, struct change *cg,
{
struct id_info *id;
struct member *memb;
+ struct node *node;
uint32_t seq = hd->msgdata;
int i, members_mismatch;
@@ -800,7 +894,7 @@ static int match_change(struct lockspace *ls, struct change *cg,
if (!id) {
log_group(ls, "match_change %d:%u skip %u we are not in members",
hd->nodeid, seq, cg->seq);
- return 0;
+ return 0;
}
memb = find_memb(cg, hd->nodeid);
@@ -810,6 +904,30 @@ static int match_change(struct lockspace *ls, struct change *cg,
return 0;
}
+ if (memb->start && hd->type == DLM_MSG_START) {
+ log_group(ls, "match_change %d:%u skip %u already start",
+ hd->nodeid, seq, cg->seq);
+ return 0;
+ }
+
+ /* a node's start can't match a change if the node joined the cluster
+ more recently than the change was created */
+
+ node = get_node_history(ls, hd->nodeid);
+ if (!node) {
+ log_group(ls, "match_change %d:%u skip cg %u no node history",
+ hd->nodeid, seq, cg->seq);
+ return 0;
+ }
+
+ if (node->cluster_add_time > cg->create_time) {
+ log_debug("match_change %d:%u skip cg %u created %llu "
+ "cluster add %llu", hd->nodeid, seq, cg->seq,
+ (unsigned long long)cg->create_time,
+ (unsigned long long)node->cluster_add_time);
+ return 0;
+ }
+
/* verify this is the right change by matching the counts
and the nodeids of the current members */
@@ -837,6 +955,7 @@ static int match_change(struct lockspace *ls, struct change *cg,
}
id = (struct id_info *)((char *)id + li->id_info_size);
}
+
if (members_mismatch)
return 0;
@@ -924,7 +1043,7 @@ static void receive_start(struct lockspace *ls, struct dlm_header *hd, int len)
added = is_added(ls, hd->nodeid);
- if (added && li->started_count) {
+ if (added && li->started_count && ls->started_count) {
log_error("receive_start %d:%u add node with started_count %u",
hd->nodeid, seq, li->started_count);
@@ -1166,6 +1285,7 @@ void process_lockspace_changes(void)
poll_fencing = 0;
poll_quorum = 0;
poll_fs = 0;
+
list_for_each_entry_safe(ls, safe, &lockspaces, list) {
if (!list_empty(&ls->changes))
apply_changes(ls);
@@ -1192,6 +1312,7 @@ static int add_change(struct lockspace *ls,
INIT_LIST_HEAD(&cg->members);
INIT_LIST_HEAD(&cg->removed);
cg->state = CGST_WAIT_CONDITIONS;
+ cg->create_time = time(NULL);
cg->seq = ++ls->change_seq;
if (!cg->seq)
cg->seq = ++ls->change_seq;
@@ -1275,7 +1396,8 @@ static int add_change(struct lockspace *ls,
return error;
}
-static int we_left(const struct cpg_address *left_list, size_t left_list_entries)
+static int we_left(const struct cpg_address *left_list,
+ size_t left_list_entries)
{
int i;
@@ -1300,6 +1422,10 @@ static void confchg_cb(cpg_handle_t handle,
struct member *memb;
int rv;
+ log_config(group_name, member_list, member_list_entries,
+ left_list, left_list_entries,
+ joined_list, joined_list_entries);
+
ls = find_ls_handle(handle);
if (!ls) {
log_error("confchg_cb no lockspace for cpg %s",
@@ -1450,7 +1576,7 @@ void update_flow_control_status(void)
cpg_flow_control_state_t flow_control_state;
cpg_error_t error;
- error = cpg_flow_control_state_get(daemon_cpg_handle,
+ error = cpg_flow_control_state_get(cpg_handle_daemon,
&flow_control_state);
if (error != CPG_OK) {
log_error("cpg_flow_control_state_get %d", error);
@@ -1470,7 +1596,7 @@ void update_flow_control_status(void)
}
}
-static void process_lockspace_cpg(int ci)
+static void process_cpg_lockspace(int ci)
{
struct lockspace *ls;
cpg_error_t error;
@@ -1516,7 +1642,7 @@ int dlm_join_lockspace(struct lockspace *ls)
cpg_fd_get(h, &fd);
- ci = client_add(fd, process_lockspace_cpg, NULL);
+ ci = client_add(fd, process_cpg_lockspace, NULL);
list_add(&ls->list, &lockspaces);
@@ -1846,7 +1972,7 @@ static void send_protocol(struct protocol *proto)
memcpy(pr, proto, sizeof(struct protocol));
protocol_out(pr);
- _send_message(daemon_cpg_handle, buf, len, DLM_MSG_PROTOCOL);
+ _send_message(cpg_handle_daemon, buf, len, DLM_MSG_PROTOCOL);
}
int set_protocol(void)
@@ -1857,7 +1983,7 @@ int set_protocol(void)
int rv;
memset(&pollfd, 0, sizeof(pollfd));
- pollfd.fd = daemon_cpg_fd;
+ pollfd.fd = cpg_fd_daemon;
pollfd.events = POLLIN;
while (1) {
@@ -1903,7 +2029,7 @@ int set_protocol(void)
}
if (pollfd.revents & POLLIN)
- process_cpg(0);
+ process_cpg_daemon(0);
if (pollfd.revents & (POLLERR | POLLHUP | POLLNVAL)) {
log_error("set_protocol poll revents %u",
pollfd.revents);
@@ -1949,6 +2075,8 @@ int set_protocol(void)
our_protocol.kernel_max[0],
our_protocol.kernel_max[1],
our_protocol.kernel_max[2]);
+
+ send_protocol(&our_protocol);
return 0;
}
@@ -1987,6 +2115,10 @@ static void confchg_cb_daemon(cpg_handle_t handle,
{
int i;
+ log_config(group_name, member_list, member_list_entries,
+ left_list, left_list_entries,
+ joined_list, joined_list_entries);
+
if (joined_list_entries)
send_protocol(&our_protocol);
@@ -2004,16 +2136,16 @@ static cpg_callbacks_t cpg_callbacks_daemon = {
.cpg_confchg_fn = confchg_cb_daemon,
};
-void process_cpg(int ci)
+void process_cpg_daemon(int ci)
{
cpg_error_t error;
- error = cpg_dispatch(daemon_cpg_handle, CPG_DISPATCH_ALL);
+ error = cpg_dispatch(cpg_handle_daemon, CPG_DISPATCH_ALL);
if (error != CPG_OK)
log_error("daemon cpg_dispatch error %d", error);
}
-int setup_cpg(void)
+int setup_cpg_daemon(void)
{
cpg_error_t error;
struct cpg_name name;
@@ -2029,20 +2161,20 @@ int setup_cpg(void)
our_protocol.kernel_max[1] = 1;
our_protocol.kernel_max[2] = 1;
- error = cpg_initialize(&daemon_cpg_handle, &cpg_callbacks_daemon);
+ error = cpg_initialize(&cpg_handle_daemon, &cpg_callbacks_daemon);
if (error != CPG_OK) {
log_error("daemon cpg_initialize error %d", error);
return -1;
}
- cpg_fd_get(daemon_cpg_handle, &daemon_cpg_fd);
+ cpg_fd_get(cpg_handle_daemon, &cpg_fd_daemon);
memset(&name, 0, sizeof(name));
sprintf(name.value, "dlm:controld");
name.length = strlen(name.value) + 1;
retry:
- error = cpg_join(daemon_cpg_handle, &name);
+ error = cpg_join(cpg_handle_daemon, &name);
if (error == CPG_ERR_TRY_AGAIN) {
sleep(1);
if (!(++i % 10))
@@ -2054,22 +2186,22 @@ int setup_cpg(void)
goto fail;
}
- log_debug("setup_cpg %d", daemon_cpg_fd);
- return daemon_cpg_fd;
+ log_debug("setup_cpg_daemon %d", cpg_fd_daemon);
+ return cpg_fd_daemon;
fail:
- cpg_finalize(daemon_cpg_handle);
+ cpg_finalize(cpg_handle_daemon);
return -1;
}
-void close_cpg(void)
+void close_cpg_daemon(void)
{
struct lockspace *ls;
cpg_error_t error;
struct cpg_name name;
int i = 0;
- if (!daemon_cpg_handle)
+ if (!cpg_handle_daemon)
return;
if (cluster_down)
goto fin;
@@ -2079,7 +2211,7 @@ void close_cpg(void)
name.length = strlen(name.value) + 1;
retry:
- error = cpg_leave(daemon_cpg_handle, &name);
+ error = cpg_leave(cpg_handle_daemon, &name);
if (error == CPG_ERR_TRY_AGAIN) {
sleep(1);
if (!(++i % 10))
@@ -2093,7 +2225,7 @@ void close_cpg(void)
if (ls->cpg_handle)
cpg_finalize(ls->cpg_handle);
}
- cpg_finalize(daemon_cpg_handle);
+ cpg_finalize(cpg_handle_daemon);
}
/* fs_controld has seen nodedown for nodeid; it's now ok for dlm to do
diff --git a/group/dlm_controld/dlm_daemon.h b/group/dlm_controld/dlm_daemon.h
index 65723d1..8143848 100644
--- a/group/dlm_controld/dlm_daemon.h
+++ b/group/dlm_controld/dlm_daemon.h
@@ -232,9 +232,9 @@ int read_ccs_int(const char *path, int *config_val);
int get_weight(int nodeid, char *lockspace);
/* cpg.c */
-int setup_cpg(void);
-void close_cpg(void);
-void process_cpg(int ci);
+int setup_cpg_daemon(void);
+void close_cpg_daemon(void);
+void process_cpg_daemon(int ci);
int set_protocol(void);
void process_lockspace_changes(void);
void dlm_send_message(struct lockspace *ls, char *buf, int len);
@@ -242,6 +242,8 @@ int dlm_join_lockspace(struct lockspace *ls);
int dlm_leave_lockspace(struct lockspace *ls);
const char *msg_name(int type);
void update_flow_control_status(void);
+void node_history_cluster_add(int nodeid);
+void node_history_cluster_remove(int nodeid);
int set_node_info(struct lockspace *ls, int nodeid, struct dlmc_node *node);
int set_lockspace_info(struct lockspace *ls, struct dlmc_lockspace *lockspace);
int set_lockspaces(int *count, struct dlmc_lockspace **lss_out);
diff --git a/group/dlm_controld/main.c b/group/dlm_controld/main.c
index 75ee55d..12e2592 100644
--- a/group/dlm_controld/main.c
+++ b/group/dlm_controld/main.c
@@ -938,10 +938,10 @@ static void loop(void)
log_debug("group_mode %d compat %d", group_mode, cfgd_groupd_compat);
if (group_mode == GROUP_LIBCPG) {
- rv = setup_cpg();
+ rv = setup_cpg_daemon();
if (rv < 0)
goto out;
- client_add(rv, process_cpg, cluster_dead);
+ client_add(rv, process_cpg_daemon, cluster_dead);
rv = set_protocol();
if (rv < 0)
@@ -1024,7 +1024,7 @@ static void loop(void)
if (cfgd_groupd_compat)
close_groupd();
if (group_mode == GROUP_LIBCPG) {
- close_cpg();
+ close_cpg_daemon();
close_plocks();
}
clear_configfs();
diff --git a/group/dlm_controld/member_cman.c b/group/dlm_controld/member_cman.c
index 4caaaa5..c6b7cc7 100644
--- a/group/dlm_controld/member_cman.c
+++ b/group/dlm_controld/member_cman.c
@@ -99,8 +99,10 @@ static void statechange(void)
if (old_nodes[i].cn_member &&
!is_cluster_member(old_nodes[i].cn_nodeid)) {
- log_debug("cman: node %d removed",
- old_nodes[i].cn_nodeid);
+ log_debug("cluster node %d removed",
+ old_nodes[i].cn_nodeid);
+
+ node_history_cluster_remove(old_nodes[i].cn_nodeid);
del_configfs_node(old_nodes[i].cn_nodeid);
}
@@ -119,9 +121,11 @@ static void statechange(void)
addrptr = &cman_nodes[i].cn_address;
}
- log_debug("cman: node %d added",
+ log_debug("cluster node %d added",
cman_nodes[i].cn_nodeid);
+ node_history_cluster_add(cman_nodes[i].cn_nodeid);
+
for (j = 0; j < num_addrs; j++) {
add_configfs_node(cman_nodes[i].cn_nodeid,
addrptr[j].cna_address,
14 years, 7 months