September 2009 - cluster-commits - Fedora Mailing-Lists

cluster: RHEL4 - gfs-kernel: bz245264 - gfs_tool: page allocation failure. order:4, mode:0xd0

by Abhijith Das

Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=... Commit: a9b72c1a181c640f1a469eeb13fe479fc9758cc9 Parent: db5c670bfecfeb3cdf748651181e7e82438e601d Author: Abhijith Das <adas(a)redhat.com> AuthorDate: Tue Sep 22 09:54:41 2009 -0500 Committer: Abhijith Das <adas(a)redhat.com> CommitterDate: Tue Sep 22 09:54:41 2009 -0500 gfs-kernel: bz245264 - gfs_tool: page allocation failure. order:4, mode:0xd0 This patch changes the allocation in gi_skeleton from kmalloc to vmalloc. Allows for larger buffers to be allocated without tripping page allocation failures. --- gfs-kernel/src/gfs/ioctl.c | 5 +++-- 1 files changed, 3 insertions(+), 2 deletions(-) diff --git a/gfs-kernel/src/gfs/ioctl.c b/gfs-kernel/src/gfs/ioctl.c index d73926a..588bf89 100644 --- a/gfs-kernel/src/gfs/ioctl.c +++ b/gfs-kernel/src/gfs/ioctl.c @@ -20,6 +20,7 @@ #include <linux/buffer_head.h> #include <asm/uaccess.h> #include <linux/gfs_ioctl.h> +#include <linux/vmalloc.h> #include "gfs.h" #include "bmap.h" @@ -66,7 +67,7 @@ gi_skeleton(struct gfs_inode *ip, struct gfs_ioctl *gi, if (size > gi->gi_size) size = gi->gi_size; - buf = kmalloc(size, GFP_KERNEL); + buf = vmalloc(size); if (!buf) return -ENOMEM; @@ -80,7 +81,7 @@ gi_skeleton(struct gfs_inode *ip, struct gfs_ioctl *gi, error = count + 1; out: - kfree(buf); + vfree(buf); return error; }

14 years, 7 months

1
0
0 / 0

cluster: STABLE3 - cman: Allow re-registering of a quorum disk

by Christine Caulfield

Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=... Commit: 161b6d3d7db0b34046c6e4b8c6c5818e655e549e Parent: 9bf4612bceb34beeb9302d0cdcfa2a0dadb90a2c Author: Christine Caulfield <ccaulfie(a)redhat.com> AuthorDate: Tue Sep 22 09:10:19 2009 +0100 Committer: Christine Caulfield <ccaulfie(a)redhat.com> CommitterDate: Tue Sep 22 09:10:19 2009 +0100 cman: Allow re-registering of a quorum disk cman-register_quorum_device now allows the quorum device to be be registered, with different votes, provided the name stays the same. This should make it easier for qdiskd to handle configuration changes without the cluster losing quorum. Signed-off-by: Christine Caulfield <ccaulfie(a)redhat.com> --- cman/daemon/commands.c | 55 ++++++++++++++++++++++++++++------------------- 1 files changed, 33 insertions(+), 22 deletions(-) diff --git a/cman/daemon/commands.c b/cman/daemon/commands.c index c2169f0..cc3c8c9 100644 --- a/cman/daemon/commands.c +++ b/cman/daemon/commands.c @@ -488,7 +488,7 @@ static int do_cmd_set_version(char *cmdbuf, int *retlen) /* If the passed-in version number is 0 then read the file now, then * tell the other nodes to look for that version number. * That means we also have to send the notification here, because it will - * beskipped when we get our own RECONFIGURE message back as the version + * be skipped when we get our own RECONFIGURE message back, as the version * number will match. */ if (!version->config) { @@ -1106,38 +1106,49 @@ static int do_cmd_register_quorum_device(char *cmdbuf, int *retlen) if (!we_are_a_cluster_member) return -ENOENT; - if (quorum_device) - return -EBUSY; - if (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN) return -EINVAL; + /* Allow re-registering of a quorum device if the name is the same */ + if (quorum_device && strcmp(name, quorum_device->name)) + return -EBUSY; + if (find_node_by_name(name)) return -EALREADY; memcpy(&votes, cmdbuf, sizeof(int)); - quorum_device = malloc(sizeof(struct cluster_node)); - if (!quorum_device) - return -ENOMEM; - memset(quorum_device, 0, sizeof(struct cluster_node)); - - quorum_device->name = malloc(strlen(name) + 1); - if (!quorum_device->name) { - free(quorum_device); - quorum_device = NULL; - return -ENOMEM; - } + /* A new quorum device */ + if (!quorum_device) + { + quorum_device = malloc(sizeof(struct cluster_node)); + if (!quorum_device) + return -ENOMEM; + memset(quorum_device, 0, sizeof(struct cluster_node)); - strcpy(quorum_device->name, name); - quorum_device->votes = votes; - quorum_device->state = NODESTATE_DEAD; - gettimeofday(&quorum_device->join_time, NULL); + quorum_device->name = malloc(strlen(name) + 1); + if (!quorum_device->name) { + free(quorum_device); + quorum_device = NULL; + return -ENOMEM; + } + + strcpy(quorum_device->name, name); + quorum_device->state = NODESTATE_DEAD; + gettimeofday(&quorum_device->join_time, NULL); + + /* Keep this list valid so it doesn't confuse other code */ + list_init(&quorum_device->addr_list); + log_printf(LOG_INFO, "quorum device registered\n"); + } + else + { + log_printf(LOG_INFO, "quorum device re-registered\n"); + } - /* Keep this list valid so it doesn't confuse other code */ - list_init(&quorum_device->addr_list); + /* Update votes even if it existed before */ + quorum_device->votes = votes; - log_printf(LOG_INFO, "quorum device registered\n"); return 0; }

14 years, 7 months

1
0
0 / 0

cluster: RHEL55 - rgmanager: Fix missing path attribute handling

by Lon Hohberger

Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=... Commit: ca7c9e3bc4dafe47303e90a4a74b76ad29050e39 Parent: f847d934b59f3d085bd3923039b3064d2511c25c Author: Lon Hohberger <lhh(a)redhat.com> AuthorDate: Thu Aug 27 15:12:46 2009 -0400 Committer: Lon Hohberger <lhh(a)redhat.com> CommitterDate: Thu Sep 17 18:30:09 2009 -0400 rgmanager: Fix missing path attribute handling If using the Xen hypervisor with vm configs in a non standard location (e.g. not /etc/xen), rgmanager was ignoring the path attribute, preventing VM management. Resolves: rhbz#519786 Signed-off-by: Lon Hohberger <lhh(a)redhat.com> --- rgmanager/src/resources/vm.sh | 11 +++++++++++ 1 files changed, 11 insertions(+), 0 deletions(-) diff --git a/rgmanager/src/resources/vm.sh b/rgmanager/src/resources/vm.sh index 24e286d..df0c9b7 100755 --- a/rgmanager/src/resources/vm.sh +++ b/rgmanager/src/resources/vm.sh @@ -141,6 +141,17 @@ meta_data() <content type="string" default="live"/> </parameter> + <parameter name="path"> + <longdesc lang="en"> + Path specification 'xm create' will search for the specified + VM configuration file + </longdesc> + <shortdesc lang="en"> + Path to virtual machine configuration files + </shortdesc> + <content type="string"/> + </parameter> + <parameter name="snapshot"> <longdesc lang="en"> Path to the snapshot directory where the virtual machine

14 years, 7 months

1
0
0 / 0

dlm: master - dlm_controld: fix build failure

by Fabio M. Di Nitto

Gitweb: http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=aeee6d80... Commit: aeee6d80225cc0dfa8fa8a567603d31dfee5bc84 Parent: fff520b757bfd56307ed5a1a5b7192fea714f8a7 Author: Fabio M. Di Nitto <fdinitto(a)redhat.com> AuthorDate: Fri Sep 18 03:57:55 2009 +0200 Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com> CommitterDate: Fri Sep 18 03:57:55 2009 +0200 dlm_controld: fix build failure Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com> --- group/dlm_controld/member_cman.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/group/dlm_controld/member_cman.c b/group/dlm_controld/member_cman.c index ad0ff8e..880e3c1 100644 --- a/group/dlm_controld/member_cman.c +++ b/group/dlm_controld/member_cman.c @@ -83,7 +83,7 @@ static void quorum_callback(quorum_handle_t h, uint32_t quorate, for (i = 0; i < old_node_count; i++) { if (!is_cluster_member(old_nodes[i])) { log_debug("cluster node %u removed", old_nodes[i]); - node_history_cluster_remove(old_nodes[i].cn_nodeid); + node_history_cluster_remove(old_nodes[i]); del_configfs_node(old_nodes[i]); } } @@ -91,7 +91,7 @@ static void quorum_callback(quorum_handle_t h, uint32_t quorate, for (i = 0; i < quorum_node_count; i++) { if (!is_old_member(quorum_nodes[i])) { log_debug("cluster node %u added", quorum_nodes[i]); - node_history_cluster_add(cman_nodes[i].cn_nodeid); + node_history_cluster_add(quorum_nodes[i]); err = corosync_cfg_get_node_addrs(ch, quorum_nodes[i], MAX_NODE_ADDRESSES,

14 years, 7 months

1
0
0 / 0

fence: master - fenced: fix build failure

by Fabio M. Di Nitto

Gitweb: http://git.fedorahosted.org/git/fence.git?p=fence.git;a=commitdiff;h=12ee... Commit: 12eec752e6c0c1f3873915b5faa7d234c035537e Parent: 89ee8af733c7030bcfce7457a2a4c4032e6bd74a Author: Fabio M. Di Nitto <fdinitto(a)redhat.com> AuthorDate: Fri Sep 18 03:51:01 2009 +0200 Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com> CommitterDate: Fri Sep 18 03:51:01 2009 +0200 fenced: fix build failure Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com> --- fence/fenced/recover.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fence/fenced/recover.c b/fence/fenced/recover.c index 27a1c16..25e719c 100644 --- a/fence/fenced/recover.c +++ b/fence/fenced/recover.c @@ -83,7 +83,7 @@ static int reduce_victims(struct fd *fd) list_for_each_entry_safe(node, safe, &fd->victims, list) { if (is_cluster_member_reread(node->nodeid) && - in_clean_daemon_member(node->nodeid)) { + is_clean_daemon_member(node->nodeid)) { log_debug("reduce victim %s", node->name); victim_done(fd, node->nodeid, VIC_DONE_MEMBER); list_del(&node->list); @@ -328,7 +328,7 @@ void fence_victims(struct fd *fd) fd->current_victim = node->nodeid; cluster_member = is_cluster_member_reread(node->nodeid); - cpg_member = in_clean_daemon_member(node->nodeid); + cpg_member = is_clean_daemon_member(node->nodeid); ext = is_fenced_external(fd, node->nodeid); if ((cluster_member && cpg_member) || ext) {

14 years, 7 months

1
0
0 / 0

cluster: STABLE3 - dlm_controld: fix start matching for partition+merge changes

by David Teigland

Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=... Commit: 9bf4612bceb34beeb9302d0cdcfa2a0dadb90a2c Parent: 174f3a4cc1f5b46a90786fbacaa9d2b4a6296e44 Author: David Teigland <teigland(a)redhat.com> AuthorDate: Thu Sep 17 14:53:25 2009 -0500 Committer: David Teigland <teigland(a)redhat.com> CommitterDate: Thu Sep 17 14:53:25 2009 -0500 dlm_controld: fix start matching for partition+merge changes When a node is removed, added, removed due to a partition+merge, the start messages for the second removal are mistakenly matched to the first removal (since the change descriptions are idential). To prevent this, detect when there are identical outstanding changes and send a start+nack for the first before sending the regular start for the second. Signed-off-by: David Teigland <teigland(a)redhat.com> --- group/dlm_controld/cpg.c | 57 ++++++++++++++++++++++++++++++++++---- group/dlm_controld/dlm_daemon.h | 2 + 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c index 4c5ed5c..cf249c2 100644 --- a/group/dlm_controld/cpg.c +++ b/group/dlm_controld/cpg.c @@ -904,6 +904,12 @@ static int match_change(struct lockspace *ls, struct change *cg, return 0; } + if (memb->start_flags & DLM_MFLG_NACK) { + log_group(ls, "match_change %d:%u skip %u is nacked", + hd->nodeid, seq, cg->seq); + return 0; + } + if (memb->start && hd->type == DLM_MSG_START) { log_group(ls, "match_change %d:%u skip %u already start", hd->nodeid, seq, cg->seq); @@ -1052,6 +1058,11 @@ static void receive_start(struct lockspace *ls, struct dlm_header *hd, int len) return; } + if (memb->start_flags & DLM_MFLG_NACK) { + log_group(ls, "receive_start %d:%u is NACK", hd->nodeid, seq); + return; + } + node_history_start(ls, hd->nodeid); memb->start = 1; } @@ -1095,9 +1106,9 @@ static void receive_plocks_stored(struct lockspace *ls, struct dlm_header *hd, ls->save_plocks = 0; } -static void send_info(struct lockspace *ls, int type) +static void send_info(struct lockspace *ls, struct change *cg, int type, + uint32_t flags) { - struct change *cg; struct dlm_header *hd; struct ls_info *li; struct id_info *id; @@ -1105,8 +1116,6 @@ static void send_info(struct lockspace *ls, int type) char *buf; int len, id_count; - cg = list_first_entry(&ls->changes, struct change, list); - id_count = cg->member_count; len = sizeof(struct dlm_header) + sizeof(struct ls_info) + @@ -1127,6 +1136,8 @@ static void send_info(struct lockspace *ls, int type) hd->type = type; hd->msgdata = cg->seq; + hd->flags = flags; + if (ls->joining) hd->flags |= DLM_MFLG_JOINING; if (!ls->need_plocks) @@ -1162,12 +1173,45 @@ static void send_info(struct lockspace *ls, int type) static void send_start(struct lockspace *ls) { - send_info(ls, DLM_MSG_START); + struct change *cg = list_first_entry(&ls->changes, struct change, list); + + send_info(ls, cg, DLM_MSG_START, 0); } static void send_plocks_stored(struct lockspace *ls) { - send_info(ls, DLM_MSG_PLOCKS_STORED); + struct change *cg = list_first_entry(&ls->changes, struct change, list); + + send_info(ls, cg, DLM_MSG_PLOCKS_STORED, 0); +} + +static int same_members(struct change *cg1, struct change *cg2) +{ + struct member *memb; + + list_for_each_entry(memb, &cg1->members, list) { + if (!find_memb(cg2, memb->nodeid)) + return 0; + } + return 1; +} + +static void send_nacks(struct lockspace *ls, struct change *startcg) +{ + struct change *cg; + + list_for_each_entry(cg, &ls->changes, list) { + if (cg->seq < startcg->seq && + cg->member_count == startcg->member_count && + cg->joined_count == startcg->joined_count && + cg->remove_count == startcg->remove_count && + cg->failed_count == startcg->failed_count && + same_members(cg, startcg)) { + log_group(ls, "send nack old cg %u new cg %u", + cg->seq, startcg->seq); + send_info(ls, cg, DLM_MSG_START, DLM_MFLG_NACK); + } + } } static int nodes_added(struct lockspace *ls) @@ -1260,6 +1304,7 @@ static void apply_changes(struct lockspace *ls) case CGST_WAIT_CONDITIONS: if (wait_conditions_done(ls)) { + send_nacks(ls, cg); send_start(ls); cg->state = CGST_WAIT_MESSAGES; } diff --git a/group/dlm_controld/dlm_daemon.h b/group/dlm_controld/dlm_daemon.h index 8143848..0ca895a 100644 --- a/group/dlm_controld/dlm_daemon.h +++ b/group/dlm_controld/dlm_daemon.h @@ -142,6 +142,8 @@ enum { /* dlm_header flags */ #define DLM_MFLG_JOINING 1 /* accompanies start, we are joining */ #define DLM_MFLG_HAVEPLOCK 2 /* accompanies start, we have plock state */ +#define DLM_MFLG_NACK 4 /* accompanies start, prevent wrong match when + two outstanding changes are the same */ struct dlm_header { uint16_t version[3];

14 years, 7 months

1
0
0 / 0

Pb cman_tool : aisexec daemon didn't start /// with cman-2.0.115-1.el5 & openais-0.80.6-8.el5

by Alain.Moulle

Hi, I just have installed the last releases of cman & openais on RHEL5, with a rpm Uvh (to replace the previous releases I had on my sytems which were cman-2.0.98 and openais-0.80.3-22.el5). I have the same cluster.conf as with previous releases, but unfortunately, when I start cman, I got : Starting cluster: Loading modules... done Mounting configfs... done Starting ccsd... done Starting cman... failed /usr/sbin/cman_tool: aisexec daemon didn't start [FAILED] and in the syslog I can only see : Sep 17 16:32:14 s_sys@xena2 ccsd[11097]: Starting ccsd 2.0.115: Sep 17 16:32:14 s_sys@xena2 ccsd[11097]: Built: Sep 17 2009 14:44:05 Sep 17 16:32:14 s_sys@xena2 ccsd[11097]: Copyright (C) Red Hat, Inc. 2004 All rights reserved. Sep 17 16:32:14 s_sys@xena2 ccsd[11097]: cluster.conf (cluster name = iocell4-ha1, version = 2) found. Sep 17 16:32:17 s_sys@xena2 openais[11103]: [MAIN ] AIS Executive Service RELEASE 'subrev 1887 version 0.80.6' Sep 17 16:32:17 s_sys@xena2 openais[11103]: [MAIN ] Copyright (C) 2002-2006 MontaVista Software, Inc and contributors. Sep 17 16:32:17 s_sys@xena2 openais[11103]: [MAIN ] Copyright (C) 2006 Red Hat, Inc. Sep 17 16:32:17 s_sys@xena2 openais[11103]: [MAIN ] AIS Executive Service: started and ready to provide service. Sep 17 16:32:17 s_sys@xena2 openais[11103]: [MAIN ] Using default multicast address of 239.192.15.176 Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] Token Timeout (21000 ms) retransmit timeout (1039 ms) Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] token hold (821 ms) retransmits before loss (20 retrans) Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] join (60 ms) send_join (0 ms) consensus (4800 ms) merge (200 ms) Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] downcheck (1000 ms) fail to recv const (50 msgs) Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] seqno unchanged const (30 rotations) Maximum network MTU 1500 Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] window size per rotation (50 messages) maximum messages per rotation (17 messages) Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] send threads (0 threads) Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] RRP token expired timeout (1039 ms) Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] RRP token problem counter (2000 ms) Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] RRP threshold (10 problem count) Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] RRP mode set to none. Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] heartbeat_failures_allowed (0) Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] max_network_delay (50 ms) Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] HeartBeat is Disabled. To enable set heartbeat_failures_allowed > 0 Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] Receive multicast socket recv buffer size (288000 bytes). Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] Transmit multicast socket send buffer size (288000 bytes). Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] The network interface [172.19.1.62] is now up. Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] Created or loaded sequence id 144.172.19.1.62 for this ring. Sep 17 16:32:17 s_sys@xena2 openais[11103]: [TOTEM] entering GATHER state from 15. Sep 17 16:32:43 s_sys@xena2 ccsd[11097]: Unable to connect to cluster infrastructure after 30 seconds. Sep 17 16:33:14 s_sys@xena2 ccsd[11097]: Unable to connect to cluster infrastructure after 60 seconds. Sep 17 16:33:44 s_sys@xena2 ccsd[11097]: Unable to connect to cluster infrastructure after 90 seconds. Sep 17 16:34:14 s_sys@xena2 ccsd[11097]: Unable to connect to cluster infrastructure after 120 seconds. Sep 17 16:34:44 s_sys@xena2 ccsd[11097]: Unable to connect to cluster infrastructure after 150 seconds. Sep 17 16:35:15 s_sys@xena2 ccsd[11097]: Unable to connect to cluster infrastructure after 180 seconds. etc. I don't catch why it was working before the update, and why it is not working anymore ... Any idea ? Many thanks. Regards Alain

14 years, 7 months

1
0
0 / 0

cluster: STABLE3 - dlm_controld: log_debug to log_group

by David Teigland

Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=... Commit: 174f3a4cc1f5b46a90786fbacaa9d2b4a6296e44 Parent: 844ff74c533b72f24789f78aa82acb321fadc5bb Author: David Teigland <teigland(a)redhat.com> AuthorDate: Wed Sep 16 16:04:56 2009 -0500 Committer: David Teigland <teigland(a)redhat.com> CommitterDate: Wed Sep 16 16:04:56 2009 -0500 dlm_controld: log_debug to log_group Signed-off-by: David Teigland <teigland(a)redhat.com> --- group/dlm_controld/cpg.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c index eb46bec..4c5ed5c 100644 --- a/group/dlm_controld/cpg.c +++ b/group/dlm_controld/cpg.c @@ -921,7 +921,7 @@ static int match_change(struct lockspace *ls, struct change *cg, } if (node->cluster_add_time > cg->create_time) { - log_debug("match_change %d:%u skip cg %u created %llu " + log_group(ls, "match_change %d:%u skip cg %u created %llu " "cluster add %llu", hd->nodeid, seq, cg->seq, (unsigned long long)cg->create_time, (unsigned long long)node->cluster_add_time);

14 years, 7 months

1
0
0 / 0

cluster: STABLE3 - gfs_controld: copy some fenced changes

by David Teigland

Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=... Commit: 844ff74c533b72f24789f78aa82acb321fadc5bb Parent: f1be533f910238ab5350e1a63b2ee18f548bff6b Author: David Teigland <teigland(a)redhat.com> AuthorDate: Wed Sep 16 16:03:46 2009 -0500 Committer: David Teigland <teigland(a)redhat.com> CommitterDate: Wed Sep 16 16:03:46 2009 -0500 gfs_controld: copy some fenced changes from the fenced commit bcc5fdef8473d99399c624a7bc15423a2af645c1 . copy some naming changes . copy some logging changes . copy some new checks for accepting start messages, check for a start already matched to a struct change, check that a node was a cluster member prior to the creation of the struct change Signed-off-by: David Teigland <teigland(a)redhat.com> --- group/gfs_controld/cpg-new.c | 170 +++++++++++++++++++++++++++++++++---- group/gfs_controld/gfs_daemon.h | 16 ++-- group/gfs_controld/main.c | 14 ++-- group/gfs_controld/member_cman.c | 6 +- 4 files changed, 170 insertions(+), 36 deletions(-) diff --git a/group/gfs_controld/cpg-new.c b/group/gfs_controld/cpg-new.c index d08ca03..b3f25ae 100644 --- a/group/gfs_controld/cpg-new.c +++ b/group/gfs_controld/cpg-new.c @@ -126,6 +126,9 @@ struct node { int withdraw; int send_withdraw_ack; + uint64_t cluster_add_time; + uint64_t cluster_remove_time; + struct protocol proto; }; @@ -158,6 +161,7 @@ struct change { int we_joined; uint32_t seq; /* used as a reference for debugging, and for queries */ uint32_t combined_seq; /* for queries */ + uint64_t create_time; }; struct save_msg { @@ -167,7 +171,7 @@ struct save_msg { }; static int dlmcontrol_fd; -static int daemon_cpg_fd; +static int cpg_fd_daemon; static struct protocol our_protocol; static struct list_head daemon_nodes; static struct cpg_address daemon_member[MAX_NODES]; @@ -261,6 +265,59 @@ static void apply_changes_recovery(struct mountgroup *mg); static void send_withdraw_acks(struct mountgroup *mg); static void leave_mountgroup(struct mountgroup *mg, int mnterr); +static void log_config(const struct cpg_name *group_name, + const struct cpg_address *member_list, + size_t member_list_entries, + const struct cpg_address *left_list, + size_t left_list_entries, + const struct cpg_address *joined_list, + size_t joined_list_entries) +{ + char m_buf[128]; + char j_buf[32]; + char l_buf[32]; + size_t i, len, pos; + int ret; + + memset(m_buf, 0, sizeof(m_buf)); + memset(j_buf, 0, sizeof(j_buf)); + memset(l_buf, 0, sizeof(l_buf)); + + len = sizeof(m_buf); + pos = 0; + for (i = 0; i < member_list_entries; i++) { + ret = snprintf(m_buf + pos, len - pos, " %d", + member_list[i].nodeid); + if (ret >= len - pos) + break; + pos += ret; + } + + len = sizeof(j_buf); + pos = 0; + for (i = 0; i < joined_list_entries; i++) { + ret = snprintf(j_buf + pos, len - pos, " %d", + joined_list[i].nodeid); + if (ret >= len - pos) + break; + pos += ret; + } + + len = sizeof(l_buf); + pos = 0; + for (i = 0; i < left_list_entries; i++) { + ret = snprintf(l_buf + pos, len - pos, " %d", + left_list[i].nodeid); + if (ret >= len - pos) + break; + pos += ret; + } + + log_debug("%s conf %zu %zu %zu memb%s join%s left%s", group_name->value, + member_list_entries, joined_list_entries, left_list_entries, + m_buf, j_buf, l_buf); +} + static const char *msg_name(int type) { switch (type) { @@ -470,7 +527,45 @@ static void node_history_init(struct mountgroup *mg, int nodeid, node->nodeid = nodeid; node->add_time = 0; list_add_tail(&node->list, &mg->node_history); - node->added_seq = cg->seq; /* for queries */ + + if (cg) + node->added_seq = cg->seq; /* for queries */ +} + +void node_history_cluster_add(int nodeid) +{ + struct mountgroup *mg; + struct node *node; + + list_for_each_entry(mg, &mountgroups, list) { + node_history_init(mg, nodeid, NULL); + + node = get_node_history(mg, nodeid); + if (!node) { + log_error("node_history_cluster_add no nodeid %d", + nodeid); + return; + } + + node->cluster_add_time = time(NULL); + } +} + +void node_history_cluster_remove(int nodeid) +{ + struct mountgroup *mg; + struct node *node; + + list_for_each_entry(mg, &mountgroups, list) { + node = get_node_history(mg, nodeid); + if (!node) { + log_error("node_history_cluster_remove no nodeid %d", + nodeid); + return; + } + + node->cluster_remove_time = time(NULL); + } } static void node_history_start(struct mountgroup *mg, int nodeid) @@ -857,6 +952,7 @@ static int match_change(struct mountgroup *mg, struct change *cg, { struct id_info *id; struct member *memb; + struct node *node; uint32_t seq = hd->msgdata; int i, members_mismatch; @@ -880,6 +976,30 @@ static int match_change(struct mountgroup *mg, struct change *cg, return 0; } + if (memb->start && hd->type == GFS_MSG_START) { + log_group(mg, "match_change %d:%u skip %u already start", + hd->nodeid, seq, cg->seq); + return 0; + } + + /* a node's start can't match a change if the node joined the cluster + more recently than the change was created */ + + node = get_node_history(mg, hd->nodeid); + if (!node) { + log_group(mg, "match_change %d:%u skip cg %u no node history", + hd->nodeid, seq, cg->seq); + return 0; + } + + if (node->cluster_add_time > cg->create_time) { + log_group(mg, "match_change %d:%u skip cg %u created %llu " + "cluster add %llu", hd->nodeid, seq, cg->seq, + (unsigned long long)cg->create_time, + (unsigned long long)node->cluster_add_time); + return 0; + } + /* verify this is the right change by matching the counts and the nodeids of the current members */ @@ -1016,7 +1136,7 @@ static void receive_start(struct mountgroup *mg, struct gfs_header *hd, int len) added = is_added(mg, hd->nodeid); - if (added && mi->started_count) { + if (added && mi->started_count && mg->started_count) { log_error("receive_start %d:%u add node with started_count %u", hd->nodeid, seq, mi->started_count); @@ -1687,11 +1807,11 @@ static void create_old_nodes(struct mountgroup *mg) return; } - node->jid = id->jid; + node->jid = id->jid; node->kernel_mount_done = !!(id->flags & IDI_MOUNT_DONE); node->kernel_mount_error = !!(id->flags & IDI_MOUNT_ERROR); - node->ro = !!(id->flags & IDI_MOUNT_RO); - node->spectator = !!(id->flags & IDI_MOUNT_SPECTATOR); + node->ro = !!(id->flags & IDI_MOUNT_RO); + node->spectator = !!(id->flags & IDI_MOUNT_SPECTATOR); j = malloc(sizeof(struct journal)); if (!j) { @@ -1749,7 +1869,7 @@ static void create_new_nodes(struct mountgroup *mg) } node->jid = JID_NONE; - node->ro = !!(id->flags & IDI_MOUNT_RO); + node->ro = !!(id->flags & IDI_MOUNT_RO); node->spectator = !!(id->flags & IDI_MOUNT_SPECTATOR); log_group(mg, "create_new_nodes %d ro %d spect %d", @@ -2033,7 +2153,7 @@ static void sync_state(struct mountgroup *mg) /* Normal case where nodes join an established group that completed first recovery sometime in the past. Existing nodes that weren't around during first recovery come through here, and new nodes - being added in this cycle come through here. */ + being added in this cycle come through here. */ if (mg->first_recovery_needed) { /* shouldn't happen */ @@ -2349,6 +2469,7 @@ static int add_change(struct mountgroup *mg, INIT_LIST_HEAD(&cg->removed); INIT_LIST_HEAD(&cg->saved_messages); cg->state = CGST_WAIT_CONDITIONS; + cg->create_time = time(NULL); cg->seq = ++mg->change_seq; if (!cg->seq) cg->seq = ++mg->change_seq; @@ -2432,7 +2553,8 @@ static int add_change(struct mountgroup *mg, return error; } -static int we_left(const struct cpg_address *left_list, size_t left_list_entries) +static int we_left(const struct cpg_address *left_list, + size_t left_list_entries) { int i; @@ -2456,6 +2578,10 @@ static void confchg_cb(cpg_handle_t handle, struct change *cg; int rv; + log_config(group_name, member_list, member_list_entries, + left_list, left_list_entries, + joined_list, joined_list_entries); + mg = find_mg_handle(handle); if (!mg) { log_error("confchg_cb no mountgroup for cpg %s", @@ -2591,7 +2717,7 @@ static cpg_callbacks_t cpg_callbacks = { .cpg_confchg_fn = confchg_cb, }; -static void process_mountgroup_cpg(int ci) +static void process_cpg_mountgroup(int ci) { struct mountgroup *mg; cpg_error_t error; @@ -2634,7 +2760,7 @@ int gfs_join_mountgroup(struct mountgroup *mg) cpg_fd_get(h, &fd); - ci = client_add(fd, process_mountgroup_cpg, NULL); + ci = client_add(fd, process_cpg_mountgroup, NULL); mg->cpg_handle = h; mg->cpg_client = ci; @@ -3047,7 +3173,7 @@ int set_protocol(void) int rv; memset(&pollfd, 0, sizeof(pollfd)); - pollfd.fd = daemon_cpg_fd; + pollfd.fd = cpg_fd_daemon; pollfd.events = POLLIN; while (1) { @@ -3093,7 +3219,7 @@ int set_protocol(void) } if (pollfd.revents & POLLIN) - process_cpg(0); + process_cpg_daemon(0); if (pollfd.revents & (POLLERR | POLLHUP | POLLNVAL)) { log_error("set_protocol poll revents %u", pollfd.revents); @@ -3139,6 +3265,8 @@ int set_protocol(void) our_protocol.kernel_max[0], our_protocol.kernel_max[1], our_protocol.kernel_max[2]); + + send_protocol(&our_protocol); return 0; } @@ -3182,6 +3310,10 @@ static void confchg_cb_daemon(cpg_handle_t handle, { int i; + log_config(group_name, member_list, member_list_entries, + left_list, left_list_entries, + joined_list, joined_list_entries); + if (joined_list_entries) send_protocol(&our_protocol); @@ -3199,7 +3331,7 @@ static cpg_callbacks_t cpg_callbacks_daemon = { .cpg_confchg_fn = confchg_cb_daemon, }; -void process_cpg(int ci) +void process_cpg_daemon(int ci) { cpg_error_t error; @@ -3208,7 +3340,7 @@ void process_cpg(int ci) log_error("daemon cpg_dispatch error %d", error); } -int setup_cpg(void) +int setup_cpg_daemon(void) { cpg_error_t error; cpg_handle_t h; @@ -3231,7 +3363,7 @@ int setup_cpg(void) return -1; } - cpg_fd_get(h, &daemon_cpg_fd); + cpg_fd_get(h, &cpg_fd_daemon); cpg_handle_daemon = h; @@ -3252,15 +3384,15 @@ int setup_cpg(void) goto fail; } - log_debug("setup_cpg %d", daemon_cpg_fd); - return daemon_cpg_fd; + log_debug("setup_cpg_daemon %d", cpg_fd_daemon); + return cpg_fd_daemon; fail: cpg_finalize(h); return -1; } -void close_cpg(void) +void close_cpg_daemon(void) { struct mountgroup *mg; cpg_error_t error; diff --git a/group/gfs_controld/gfs_daemon.h b/group/gfs_controld/gfs_daemon.h index 8880b42..a69385b 100644 --- a/group/gfs_controld/gfs_daemon.h +++ b/group/gfs_controld/gfs_daemon.h @@ -70,7 +70,7 @@ extern int poll_ignore_plock; extern int plock_fd; extern int plock_ci; extern struct list_head mountgroups; -extern int cman_quorate; +extern int cluster_quorate; extern int our_nodeid; extern char *clustername; extern char daemon_debug_buf[256]; @@ -221,9 +221,9 @@ int read_ccs_int(const char *path, int *config_val); void read_ccs_nodir(struct mountgroup *mg, char *buf); /* cpg-new.c */ -int setup_cpg(void); -void close_cpg(void); -void process_cpg(int ci); +int setup_cpg_daemon(void); +void close_cpg_daemon(void); +void process_cpg_daemon(int ci); int setup_dlmcontrol(void); void process_dlmcontrol(int ci); int set_protocol(void); @@ -240,6 +240,8 @@ int set_mountgroups(int *count, struct gfsc_mountgroup **mgs_out); int set_mountgroup_nodes(struct mountgroup *mg, int option, int *node_count, struct gfsc_node **nodes_out); void free_mg(struct mountgroup *mg); +void node_history_cluster_add(int nodeid); +void node_history_cluster_remove(int nodeid); /* cpg-old.c */ int setup_cpg_old(void); @@ -294,9 +296,9 @@ void process_connection(int ci); void cluster_dead(int ci); /* member_cman.c */ -int setup_cman(void); -void close_cman(void); -void process_cman(int ci); +int setup_cluster(void); +void close_cluster(void); +void process_cluster(int ci); void kick_node_from_cluster(int nodeid); /* plock.c */ diff --git a/group/gfs_controld/main.c b/group/gfs_controld/main.c index ae59d80..30f44eb 100644 --- a/group/gfs_controld/main.c +++ b/group/gfs_controld/main.c @@ -1131,10 +1131,10 @@ static void loop(void) goto out; client_add(rv, process_listener, NULL); - rv = setup_cman(); + rv = setup_cluster(); if (rv < 0) goto out; - client_add(rv, process_cman, cluster_dead); + client_add(rv, process_cluster, cluster_dead); rv = setup_ccs(); if (rv < 0) @@ -1184,10 +1184,10 @@ static void loop(void) * code in: cpg-new.c */ - rv = setup_cpg(); + rv = setup_cpg_daemon(); if (rv < 0) goto out; - client_add(rv, process_cpg, cluster_dead); + client_add(rv, process_cpg_daemon, cluster_dead); rv = set_protocol(); if (rv < 0) @@ -1291,7 +1291,7 @@ static void loop(void) } out: if (group_mode == GROUP_LIBCPG) - close_cpg(); + close_cpg_daemon(); else if (group_mode == GROUP_LIBGROUP) { close_plocks(); close_cpg_old(); @@ -1300,7 +1300,7 @@ static void loop(void) close_groupd(); close_logging(); close_ccs(); - close_cman(); + close_cluster(); if (!list_empty(&mountgroups)) log_error("mountgroups abandoned"); @@ -1565,7 +1565,7 @@ int poll_dlm; int plock_fd; int plock_ci; struct list_head mountgroups; -int cman_quorate; +int cluster_quorate; int our_nodeid; char *clustername; char daemon_debug_buf[256]; diff --git a/group/gfs_controld/member_cman.c b/group/gfs_controld/member_cman.c index f536d30..0f78097 100644 --- a/group/gfs_controld/member_cman.c +++ b/group/gfs_controld/member_cman.c @@ -36,7 +36,7 @@ static void cman_callback(cman_handle_t h, void *private, int reason, int arg) } } -void process_cman(int ci) +void process_cluster(int ci) { int rv; @@ -45,7 +45,7 @@ void process_cman(int ci) cluster_dead(0); } -int setup_cman(void) +int setup_cluster(void) { cman_node_t node; int rv, fd; @@ -119,7 +119,7 @@ int setup_cman(void) return fd; } -void close_cman(void) +void close_cluster(void) { cman_finish(ch); cman_finish(ch_admin);

14 years, 7 months

1
0
0 / 0

cluster: STABLE3 - dlm_controld: copy some fenced changes

by David Teigland

Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=... Commit: f1be533f910238ab5350e1a63b2ee18f548bff6b Parent: 36279bd1cbed1b4a3a9026df00326148b42e67f6 Author: David Teigland <teigland(a)redhat.com> AuthorDate: Wed Sep 16 14:44:14 2009 -0500 Committer: David Teigland <teigland(a)redhat.com> CommitterDate: Wed Sep 16 14:44:14 2009 -0500 dlm_controld: copy some fenced changes from the fenced commit bcc5fdef8473d99399c624a7bc15423a2af645c1 . copy some naming changes . copy some logging changes . copy some new checks for accepting start messages, check for a start already matched to a struct change, check that a node was a cluster member prior to the creation of the struct change Signed-off-by: David Teigland <teigland(a)redhat.com> --- group/dlm_controld/cpg.c | 184 ++++++++++++++++++++++++++++++++------ group/dlm_controld/dlm_daemon.h | 8 +- group/dlm_controld/main.c | 6 +- group/dlm_controld/member_cman.c | 10 ++- 4 files changed, 173 insertions(+), 35 deletions(-) diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c index 1704986..eb46bec 100644 --- a/group/dlm_controld/cpg.c +++ b/group/dlm_controld/cpg.c @@ -46,6 +46,8 @@ struct node { int fs_notified; uint64_t add_time; uint64_t fence_time; /* for debug */ + uint64_t cluster_add_time; + uint64_t cluster_remove_time; uint32_t fence_queries; /* for debug */ uint32_t added_seq; /* for queries */ uint32_t removed_seq; /* for queries */ @@ -71,6 +73,7 @@ struct change { int we_joined; uint32_t seq; /* used as a reference for debugging, and for queries */ uint32_t combined_seq; /* for queries */ + uint64_t create_time; }; struct ls_info { @@ -91,13 +94,66 @@ struct id_info { }; int message_flow_control_on; -static int daemon_cpg_fd; -static cpg_handle_t daemon_cpg_handle; +static cpg_handle_t cpg_handle_daemon; +static int cpg_fd_daemon; static struct protocol our_protocol; static struct list_head daemon_nodes; static struct cpg_address daemon_member[MAX_NODES]; static int daemon_member_count; +static void log_config(const struct cpg_name *group_name, + const struct cpg_address *member_list, + size_t member_list_entries, + const struct cpg_address *left_list, + size_t left_list_entries, + const struct cpg_address *joined_list, + size_t joined_list_entries) +{ + char m_buf[128]; + char j_buf[32]; + char l_buf[32]; + size_t i, len, pos; + int ret; + + memset(m_buf, 0, sizeof(m_buf)); + memset(j_buf, 0, sizeof(j_buf)); + memset(l_buf, 0, sizeof(l_buf)); + + len = sizeof(m_buf); + pos = 0; + for (i = 0; i < member_list_entries; i++) { + ret = snprintf(m_buf + pos, len - pos, " %d", + member_list[i].nodeid); + if (ret >= len - pos) + break; + pos += ret; + } + + len = sizeof(j_buf); + pos = 0; + for (i = 0; i < joined_list_entries; i++) { + ret = snprintf(j_buf + pos, len - pos, " %d", + joined_list[i].nodeid); + if (ret >= len - pos) + break; + pos += ret; + } + + len = sizeof(l_buf); + pos = 0; + for (i = 0; i < left_list_entries; i++) { + ret = snprintf(l_buf + pos, len - pos, " %d", + left_list[i].nodeid); + if (ret >= len - pos) + break; + pos += ret; + } + + log_debug("%s conf %zu %zu %zu memb%s join%s left%s", group_name->value, + member_list_entries, joined_list_entries, left_list_entries, + m_buf, j_buf, l_buf); +} + static void ls_info_in(struct ls_info *li) { li->ls_info_size = le32_to_cpu(li->ls_info_size); @@ -359,7 +415,44 @@ static void node_history_init(struct lockspace *ls, int nodeid, node->add_time = 0; list_add_tail(&node->list, &ls->node_history); out: - node->added_seq = cg->seq; /* for queries */ + if (cg) + node->added_seq = cg->seq; /* for queries */ +} + +void node_history_cluster_add(int nodeid) +{ + struct lockspace *ls; + struct node *node; + + list_for_each_entry(ls, &lockspaces, list) { + node_history_init(ls, nodeid, NULL); + + node = get_node_history(ls, nodeid); + if (!node) { + log_error("node_history_cluster_add no nodeid %d", + nodeid); + return; + } + + node->cluster_add_time = time(NULL); + } +} + +void node_history_cluster_remove(int nodeid) +{ + struct lockspace *ls; + struct node *node; + + list_for_each_entry(ls, &lockspaces, list) { + node = get_node_history(ls, nodeid); + if (!node) { + log_error("node_history_cluster_remove no nodeid %d", + nodeid); + return; + } + + node->cluster_remove_time = time(NULL); + } } static void node_history_start(struct lockspace *ls, int nodeid) @@ -766,7 +859,7 @@ static void set_plock_ckpt_node(struct lockspace *ls) } static struct id_info *get_id_struct(struct id_info *ids, int count, int size, - int nodeid) + int nodeid) { struct id_info *id = ids; int i; @@ -787,6 +880,7 @@ static int match_change(struct lockspace *ls, struct change *cg, { struct id_info *id; struct member *memb; + struct node *node; uint32_t seq = hd->msgdata; int i, members_mismatch; @@ -800,7 +894,7 @@ static int match_change(struct lockspace *ls, struct change *cg, if (!id) { log_group(ls, "match_change %d:%u skip %u we are not in members", hd->nodeid, seq, cg->seq); - return 0; + return 0; } memb = find_memb(cg, hd->nodeid); @@ -810,6 +904,30 @@ static int match_change(struct lockspace *ls, struct change *cg, return 0; } + if (memb->start && hd->type == DLM_MSG_START) { + log_group(ls, "match_change %d:%u skip %u already start", + hd->nodeid, seq, cg->seq); + return 0; + } + + /* a node's start can't match a change if the node joined the cluster + more recently than the change was created */ + + node = get_node_history(ls, hd->nodeid); + if (!node) { + log_group(ls, "match_change %d:%u skip cg %u no node history", + hd->nodeid, seq, cg->seq); + return 0; + } + + if (node->cluster_add_time > cg->create_time) { + log_debug("match_change %d:%u skip cg %u created %llu " + "cluster add %llu", hd->nodeid, seq, cg->seq, + (unsigned long long)cg->create_time, + (unsigned long long)node->cluster_add_time); + return 0; + } + /* verify this is the right change by matching the counts and the nodeids of the current members */ @@ -837,6 +955,7 @@ static int match_change(struct lockspace *ls, struct change *cg, } id = (struct id_info *)((char *)id + li->id_info_size); } + if (members_mismatch) return 0; @@ -924,7 +1043,7 @@ static void receive_start(struct lockspace *ls, struct dlm_header *hd, int len) added = is_added(ls, hd->nodeid); - if (added && li->started_count) { + if (added && li->started_count && ls->started_count) { log_error("receive_start %d:%u add node with started_count %u", hd->nodeid, seq, li->started_count); @@ -1166,6 +1285,7 @@ void process_lockspace_changes(void) poll_fencing = 0; poll_quorum = 0; poll_fs = 0; + list_for_each_entry_safe(ls, safe, &lockspaces, list) { if (!list_empty(&ls->changes)) apply_changes(ls); @@ -1192,6 +1312,7 @@ static int add_change(struct lockspace *ls, INIT_LIST_HEAD(&cg->members); INIT_LIST_HEAD(&cg->removed); cg->state = CGST_WAIT_CONDITIONS; + cg->create_time = time(NULL); cg->seq = ++ls->change_seq; if (!cg->seq) cg->seq = ++ls->change_seq; @@ -1275,7 +1396,8 @@ static int add_change(struct lockspace *ls, return error; } -static int we_left(const struct cpg_address *left_list, size_t left_list_entries) +static int we_left(const struct cpg_address *left_list, + size_t left_list_entries) { int i; @@ -1300,6 +1422,10 @@ static void confchg_cb(cpg_handle_t handle, struct member *memb; int rv; + log_config(group_name, member_list, member_list_entries, + left_list, left_list_entries, + joined_list, joined_list_entries); + ls = find_ls_handle(handle); if (!ls) { log_error("confchg_cb no lockspace for cpg %s", @@ -1450,7 +1576,7 @@ void update_flow_control_status(void) cpg_flow_control_state_t flow_control_state; cpg_error_t error; - error = cpg_flow_control_state_get(daemon_cpg_handle, + error = cpg_flow_control_state_get(cpg_handle_daemon, &flow_control_state); if (error != CPG_OK) { log_error("cpg_flow_control_state_get %d", error); @@ -1470,7 +1596,7 @@ void update_flow_control_status(void) } } -static void process_lockspace_cpg(int ci) +static void process_cpg_lockspace(int ci) { struct lockspace *ls; cpg_error_t error; @@ -1516,7 +1642,7 @@ int dlm_join_lockspace(struct lockspace *ls) cpg_fd_get(h, &fd); - ci = client_add(fd, process_lockspace_cpg, NULL); + ci = client_add(fd, process_cpg_lockspace, NULL); list_add(&ls->list, &lockspaces); @@ -1846,7 +1972,7 @@ static void send_protocol(struct protocol *proto) memcpy(pr, proto, sizeof(struct protocol)); protocol_out(pr); - _send_message(daemon_cpg_handle, buf, len, DLM_MSG_PROTOCOL); + _send_message(cpg_handle_daemon, buf, len, DLM_MSG_PROTOCOL); } int set_protocol(void) @@ -1857,7 +1983,7 @@ int set_protocol(void) int rv; memset(&pollfd, 0, sizeof(pollfd)); - pollfd.fd = daemon_cpg_fd; + pollfd.fd = cpg_fd_daemon; pollfd.events = POLLIN; while (1) { @@ -1903,7 +2029,7 @@ int set_protocol(void) } if (pollfd.revents & POLLIN) - process_cpg(0); + process_cpg_daemon(0); if (pollfd.revents & (POLLERR | POLLHUP | POLLNVAL)) { log_error("set_protocol poll revents %u", pollfd.revents); @@ -1949,6 +2075,8 @@ int set_protocol(void) our_protocol.kernel_max[0], our_protocol.kernel_max[1], our_protocol.kernel_max[2]); + + send_protocol(&our_protocol); return 0; } @@ -1987,6 +2115,10 @@ static void confchg_cb_daemon(cpg_handle_t handle, { int i; + log_config(group_name, member_list, member_list_entries, + left_list, left_list_entries, + joined_list, joined_list_entries); + if (joined_list_entries) send_protocol(&our_protocol); @@ -2004,16 +2136,16 @@ static cpg_callbacks_t cpg_callbacks_daemon = { .cpg_confchg_fn = confchg_cb_daemon, }; -void process_cpg(int ci) +void process_cpg_daemon(int ci) { cpg_error_t error; - error = cpg_dispatch(daemon_cpg_handle, CPG_DISPATCH_ALL); + error = cpg_dispatch(cpg_handle_daemon, CPG_DISPATCH_ALL); if (error != CPG_OK) log_error("daemon cpg_dispatch error %d", error); } -int setup_cpg(void) +int setup_cpg_daemon(void) { cpg_error_t error; struct cpg_name name; @@ -2029,20 +2161,20 @@ int setup_cpg(void) our_protocol.kernel_max[1] = 1; our_protocol.kernel_max[2] = 1; - error = cpg_initialize(&daemon_cpg_handle, &cpg_callbacks_daemon); + error = cpg_initialize(&cpg_handle_daemon, &cpg_callbacks_daemon); if (error != CPG_OK) { log_error("daemon cpg_initialize error %d", error); return -1; } - cpg_fd_get(daemon_cpg_handle, &daemon_cpg_fd); + cpg_fd_get(cpg_handle_daemon, &cpg_fd_daemon); memset(&name, 0, sizeof(name)); sprintf(name.value, "dlm:controld"); name.length = strlen(name.value) + 1; retry: - error = cpg_join(daemon_cpg_handle, &name); + error = cpg_join(cpg_handle_daemon, &name); if (error == CPG_ERR_TRY_AGAIN) { sleep(1); if (!(++i % 10)) @@ -2054,22 +2186,22 @@ int setup_cpg(void) goto fail; } - log_debug("setup_cpg %d", daemon_cpg_fd); - return daemon_cpg_fd; + log_debug("setup_cpg_daemon %d", cpg_fd_daemon); + return cpg_fd_daemon; fail: - cpg_finalize(daemon_cpg_handle); + cpg_finalize(cpg_handle_daemon); return -1; } -void close_cpg(void) +void close_cpg_daemon(void) { struct lockspace *ls; cpg_error_t error; struct cpg_name name; int i = 0; - if (!daemon_cpg_handle) + if (!cpg_handle_daemon) return; if (cluster_down) goto fin; @@ -2079,7 +2211,7 @@ void close_cpg(void) name.length = strlen(name.value) + 1; retry: - error = cpg_leave(daemon_cpg_handle, &name); + error = cpg_leave(cpg_handle_daemon, &name); if (error == CPG_ERR_TRY_AGAIN) { sleep(1); if (!(++i % 10)) @@ -2093,7 +2225,7 @@ void close_cpg(void) if (ls->cpg_handle) cpg_finalize(ls->cpg_handle); } - cpg_finalize(daemon_cpg_handle); + cpg_finalize(cpg_handle_daemon); } /* fs_controld has seen nodedown for nodeid; it's now ok for dlm to do diff --git a/group/dlm_controld/dlm_daemon.h b/group/dlm_controld/dlm_daemon.h index 65723d1..8143848 100644 --- a/group/dlm_controld/dlm_daemon.h +++ b/group/dlm_controld/dlm_daemon.h @@ -232,9 +232,9 @@ int read_ccs_int(const char *path, int *config_val); int get_weight(int nodeid, char *lockspace); /* cpg.c */ -int setup_cpg(void); -void close_cpg(void); -void process_cpg(int ci); +int setup_cpg_daemon(void); +void close_cpg_daemon(void); +void process_cpg_daemon(int ci); int set_protocol(void); void process_lockspace_changes(void); void dlm_send_message(struct lockspace *ls, char *buf, int len); @@ -242,6 +242,8 @@ int dlm_join_lockspace(struct lockspace *ls); int dlm_leave_lockspace(struct lockspace *ls); const char *msg_name(int type); void update_flow_control_status(void); +void node_history_cluster_add(int nodeid); +void node_history_cluster_remove(int nodeid); int set_node_info(struct lockspace *ls, int nodeid, struct dlmc_node *node); int set_lockspace_info(struct lockspace *ls, struct dlmc_lockspace *lockspace); int set_lockspaces(int *count, struct dlmc_lockspace **lss_out); diff --git a/group/dlm_controld/main.c b/group/dlm_controld/main.c index 75ee55d..12e2592 100644 --- a/group/dlm_controld/main.c +++ b/group/dlm_controld/main.c @@ -938,10 +938,10 @@ static void loop(void) log_debug("group_mode %d compat %d", group_mode, cfgd_groupd_compat); if (group_mode == GROUP_LIBCPG) { - rv = setup_cpg(); + rv = setup_cpg_daemon(); if (rv < 0) goto out; - client_add(rv, process_cpg, cluster_dead); + client_add(rv, process_cpg_daemon, cluster_dead); rv = set_protocol(); if (rv < 0) @@ -1024,7 +1024,7 @@ static void loop(void) if (cfgd_groupd_compat) close_groupd(); if (group_mode == GROUP_LIBCPG) { - close_cpg(); + close_cpg_daemon(); close_plocks(); } clear_configfs(); diff --git a/group/dlm_controld/member_cman.c b/group/dlm_controld/member_cman.c index 4caaaa5..c6b7cc7 100644 --- a/group/dlm_controld/member_cman.c +++ b/group/dlm_controld/member_cman.c @@ -99,8 +99,10 @@ static void statechange(void) if (old_nodes[i].cn_member && !is_cluster_member(old_nodes[i].cn_nodeid)) { - log_debug("cman: node %d removed", - old_nodes[i].cn_nodeid); + log_debug("cluster node %d removed", + old_nodes[i].cn_nodeid); + + node_history_cluster_remove(old_nodes[i].cn_nodeid); del_configfs_node(old_nodes[i].cn_nodeid); } @@ -119,9 +121,11 @@ static void statechange(void) addrptr = &cman_nodes[i].cn_address; } - log_debug("cman: node %d added", + log_debug("cluster node %d added", cman_nodes[i].cn_nodeid); + node_history_cluster_add(cman_nodes[i].cn_nodeid); + for (j = 0; j < num_addrs; j++) { add_configfs_node(cman_nodes[i].cn_nodeid, addrptr[j].cna_address,

14 years, 7 months

1
0
0 / 0

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

cluster-commits September 2009