August 2010 - cluster-commits - Fedora Mailing-Lists

dlm: master - dlm_controld: fix plock signature in stored message

by David Teigland

Gitweb: http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=9ad6aa0d... Commit: 9ad6aa0dadde3997e0fe8018af06db99fe13ba82 Parent: a83d1f5eeb9c01a346be32b8effb78553ccb7984 Author: David Teigland <teigland(a)redhat.com> AuthorDate: Fri Aug 13 12:00:58 2010 -0500 Committer: David Teigland <teigland(a)redhat.com> CommitterDate: Fri Aug 13 12:00:58 2010 -0500 dlm_controld: fix plock signature in stored message When confchg's change the ckpt_node between plock checkpoints being written, and the plocks_stored message being received, the plocks_stored message is resent by the new ckpt_node that didn't write the checkpoint. The new ckpt_node needs to save the plock checkpoint signature that was sent in the first plocks_stored message from the checkpoint writer, and reuse it when sending the new plocks_stored method. Otherwise the plock signature in the stored message will be invalid. bz 623816 Signed-off-by: David Teigland <teigland(a)redhat.com> --- group/dlm_controld/cpg.c | 8 +++++++- group/dlm_controld/dlm_daemon.h | 1 + 2 files changed, 8 insertions(+), 1 deletions(-) diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c index a9cc61e..899d23e 100644 --- a/group/dlm_controld/cpg.c +++ b/group/dlm_controld/cpg.c @@ -1099,6 +1099,8 @@ static void receive_plocks_stored(struct lockspace *ls, struct dlm_header *hd, "need_plocks %d", hd->nodeid, hd->msgdata, hd->flags, hd->msgdata2, ls->need_plocks); + ls->last_plock_sig = hd->msgdata2; + if (!ls->need_plocks) return; @@ -1329,8 +1331,12 @@ static void prepare_plocks(struct lockspace *ls) the previous stored message. They will read the ckpt from the previous ckpt_node upon receiving the stored message from us. */ - if (nodes_added(ls)) + if (nodes_added(ls)) { store_plocks(ls, &sig); + ls->last_plock_sig = sig; + } else { + sig = ls->last_plock_sig; + } send_plocks_stored(ls, sig); } diff --git a/group/dlm_controld/dlm_daemon.h b/group/dlm_controld/dlm_daemon.h index 0b0e9f2..71e3bf4 100644 --- a/group/dlm_controld/dlm_daemon.h +++ b/group/dlm_controld/dlm_daemon.h @@ -216,6 +216,7 @@ struct lockspace { uint64_t checkpoint_r_num_last; uint32_t checkpoint_r_count; uint32_t checkpoint_p_count; + uint32_t last_plock_sig; /* deadlock stuff */

13 years, 8 months

1
0
0 / 0

dlm: master - dlm_controld: fix plock owner syncing

by David Teigland

Gitweb: http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=a83d1f5e... Commit: a83d1f5eeb9c01a346be32b8effb78553ccb7984 Parent: cad14d81f7f4a6adbdf762d7775c52f95807eae2 Author: David Teigland <teigland(a)redhat.com> AuthorDate: Thu Aug 12 15:43:25 2010 -0500 Committer: David Teigland <teigland(a)redhat.com> CommitterDate: Fri Aug 13 12:00:12 2010 -0500 dlm_controld: fix plock owner syncing - The R_GOT_UNOWN flag was not always being set on resources when the owner was set to 0. This would cause subsequent syncing of plock state to write the incorrect owner into the checkpoint. - Plocks were being written into the checkpoint unnecessarily for owned resources in cases where the owner written in the checkpoint was not the same as r->owner. - Set a few other flags in r->flags to help debug future problems. - Do more error checking and verifying of checkpointed state when checkpoints are being written and read. - Handle errors during plock syncing by disabling plock operations, instead of ignoring or running with incorrect state. bz 617306 Signed-off-by: David Teigland <teigland(a)redhat.com> --- group/dlm_controld/plock.c | 110 ++++++++++++++++++++++++++++++-------------- 1 files changed, 76 insertions(+), 34 deletions(-) diff --git a/group/dlm_controld/plock.c b/group/dlm_controld/plock.c index 15bb2f1..e292e95 100644 --- a/group/dlm_controld/plock.c +++ b/group/dlm_controld/plock.c @@ -32,7 +32,11 @@ struct pack_plock { uint32_t pad; }; -#define R_GOT_UNOWN 0x00000001 /* have received owner=0 message */ +#define R_GOT_UNOWN 0x00000001 /* have received owner=0 message */ +#define R_SEND_UNOWN 0x00000002 /* have sent owner=0 message */ +#define R_SEND_OWN 0x00000004 /* have sent owner=our_nodeid message */ +#define R_PURGE_UNOWN 0x00000008 /* set owner=0 in purge */ +#define R_SEND_DROP 0x00000010 struct resource { struct list_head list; /* list of resources */ @@ -965,6 +969,11 @@ static void send_own(struct lockspace *ls, struct resource *r, int owner) return; } + if (!owner) + r->flags |= R_SEND_UNOWN; + else + r->flags |= R_SEND_OWN; + memset(&info, 0, sizeof(info)); info.number = r->number; info.nodeid = owner; @@ -1015,6 +1024,7 @@ static void send_drop(struct lockspace *ls, struct resource *r) memset(&info, 0, sizeof(info)); info.number = r->number; + r->flags |= R_SEND_DROP; send_struct_info(ls, &info, DLM_MSG_PLOCK_DROP); } @@ -1587,15 +1597,18 @@ void process_saved_plocks(struct lockspace *ls) /* locks still marked SYNCING should not go into the ckpt; the new node will get those locks by receiving PLOCK_SYNC messages */ -static void pack_section_buf(struct lockspace *ls, struct resource *r) +static void pack_section_buf(struct lockspace *ls, struct resource *r, + int owner) { struct pack_plock *pp; struct posix_lock *po; struct lock_waiter *w; int count = 0; - /* plocks on owned resources are not replicated on other nodes */ - if (r->owner == our_nodeid) + /* plocks on owned resources are not replicated on other nodes; + N.B. owner not always equal to r->owner */ + + if (cfgd_plock_ownership && (owner == our_nodeid)) return; pp = (struct pack_plock *) &section_buf; @@ -1645,6 +1658,17 @@ static int unpack_section_buf(struct lockspace *ls, char *numbuf, int buflen, gettimeofday(&now, NULL); + sscanf(numbuf, "r%llu.%d", &num, &owner); + +#if 0 + /* would be nice to always compile this, but it adds a lot of time */ + r = search_resource(ls, num); + if (r) { + log_error("unpack %llu duplicate", num); + return -1; + } +#endif + r = malloc(sizeof(struct resource)); if (!r) return -ENOMEM; @@ -1653,7 +1677,26 @@ static int unpack_section_buf(struct lockspace *ls, char *numbuf, int buflen, INIT_LIST_HEAD(&r->waiters); INIT_LIST_HEAD(&r->pending); - sscanf(numbuf, "r%llu.%d", &num, &owner); + if (!cfgd_plock_ownership) { + if (owner) { + log_error("unpack %llu bad owner %d count %d", + (unsigned long long)num, owner, count); + free(r); + return -1; + } + } else { + if (!owner) + r->flags |= R_GOT_UNOWN; + + /* no locks should be included for owned resources */ + + if (owner && count) { + log_error("unpack %llu owner %d bad count %d", + (unsigned long long)num, owner, count); + free(r); + return -1; + } + } r->number = num; r->owner = owner; @@ -1666,7 +1709,8 @@ static int unpack_section_buf(struct lockspace *ls, char *numbuf, int buflen, for (i = 0; i < count; i++) { if (!pp->waiter) { po = malloc(sizeof(struct posix_lock)); - // FIXME: handle failed malloc + if (!po) + return -ENOMEM; po->start = le64_to_cpu(pp->start); po->end = le64_to_cpu(pp->end); po->owner = le64_to_cpu(pp->owner); @@ -1676,7 +1720,8 @@ static int unpack_section_buf(struct lockspace *ls, char *numbuf, int buflen, list_add_tail(&po->list, &r->locks); } else { w = malloc(sizeof(struct lock_waiter)); - // FIXME: handle failed malloc + if (!w) + return -ENOMEM; w->info.start = le64_to_cpu(pp->start); w->info.end = le64_to_cpu(pp->end); w->info.owner = le64_to_cpu(pp->owner); @@ -1899,13 +1944,9 @@ void store_plocks(struct lockspace *ls, uint32_t *sig) sleep(1); goto open_retry; } - if (rv == SA_AIS_ERR_EXIST) { - log_group(ls, "store_plocks ckpt already exists"); - return; - } if (rv != SA_AIS_OK) { log_error("store_plocks ckpt open error %d %s", rv, ls->name); - return; + goto fail; } log_group(ls, "store_plocks open ckpt handle %llx", @@ -1953,14 +1994,14 @@ void store_plocks(struct lockspace *ls, uint32_t *sig) memset(&section_buf, 0, sizeof(section_buf)); section_len = 0; - pack_section_buf(ls, r); + pack_section_buf(ls, r, owner); if (!r_num_first) r_num_first = r->number; r_num_last = r->number; - log_plock(ls, "store_plocks section size %u id %u \"%s\"", - section_len, section_id.idLen, buf); + log_plock(ls, "wr sect ro %d rf %x len %u \"%s\"", + r->owner, r->flags, section_len, buf); create_retry: rv = saCkptSectionCreate(h, &section_attr, &section_buf, @@ -1970,19 +2011,10 @@ void store_plocks(struct lockspace *ls, uint32_t *sig) sleep(1); goto create_retry; } - if (rv == SA_AIS_ERR_EXIST) { - /* this shouldn't happen in general */ - log_group(ls, "store_plocks clearing old ckpt"); - /* do we need this close or will the close in - the unlink function be ok? */ - saCkptCheckpointClose(h); - _unlink_checkpoint(ls, &name); - goto open_retry; - } if (rv != SA_AIS_OK) { log_error("store_plocks ckpt section create err %d %s", rv, ls->name); - break; + goto fail; } } out: @@ -2003,6 +2035,13 @@ void store_plocks(struct lockspace *ls, uint32_t *sig) ls->checkpoint_r_num_last = r_num_last; ls->checkpoint_r_count = r_count; ls->checkpoint_p_count = p_count; + return; + + fail: + ls->disable_plock = 1; + /* force the node receiving plocks to fail sig check and disable + plocks as well */ + *sig = 0xF0000000; } /* called by a node that's just been added to the group to get existing plock @@ -2017,7 +2056,7 @@ void retrieve_plocks(struct lockspace *ls, uint32_t *sig) SaNameT name; SaAisErrorT rv; char buf[SECTION_NAME_LEN]; - int len, lock_count; + int len, lock_count, error; uint32_t r_count = 0, p_count = 0; uint64_t r_num, r_num_first = 0, r_num_last = 0; @@ -2085,10 +2124,6 @@ void retrieve_plocks(struct lockspace *ls, uint32_t *sig) memset(&buf, 0, sizeof(buf)); snprintf(buf, SECTION_NAME_LEN, "%s", desc.sectionId.id); - log_plock(ls, "retrieve_plocks section size %llu id %u \"%s\"", - (unsigned long long)iov.dataSize, iov.sectionId.idLen, - buf); - read_retry: rv = saCkptCheckpointRead(h, &iov, 1, NULL); if (rv == SA_AIS_ERR_TRY_AGAIN) { @@ -2106,8 +2141,9 @@ void retrieve_plocks(struct lockspace *ls, uint32_t *sig) no locks, which exist in ownership mode; the resource name and owner come from the section id */ - log_plock(ls, "retrieve_plocks ckpt read %llu bytes", - (unsigned long long)iov.readSize); + log_plock(ls, "rd sect len %llu \"%s\"", + (unsigned long long)iov.readSize, buf); + section_len = iov.readSize; if (section_len % sizeof(struct pack_plock)) { @@ -2119,8 +2155,12 @@ void retrieve_plocks(struct lockspace *ls, uint32_t *sig) r_num = 0; lock_count = 0; - unpack_section_buf(ls, (char *)desc.sectionId.id, - desc.sectionId.idLen, &r_num, &lock_count); + error = unpack_section_buf(ls, (char *)desc.sectionId.id, + desc.sectionId.idLen, &r_num, + &lock_count); + if (error < 0) + continue; + r_count++; p_count += lock_count; @@ -2185,6 +2225,8 @@ void purge_plocks(struct lockspace *ls, int nodeid, int unmount) if (r->owner == nodeid) { r->owner = 0; + r->flags |= R_GOT_UNOWN; + r->flags |= R_PURGE_UNOWN; send_pending_plocks(ls, r); }

13 years, 8 months

1
0
0 / 0

dlm: master - dlm_controld: fix save_plocks initialization

by David Teigland

Gitweb: http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=cad14d81... Commit: cad14d81f7f4a6adbdf762d7775c52f95807eae2 Parent: 82afe0498f9176d0cc2a95c85437eea8f4ad965f Author: David Teigland <teigland(a)redhat.com> AuthorDate: Thu Aug 12 15:37:23 2010 -0500 Committer: David Teigland <teigland(a)redhat.com> CommitterDate: Fri Aug 13 12:00:05 2010 -0500 dlm_controld: fix save_plocks initialization When dlm_controld joins a cpg and begins receiving plock messages, it needs to save all those plock messages for processing after it initializes plock state from a checkpoint. Instead of being initialized to 1, saved_plocks started as 0 and was initialized to 1 shortly after the join. This left a short span of time where a plock message could arrive and be processed immediately instead of saved, which would cause the node's plock state to be out of sync with the other nodes, which could lead to any number of different problems with plocks. bz 623810 Signed-off-by: David Teigland <teigland(a)redhat.com> --- group/dlm_controld/cpg.c | 16 +++++++++++----- 1 files changed, 11 insertions(+), 5 deletions(-) diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c index 93cf108..a9cc61e 100644 --- a/group/dlm_controld/cpg.c +++ b/group/dlm_controld/cpg.c @@ -1135,9 +1135,9 @@ static void receive_plocks_stored(struct lockspace *ls, struct dlm_header *hd, return; } - process_saved_plocks(ls); ls->need_plocks = 0; ls->save_plocks = 0; + process_saved_plocks(ls); } static void send_info(struct lockspace *ls, struct change *cg, int type, @@ -1282,6 +1282,10 @@ static void prepare_plocks(struct lockspace *ls) } ls->plock_ckpt_node = our_nodeid; ls->need_plocks = 0; + if (ls->save_plocks) { + ls->save_plocks = 0; + process_saved_plocks(ls); + } return; } @@ -1294,7 +1298,10 @@ static void prepare_plocks(struct lockspace *ls) if (!ls->plock_ckpt_node) { ls->need_plocks = 0; - ls->save_plocks = 0; + if (ls->save_plocks) { + ls->save_plocks = 0; + process_saved_plocks(ls); + } return; } @@ -1302,10 +1309,8 @@ static void prepare_plocks(struct lockspace *ls) existing plock state in the ckpt to the time that we read that state from the ckpt. */ - if (ls->need_plocks) { - ls->save_plocks = 1; + if (ls->need_plocks) return; - } if (ls->plock_ckpt_node != our_nodeid) return; @@ -1783,6 +1788,7 @@ int dlm_join_lockspace(struct lockspace *ls) ls->cpg_fd = fd; ls->kernel_stopped = 1; ls->need_plocks = 1; + ls->save_plocks = 1; ls->joining = 1; memset(&name, 0, sizeof(name));

13 years, 8 months

1
0
0 / 0

dlm: master - dlm_controld: fix plock owner in checkpoints

by David Teigland

Gitweb: http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=82afe049... Commit: 82afe0498f9176d0cc2a95c85437eea8f4ad965f Parent: 9d70d7d6df3c2d66c6b78cfd5e54353ecbc54b3c Author: David Teigland <teigland(a)redhat.com> AuthorDate: Tue Jul 27 14:06:53 2010 -0500 Committer: David Teigland <teigland(a)redhat.com> CommitterDate: Fri Aug 13 11:59:40 2010 -0500 dlm_controld: fix plock owner in checkpoints The wrong plock resource owner is written into checkpoints when plock_ownership is 0. This causes a node that mounts the fs to have incorrect owner values, which cause the plock operations to permanently hang. This bug seems to have existed since the plock code was originally copied into dlm_controld from gfs_controld. As part of the copy, there were some small code changes. One was to always include the resource owner in the checkpoint data, instead of only including it when plock_ownership was 1. The owner was then written and read incorrectly when plock_ownership was 0. bz 618814 Signed-off-by: David Teigland <teigland(a)redhat.com> --- group/dlm_controld/plock.c | 4 +++- 1 files changed, 3 insertions(+), 1 deletions(-) diff --git a/group/dlm_controld/plock.c b/group/dlm_controld/plock.c index 92cecdc..15bb2f1 100644 --- a/group/dlm_controld/plock.c +++ b/group/dlm_controld/plock.c @@ -1923,7 +1923,9 @@ void store_plocks(struct lockspace *ls, uint32_t *sig) (there should be no SYNCING plocks) */ list_for_each_entry(r, &ls->plock_resources, list) { - if (r->owner == -1) + if (!cfgd_plock_ownership) + owner = 0; + else if (r->owner == -1) continue; else if (r->owner == our_nodeid) owner = our_nodeid;

13 years, 8 months

1
0
0 / 0

dlm: master - dlm_controld: fix plock checkpoint signatures

by David Teigland

Gitweb: http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=9d70d7d6... Commit: 9d70d7d6df3c2d66c6b78cfd5e54353ecbc54b3c Parent: 10e53d7fae4cd576518864fb4309e88402cecc56 Author: David Teigland <teigland(a)redhat.com> AuthorDate: Tue Jul 27 13:50:14 2010 -0500 Committer: David Teigland <teigland(a)redhat.com> CommitterDate: Fri Aug 13 11:59:36 2010 -0500 dlm_controld: fix plock checkpoint signatures Commit e2ccbf90543cf1d163d1a067bf5a8ce049a9c134 for bz 578625 was not correct to use "p_count" (a count of plocks) in the signature calculation. When plock_ownership is on, the plocks under an owned resource are not copied into the checkpoint. However, the node writing the checkpoint counts all these owned plocks and factors the count into the signature. The node reading the checkpoint does not get the plocks, so its count of plocks is different, causing the signature calculation to be different. It will then disable plock operations. It would be very common for this to occur in practice, so the impact is very high. bz 618806 Signed-off-by: David Teigland <teigland(a)redhat.com> --- group/dlm_controld/plock.c | 6 ++---- 1 files changed, 2 insertions(+), 4 deletions(-) diff --git a/group/dlm_controld/plock.c b/group/dlm_controld/plock.c index 8b3eae2..92cecdc 100644 --- a/group/dlm_controld/plock.c +++ b/group/dlm_controld/plock.c @@ -1984,8 +1984,7 @@ void store_plocks(struct lockspace *ls, uint32_t *sig) } } out: - *sig = (0xFFFFFFFF & r_num_first) ^ (0xFFFFFFFF & r_num_last) ^ - r_count ^ p_count; + *sig = (0xFFFFFFFF & r_num_first) ^ (0xFFFFFFFF & r_num_last) ^ r_count; log_group(ls, "store_plocks first %llu last %llu r_count %u " "p_count %u sig %x", @@ -2133,8 +2132,7 @@ void retrieve_plocks(struct lockspace *ls, uint32_t *sig) out: saCkptCheckpointClose(h); - *sig = (0xFFFFFFFF & r_num_first) ^ (0xFFFFFFFF & r_num_last) - ^ r_count ^ p_count; + *sig = (0xFFFFFFFF & r_num_first) ^ (0xFFFFFFFF & r_num_last) ^ r_count; log_group(ls, "retrieve_plocks first %llu last %llu r_count %u " "p_count %u sig %x",

13 years, 8 months

1
0
0 / 0

cluster: RHEL60 - gfs2-utils: mkfs can't fsync device with 32MB RGs

by Bob Peterson

Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=... Commit: ba3ddbcd0a1123350774e75ac1845c4fae61f697 Parent: 734c1956e16c5afc6ac5f265e9b3ed396f9e4201 Author: Bob Peterson <rpeterso(a)redhat.com> AuthorDate: Wed Aug 11 13:31:14 2010 -0500 Committer: Bob Peterson <rpeterso(a)redhat.com> CommitterDate: Fri Aug 13 09:32:52 2010 -0500 gfs2-utils: mkfs can't fsync device with 32MB RGs This patch periodically syncs rgrp data written to a GFS2 file system during mkfs.gfs2 to circumvent the problem described in bug #605322. rhbz#622844 --- gfs2/libgfs2/rgrp.c | 14 +++++++++++++- 1 files changed, 13 insertions(+), 1 deletions(-) diff --git a/gfs2/libgfs2/rgrp.c b/gfs2/libgfs2/rgrp.c index 8d2d405..d442633 100644 --- a/gfs2/libgfs2/rgrp.c +++ b/gfs2/libgfs2/rgrp.c @@ -5,6 +5,8 @@ #include "libgfs2.h" +#define RG_SYNC_TOLERANCE 1000 + /** * gfs2_compute_bitstructs - Compute the bitmap sizes * @rgd: The resource group descriptor @@ -158,11 +160,21 @@ void gfs2_rgrp_relse(struct rgrp_list *rgd) void gfs2_rgrp_free(osi_list_t *rglist) { struct rgrp_list *rgd; + int rgs_since_sync = 0; + struct gfs2_sbd *sdp = NULL; while(!osi_list_empty(rglist->next)){ rgd = osi_list_entry(rglist->next, struct rgrp_list, list); - if (rgd->bh && rgd->bh[0]) /* if a buffer exists */ + if (rgd->bh && rgd->bh[0]) { /* if a buffer exists */ + rgs_since_sync++; + if (rgs_since_sync >= RG_SYNC_TOLERANCE) { + if (!sdp) + sdp = rgd->bh[0]->sdp; + fsync(sdp->device_fd); + rgs_since_sync = 0; + } gfs2_rgrp_relse(rgd); /* free them all. */ + } if(rgd->bits) free(rgd->bits); if(rgd->bh) {

13 years, 8 months

1
0
0 / 0

cluster: RHEL6 - gfs2-utils: mkfs can't fsync device with 32MB RGs

by Bob Peterson

Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=... Commit: 85eb67286e00b4fd6e58d2c5f86ff67c5d58c0c5 Parent: 4528ab5583ae25910b82795884520033651fc405 Author: Bob Peterson <rpeterso(a)redhat.com> AuthorDate: Wed Aug 11 13:31:14 2010 -0500 Committer: Bob Peterson <rpeterso(a)redhat.com> CommitterDate: Fri Aug 13 09:30:44 2010 -0500 gfs2-utils: mkfs can't fsync device with 32MB RGs This patch periodically syncs rgrp data written to a GFS2 file system during mkfs.gfs2 to circumvent the problem described in bug #605322. rhbz#622844 --- gfs2/libgfs2/rgrp.c | 14 +++++++++++++- 1 files changed, 13 insertions(+), 1 deletions(-) diff --git a/gfs2/libgfs2/rgrp.c b/gfs2/libgfs2/rgrp.c index 8d2d405..d442633 100644 --- a/gfs2/libgfs2/rgrp.c +++ b/gfs2/libgfs2/rgrp.c @@ -5,6 +5,8 @@ #include "libgfs2.h" +#define RG_SYNC_TOLERANCE 1000 + /** * gfs2_compute_bitstructs - Compute the bitmap sizes * @rgd: The resource group descriptor @@ -158,11 +160,21 @@ void gfs2_rgrp_relse(struct rgrp_list *rgd) void gfs2_rgrp_free(osi_list_t *rglist) { struct rgrp_list *rgd; + int rgs_since_sync = 0; + struct gfs2_sbd *sdp = NULL; while(!osi_list_empty(rglist->next)){ rgd = osi_list_entry(rglist->next, struct rgrp_list, list); - if (rgd->bh && rgd->bh[0]) /* if a buffer exists */ + if (rgd->bh && rgd->bh[0]) { /* if a buffer exists */ + rgs_since_sync++; + if (rgs_since_sync >= RG_SYNC_TOLERANCE) { + if (!sdp) + sdp = rgd->bh[0]->sdp; + fsync(sdp->device_fd); + rgs_since_sync = 0; + } gfs2_rgrp_relse(rgd); /* free them all. */ + } if(rgd->bits) free(rgd->bits); if(rgd->bh) {

13 years, 8 months

1
0
0 / 0

cluster: STABLE3 - dlm_controld: fix plock signature in stored message

by David Teigland

Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=... Commit: 9c9a67d281402ad0611233da958a68c6a9ec6e5c Parent: c2894ccc77d3b3efeb28d821950fad9f8b9ae1c8 Author: David Teigland <teigland(a)redhat.com> AuthorDate: Thu Aug 12 14:51:02 2010 -0500 Committer: David Teigland <teigland(a)redhat.com> CommitterDate: Thu Aug 12 16:43:00 2010 -0500 dlm_controld: fix plock signature in stored message When confchg's change the ckpt_node between plock checkpoints being written, and the plocks_stored message being received, the plocks_stored message is resent by the new ckpt_node that didn't write the checkpoint. The new ckpt_node needs to save the plock checkpoint signature that was sent in the first plocks_stored message from the checkpoint writer, and reuse it when sending the new plocks_stored method. Otherwise the plock signature in the stored message will be invalid. bz 623816 Signed-off-by: David Teigland <teigland(a)redhat.com> --- group/dlm_controld/cpg.c | 8 +++++++- group/dlm_controld/dlm_daemon.h | 1 + 2 files changed, 8 insertions(+), 1 deletions(-) diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c index a9cc61e..899d23e 100644 --- a/group/dlm_controld/cpg.c +++ b/group/dlm_controld/cpg.c @@ -1099,6 +1099,8 @@ static void receive_plocks_stored(struct lockspace *ls, struct dlm_header *hd, "need_plocks %d", hd->nodeid, hd->msgdata, hd->flags, hd->msgdata2, ls->need_plocks); + ls->last_plock_sig = hd->msgdata2; + if (!ls->need_plocks) return; @@ -1329,8 +1331,12 @@ static void prepare_plocks(struct lockspace *ls) the previous stored message. They will read the ckpt from the previous ckpt_node upon receiving the stored message from us. */ - if (nodes_added(ls)) + if (nodes_added(ls)) { store_plocks(ls, &sig); + ls->last_plock_sig = sig; + } else { + sig = ls->last_plock_sig; + } send_plocks_stored(ls, sig); } diff --git a/group/dlm_controld/dlm_daemon.h b/group/dlm_controld/dlm_daemon.h index c2423d2..6f7711a 100644 --- a/group/dlm_controld/dlm_daemon.h +++ b/group/dlm_controld/dlm_daemon.h @@ -220,6 +220,7 @@ struct lockspace { uint64_t checkpoint_r_num_last; uint32_t checkpoint_r_count; uint32_t checkpoint_p_count; + uint32_t last_plock_sig; /* save copy of groupd member callback data for queries */

13 years, 8 months

1
0
0 / 0

cluster: STABLE3 - dlm_controld: fix plock owner syncing

by David Teigland

Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=... Commit: c2894ccc77d3b3efeb28d821950fad9f8b9ae1c8 Parent: a85ea6fa4321504535ef8b6f60a914e094b5ede7 Author: David Teigland <teigland(a)redhat.com> AuthorDate: Thu Aug 12 15:43:25 2010 -0500 Committer: David Teigland <teigland(a)redhat.com> CommitterDate: Thu Aug 12 16:42:52 2010 -0500 dlm_controld: fix plock owner syncing - The R_GOT_UNOWN flag was not always being set on resources when the owner was set to 0. This would cause subsequent syncing of plock state to write the incorrect owner into the checkpoint. - Plocks were being written into the checkpoint unnecessarily for owned resources in cases where the owner written in the checkpoint was not the same as r->owner. - Set a few other flags in r->flags to help debug future problems. - Do more error checking and verifying of checkpointed state when checkpoints are being written and read. - Handle errors during plock syncing by disabling plock operations, instead of ignoring or running with incorrect state. bz 617306 Signed-off-by: David Teigland <teigland(a)redhat.com> --- group/dlm_controld/plock.c | 110 ++++++++++++++++++++++++++++++-------------- 1 files changed, 76 insertions(+), 34 deletions(-) diff --git a/group/dlm_controld/plock.c b/group/dlm_controld/plock.c index d18d1f5..ca2d61a 100644 --- a/group/dlm_controld/plock.c +++ b/group/dlm_controld/plock.c @@ -32,7 +32,11 @@ struct pack_plock { uint32_t pad; }; -#define R_GOT_UNOWN 0x00000001 /* have received owner=0 message */ +#define R_GOT_UNOWN 0x00000001 /* have received owner=0 message */ +#define R_SEND_UNOWN 0x00000002 /* have sent owner=0 message */ +#define R_SEND_OWN 0x00000004 /* have sent owner=our_nodeid message */ +#define R_PURGE_UNOWN 0x00000008 /* set owner=0 in purge */ +#define R_SEND_DROP 0x00000010 struct resource { struct list_head list; /* list of resources */ @@ -966,6 +970,11 @@ static void send_own(struct lockspace *ls, struct resource *r, int owner) return; } + if (!owner) + r->flags |= R_SEND_UNOWN; + else + r->flags |= R_SEND_OWN; + memset(&info, 0, sizeof(info)); info.number = r->number; info.nodeid = owner; @@ -1016,6 +1025,7 @@ static void send_drop(struct lockspace *ls, struct resource *r) memset(&info, 0, sizeof(info)); info.number = r->number; + r->flags |= R_SEND_DROP; send_struct_info(ls, &info, DLM_MSG_PLOCK_DROP); } @@ -1588,15 +1598,18 @@ void process_saved_plocks(struct lockspace *ls) /* locks still marked SYNCING should not go into the ckpt; the new node will get those locks by receiving PLOCK_SYNC messages */ -static void pack_section_buf(struct lockspace *ls, struct resource *r) +static void pack_section_buf(struct lockspace *ls, struct resource *r, + int owner) { struct pack_plock *pp; struct posix_lock *po; struct lock_waiter *w; int count = 0; - /* plocks on owned resources are not replicated on other nodes */ - if (r->owner == our_nodeid) + /* plocks on owned resources are not replicated on other nodes; + N.B. owner not always equal to r->owner */ + + if (cfgd_plock_ownership && (owner == our_nodeid)) return; pp = (struct pack_plock *) &section_buf; @@ -1646,6 +1659,17 @@ static int unpack_section_buf(struct lockspace *ls, char *numbuf, int buflen, gettimeofday(&now, NULL); + sscanf(numbuf, "r%llu.%d", &num, &owner); + +#if 0 + /* would be nice to always compile this, but it adds a lot of time */ + r = search_resource(ls, num); + if (r) { + log_error("unpack %llu duplicate", num); + return -1; + } +#endif + r = malloc(sizeof(struct resource)); if (!r) return -ENOMEM; @@ -1654,7 +1678,26 @@ static int unpack_section_buf(struct lockspace *ls, char *numbuf, int buflen, INIT_LIST_HEAD(&r->waiters); INIT_LIST_HEAD(&r->pending); - sscanf(numbuf, "r%llu.%d", &num, &owner); + if (!cfgd_plock_ownership) { + if (owner) { + log_error("unpack %llu bad owner %d count %d", + (unsigned long long)num, owner, count); + free(r); + return -1; + } + } else { + if (!owner) + r->flags |= R_GOT_UNOWN; + + /* no locks should be included for owned resources */ + + if (owner && count) { + log_error("unpack %llu owner %d bad count %d", + (unsigned long long)num, owner, count); + free(r); + return -1; + } + } r->number = num; r->owner = owner; @@ -1667,7 +1710,8 @@ static int unpack_section_buf(struct lockspace *ls, char *numbuf, int buflen, for (i = 0; i < count; i++) { if (!pp->waiter) { po = malloc(sizeof(struct posix_lock)); - // FIXME: handle failed malloc + if (!po) + return -ENOMEM; po->start = le64_to_cpu(pp->start); po->end = le64_to_cpu(pp->end); po->owner = le64_to_cpu(pp->owner); @@ -1677,7 +1721,8 @@ static int unpack_section_buf(struct lockspace *ls, char *numbuf, int buflen, list_add_tail(&po->list, &r->locks); } else { w = malloc(sizeof(struct lock_waiter)); - // FIXME: handle failed malloc + if (!w) + return -ENOMEM; w->info.start = le64_to_cpu(pp->start); w->info.end = le64_to_cpu(pp->end); w->info.owner = le64_to_cpu(pp->owner); @@ -1900,13 +1945,9 @@ void store_plocks(struct lockspace *ls, uint32_t *sig) sleep(1); goto open_retry; } - if (rv == SA_AIS_ERR_EXIST) { - log_group(ls, "store_plocks ckpt already exists"); - return; - } if (rv != SA_AIS_OK) { log_error("store_plocks ckpt open error %d %s", rv, ls->name); - return; + goto fail; } log_group(ls, "store_plocks open ckpt handle %llx", @@ -1954,14 +1995,14 @@ void store_plocks(struct lockspace *ls, uint32_t *sig) memset(&section_buf, 0, sizeof(section_buf)); section_len = 0; - pack_section_buf(ls, r); + pack_section_buf(ls, r, owner); if (!r_num_first) r_num_first = r->number; r_num_last = r->number; - log_plock(ls, "store_plocks section size %u id %u \"%s\"", - section_len, section_id.idLen, buf); + log_plock(ls, "wr sect ro %d rf %x len %u \"%s\"", + r->owner, r->flags, section_len, buf); create_retry: rv = saCkptSectionCreate(h, &section_attr, &section_buf, @@ -1971,19 +2012,10 @@ void store_plocks(struct lockspace *ls, uint32_t *sig) sleep(1); goto create_retry; } - if (rv == SA_AIS_ERR_EXIST) { - /* this shouldn't happen in general */ - log_group(ls, "store_plocks clearing old ckpt"); - /* do we need this close or will the close in - the unlink function be ok? */ - saCkptCheckpointClose(h); - _unlink_checkpoint(ls, &name); - goto open_retry; - } if (rv != SA_AIS_OK) { log_error("store_plocks ckpt section create err %d %s", rv, ls->name); - break; + goto fail; } } out: @@ -2004,6 +2036,13 @@ void store_plocks(struct lockspace *ls, uint32_t *sig) ls->checkpoint_r_num_last = r_num_last; ls->checkpoint_r_count = r_count; ls->checkpoint_p_count = p_count; + return; + + fail: + ls->disable_plock = 1; + /* force the node receiving plocks to fail sig check and disable + plocks as well */ + *sig = 0xF0000000; } /* called by a node that's just been added to the group to get existing plock @@ -2018,7 +2057,7 @@ void retrieve_plocks(struct lockspace *ls, uint32_t *sig) SaNameT name; SaAisErrorT rv; char buf[SECTION_NAME_LEN]; - int len, lock_count; + int len, lock_count, error; uint32_t r_count = 0, p_count = 0; uint64_t r_num, r_num_first = 0, r_num_last = 0; @@ -2086,10 +2125,6 @@ void retrieve_plocks(struct lockspace *ls, uint32_t *sig) memset(&buf, 0, sizeof(buf)); snprintf(buf, SECTION_NAME_LEN, "%s", desc.sectionId.id); - log_plock(ls, "retrieve_plocks section size %llu id %u \"%s\"", - (unsigned long long)iov.dataSize, iov.sectionId.idLen, - buf); - read_retry: rv = saCkptCheckpointRead(h, &iov, 1, NULL); if (rv == SA_AIS_ERR_TRY_AGAIN) { @@ -2107,8 +2142,9 @@ void retrieve_plocks(struct lockspace *ls, uint32_t *sig) no locks, which exist in ownership mode; the resource name and owner come from the section id */ - log_plock(ls, "retrieve_plocks ckpt read %llu bytes", - (unsigned long long)iov.readSize); + log_plock(ls, "rd sect len %llu \"%s\"", + (unsigned long long)iov.readSize, buf); + section_len = iov.readSize; if (section_len % sizeof(struct pack_plock)) { @@ -2120,8 +2156,12 @@ void retrieve_plocks(struct lockspace *ls, uint32_t *sig) r_num = 0; lock_count = 0; - unpack_section_buf(ls, (char *)desc.sectionId.id, - desc.sectionId.idLen, &r_num, &lock_count); + error = unpack_section_buf(ls, (char *)desc.sectionId.id, + desc.sectionId.idLen, &r_num, + &lock_count); + if (error < 0) + continue; + r_count++; p_count += lock_count; @@ -2186,6 +2226,8 @@ void purge_plocks(struct lockspace *ls, int nodeid, int unmount) if (r->owner == nodeid) { r->owner = 0; + r->flags |= R_GOT_UNOWN; + r->flags |= R_PURGE_UNOWN; send_pending_plocks(ls, r); }

13 years, 8 months

1
0
0 / 0

cluster: STABLE3 - dlm_controld: fix save_plocks initialization

by David Teigland

Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=... Commit: a85ea6fa4321504535ef8b6f60a914e094b5ede7 Parent: f70311ab08a273982ba8f7bf55572d393506a852 Author: David Teigland <teigland(a)redhat.com> AuthorDate: Thu Aug 12 15:37:23 2010 -0500 Committer: David Teigland <teigland(a)redhat.com> CommitterDate: Thu Aug 12 16:42:39 2010 -0500 dlm_controld: fix save_plocks initialization When dlm_controld joins a cpg and begins receiving plock messages, it needs to save all those plock messages for processing after it initializes plock state from a checkpoint. Instead of being initialized to 1, saved_plocks started as 0 and was initialized to 1 shortly after the join. This left a short span of time where a plock message could arrive and be processed immediately instead of saved, which would cause the node's plock state to be out of sync with the other nodes, which could lead to any number of different problems with plocks. bz 623810 Signed-off-by: David Teigland <teigland(a)redhat.com> --- group/dlm_controld/cpg.c | 16 +++++++++++----- 1 files changed, 11 insertions(+), 5 deletions(-) diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c index 93cf108..a9cc61e 100644 --- a/group/dlm_controld/cpg.c +++ b/group/dlm_controld/cpg.c @@ -1135,9 +1135,9 @@ static void receive_plocks_stored(struct lockspace *ls, struct dlm_header *hd, return; } - process_saved_plocks(ls); ls->need_plocks = 0; ls->save_plocks = 0; + process_saved_plocks(ls); } static void send_info(struct lockspace *ls, struct change *cg, int type, @@ -1282,6 +1282,10 @@ static void prepare_plocks(struct lockspace *ls) } ls->plock_ckpt_node = our_nodeid; ls->need_plocks = 0; + if (ls->save_plocks) { + ls->save_plocks = 0; + process_saved_plocks(ls); + } return; } @@ -1294,7 +1298,10 @@ static void prepare_plocks(struct lockspace *ls) if (!ls->plock_ckpt_node) { ls->need_plocks = 0; - ls->save_plocks = 0; + if (ls->save_plocks) { + ls->save_plocks = 0; + process_saved_plocks(ls); + } return; } @@ -1302,10 +1309,8 @@ static void prepare_plocks(struct lockspace *ls) existing plock state in the ckpt to the time that we read that state from the ckpt. */ - if (ls->need_plocks) { - ls->save_plocks = 1; + if (ls->need_plocks) return; - } if (ls->plock_ckpt_node != our_nodeid) return; @@ -1783,6 +1788,7 @@ int dlm_join_lockspace(struct lockspace *ls) ls->cpg_fd = fd; ls->kernel_stopped = 1; ls->need_plocks = 1; + ls->save_plocks = 1; ls->joining = 1; memset(&name, 0, sizeof(name));

13 years, 8 months

1
0
0 / 0

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

cluster-commits August 2010