cluster: STABLE32 - rgmanager: Fix for deadlock
by Ryan McCabe
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=b17d6ffc82e...
Commit: b17d6ffc82e75553cfee7c2fd21d3bbdac0d1e86
Parent: 05b6c764abb3e8e6f406bd8f33df8000206bd986
Author: Ryan McCabe <rmccabe(a)redhat.com>
AuthorDate: Thu Sep 27 15:07:15 2012 -0400
Committer: Ryan McCabe <rmccabe(a)redhat.com>
CommitterDate: Thu Sep 27 15:07:15 2012 -0400
rgmanager: Fix for deadlock
This patch fixes a deadlock in rgmanager that could occur when a node
starts rgmanager while a service is recovering.
Signed-off-by: Ryan McCabe <rmccabe(a)redhat.com>
---
rgmanager/src/daemons/rg_state.c | 1 +
rgmanager/src/daemons/rg_thread.c | 19 ++++++++++++++++++-
rgmanager/src/daemons/service_op.c | 1 +
3 files changed, 20 insertions(+), 1 deletions(-)
diff --git a/rgmanager/src/daemons/rg_state.c b/rgmanager/src/daemons/rg_state.c
index 30470ac..9f0c68e 100644
--- a/rgmanager/src/daemons/rg_state.c
+++ b/rgmanager/src/daemons/rg_state.c
@@ -1979,6 +1979,7 @@ retry:
/* Deliberate */
case RG_EDEPEND:
case RG_EFAIL:
+ case RG_EDEADLCK:
/* Uh oh - we failed to relocate to this node.
ensure that we tell the next node to start it from
the 'recovering' state. */
diff --git a/rgmanager/src/daemons/rg_thread.c b/rgmanager/src/daemons/rg_thread.c
index a7bf3f9..31f289c 100644
--- a/rgmanager/src/daemons/rg_thread.c
+++ b/rgmanager/src/daemons/rg_thread.c
@@ -9,6 +9,8 @@
#include <rg_queue.h>
#include <assert.h>
#include <members.h>
+#include <liblogthread.h>
+
/**
* Resource thread list entry.
@@ -738,13 +740,28 @@ rt_enqueue_request(const char *resgroupname, int request,
ret = 0;
break;
}
- fprintf(stderr, "Failed to queue request: Would block\n");
/* EWOULDBLOCK */
pthread_mutex_unlock(resgroup->rt_queue_mutex);
pthread_mutex_unlock(&reslist_mutex);
+ logt_print(LOG_DEBUG,
+ "Failed to queue %d request for %s: Would block\n",
+ request, resgroupname);
return ret;
}
+ if (resgroup->rt_request == RG_START &&
+ (request == RG_START_REMOTE || request == RG_START_RECOVER)) {
+ send_ret(response_ctx, resgroup->rt_name, RG_EDEADLCK,
+ request, 0);
+ msg_free_ctx(response_ctx);
+ pthread_mutex_unlock(resgroup->rt_queue_mutex);
+ pthread_mutex_unlock(&reslist_mutex);
+ logt_print(LOG_DEBUG,
+ "Failed to queue %d request for %s: Would block\n",
+ request, resgroupname);
+ return -1;
+ }
+
ret = rq_queue_request(resgroup->rt_queue, resgroup->rt_name,
request, 0, 0, response_ctx, 0, target,
arg0, arg1);
diff --git a/rgmanager/src/daemons/service_op.c b/rgmanager/src/daemons/service_op.c
index f094129..4b74427 100644
--- a/rgmanager/src/daemons/service_op.c
+++ b/rgmanager/src/daemons/service_op.c
@@ -62,6 +62,7 @@ service_op_start(char *svcName,
++dep;
continue;
case RG_EFAIL:
+ case RG_EDEADLCK:
++fail;
continue;
case RG_EABORT:
11 years, 6 months
cluster: RHEL59 - fence_vmware_soap: Faster fencing, fix crash on VM without valid UUID
by Marek Grác
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=c404f3305d1...
Commit: c404f3305d156a89232c1f8294aadd23fdd2a15f
Parent: befffad0d0ad2554071c02f877033a49d47629b4
Author: Marek 'marx' Grac <mgrac(a)redhat.com>
AuthorDate: Thu Sep 27 15:28:25 2012 +0200
Committer: Marek 'marx' Grac <mgrac(a)redhat.com>
CommitterDate: Thu Sep 27 15:28:25 2012 +0200
fence_vmware_soap: Faster fencing, fix crash on VM without valid UUID
Improve speed of fencing by removing requests for attributes that are not needed. This change is significant
when there are hundrens of VM on vSphere server. On the systems with <10 VM improvement is still about 20%.
This patch also fixes situation when there are VM which do not have valid UUID. This can happend when P2V (physical
to virtual machine process) failed.
Patch was proposed by: Rodrigo A B Freire
Resolves: rhbz#836654
---
fence/agents/vmware_soap/fence_vmware_soap.py | 14 ++++++++++----
1 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/fence/agents/vmware_soap/fence_vmware_soap.py b/fence/agents/vmware_soap/fence_vmware_soap.py
index 9bebd08..0da7f0d 100644
--- a/fence/agents/vmware_soap/fence_vmware_soap.py
+++ b/fence/agents/vmware_soap/fence_vmware_soap.py
@@ -44,9 +44,12 @@ def process_results(results, machines, uuid, mappingToUUID):
info = {}
for i in m.propSet:
info[i.name] = i.val
- machines[info["name"]] = (info["config.uuid"], info["summary.runtime.powerState"])
- uuid[info["config.uuid"]] = info["summary.runtime.powerState"]
- mappingToUUID[m.obj.value] = info["config.uuid"]
+ # Prevent error KeyError: 'config.uuid' when reaching systems which P2V failed,
+ # since these systems don't have a valid UUID
+ if info.has_key("config.uuid"):
+ machines[info["name"]] = (info["config.uuid"], info["summary.runtime.powerState"])
+ uuid[info["config.uuid"]] = info["summary.runtime.powerState"]
+ mappingToUUID[m.obj.value] = info["config.uuid"]
return (machines, uuid, mappingToUUID)
@@ -77,7 +80,7 @@ def get_power_status(conn, options):
propSpec = conn.factory.create('ns0:PropertySpec')
propSpec.all = False
- propSpec.pathSet = ["name", "summary.runtime.powerState", "config.uuid", "summary", "config", "capability", "network"]
+ propSpec.pathSet = ["name", "summary.runtime.powerState", "config.uuid"]
propSpec.type = "VirtualMachine"
propFilterSpec = conn.factory.create('ns0:PropertyFilterSpec')
@@ -101,6 +104,9 @@ def get_power_status(conn, options):
machines.update(more_machines)
uuid.update(more_uuid)
mappingToUUID.update(more_mappingToUUID)
+ # Do not run unnecessary SOAP requests
+ if options.has_key("-U") and options["-U"] in uuid:
+ break
if ["list", "monitor"].count(options["-o"]) == 1:
return machines
11 years, 6 months
fence-agents: master - fence_vmware_soap: Faster fencing, fix crash on VM without valid UUID
by Marek Grác
Gitweb: http://git.fedorahosted.org/git/?p=fence-agents.git;a=commitdiff;h=582aa5...
Commit: 582aa5f7f285e225928765fff50979154eafa138
Parent: 37bf2c9eb75b3ed98edfec6991110af82a1c5efa
Author: Marek 'marx' Grac <mgrac(a)redhat.com>
AuthorDate: Thu Sep 27 13:00:37 2012 +0200
Committer: Marek 'marx' Grac <mgrac(a)redhat.com>
CommitterDate: Thu Sep 27 13:00:37 2012 +0200
fence_vmware_soap: Faster fencing, fix crash on VM without valid UUID
Improve speed of fencing by removing requests for attributes that are not needed. This change is significant
when there are hundrens of VM on vSphere server. On the systems with <10 VM improvement is still about 20%.
This patch also fixes situation when there are VM which do not have valid UUID. This can happend when P2V (physical
to virtual machine process) failed.
Patch was proposed by: Rodrigo A B Freire
Resolves: rhbz#836654
---
fence/agents/vmware_soap/fence_vmware_soap.py | 14 ++++++++++----
1 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/fence/agents/vmware_soap/fence_vmware_soap.py b/fence/agents/vmware_soap/fence_vmware_soap.py
index 9bebd08..0da7f0d 100644
--- a/fence/agents/vmware_soap/fence_vmware_soap.py
+++ b/fence/agents/vmware_soap/fence_vmware_soap.py
@@ -44,9 +44,12 @@ def process_results(results, machines, uuid, mappingToUUID):
info = {}
for i in m.propSet:
info[i.name] = i.val
- machines[info["name"]] = (info["config.uuid"], info["summary.runtime.powerState"])
- uuid[info["config.uuid"]] = info["summary.runtime.powerState"]
- mappingToUUID[m.obj.value] = info["config.uuid"]
+ # Prevent error KeyError: 'config.uuid' when reaching systems which P2V failed,
+ # since these systems don't have a valid UUID
+ if info.has_key("config.uuid"):
+ machines[info["name"]] = (info["config.uuid"], info["summary.runtime.powerState"])
+ uuid[info["config.uuid"]] = info["summary.runtime.powerState"]
+ mappingToUUID[m.obj.value] = info["config.uuid"]
return (machines, uuid, mappingToUUID)
@@ -77,7 +80,7 @@ def get_power_status(conn, options):
propSpec = conn.factory.create('ns0:PropertySpec')
propSpec.all = False
- propSpec.pathSet = ["name", "summary.runtime.powerState", "config.uuid", "summary", "config", "capability", "network"]
+ propSpec.pathSet = ["name", "summary.runtime.powerState", "config.uuid"]
propSpec.type = "VirtualMachine"
propFilterSpec = conn.factory.create('ns0:PropertyFilterSpec')
@@ -101,6 +104,9 @@ def get_power_status(conn, options):
machines.update(more_machines)
uuid.update(more_uuid)
mappingToUUID.update(more_mappingToUUID)
+ # Do not run unnecessary SOAP requests
+ if options.has_key("-U") and options["-U"] in uuid:
+ break
if ["list", "monitor"].count(options["-o"]) == 1:
return machines
11 years, 6 months
cluster: RHEL6 - fsck.gfs2: soften the messages when reclaiming freemeta blocks
by Bob Peterson
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=de3d3adfc52...
Commit: de3d3adfc52c0872e9576dc3119fc61d6fc414bb
Parent: 31165d9e1a31c058e48ae09255c05748a301cfdb
Author: Bob Peterson <rpeterso(a)redhat.com>
AuthorDate: Wed Aug 15 15:04:44 2012 -0500
Committer: Bob Peterson <rpeterso(a)redhat.com>
CommitterDate: Fri Sep 21 14:27:30 2012 -0500
fsck.gfs2: soften the messages when reclaiming freemeta blocks
Before the patch, fsck.gfs2 would reclaim "free metadata" blocks,
turning them all into truly "free" blocks, but then it would see
the discrepancy between the free space numbers in the rgrp and
complain bitterly. It gave users the impression that there was
a file system error when, in fact, there was none. This patch
now takes into account the reclaiming and adjusts the numbers
accordingly, then it adjusts the rgrp, assuming permission from
the earlier question about whether it should reclaim free meta.
rhbz#803477
---
gfs2/fsck/initialize.c | 120 ++++++++++++++++++++++++++++++++++++-----------
1 files changed, 92 insertions(+), 28 deletions(-)
diff --git a/gfs2/fsck/initialize.c b/gfs2/fsck/initialize.c
index 558fd03..a288ed2 100644
--- a/gfs2/fsck/initialize.c
+++ b/gfs2/fsck/initialize.c
@@ -30,6 +30,7 @@
static int was_mounted_ro = 0;
static uint64_t possible_root = HIGHEST_BLOCK;
static struct master_dir fix_md;
+static unsigned long long blks_2free = 0;
/**
* block_mounters
@@ -189,18 +190,21 @@ static int set_block_ranges(struct gfs2_sbd *sdp)
*/
static void check_rgrp_integrity(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
int *fixit, int *this_rg_fixed,
- int *this_rg_bad)
+ int *this_rg_bad, int *this_rg_cleaned)
{
uint32_t rg_free, rg_reclaimed, rg_unlinked;
int rgb, x, y, off, bytes_to_check, total_bytes_to_check, asked = 0;
unsigned int state;
struct gfs_rgrp *gfs1rg = (struct gfs_rgrp *)&rgd->rg;
+ uint64_t diblock;
+ struct gfs2_buffer_head *bh;
rg_free = rg_reclaimed = rg_unlinked = 0;
total_bytes_to_check = rgd->ri.ri_bitbytes;
- *this_rg_fixed = *this_rg_bad = 0;
+ *this_rg_fixed = *this_rg_bad = *this_rg_cleaned = 0;
+ diblock = rgd->ri.ri_data0;
for (rgb = 0; rgb < rgd->ri.ri_length; rgb++){
/* Count up the free blocks in the bitmap */
off = (rgb) ? sizeof(struct gfs2_meta_header) :
@@ -214,32 +218,47 @@ static void check_rgrp_integrity(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
unsigned char *byte;
byte = (unsigned char *)&rgd->bh[rgb]->b_data[off + x];
- if (*byte == 0x55)
+ if (*byte == 0x55) {
+ diblock += GFS2_NBBY;
continue;
+ }
if (*byte == 0x00) {
+ diblock += GFS2_NBBY;
rg_free += GFS2_NBBY;
continue;
}
for (y = 0; y < GFS2_NBBY; y++) {
state = (*byte >>
(GFS2_BIT_SIZE * y)) & GFS2_BIT_MASK;
- if (state == GFS2_BLKST_USED)
+ if (state == GFS2_BLKST_USED) {
+ diblock++;
continue;
- if (state == GFS2_BLKST_DINODE)
+ }
+ if (state == GFS2_BLKST_DINODE) {
+ diblock++;
continue;
+ }
if (state == GFS2_BLKST_FREE) {
+ diblock++;
rg_free++;
continue;
}
/* GFS2_BLKST_UNLINKED */
- *this_rg_bad = 1;
+ if (sdp->gfs1)
+ log_info(_("Free metadata block 0x%llx"
+ " found.\n"),
+ (unsigned long long)diblock);
+ else
+ log_info(_("Unlinked dinode 0x%llx "
+ "found.\n"),
+ (unsigned long long)diblock);
if (!asked) {
char msg[256];
asked = 1;
sprintf(msg,
- _("Okay to reclaim unlinked "
- "inodes in resource group "
+ _("Okay to reclaim free "
+ "metadata in resource group "
"%lld (0x%llx)? (y/n)"),
(unsigned long long)rgd->ri.ri_addr,
(unsigned long long)rgd->ri.ri_addr);
@@ -248,6 +267,7 @@ static void check_rgrp_integrity(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
}
if (!(*fixit)) {
rg_unlinked++;
+ diblock++;
continue;
}
*byte &= ~(GFS2_BIT_MASK <<
@@ -255,20 +275,58 @@ static void check_rgrp_integrity(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
bmodified(rgd->bh[rgb]);
rg_reclaimed++;
rg_free++;
- *this_rg_fixed = 1;
+ rgd->rg.rg_free++;
+ if (sdp->gfs1 && gfs1rg->rg_freemeta)
+ gfs1rg->rg_freemeta--;
+ log_info(_("Free metadata block %lld (0x%llx) "
+ "reclaimed.\n"),
+ (unsigned long long)diblock,
+ (unsigned long long)diblock);
+ bh = bread(sdp, diblock);
+ if (!gfs2_check_meta(bh, GFS2_METATYPE_DI)) {
+ struct gfs2_inode *ip =
+ fsck_inode_get(sdp, bh);
+ if (ip->i_di.di_blocks > 1) {
+ blks_2free +=
+ ip->i_di.di_blocks - 1;
+ log_info(_("%lld blocks "
+ "(total) may need "
+ "to be freed in "
+ "pass 5.\n"),
+ blks_2free);
+ }
+ fsck_inode_put(&ip);
+ }
+ brelse(bh);
+ diblock++;
}
}
}
+ /* The unlinked blocks we reclaim shouldn't be considered errors,
+ since we're just reclaiming them as a courtesy. If we already
+ got permission to reclaim them, we adjust the rgrp counts
+ accordingly. That way, only "real" rgrp count inconsistencies
+ will be reported. */
+ if (rg_reclaimed && *fixit) {
+ if (sdp->gfs1)
+ gfs_rgrp_out((struct gfs_rgrp *)&rgd->rg, rgd->bh[0]);
+ else
+ gfs2_rgrp_out(&rgd->rg, rgd->bh[0]);
+ *this_rg_cleaned = 1;
+ log_info( _("The rgrp at %lld (0x%llx) was cleaned of %d "
+ "free metadata blocks.\n"),
+ (unsigned long long)rgd->ri.ri_addr,
+ (unsigned long long)rgd->ri.ri_addr,
+ rg_reclaimed);
+ }
if (rgd->rg.rg_free != rg_free) {
*this_rg_bad = 1;
+ *this_rg_cleaned = 0;
log_err( _("Error: resource group %lld (0x%llx): "
"free space (%d) does not match bitmap (%d)\n"),
(unsigned long long)rgd->ri.ri_addr,
(unsigned long long)rgd->ri.ri_addr,
rgd->rg.rg_free, rg_free);
- if (rg_reclaimed)
- log_err( _("(%d blocks were reclaimed)\n"),
- rg_reclaimed);
if (query( _("Fix the rgrp free blocks count? (y/n)"))) {
rgd->rg.rg_free = rg_free;
if (sdp->gfs1)
@@ -283,14 +341,12 @@ static void check_rgrp_integrity(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
}
if (sdp->gfs1 && gfs1rg->rg_freemeta != rg_unlinked) {
*this_rg_bad = 1;
+ *this_rg_cleaned = 0;
log_err( _("Error: resource group %lld (0x%llx): "
"free meta (%d) does not match bitmap (%d)\n"),
(unsigned long long)rgd->ri.ri_addr,
(unsigned long long)rgd->ri.ri_addr,
gfs1rg->rg_freemeta, rg_unlinked);
- if (rg_reclaimed)
- log_err( _("(%d blocks were reclaimed)\n"),
- rg_reclaimed);
if (query( _("Fix the rgrp free meta blocks count? (y/n)"))) {
gfs1rg->rg_freemeta = rg_unlinked;
gfs_rgrp_out((struct gfs_rgrp *)&rgd->rg, rgd->bh[0]);
@@ -311,14 +367,16 @@ static void check_rgrp_integrity(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
/**
* check_rgrps_integrity - verify rgrp consistency
+ * Note: We consider an rgrp "cleaned" if the unlinked meta blocks are
+ * cleaned, so not quite "bad" and not quite "good" but rewritten anyway.
*
* Returns: 0 on success, 1 if errors were detected
*/
-static int check_rgrps_integrity(struct gfs2_sbd *sdp)
+static void check_rgrps_integrity(struct gfs2_sbd *sdp)
{
struct osi_node *n, *next = NULL;
- int rgs_good = 0, rgs_bad = 0, rgs_fixed = 0;
- int was_bad = 0, was_fixed = 0, error = 0;
+ int rgs_good = 0, rgs_bad = 0, rgs_fixed = 0, rgs_cleaned = 0;
+ int was_bad = 0, was_fixed = 0, was_cleaned = 0;
struct rgrp_tree *rgd;
int reclaim_unlinked = 0;
@@ -327,22 +385,28 @@ static int check_rgrps_integrity(struct gfs2_sbd *sdp)
next = osi_next(n);
rgd = (struct rgrp_tree *)n;
if (fsck_abort)
- return 0;
+ return;
check_rgrp_integrity(sdp, rgd, &reclaim_unlinked,
- &was_fixed, &was_bad);
+ &was_fixed, &was_bad, &was_cleaned);
if (was_fixed)
rgs_fixed++;
- if (was_bad) {
- error = 1;
+ if (was_cleaned)
+ rgs_cleaned++;
+ else if (was_bad)
rgs_bad++;
- } else
+ else
rgs_good++;
}
- if (rgs_bad)
- log_err( _("RGs: Consistent: %d Inconsistent: %d Fixed: %d"
- " Total: %d\n"),
- rgs_good, rgs_bad, rgs_fixed, rgs_good + rgs_bad);
- return error;
+ if (rgs_bad || rgs_cleaned) {
+ log_err( _("RGs: Consistent: %d Cleaned: %d Inconsistent: "
+ "%d Fixed: %d Total: %d\n"),
+ rgs_good, rgs_cleaned, rgs_bad, rgs_fixed,
+ rgs_good + rgs_bad + rgs_cleaned);
+ if (rgs_cleaned && blks_2free)
+ log_err(_("%lld blocks may need to be freed in pass 5 "
+ "due to the cleaned resource groups.\n"),
+ blks_2free);
+ }
}
/**
11 years, 7 months
gfs2-utils: master - fsck.gfs2: soften the messages when reclaiming freemeta blocks
by Bob Peterson
Gitweb: http://git.fedorahosted.org/git/?p=gfs2-utils.git;a=commitdiff;h=abe3d4b8...
Commit: abe3d4b88afe39d839692865b29bd800f09cba8a
Parent: e04ac4bed3cf5b2b3376fdcbf4485bc4fa2f2b36
Author: Bob Peterson <rpeterso(a)redhat.com>
AuthorDate: Fri Sep 21 15:17:20 2012 -0500
Committer: Bob Peterson <rpeterso(a)redhat.com>
CommitterDate: Fri Sep 21 15:17:20 2012 -0500
fsck.gfs2: soften the messages when reclaiming freemeta blocks
Before the patch, fsck.gfs2 would reclaim "free metadata" blocks,
turning them all into truly "free" blocks, but then it would see
the discrepancy between the free space numbers in the rgrp and
complain bitterly. It gave users the impression that there was
a file system error when, in fact, there was none. This patch
now takes into account the reclaiming and adjusts the numbers
accordingly, then it adjusts the rgrp, assuming permission from
the earlier question about whether it should reclaim free meta.
---
gfs2/fsck/initialize.c | 120 ++++++++++++++++++++++++++++++++++++-----------
1 files changed, 92 insertions(+), 28 deletions(-)
diff --git a/gfs2/fsck/initialize.c b/gfs2/fsck/initialize.c
index a1047f3..e64ab3a 100644
--- a/gfs2/fsck/initialize.c
+++ b/gfs2/fsck/initialize.c
@@ -32,6 +32,7 @@
static int was_mounted_ro = 0;
static uint64_t possible_root = HIGHEST_BLOCK;
static struct master_dir fix_md;
+static unsigned long long blks_2free = 0;
/**
* block_mounters
@@ -192,18 +193,21 @@ static int set_block_ranges(struct gfs2_sbd *sdp)
*/
static void check_rgrp_integrity(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
int *fixit, int *this_rg_fixed,
- int *this_rg_bad)
+ int *this_rg_bad, int *this_rg_cleaned)
{
uint32_t rg_free, rg_reclaimed, rg_unlinked;
int rgb, x, y, off, bytes_to_check, total_bytes_to_check, asked = 0;
unsigned int state;
struct gfs_rgrp *gfs1rg = (struct gfs_rgrp *)&rgd->rg;
+ uint64_t diblock;
+ struct gfs2_buffer_head *bh;
rg_free = rg_reclaimed = rg_unlinked = 0;
total_bytes_to_check = rgd->ri.ri_bitbytes;
- *this_rg_fixed = *this_rg_bad = 0;
+ *this_rg_fixed = *this_rg_bad = *this_rg_cleaned = 0;
+ diblock = rgd->ri.ri_data0;
for (rgb = 0; rgb < rgd->ri.ri_length; rgb++){
/* Count up the free blocks in the bitmap */
off = (rgb) ? sizeof(struct gfs2_meta_header) :
@@ -217,32 +221,47 @@ static void check_rgrp_integrity(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
unsigned char *byte;
byte = (unsigned char *)&rgd->bh[rgb]->b_data[off + x];
- if (*byte == 0x55)
+ if (*byte == 0x55) {
+ diblock += GFS2_NBBY;
continue;
+ }
if (*byte == 0x00) {
+ diblock += GFS2_NBBY;
rg_free += GFS2_NBBY;
continue;
}
for (y = 0; y < GFS2_NBBY; y++) {
state = (*byte >>
(GFS2_BIT_SIZE * y)) & GFS2_BIT_MASK;
- if (state == GFS2_BLKST_USED)
+ if (state == GFS2_BLKST_USED) {
+ diblock++;
continue;
- if (state == GFS2_BLKST_DINODE)
+ }
+ if (state == GFS2_BLKST_DINODE) {
+ diblock++;
continue;
+ }
if (state == GFS2_BLKST_FREE) {
+ diblock++;
rg_free++;
continue;
}
/* GFS2_BLKST_UNLINKED */
- *this_rg_bad = 1;
+ if (sdp->gfs1)
+ log_info(_("Free metadata block 0x%llx"
+ " found.\n"),
+ (unsigned long long)diblock);
+ else
+ log_info(_("Unlinked dinode 0x%llx "
+ "found.\n"),
+ (unsigned long long)diblock);
if (!asked) {
char msg[256];
asked = 1;
sprintf(msg,
- _("Okay to reclaim unlinked "
- "inodes in resource group "
+ _("Okay to reclaim free "
+ "metadata in resource group "
"%lld (0x%llx)? (y/n)"),
(unsigned long long)rgd->ri.ri_addr,
(unsigned long long)rgd->ri.ri_addr);
@@ -251,6 +270,7 @@ static void check_rgrp_integrity(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
}
if (!(*fixit)) {
rg_unlinked++;
+ diblock++;
continue;
}
*byte &= ~(GFS2_BIT_MASK <<
@@ -258,20 +278,58 @@ static void check_rgrp_integrity(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
bmodified(rgd->bh[rgb]);
rg_reclaimed++;
rg_free++;
- *this_rg_fixed = 1;
+ rgd->rg.rg_free++;
+ if (sdp->gfs1 && gfs1rg->rg_freemeta)
+ gfs1rg->rg_freemeta--;
+ log_info(_("Free metadata block %lld (0x%llx) "
+ "reclaimed.\n"),
+ (unsigned long long)diblock,
+ (unsigned long long)diblock);
+ bh = bread(sdp, diblock);
+ if (!gfs2_check_meta(bh, GFS2_METATYPE_DI)) {
+ struct gfs2_inode *ip =
+ fsck_inode_get(sdp, bh);
+ if (ip->i_di.di_blocks > 1) {
+ blks_2free +=
+ ip->i_di.di_blocks - 1;
+ log_info(_("%lld blocks "
+ "(total) may need "
+ "to be freed in "
+ "pass 5.\n"),
+ blks_2free);
+ }
+ fsck_inode_put(&ip);
+ }
+ brelse(bh);
+ diblock++;
}
}
}
+ /* The unlinked blocks we reclaim shouldn't be considered errors,
+ since we're just reclaiming them as a courtesy. If we already
+ got permission to reclaim them, we adjust the rgrp counts
+ accordingly. That way, only "real" rgrp count inconsistencies
+ will be reported. */
+ if (rg_reclaimed && *fixit) {
+ if (sdp->gfs1)
+ gfs_rgrp_out((struct gfs_rgrp *)&rgd->rg, rgd->bh[0]);
+ else
+ gfs2_rgrp_out(&rgd->rg, rgd->bh[0]);
+ *this_rg_cleaned = 1;
+ log_info( _("The rgrp at %lld (0x%llx) was cleaned of %d "
+ "free metadata blocks.\n"),
+ (unsigned long long)rgd->ri.ri_addr,
+ (unsigned long long)rgd->ri.ri_addr,
+ rg_reclaimed);
+ }
if (rgd->rg.rg_free != rg_free) {
*this_rg_bad = 1;
+ *this_rg_cleaned = 0;
log_err( _("Error: resource group %lld (0x%llx): "
"free space (%d) does not match bitmap (%d)\n"),
(unsigned long long)rgd->ri.ri_addr,
(unsigned long long)rgd->ri.ri_addr,
rgd->rg.rg_free, rg_free);
- if (rg_reclaimed)
- log_err( _("(%d blocks were reclaimed)\n"),
- rg_reclaimed);
if (query( _("Fix the rgrp free blocks count? (y/n)"))) {
rgd->rg.rg_free = rg_free;
if (sdp->gfs1)
@@ -286,14 +344,12 @@ static void check_rgrp_integrity(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
}
if (sdp->gfs1 && gfs1rg->rg_freemeta != rg_unlinked) {
*this_rg_bad = 1;
+ *this_rg_cleaned = 0;
log_err( _("Error: resource group %lld (0x%llx): "
"free meta (%d) does not match bitmap (%d)\n"),
(unsigned long long)rgd->ri.ri_addr,
(unsigned long long)rgd->ri.ri_addr,
gfs1rg->rg_freemeta, rg_unlinked);
- if (rg_reclaimed)
- log_err( _("(%d blocks were reclaimed)\n"),
- rg_reclaimed);
if (query( _("Fix the rgrp free meta blocks count? (y/n)"))) {
gfs1rg->rg_freemeta = rg_unlinked;
gfs_rgrp_out((struct gfs_rgrp *)&rgd->rg, rgd->bh[0]);
@@ -314,14 +370,16 @@ static void check_rgrp_integrity(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
/**
* check_rgrps_integrity - verify rgrp consistency
+ * Note: We consider an rgrp "cleaned" if the unlinked meta blocks are
+ * cleaned, so not quite "bad" and not quite "good" but rewritten anyway.
*
* Returns: 0 on success, 1 if errors were detected
*/
-static int check_rgrps_integrity(struct gfs2_sbd *sdp)
+static void check_rgrps_integrity(struct gfs2_sbd *sdp)
{
struct osi_node *n, *next = NULL;
- int rgs_good = 0, rgs_bad = 0, rgs_fixed = 0;
- int was_bad = 0, was_fixed = 0, error = 0;
+ int rgs_good = 0, rgs_bad = 0, rgs_fixed = 0, rgs_cleaned = 0;
+ int was_bad = 0, was_fixed = 0, was_cleaned = 0;
struct rgrp_tree *rgd;
int reclaim_unlinked = 0;
@@ -330,22 +388,28 @@ static int check_rgrps_integrity(struct gfs2_sbd *sdp)
next = osi_next(n);
rgd = (struct rgrp_tree *)n;
if (fsck_abort)
- return 0;
+ return;
check_rgrp_integrity(sdp, rgd, &reclaim_unlinked,
- &was_fixed, &was_bad);
+ &was_fixed, &was_bad, &was_cleaned);
if (was_fixed)
rgs_fixed++;
- if (was_bad) {
- error = 1;
+ if (was_cleaned)
+ rgs_cleaned++;
+ else if (was_bad)
rgs_bad++;
- } else
+ else
rgs_good++;
}
- if (rgs_bad)
- log_err( _("RGs: Consistent: %d Inconsistent: %d Fixed: %d"
- " Total: %d\n"),
- rgs_good, rgs_bad, rgs_fixed, rgs_good + rgs_bad);
- return error;
+ if (rgs_bad || rgs_cleaned) {
+ log_err( _("RGs: Consistent: %d Cleaned: %d Inconsistent: "
+ "%d Fixed: %d Total: %d\n"),
+ rgs_good, rgs_cleaned, rgs_bad, rgs_fixed,
+ rgs_good + rgs_bad + rgs_cleaned);
+ if (rgs_cleaned && blks_2free)
+ log_err(_("%lld blocks may need to be freed in pass 5 "
+ "due to the cleaned resource groups.\n"),
+ blks_2free);
+ }
}
/**
11 years, 7 months
cluster: RHEL58 - rgmanager: Don't use the resource name in tmp file names
by Ryan McCabe
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=9a98cfc8e11...
Commit: 9a98cfc8e11273ca14135245132d9adc90dbd754
Parent: bdf0650eb61104933d76a4b74adc12c37ac3511a
Author: Ryan McCabe <rmccabe(a)redhat.com>
AuthorDate: Tue Sep 18 09:52:59 2012 -0400
Committer: Ryan McCabe <rmccabe(a)redhat.com>
CommitterDate: Tue Sep 18 10:14:56 2012 -0400
rgmanager: Don't use the resource name in tmp file names
Patch from John Ruemker <jruemker(a)redhat.com>
This patch fixes a regression in the fs resource agent that caused
mktemp to fail to create a tmp file during status operations when
the resource name contains the '/' character.
Resolves: rhbz#858022
Acked-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Signed-off-by: Ryan McCabe <rmccabe(a)redhat.com>
---
rgmanager/src/resources/fs.sh | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/rgmanager/src/resources/fs.sh b/rgmanager/src/resources/fs.sh
index 7e6898c..225034b 100755
--- a/rgmanager/src/resources/fs.sh
+++ b/rgmanager/src/resources/fs.sh
@@ -560,7 +560,7 @@ mountInUse () {
dev=$1
mp=$2
- typeset proc_mounts=$(mktemp /tmp/fs-$OCF_RESKEY_name.proc.mounts.XXXXXX)
+ typeset proc_mounts=$(mktemp /tmp/fs.proc.mounts.XXXXXX)
cat /proc/mounts > $proc_mounts
while read tmp_dev tmp_mp junka junkb junkc junkd; do
if [ -n "$tmp_dev" -a "$tmp_dev" = "$dev" ]; then
@@ -612,7 +612,7 @@ isMounted () {
ret=$NO
- typeset proc_mounts=$(mktemp /tmp/fs-$OCF_RESKEY_name.proc.mounts.XXXXXX)
+ typeset proc_mounts=$(mktemp /tmp/fs.proc.mounts.XXXXXX)
cat /proc/mounts > $proc_mounts
while read tmp_dev tmp_mp junk_a junk_b junk_c junk_d
do
@@ -970,7 +970,7 @@ Unknown file system type '$fstype' for device $dev. Assuming fsck is required."
#
if [ -n "$fsck_needed" ] || [ "${OCF_RESKEY_force_fsck}" = "yes" ] ||\
[ "${OCF_RESKEY_force_fsck}" = "1" ]; then
- typeset fsck_log=$(mktemp /tmp/fs-$OCF_RESKEY_name.fsck.log.XXXXXX)
+ typeset fsck_log=$(mktemp /tmp/fs.fsck.log.XXXXXX)
ocf_log debug "Running fsck on $dev"
fsck -p $dev >> $fsck_log 2>&1
ret_val=$?
11 years, 7 months
cluster: RHEL59 - rgmanager: Don't use the resource name in tmp file names
by Ryan McCabe
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=befffad0d0a...
Commit: befffad0d0ad2554071c02f877033a49d47629b4
Parent: 6342b59f089475e679bbbd440ef8ae6277d173d8
Author: Ryan McCabe <rmccabe(a)redhat.com>
AuthorDate: Tue Sep 18 09:52:59 2012 -0400
Committer: Ryan McCabe <rmccabe(a)redhat.com>
CommitterDate: Tue Sep 18 10:14:20 2012 -0400
rgmanager: Don't use the resource name in tmp file names
Patch from John Ruemker <jruemker(a)redhat.com>
This patch fixes a regression in the fs resource agent that caused
mktemp to fail to create a tmp file during status operations when
the resource name contains the '/' character.
Resolves: rhbz#858022
Acked-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Signed-off-by: Ryan McCabe <rmccabe(a)redhat.com>
---
rgmanager/src/resources/fs.sh | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/rgmanager/src/resources/fs.sh b/rgmanager/src/resources/fs.sh
index 147f246..e55e77f 100755
--- a/rgmanager/src/resources/fs.sh
+++ b/rgmanager/src/resources/fs.sh
@@ -560,7 +560,7 @@ mountInUse () {
dev=$1
mp=$2
- typeset proc_mounts=$(mktemp /tmp/fs-$OCF_RESKEY_name.proc.mounts.XXXXXX)
+ typeset proc_mounts=$(mktemp /tmp/fs.proc.mounts.XXXXXX)
cat /proc/mounts > $proc_mounts
while read tmp_dev tmp_mp junka junkb junkc junkd; do
if [ -n "$tmp_dev" -a "$tmp_dev" = "$dev" ]; then
@@ -612,7 +612,7 @@ isMounted () {
ret=$NO
- typeset proc_mounts=$(mktemp /tmp/fs-$OCF_RESKEY_name.proc.mounts.XXXXXX)
+ typeset proc_mounts=$(mktemp /tmp/fs.proc.mounts.XXXXXX)
cat /proc/mounts > $proc_mounts
while read tmp_dev tmp_mp junk_a junk_b junk_c junk_d
do
@@ -970,7 +970,7 @@ Unknown file system type '$fstype' for device $dev. Assuming fsck is required."
#
if [ -n "$fsck_needed" ] || [ "${OCF_RESKEY_force_fsck}" = "yes" ] ||\
[ "${OCF_RESKEY_force_fsck}" = "1" ]; then
- typeset fsck_log=$(mktemp /tmp/fs-$OCF_RESKEY_name.fsck.log.XXXXXX)
+ typeset fsck_log=$(mktemp /tmp/fs.fsck.log.XXXXXX)
ocf_log debug "Running fsck on $dev"
fsck -p $dev >> $fsck_log 2>&1
ret_val=$?
11 years, 7 months
cluster: RHEL6 - cman init: increase default shutdown timeouts
by Fabio M. Di Nitto
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=31165d9e1a3...
Commit: 31165d9e1a31c058e48ae09255c05748a301cfdb
Parent: 33c58d437ad7f8c919ce7c56ef6826579a97fca7
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Fri Sep 14 14:06:34 2012 +0200
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Tue Sep 18 08:01:47 2012 +0200
cman init: increase default shutdown timeouts
in some conditions, specially triggered when shutting down all nodes
at the same time, corosync takes a lot longer than 10 seconds
to stabilize membership. That means that daemons will not quit fast
enough before cman init will declare a shutdown error.
Increase the default shutdown timeouts from 10 to 30 seconds.
Resolves: rhbz#854032
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Reviewed-by: Lon Hohberger <lhh(a)redhat.com>
---
cman/init.d/cman.in | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/cman/init.d/cman.in b/cman/init.d/cman.in
index 4f8db7a..b64bb17 100644
--- a/cman/init.d/cman.in
+++ b/cman/init.d/cman.in
@@ -276,7 +276,7 @@ stop_daemon()
shift
retryforsec=$1
- [ -z "$retryforsec" ] && retryforsec=1
+ [ -z "$retryforsec" ] && retryforsec=30
retries=0
if check_sleep; then
@@ -589,7 +589,7 @@ start_qdiskd()
stop_qdiskd()
{
- stop_daemon qdiskd 5
+ stop_daemon qdiskd
}
start_groupd()
@@ -708,7 +708,7 @@ join_fence_domain()
leave_fence_domain()
{
if status fenced > /dev/null 2>&1; then
- errmsg=$( fence_tool leave -w 10 2>&1 )
+ errmsg=$( fence_tool leave -w 30 2>&1 )
return $?
fi
}
11 years, 7 months
fence-agents: RHEL6 - Add metadata to list of accepted actions.
by rohara
Gitweb: http://git.fedorahosted.org/git/?p=fence-agents.git;a=commitdiff;h=fdb968...
Commit: fdb96874c75600864048a752c08aa1e45d2cc534
Parent: 693fdc9f79b3e3961fe1c562950c4ce76d274870
Author: Ryan O'Hara <rohara(a)redhat.com>
AuthorDate: Mon Sep 17 19:02:49 2012 -0500
Committer: Ryan O'Hara <rohara(a)redhat.com>
CommitterDate: Mon Sep 17 19:04:40 2012 -0500
Add metadata to list of accepted actions.
Resolves: rhbz#825667
Signed-off-by: Ryan O'Hara <rohara(a)redhat.com>
---
fence/agents/scsi/fence_scsi.8 | 16 +++++++++-------
1 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/fence/agents/scsi/fence_scsi.8 b/fence/agents/scsi/fence_scsi.8
index f1a29ea..0d70930 100644
--- a/fence/agents/scsi/fence_scsi.8
+++ b/fence/agents/scsi/fence_scsi.8
@@ -41,13 +41,15 @@ and created registrations for each path.
.SH OPTIONS
.TP
\fB-o\fP \fIaction\fR
-Fencing action. This value can be "on", "off", or "status". All
-actions require either a key (see -k option) or node name (see -n
-option). For "on", the agent will attempt to register with the
-device(s) and create a reservation if none exists. The "off" action
-will attempt to remove a node's key from the device(s). The "status"
-action will report whether or not a node's key is currently register
-with one or more of the devices. The default action if "off".
+Fencing action. This value can be "on", "off", "status", or
+"metadata". The "on", "off", and "status" actions require either a key
+(see -k option) or node name (see -n option). For "on", the agent will
+attempt to register with the device(s) and create a reservation if
+none exists. The "off" action will attempt to remove a node's key from
+the device(s). The "status" action will report whether or not a node's
+key is currently register with one or more of the devices. The
+"metadata" action will display the XML metadata. The default action if
+"off".
.TP
\fB-d\fP \fIdevices\fR
List of devices to use for current operation. Devices can be
11 years, 7 months
fence-agents: master - Add metadata to list of accepted actions.
by rohara
Gitweb: http://git.fedorahosted.org/git/?p=fence-agents.git;a=commitdiff;h=37bf2c...
Commit: 37bf2c9eb75b3ed98edfec6991110af82a1c5efa
Parent: 36559eaf7102932df1e3250f018a587c12c5ce67
Author: Ryan O'Hara <rohara(a)redhat.com>
AuthorDate: Mon Sep 17 19:02:49 2012 -0500
Committer: Ryan O'Hara <rohara(a)redhat.com>
CommitterDate: Mon Sep 17 19:03:35 2012 -0500
Add metadata to list of accepted actions.
Resolves: rhbz#825667
Signed-off-by: Ryan O'Hara <rohara(a)redhat.com>
---
fence/agents/scsi/fence_scsi.8 | 16 +++++++++-------
1 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/fence/agents/scsi/fence_scsi.8 b/fence/agents/scsi/fence_scsi.8
index f1a29ea..0d70930 100644
--- a/fence/agents/scsi/fence_scsi.8
+++ b/fence/agents/scsi/fence_scsi.8
@@ -41,13 +41,15 @@ and created registrations for each path.
.SH OPTIONS
.TP
\fB-o\fP \fIaction\fR
-Fencing action. This value can be "on", "off", or "status". All
-actions require either a key (see -k option) or node name (see -n
-option). For "on", the agent will attempt to register with the
-device(s) and create a reservation if none exists. The "off" action
-will attempt to remove a node's key from the device(s). The "status"
-action will report whether or not a node's key is currently register
-with one or more of the devices. The default action if "off".
+Fencing action. This value can be "on", "off", "status", or
+"metadata". The "on", "off", and "status" actions require either a key
+(see -k option) or node name (see -n option). For "on", the agent will
+attempt to register with the device(s) and create a reservation if
+none exists. The "off" action will attempt to remove a node's key from
+the device(s). The "status" action will report whether or not a node's
+key is currently register with one or more of the devices. The
+"metadata" action will display the XML metadata. The default action if
+"off".
.TP
\fB-d\fP \fIdevices\fR
List of devices to use for current operation. Devices can be
11 years, 7 months