Gitweb:
http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: de4850b3764331085a4657c57fd518b3586d5b6c
Parent: fd0a7e2b1ba68ef3e96fa10b54b1049cf963202e
Author: Bob Peterson <bob(a)ganesha.peterson>
AuthorDate: Mon Jan 25 09:03:16 2010 -0600
Committer: Bob Peterson <rpeterso(a)redhat.com>
CommitterDate: Tue Jan 26 14:39:31 2010 -0600
fsck.gfs2: Check for massive amounts of pointer corruption
Sometimes, due to faulty hardware or whatever, a whole bunch of
random nonsense is written into a block. If that block happens
to be a indirect list of pointers, pass1 may not find the
corruption for a long time. This happens when the corruption
starts, for example, at offset 0x200, or if the corruption just
happens to look like valid pointers for a while, like low
blocks that correspond to system inodes, rgrps, or journals.
If pass1 marks a whole bunch of pointers as valid, then later
decides the whole inode is corrupt, it becomes a major pain to
undo what it has done. For example, if it had found one of the
"bad" pointers to be the statfs file's dinode and marked that as
a duplicate reference, it's a pain to undo that once it becomes
apparent that there's too much damage to recover.
This patch introduces a block range check function that pass1
can use to traverse the metadata tree initially, just checking
for lots of damage to pointers. If there are a lot of damaged
metadata pointers it's better to just mark the dinode as free
space and let pass5 clean up any blocks that it referenced.
If a bridge has too many damaged rungs to cross, it's better to
find that out first rather than to cross half-way and have to
tip-toe back to the start.
rhbz#455300
---
gfs2/fsck/fsck.h | 2 +
gfs2/fsck/pass1.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 122 insertions(+), 8 deletions(-)
diff --git a/gfs2/fsck/fsck.h b/gfs2/fsck/fsck.h
index 5948210..30eb223 100644
--- a/gfs2/fsck/fsck.h
+++ b/gfs2/fsck/fsck.h
@@ -23,6 +23,8 @@
#define FSCK_CANCELED 32 /* Aborted with a signal or ^C */
#define FSCK_LIBRARY 128 /* Shared library error */
+#define BAD_POINTER_TOLERANCE 10 /* How many bad pointers is too many? */
+
struct inode_info
{
struct osi_node node;
diff --git a/gfs2/fsck/pass1.c b/gfs2/fsck/pass1.c
index 04938db..4382ad9 100644
--- a/gfs2/fsck/pass1.c
+++ b/gfs2/fsck/pass1.c
@@ -554,6 +554,97 @@ static int clear_leaf(struct gfs2_inode *ip, uint64_t block,
return 0;
}
+/**
+ * Check for massive amounts of pointer corruption. If the block has
+ * lots of out-of-range pointers, we can't trust any of the pointers.
+ * For example, a stray pointer with a value of 0x1d might be
+ * corruption/nonsense, and if so, we don't want to delete an
+ * important file (like master or the root directory) because of it.
+ * We need to check for a large number of bad pointers BEFORE we start
+ * messing with them because we don't want to mark a block as a
+ * duplicate (for example) until we know if the pointers in general can
+ * be trusted. Thus it needs to be in a separate loop.
+ */
+static int rangecheck_block(struct gfs2_inode *ip, uint64_t block,
+ struct gfs2_buffer_head **bh,
+ const char *btype, void *private)
+{
+ long *bad_pointers = (long *)private;
+ uint8_t q;
+
+ if (gfs2_check_range(ip->i_sbd, block) != 0) {
+ (*bad_pointers)++;
+ log_debug( _("Bad %s block pointer (out of range #%ld) "
+ "found in inode %lld (0x%llx).\n"), btype,
+ *bad_pointers,
+ (unsigned long long)ip->i_di.di_num.no_addr,
+ (unsigned long long)ip->i_di.di_num.no_addr);
+ if ((*bad_pointers) <= BAD_POINTER_TOLERANCE)
+ return ENOENT;
+ else
+ return -ENOENT; /* Exits check_metatree quicker */
+ }
+ /* See how many duplicate blocks it has */
+ q = block_type(block);
+ if (q != gfs2_block_free) {
+ (*bad_pointers)++;
+ log_debug( _("Duplicated %s block pointer (violation #%ld) "
+ "found in inode %lld (0x%llx).\n"), btype,
+ *bad_pointers,
+ (unsigned long long)ip->i_di.di_num.no_addr,
+ (unsigned long long)ip->i_di.di_num.no_addr);
+ if ((*bad_pointers) <= BAD_POINTER_TOLERANCE)
+ return ENOENT;
+ else
+ return -ENOENT; /* Exits check_metatree quicker */
+ }
+ return 0;
+}
+
+static int rangecheck_metadata(struct gfs2_inode *ip, uint64_t block,
+ struct gfs2_buffer_head **bh, void *private)
+{
+ return rangecheck_block(ip, block, bh, _("metadata"), private);
+}
+
+static int rangecheck_leaf(struct gfs2_inode *ip, uint64_t block,
+ struct gfs2_buffer_head *bh, void *private)
+{
+ return rangecheck_block(ip, block, &bh, _("leaf"), private);
+}
+
+static int rangecheck_data(struct gfs2_inode *ip, uint64_t block,
+ void *private)
+{
+ return rangecheck_block(ip, block, NULL, _("data"), private);
+}
+
+static int rangecheck_eattr_indir(struct gfs2_inode *ip, uint64_t block,
+ uint64_t parent,
+ struct gfs2_buffer_head **bh, void *private)
+{
+ return rangecheck_block(ip, block, NULL,
+ _("indirect extended attribute"),
+ private);
+}
+
+static int rangecheck_eattr_leaf(struct gfs2_inode *ip, uint64_t block,
+ uint64_t parent, struct gfs2_buffer_head **bh,
+ void *private)
+{
+ return rangecheck_block(ip, block, NULL, _("extended attribute"),
+ private);
+}
+
+struct metawalk_fxns rangecheck_fxns = {
+ .private = NULL,
+ .check_metalist = rangecheck_metadata,
+ .check_data = rangecheck_data,
+ .check_leaf = rangecheck_leaf,
+ .check_eattr_indir = rangecheck_eattr_indir,
+ .check_eattr_leaf = rangecheck_eattr_leaf,
+};
+
static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh,
uint64_t block)
{
@@ -562,10 +653,16 @@ static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head
*bh,
int error;
struct block_count bc = {0};
struct metawalk_fxns invalidate_metatree = {0};
+ long bad_pointers;
- invalidate_metatree.check_metalist = clear_metalist;
- invalidate_metatree.check_data = clear_data;
- invalidate_metatree.check_leaf = clear_leaf;
+ q = block_type(block);
+ if(q != gfs2_block_free) {
+ log_err( _("Found duplicate block referenced as an inode at "
+ "#%" PRIu64 " (0x%" PRIx64 ")\n"), block, block);
+ gfs2_dup_set(block);
+ fsck_inode_put(&ip);
+ return 0;
+ }
ip = fsck_inode_get(sdp, bh);
if (ip->i_di.di_num.no_addr != block) {
@@ -584,11 +681,22 @@ static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head
*bh,
" (0x%" PRIx64 ") not fixed\n"), block, block);
}
- q = block_type(block);
- if(q != gfs2_block_free) {
- log_err( _("Found duplicate block referenced as an inode at "
- "#%" PRIu64 " (0x%" PRIx64 ")\n"), block, block);
- gfs2_dup_set(block);
+ bad_pointers = 0L;
+
+ /* First, check the metadata for massive amounts of pointer corruption.
+ Such corruption can only lead us to ruin trying to clean it up,
+ so it's better to check it up front and delete the inode if
+ there is corruption. */
+ rangecheck_fxns.private = &bad_pointers;
+ error = check_metatree(ip, &rangecheck_fxns);
+ if (bad_pointers > BAD_POINTER_TOLERANCE) {
+ log_err( _("Error: inode %llu (0x%llx) has more than "
+ "%d bad pointers.\n"),
+ (unsigned long long)ip->i_di.di_num.no_addr,
+ (unsigned long long)ip->i_di.di_num.no_addr,
+ BAD_POINTER_TOLERANCE);
+ fsck_blockmap_set(ip, ip->i_di.di_num.no_addr,
+ _("badly corrupt"), gfs2_block_free);
fsck_inode_put(&ip);
return 0;
}
@@ -703,6 +811,10 @@ static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head
*bh,
"errors; invalidating.\n"),
(unsigned long long)ip->i_di.di_num.no_addr,
(unsigned long long)ip->i_di.di_num.no_addr);
+ invalidate_metatree.check_metalist = clear_metalist;
+ invalidate_metatree.check_data = clear_data;
+ invalidate_metatree.check_leaf = clear_leaf;
+
/* FIXME: Must set all leaves invalid as well */
check_metatree(ip, &invalidate_metatree);
fsck_blockmap_set(ip, ip->i_di.di_num.no_addr,