Thread (49 messages) 49 messages, 4 authors, 2021-02-11

Re: [PATCH 07/10] xfs_repair: set NEEDSREPAIR when we deliberately corrupt directories

From: "Darrick J. Wong" <djwong@kernel.org>
Date: 2021-02-09 19:47:18

On Tue, Feb 09, 2021 at 02:14:22PM -0500, Brian Foster wrote:
On Tue, Feb 09, 2021 at 10:35:42AM -0800, Darrick J. Wong wrote:
quoted
On Tue, Feb 09, 2021 at 12:20:59PM -0500, Brian Foster wrote:
quoted
On Mon, Feb 08, 2021 at 08:10:44PM -0800, Darrick J. Wong wrote:
quoted
From: Darrick J. Wong <djwong@kernel.org>

There are a few places in xfs_repair's directory checking code where we
deliberately corrupt a directory entry as a sentinel to trigger a
correction in later repair phase.  In the mean time, the filesystem is
inconsistent, so set the needsrepair flag to force a re-run of repair if
the system goes down.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
Hmm.. this seems orthogonal to the rest of the series. I'm sure we can
come up with various additional uses for the bit, but it seems a little
odd to me that repair might set it in some cases after a crash but not
others (if the filesystem happens to already be corrupt, for example).
<nod> Another option I thought of is to add a hook to the buffer cache
so that the first time anyone tries to bwrite a buffer (either directly
or via a delwri list or normal buffer cache writeback) we'll also set
needsrepair on the ondisk primary super.  That would protect us against
other scenarios like crashing after writing a new AGF but before writing
the new AGI, where the fs is left in an indeterminate state.
Yeah, that _seems_ more appropriate to me. It's not immediately clear to
me what the implementation should look like, but in general behavior
that sets needsrepair on first modification and clears it as a final
step sounds more practical. Then the behavior can be easily explained as
"once repair starts on an fs, it must be completed before it is allowed
to mount." I do think this should be lifted off for a followon series so
we can make progress on the feature upgrade bits without growing more
requirements and complexity..
Oh, definitely. I'll withdraw this patch for now in the interests of
getting everything else going for Eric. :)

--D
Brian
quoted
Hmm, maybe I should pursue /that/ instead.

--D
quoted
Brian
quoted
 repair/agheader.h   |    2 ++
 repair/dir2.c       |    3 +++
 repair/phase6.c     |    7 +++++++
 repair/xfs_repair.c |   37 +++++++++++++++++++++++++++++++++++++
 4 files changed, 49 insertions(+)

diff --git a/repair/agheader.h b/repair/agheader.h
index a63827c8..fa6fe596 100644
--- a/repair/agheader.h
+++ b/repair/agheader.h
@@ -82,3 +82,5 @@ typedef struct fs_geo_list  {
 #define XR_AG_AGF	0x2
 #define XR_AG_AGI	0x4
 #define XR_AG_SB_SEC	0x8
+
+void force_needsrepair(struct xfs_mount *mp);
diff --git a/repair/dir2.c b/repair/dir2.c
index eabdb4f2..922b8a3e 100644
--- a/repair/dir2.c
+++ b/repair/dir2.c
@@ -15,6 +15,7 @@
 #include "da_util.h"
 #include "prefetch.h"
 #include "progress.h"
+#include "agheader.h"
 
 /*
  * Known bad inode list.  These are seen when the leaf and node
@@ -774,6 +775,7 @@ _("entry at block %u offset %" PRIdPTR " in directory inode %" PRIu64
 				do_warn(
 _("\tclearing inode number in entry at offset %" PRIdPTR "...\n"),
 					(intptr_t)ptr - (intptr_t)d);
+				force_needsrepair(mp);
 				dep->name[0] = '/';
 				*dirty = 1;
 			} else {
@@ -914,6 +916,7 @@ _("entry \"%*.*s\" in directory inode %" PRIu64 " points to self: "),
 		 */
 		if (junkit) {
 			if (!no_modify) {
+				force_needsrepair(mp);
 				dep->name[0] = '/';
 				*dirty = 1;
 				do_warn(_("clearing entry\n"));
diff --git a/repair/phase6.c b/repair/phase6.c
index 14464bef..5ecbe9b2 100644
--- a/repair/phase6.c
+++ b/repair/phase6.c
@@ -1649,6 +1649,7 @@ longform_dir2_entry_check_data(
 			if (entry_junked(
 	_("entry \"%s\" in directory inode %" PRIu64 " points to non-existent inode %" PRIu64 ""),
 					fname, ip->i_ino, inum)) {
+				force_needsrepair(mp);
 				dep->name[0] = '/';
 				libxfs_dir2_data_log_entry(&da, bp, dep);
 			}
@@ -1666,6 +1667,7 @@ longform_dir2_entry_check_data(
 			if (entry_junked(
 	_("entry \"%s\" in directory inode %" PRIu64 " points to free inode %" PRIu64),
 					fname, ip->i_ino, inum)) {
+				force_needsrepair(mp);
 				dep->name[0] = '/';
 				libxfs_dir2_data_log_entry(&da, bp, dep);
 			}
@@ -1684,6 +1686,7 @@ longform_dir2_entry_check_data(
 				if (entry_junked(
 	_("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"),
 						ORPHANAGE, inum, ip->i_ino)) {
+					force_needsrepair(mp);
 					dep->name[0] = '/';
 					libxfs_dir2_data_log_entry(&da, bp, dep);
 				}
@@ -1706,6 +1709,7 @@ longform_dir2_entry_check_data(
 			if (entry_junked(
 	_("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
 					fname, inum, ip->i_ino)) {
+				force_needsrepair(mp);
 				dep->name[0] = '/';
 				libxfs_dir2_data_log_entry(&da, bp, dep);
 			}
@@ -1737,6 +1741,7 @@ longform_dir2_entry_check_data(
 				if (entry_junked(
 	_("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is not in the the first block"), fname,
 						inum, ip->i_ino)) {
+					force_needsrepair(mp);
 					dep->name[0] = '/';
 					libxfs_dir2_data_log_entry(&da, bp, dep);
 				}
@@ -1764,6 +1769,7 @@ longform_dir2_entry_check_data(
 				if (entry_junked(
 	_("entry \"%s\" in dir %" PRIu64 " is not the first entry"),
 						fname, inum, ip->i_ino)) {
+					force_needsrepair(mp);
 					dep->name[0] = '/';
 					libxfs_dir2_data_log_entry(&da, bp, dep);
 				}
@@ -1852,6 +1858,7 @@ _("entry \"%s\" in dir inode %" PRIu64 " inconsistent with .. value (%" PRIu64 "
 				orphanage_ino = 0;
 			nbad++;
 			if (!no_modify)  {
+				force_needsrepair(mp);
 				dep->name[0] = '/';
 				libxfs_dir2_data_log_entry(&da, bp, dep);
 				if (verbose)
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index f607afcb..9dc73854 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -754,6 +754,43 @@ clear_needsrepair(
 		libxfs_buf_relse(bp);
 }
 
+/*
+ * Mark the filesystem as needing repair.  This should only be called by code
+ * that deliberately sets invalid sentinel values in the on-disk metadata to
+ * trigger a later reconstruction, and only after we've settled the primary
+ * super contents (i.e. after phase 1).
+ */
+void
+force_needsrepair(
+	struct xfs_mount	*mp)
+{
+	struct xfs_buf		*bp;
+	int			error;
+
+	if (!xfs_sb_version_hascrc(&mp->m_sb) ||
+	    xfs_sb_version_needsrepair(&mp->m_sb))
+		return;
+
+	bp = libxfs_getsb(mp);
+	if (!bp || bp->b_error) {
+		do_log(
+	_("couldn't get superblock to set needsrepair, err=%d\n"),
+				bp ? bp->b_error : ENOMEM);
+		return;
+	} else {
+		mp->m_sb.sb_features_incompat |=
+				XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
+		libxfs_sb_to_disk(bp->b_addr, &mp->m_sb);
+
+		/* Force the primary super to disk immediately. */
+		error = -libxfs_bwrite(bp);
+		if (error)
+			do_log(_("couldn't force needsrepair, err=%d\n"), error);
+	}
+	if (bp)
+		libxfs_buf_relse(bp);
+}
+
 int
 main(int argc, char **argv)
 {
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help