Thread (49 messages) 49 messages, 4 authors, 2021-02-11

Re: [PATCH 07/10] xfs_repair: set NEEDSREPAIR when we deliberately corrupt directories

From: "Darrick J. Wong" <djwong@kernel.org>
Date: 2021-02-09 18:50:37

On Tue, Feb 09, 2021 at 12:20:59PM -0500, Brian Foster wrote:
On Mon, Feb 08, 2021 at 08:10:44PM -0800, Darrick J. Wong wrote:
quoted
From: Darrick J. Wong <djwong@kernel.org>

There are a few places in xfs_repair's directory checking code where we
deliberately corrupt a directory entry as a sentinel to trigger a
correction in later repair phase.  In the mean time, the filesystem is
inconsistent, so set the needsrepair flag to force a re-run of repair if
the system goes down.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
Hmm.. this seems orthogonal to the rest of the series. I'm sure we can
come up with various additional uses for the bit, but it seems a little
odd to me that repair might set it in some cases after a crash but not
others (if the filesystem happens to already be corrupt, for example).
<nod> Another option I thought of is to add a hook to the buffer cache
so that the first time anyone tries to bwrite a buffer (either directly
or via a delwri list or normal buffer cache writeback) we'll also set
needsrepair on the ondisk primary super.  That would protect us against
other scenarios like crashing after writing a new AGF but before writing
the new AGI, where the fs is left in an indeterminate state.

Hmm, maybe I should pursue /that/ instead.

--D
Brian
quoted
 repair/agheader.h   |    2 ++
 repair/dir2.c       |    3 +++
 repair/phase6.c     |    7 +++++++
 repair/xfs_repair.c |   37 +++++++++++++++++++++++++++++++++++++
 4 files changed, 49 insertions(+)

diff --git a/repair/agheader.h b/repair/agheader.h
index a63827c8..fa6fe596 100644
--- a/repair/agheader.h
+++ b/repair/agheader.h
@@ -82,3 +82,5 @@ typedef struct fs_geo_list  {
 #define XR_AG_AGF	0x2
 #define XR_AG_AGI	0x4
 #define XR_AG_SB_SEC	0x8
+
+void force_needsrepair(struct xfs_mount *mp);
diff --git a/repair/dir2.c b/repair/dir2.c
index eabdb4f2..922b8a3e 100644
--- a/repair/dir2.c
+++ b/repair/dir2.c
@@ -15,6 +15,7 @@
 #include "da_util.h"
 #include "prefetch.h"
 #include "progress.h"
+#include "agheader.h"
 
 /*
  * Known bad inode list.  These are seen when the leaf and node
@@ -774,6 +775,7 @@ _("entry at block %u offset %" PRIdPTR " in directory inode %" PRIu64
 				do_warn(
 _("\tclearing inode number in entry at offset %" PRIdPTR "...\n"),
 					(intptr_t)ptr - (intptr_t)d);
+				force_needsrepair(mp);
 				dep->name[0] = '/';
 				*dirty = 1;
 			} else {
@@ -914,6 +916,7 @@ _("entry \"%*.*s\" in directory inode %" PRIu64 " points to self: "),
 		 */
 		if (junkit) {
 			if (!no_modify) {
+				force_needsrepair(mp);
 				dep->name[0] = '/';
 				*dirty = 1;
 				do_warn(_("clearing entry\n"));
diff --git a/repair/phase6.c b/repair/phase6.c
index 14464bef..5ecbe9b2 100644
--- a/repair/phase6.c
+++ b/repair/phase6.c
@@ -1649,6 +1649,7 @@ longform_dir2_entry_check_data(
 			if (entry_junked(
 	_("entry \"%s\" in directory inode %" PRIu64 " points to non-existent inode %" PRIu64 ""),
 					fname, ip->i_ino, inum)) {
+				force_needsrepair(mp);
 				dep->name[0] = '/';
 				libxfs_dir2_data_log_entry(&da, bp, dep);
 			}
@@ -1666,6 +1667,7 @@ longform_dir2_entry_check_data(
 			if (entry_junked(
 	_("entry \"%s\" in directory inode %" PRIu64 " points to free inode %" PRIu64),
 					fname, ip->i_ino, inum)) {
+				force_needsrepair(mp);
 				dep->name[0] = '/';
 				libxfs_dir2_data_log_entry(&da, bp, dep);
 			}
@@ -1684,6 +1686,7 @@ longform_dir2_entry_check_data(
 				if (entry_junked(
 	_("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"),
 						ORPHANAGE, inum, ip->i_ino)) {
+					force_needsrepair(mp);
 					dep->name[0] = '/';
 					libxfs_dir2_data_log_entry(&da, bp, dep);
 				}
@@ -1706,6 +1709,7 @@ longform_dir2_entry_check_data(
 			if (entry_junked(
 	_("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
 					fname, inum, ip->i_ino)) {
+				force_needsrepair(mp);
 				dep->name[0] = '/';
 				libxfs_dir2_data_log_entry(&da, bp, dep);
 			}
@@ -1737,6 +1741,7 @@ longform_dir2_entry_check_data(
 				if (entry_junked(
 	_("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is not in the the first block"), fname,
 						inum, ip->i_ino)) {
+					force_needsrepair(mp);
 					dep->name[0] = '/';
 					libxfs_dir2_data_log_entry(&da, bp, dep);
 				}
@@ -1764,6 +1769,7 @@ longform_dir2_entry_check_data(
 				if (entry_junked(
 	_("entry \"%s\" in dir %" PRIu64 " is not the first entry"),
 						fname, inum, ip->i_ino)) {
+					force_needsrepair(mp);
 					dep->name[0] = '/';
 					libxfs_dir2_data_log_entry(&da, bp, dep);
 				}
@@ -1852,6 +1858,7 @@ _("entry \"%s\" in dir inode %" PRIu64 " inconsistent with .. value (%" PRIu64 "
 				orphanage_ino = 0;
 			nbad++;
 			if (!no_modify)  {
+				force_needsrepair(mp);
 				dep->name[0] = '/';
 				libxfs_dir2_data_log_entry(&da, bp, dep);
 				if (verbose)
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index f607afcb..9dc73854 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -754,6 +754,43 @@ clear_needsrepair(
 		libxfs_buf_relse(bp);
 }
 
+/*
+ * Mark the filesystem as needing repair.  This should only be called by code
+ * that deliberately sets invalid sentinel values in the on-disk metadata to
+ * trigger a later reconstruction, and only after we've settled the primary
+ * super contents (i.e. after phase 1).
+ */
+void
+force_needsrepair(
+	struct xfs_mount	*mp)
+{
+	struct xfs_buf		*bp;
+	int			error;
+
+	if (!xfs_sb_version_hascrc(&mp->m_sb) ||
+	    xfs_sb_version_needsrepair(&mp->m_sb))
+		return;
+
+	bp = libxfs_getsb(mp);
+	if (!bp || bp->b_error) {
+		do_log(
+	_("couldn't get superblock to set needsrepair, err=%d\n"),
+				bp ? bp->b_error : ENOMEM);
+		return;
+	} else {
+		mp->m_sb.sb_features_incompat |=
+				XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
+		libxfs_sb_to_disk(bp->b_addr, &mp->m_sb);
+
+		/* Force the primary super to disk immediately. */
+		error = -libxfs_bwrite(bp);
+		if (error)
+			do_log(_("couldn't force needsrepair, err=%d\n"), error);
+	}
+	if (bp)
+		libxfs_buf_relse(bp);
+}
+
 int
 main(int argc, char **argv)
 {
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help