Re: Regular ext4 error warning with HD in USB dock
From: Amir Goldstein <amir73il@gmail.com>
Date: 2011-01-02 19:23:21
Subsystem:
ext4 file system, filesystems (vfs and infrastructure), the rest · Maintainers:
"Theodore Ts'o", Alexander Viro, Christian Brauner, Linus Torvalds
On Sat, Jan 1, 2011 at 7:20 PM, Ric Wheeler [off-list ref] wrote:
On 12/28/2010 05:30 AM, Amir Goldstein wrote:quoted
On Tue, Dec 28, 2010 at 10:19 AM, Rogier Wolff[off-list ref] wrote:quoted
On Mon, Dec 27, 2010 at 09:53:43PM -0500, Ted Ts'o wrote:quoted
On Tue, Dec 28, 2010 at 09:53:45AM +1100, Con Kolivas wrote:quoted
[1048401.773270] EXT4-fs (sde8): mounted filesystem with writeback data mode. Opts: (null) [1048702.736011] EXT4-fs (sde8): error count: 3 [1048702.736016] EXT4-fs (sde8): initial error at 1289053677: ext4_journal_start_sb:251 [1048702.736018] EXT4-fs (sde8): last error at 1289080948: ext4_put_super:719That's actually not an error. It's a report which is generated every 24 hours, indicating that there has been 3 errors since the last time the error count has been cleared, with the first error taking place at Sat Nov 6 10:27:57 2010 (US/Eastern) in the function ext4_journal_start_sb(), at line 251, and the most recent error taking place at Sat Nov 6 18:02:28 2010 (US/Eastern), in the function ext4_put_super() at line 719. This is a new feature which was added in 2.6.36.Nice. But the issue you're not mentioning is: What errors could have happened on the 6th of november? Should Con worry about those errors?Ted, I would like to use this opportunity to remind you about my record_journal_errstr() implementation, see: https://github.com/amir73il/ext4-snapshots/blob/next3-stable/fs/next3/super.c#L157 It records the initial errors messages (which I found to be the most interesting), in a message buffer on the unused space after the journal super block (3K on a 4K block fs). fsck prints out those messages and clears the buffer. In under a year of Next3 fs in beta and production, it has helped me many times to analyse bugs post-mortem and find the problem. If there is demand, I can post the patch for Ext4. Amir.I do think that this sounds like a useful addition - should be very useful in doing post mortem analysis... Thanks! Ric
Hi Ric, Attached are the patches for ext4 (against Ted's next branch) and e2fsck (against 1.41.14). e2fsck patch is running in production with e2fsprogs-1.41.9 and the porting to 1.41.14 was trivial. ext4 patch is a 'trivial+' port of the Next3 patch running in production. Do you think you guys can take the patches for a test drive? Thanks, Amir. ext4: record error messages in message buffer Ext4 error messages are stored in the free space after the journal super block. After journal recovery, the message buffer is copied from the journal super block to the file system super block. When error behavior is configured to remount-ro, the message buffer can be read and displayed at a later time by e2fsck. The messages from the time of the error are very helpful in post-mortem bug analysis. Signed-off-by: Amir Goldstein <redacted>
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7728a4c..8888815 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c@@ -290,10 +290,53 @@ int __ext4_journal_stop(const char *where,unsigned int line, handle_t *handle)
return err;
}
+/* record error messages after journal super block */
+static void ext4_record_journal_err(struct super_block *sb, const char *where,
+ const char *function, const char *fmt, va_list args)
+{
+#define MSGLEN 256
+ journal_t *journal = EXT4_SB(sb)->s_journal;
+ char *buf;
+ unsigned long offset;
+ int len;
+
+ if (!journal)
+ return;
+
+ buf = (char *)journal->j_superblock;
+ offset = (unsigned long)buf % sb->s_blocksize;
+ buf += sizeof(journal_superblock_t);
+ offset += sizeof(journal_superblock_t);
+
+ /* seek to end of message buffer */
+ while (offset < sb->s_blocksize && *buf) {
+ buf += MSGLEN;
+ offset += MSGLEN;
+ }
+
+ if (offset+MSGLEN > sb->s_blocksize)
+ /* no space left in message buffer */
+ return;
+
+ len = snprintf(buf, MSGLEN, "%s: %s: ", where, function);
+ len += vsnprintf(buf+len, MSGLEN-len, fmt, args);
+}
+
+static void ext4_record_journal_errstr(struct super_block *sb,
+ const char *where, const char *function, ...)
+{
+ va_list args;
+
+ va_start(args, function);
+ ext4_record_journal_err(sb, where, function, "%s\n", args);
+ va_end(args);
+}
+
void ext4_journal_abort_handle(const char *caller, unsigned int line,
const char *err_fn, struct buffer_head *bh,
handle_t *handle, int err)
{
+ struct super_block *sb = handle->h_transaction->t_journal->j_private;
char nbuf[16];
const char *errstr = ext4_decode_error(NULL, err, nbuf);
@@ -311,6 +354,9 @@ void ext4_journal_abort_handle(const char *caller,unsigned int line, printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n", caller, line, errstr, err_fn); + /* record error message in journal super block */ + ext4_record_journal_errstr(sb, caller, err_fn, errstr); + jbd2_journal_abort_handle(handle); }
@@ -398,6 +444,11 @@ void __ext4_error(struct super_block *sb, constchar *function, sb->s_id, function, line, current->comm, &vaf); va_end(args); + /* record error message in journal super block */ + va_start(args, fmt); + ext4_record_journal_err(sb, __func__, function, fmt, args); + va_end(args); + ext4_handle_error(sb); }
@@ -422,6 +473,11 @@ void ext4_error_inode(struct inode *inode, constchar *function, printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf); va_end(args); + /* record error message in journal super block */ + va_start(args, fmt); + ext4_record_journal_err(inode->i_sb, __func__, function, fmt, args); + va_end(args); + ext4_handle_error(inode->i_sb); }
@@ -452,6 +508,11 @@ void ext4_error_file(struct file *file, constchar *function, printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf); va_end(args); + /* record error message in journal super block */ + va_start(args, fmt); + ext4_record_journal_err(inode->i_sb, __func__, function, fmt, args); + va_end(args); + ext4_handle_error(inode->i_sb); }
@@ -510,6 +571,9 @@ void __ext4_std_error(struct super_block *sb,const char *function, sb->s_id, function, line, errstr); save_error_info(sb, function, line); + /* record error message in journal super block */ + ext4_record_journal_errstr(sb, __func__, function, errstr); + ext4_handle_error(sb); }
@@ -536,6 +600,11 @@ void __ext4_abort(struct super_block *sb, constchar *function,
printk("\n");
va_end(args);
+ /* record error message in journal super block */
+ va_start(args, fmt);
+ ext4_record_journal_err(sb, __func__, function, fmt, args);
+ va_end(args);
+
if ((sb->s_flags & MS_RDONLY) == 0) {
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
sb->s_flags |= MS_RDONLY;@@ -602,6 +671,11 @@ __acquires(bitlock) printk(KERN_CONT "%pV\n", &vaf); va_end(args); + /* record error message in journal super block */ + va_start(args, fmt); + ext4_record_journal_err(sb, __func__, function, fmt, args); + va_end(args); + if (test_opt(sb, ERRORS_CONT)) { ext4_commit_super(sb, 0); return;
@@ -4067,12 +4141,35 @@ static void ext4_clear_journal_err(structsuper_block *sb,
j_errno = jbd2_journal_errno(journal);
if (j_errno) {
char nbuf[16];
+ char *buf1, *buf2;
+ unsigned long offset1, offset2;
+ int len1, len2;
+
+ /* copy message buffer from journal to super block */
+ buf1 = (char *)journal->j_superblock;
+ offset1 = (unsigned long)buf1 % sb->s_blocksize;
+ buf1 += sizeof(journal_superblock_t);
+ offset1 += sizeof(journal_superblock_t);
+ len1 = sb->s_blocksize - offset1;
+ buf2 = (char *)EXT4_SB(sb)->s_es;
+ offset2 = (unsigned long)buf2 % sb->s_blocksize;
+ buf2 += sizeof(struct ext4_super_block);
+ offset2 += sizeof(struct ext4_super_block);
+ len2 = sb->s_blocksize - offset2;
+ if (len2 > len1)
+ len2 = len1;
+ if (len2 > 0 && *buf1)
+ memcpy(buf2, buf1, len2);
errstr = ext4_decode_error(sb, j_errno, nbuf);
ext4_warning(sb, "Filesystem error recorded "
"from previous mount: %s", errstr);
ext4_warning(sb, "Marking fs in need of filesystem check.");
+ /* clear journal message buffer */
+ if (len1 > 0)
+ memset(buf1, 0, len1);
+
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
ext4_commit_super(sb, 1); Attachments
- e2fsck_display_message_buffer.patch [application/octet-stream] 2818 bytes · preview
- ext4_record_error_messages.patch [application/octet-stream] 5882 bytes · preview