Re: [PATCH v2 2/5] ext4: add mballoc stats proc file
From: harshad shirwadkar <hidden>
Date: 2021-02-12 17:16:51
Thanks Artem for pointing that out. The reason I added s_bal_groups_considered inside ext4_mb_good_group() was that, in the next patches in the series, the new allocator structures call ext4_mb_good_group() before to check if the group looked up using cr0 lists or cr 1 tree is good or not. Having this counter incremented in ext4_mb_good_group() helps us measure the overhead of the new structures too. I'd like to keep that. But you're right, even without those optimizations, ext4_mb_good_group() gets called twice. Which means, with these optimizations it will get called 3 times. So, perhaps we can increment s_bal_groups_considered in ext4_mb_good_group_nolock() as you mentioned, and then we can also increment it in the following patches just before calling ext4_mb_good_group(). I'll note that for the next version, thanks! - Harshad On Fri, Feb 12, 2021 at 8:52 AM Благодаренко Артём [off-list ref] wrote:
Hello Harshad, Thanks for this great work. I am still testing these patches. Right now I have noticed this flow - s_bal_groups_considered is counted twice. It is placed to ext4_mb_good_group, that is called twice, one time from ext4_mb_good_group_nolock and again from ext4_mb_good_group_nolock. I’d rather put s_bal_groups_considered into the ext4_mb_good_group_nolock(). Here is simple execution (on non-fragmented partition) with old counter location [root@CO82 ~]# dd if=/dev/zero of=/mnt/file4 bs=2M count=2 conv=fsync 2+0 records in 2+0 records out 4194304 bytes (4.2 MB, 4.0 MiB) copied, 0.0137407 s, 305 MB/s [root@CO82 ~]# cat /proc/fs/ext4/sda2/mb_stats mballoc: reqs: 2 success: 1 groups_scanned: 2 groups_considered: 4 extents_scanned: 2 goal_hits: 0 2^n_hits: 2 breaks: 0 lost: 0 useless_c0_loops: 0 useless_c1_loops: 0 useless_c2_loops: 0 useless_c3_loops: 0 buddies_generated: 60/969 buddies_time_used: 9178136 preallocated: 5612 discarded: 0 [root@CO82 ~]# And location that suggested [root@CO82 ~]# dd if=/dev/zero of=/mnt/file3 bs=2M count=2 conv=fsync 2+0 records in 2+0 records out 4194304 bytes (4.2 MB, 4.0 MiB) copied, 0.0155074 s, 270 MB/s [root@CO82 ~]# cat /proc/fs/ext4/sda2/mb_stats mballoc: reqs: 3 success: 2 groups_scanned: 2 groups_considered: 2 extents_scanned: 2 goal_hits: 1 2^n_hits: 2 breaks: 0 lost: 0 useless_c0_loops: 0 useless_c1_loops: 0 useless_c2_loops: 0 useless_c3_loops: 0 buddies_generated: 60/969 buddies_time_used: 2870959 preallocated: 5626 discarded: 0 [root@CO82 ~]# Second looks more rational. Best regards, Artem Blagodarenko.quoted
On 9 Feb 2021, at 23:28, Harshad Shirwadkar [off-list ref] wrote: Add new stats for measuring the performance of mballoc. This patch is forked from Artem Blagodarenko's work that can be found here: https://github.com/lustre/lustre-release/blob/master/ldiskfs/kernel_patches/patches/rhel8/ext4-simple-blockalloc.patch Signed-off-by: Harshad Shirwadkar <redacted> --- fs/ext4/ext4.h | 4 ++++ fs/ext4/mballoc.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++- fs/ext4/mballoc.h | 1 + fs/ext4/sysfs.c | 2 ++ 4 files changed, 57 insertions(+), 1 deletion(-)diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6dd127942208..317b43420ecf 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h@@ -1549,6 +1549,8 @@ struct ext4_sb_info { atomic_t s_bal_success; /* we found long enough chunks */ atomic_t s_bal_allocated; /* in blocks */ atomic_t s_bal_ex_scanned; /* total extents scanned */ + atomic_t s_bal_groups_considered; /* number of groups considered */ + atomic_t s_bal_groups_scanned; /* number of groups scanned */ atomic_t s_bal_goals; /* goal hits */ atomic_t s_bal_breaks; /* too long searches */ atomic_t s_bal_2orders; /* 2^order hits */@@ -1558,6 +1560,7 @@ struct ext4_sb_info { atomic_t s_mb_preallocated; atomic_t s_mb_discarded; atomic_t s_lock_busy; + atomic64_t s_bal_cX_failed[4]; /* cX loop didn't find blocks */ /* locality groups */ struct ext4_locality_group __percpu *s_locality_groups;@@ -2808,6 +2811,7 @@ int __init ext4_fc_init_dentry_cache(void);extern const struct seq_operations ext4_mb_seq_groups_ops; extern long ext4_mb_stats; extern long ext4_mb_max_to_scan; +extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset); extern int ext4_mb_init(struct super_block *); extern int ext4_mb_release(struct super_block *); extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 07b78a3cc421..fffd0770e930 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c@@ -2083,6 +2083,7 @@ static bool ext4_mb_good_group(struct ext4_allocation_context *ac, BUG_ON(cr < 0 || cr >= 4); + ac->ac_groups_considered++; if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) return false;@@ -2420,6 +2421,9 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) if (ac->ac_status != AC_STATUS_CONTINUE) break; } + /* Processed all groups and haven't found blocks */ + if (sbi->s_mb_stats && i == ngroups) + atomic64_inc(&sbi->s_bal_cX_failed[cr]); } if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&@@ -2548,6 +2552,48 @@ const struct seq_operations ext4_mb_seq_groups_ops = { .show = ext4_mb_seq_groups_show,}; +int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = (struct super_block *)seq->private; + struct ext4_sb_info *sbi = EXT4_SB(sb); + + seq_puts(seq, "mballoc:\n"); + if (!sbi->s_mb_stats) { + seq_puts(seq, "\tmb stats collection turned off.\n"); + seq_puts(seq, "\tTo enable, please write \"1\" to sysfs file mb_stats.\n"); + return 0; + } + seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs)); + seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success)); + + seq_printf(seq, "\tgroups_scanned: %u\n", atomic_read(&sbi->s_bal_groups_scanned)); + seq_printf(seq, "\tgroups_considered: %u\n", atomic_read(&sbi->s_bal_groups_considered)); + seq_printf(seq, "\textents_scanned: %u\n", atomic_read(&sbi->s_bal_ex_scanned)); + seq_printf(seq, "\t\tgoal_hits: %u\n", atomic_read(&sbi->s_bal_goals)); + seq_printf(seq, "\t\t2^n_hits: %u\n", atomic_read(&sbi->s_bal_2orders)); + seq_printf(seq, "\t\tbreaks: %u\n", atomic_read(&sbi->s_bal_breaks)); + seq_printf(seq, "\t\tlost: %u\n", atomic_read(&sbi->s_mb_lost_chunks)); + + seq_printf(seq, "\tuseless_c0_loops: %llu\n", + (unsigned long long)atomic64_read(&sbi->s_bal_cX_failed[0])); + seq_printf(seq, "\tuseless_c1_loops: %llu\n", + (unsigned long long)atomic64_read(&sbi->s_bal_cX_failed[1])); + seq_printf(seq, "\tuseless_c2_loops: %llu\n", + (unsigned long long)atomic64_read(&sbi->s_bal_cX_failed[2])); + seq_printf(seq, "\tuseless_c3_loops: %llu\n", + (unsigned long long)atomic64_read(&sbi->s_bal_cX_failed[3])); + seq_printf(seq, "\tbuddies_generated: %u/%u\n", + atomic_read(&sbi->s_mb_buddies_generated), + ext4_get_groups_count(sb)); + seq_printf(seq, "\tbuddies_time_used: %llu\n", + atomic64_read(&sbi->s_mb_generation_time)); + seq_printf(seq, "\tpreallocated: %u\n", + atomic_read(&sbi->s_mb_preallocated)); + seq_printf(seq, "\tdiscarded: %u\n", + atomic_read(&sbi->s_mb_discarded)); + return 0; +} + static struct kmem_cache *get_groupinfo_cache(int blocksize_bits) { int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;@@ -2968,9 +3014,10 @@ int ext4_mb_release(struct super_block *sb) atomic_read(&sbi->s_bal_reqs), atomic_read(&sbi->s_bal_success)); ext4_msg(sb, KERN_INFO, - "mballoc: %u extents scanned, %u goal hits, " + "mballoc: %u extents scanned, %u groups scanned, %u goal hits, " "%u 2^N hits, %u breaks, %u lost", atomic_read(&sbi->s_bal_ex_scanned), + atomic_read(&sbi->s_bal_groups_scanned), atomic_read(&sbi->s_bal_goals), atomic_read(&sbi->s_bal_2orders), atomic_read(&sbi->s_bal_breaks),@@ -3579,6 +3626,8 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len) atomic_inc(&sbi->s_bal_success); atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned); + atomic_add(ac->ac_groups_scanned, &sbi->s_bal_groups_scanned); + atomic_add(ac->ac_groups_considered, &sbi->s_bal_groups_considered); if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) atomic_inc(&sbi->s_bal_goals);diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index e75b4749aa1c..7597330dbdf8 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h@@ -161,6 +161,7 @@ struct ext4_allocation_context { /* copy of the best found extent taken before preallocation efforts */ struct ext4_free_extent ac_f_ex; + __u32 ac_groups_considered; __u16 ac_groups_scanned; __u16 ac_found; __u16 ac_tail;diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 4e27fe6ed3ae..752d1c261e2a 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c@@ -527,6 +527,8 @@ int ext4_register_sysfs(struct super_block *sb) ext4_fc_info_show, sb); proc_create_seq_data("mb_groups", S_IRUGO, sbi->s_proc, &ext4_mb_seq_groups_ops, sb); + proc_create_single_data("mb_stats", 0444, sbi->s_proc, + ext4_seq_mb_stats_show, sb); } return 0;} -- 2.30.0.478.g8a0d178c01-goog