Re: [PATCH 3/3] raid5: allow r5l_io_unit allocations to fail
From: NeilBrown <hidden>
Date: 2015-12-22 22:29:09
On Wed, Dec 23 2015, Christoph Hellwig wrote:
quoted hunk ↗ jump to hunk
quoted
I wonder if we should have a mempool for these io units too. We would allocate with GFP_ATOMIC (or similar) so the allocation woult fail instead of blocking, but we would then know that an allocation could only fail if there was another request in flight. So the place where we free an io_unit would be the obviously correct place to trigger a retry of the delayed-due-to-mem-allocation-failure stripes. So I think I would prefer two lists, another mempool, and very well defined places to retry the two lists. Is that over-engineering?How about the variant below (relative to md/for-next)? This implements the above and passes testing fine:diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 18de1fc..4fa9457 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c@@ -75,7 +75,10 @@ struct r5l_log { struct list_head finished_ios; /* io_units which settle down in log disk */ struct bio flush_bio; + struct list_head no_mem_stripes; /* pending stripes, -ENOMEM */ + struct kmem_cache *io_kc; + mempool_t *io_pool; struct bio_set *bs; mempool_t *meta_pool;@@ -287,9 +290,10 @@ static struct r5l_io_unit *r5l_new_meta(struct r5l_log *log) struct r5l_io_unit *io; struct r5l_meta_block *block; - io = kmem_cache_zalloc(log->io_kc, GFP_ATOMIC); + io = mempool_alloc(log->io_pool, GFP_ATOMIC); if (!io) return NULL; + memset(io, 0, sizeof(*io)); io->log = log; INIT_LIST_HEAD(&io->log_sibling);@@ -490,24 +494,25 @@ int r5l_write_stripe(struct r5l_log *log, struct stripe_head *sh) mutex_lock(&log->io_mutex); /* meta + data */ reserve = (1 + write_disks) << (PAGE_SHIFT - 9); - if (!r5l_has_free_space(log, reserve)) - goto err_retry; + if (!r5l_has_free_space(log, reserve)) { + spin_lock(&log->no_space_stripes_lock); + list_add_tail(&sh->log_list, &log->no_space_stripes); + spin_unlock(&log->no_space_stripes_lock); + + r5l_wake_reclaim(log, reserve); + goto out_unlock; + } ret = r5l_log_stripe(log, sh, data_pages, parity_pages); - if (ret) - goto err_retry; + if (ret) { + spin_lock_irq(&log->io_list_lock); + list_add_tail(&sh->log_list, &log->no_mem_stripes); + spin_unlock_irq(&log->io_list_lock); + } out_unlock: mutex_unlock(&log->io_mutex); return 0; - -err_retry: - spin_lock(&log->no_space_stripes_lock); - list_add_tail(&sh->log_list, &log->no_space_stripes); - spin_unlock(&log->no_space_stripes_lock); - - r5l_wake_reclaim(log, reserve); - goto out_unlock; } void r5l_write_stripe_run(struct r5l_log *log)@@ -559,6 +564,21 @@ static sector_t r5l_reclaimable_space(struct r5l_log *log) log->next_checkpoint); } +static void r5l_run_no_mem_stripe(struct r5l_log *log) +{ + struct stripe_head *sh; + + assert_spin_locked(&log->io_list_lock); + + if (!list_empty(&log->no_mem_stripes)) { + sh = list_first_entry(&log->no_mem_stripes, + struct stripe_head, log_list); + list_del_init(&sh->log_list); + set_bit(STRIPE_HANDLE, &sh->state); + raid5_release_stripe(sh); + } +} + static bool r5l_complete_finished_ios(struct r5l_log *log) { struct r5l_io_unit *io, *next;@@ -575,7 +595,8 @@ static bool r5l_complete_finished_ios(struct r5l_log *log) log->next_cp_seq = io->seq; list_del(&io->log_sibling); - kmem_cache_free(log->io_kc, io); + mempool_free(io, log->io_pool); + r5l_run_no_mem_stripe(log); found = true; }@@ -1189,6 +1210,10 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) if (!log->io_kc) goto io_kc; + log->io_pool = mempool_create_slab_pool(R5L_POOL_SIZE, log->io_kc); + if (!log->io_pool) + goto io_pool; + log->bs = bioset_create(R5L_POOL_SIZE, 0); if (!log->bs) goto io_bs;@@ -1203,6 +1228,8 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) goto reclaim_thread; init_waitqueue_head(&log->iounit_wait); + INIT_LIST_HEAD(&log->no_mem_stripes); + INIT_LIST_HEAD(&log->no_space_stripes); spin_lock_init(&log->no_space_stripes_lock);@@ -1219,6 +1246,8 @@ reclaim_thread: out_mempool: bioset_free(log->bs); io_bs: + mempool_destroy(log->io_pool); +io_pool: kmem_cache_destroy(log->io_kc); io_kc: kfree(log);
Yes, that looks just right - thanks. I feel a lot more confident that this code won't deadlock. Thanks, NeilBrown
Attachments
- signature.asc [application/pgp-signature] 818 bytes