Thread (12 messages) 12 messages, 2 authors, 2020-12-12

Re: [PATCH V5 4/6] nvmet: add ZBD over ZNS backend support

From: Damien Le Moal <hidden>
Date: 2020-12-12 10:06:42
Also in: linux-nvme

On 2020/12/12 15:54, Chaitanya Kulkarni wrote:
[...]
quoted
+bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
+{
+	if (ns->bdev->bd_disk->queue->conv_zones_bitmap) {
Hmm... BIO based DM devices do not have this bitmap set since they do not have a
scheduler. So if one setup a dm-linear device on top of an SMR disk and export
the DM device through fabric, then this check will fail to verify if
conventional zones are present. There may be no other option than to do a full
report zones here if queue->seq_zones_wlock is NULL (meaning the queue is for a
stacked device).
If I'm not wrong each LLD does call the report zones at disk revalidation,
as we should be able to reuse it instead of repeating for each zbd ns
especially for static property:-
I did say BIO based DM... If the backend is a dm-linear device, the bdev and
request queue that this driver sees is the DM device, not the bdev and request
queue of the DM backend. And DM code does *not* call
blk_revalidate_disk_zones(). In that function, you can see:

	if (WARN_ON_ONCE(!queue_is_mq(q)))
		return -EIO;

to check that.

So the zone bitmaps are *not* set for a DM device. Which means that this driver
needs to do a report zones to determine if there are conventional zones.
1. drivers/block/null_blk_zoned.c:-
    null_register_zoned_dev int
        ret = blk_revalidate_disk_zones(nullb->disk, NULL);
2. drivers/nvme/host/zns.c:-
    nvme_revalidate_zones
        ret = blk_revalidate_disk_zones(ns->disk, NULL);
3. drivers/scsi/sd_zbc.c:-
    sd_zbc_revalidate_zones
        ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb);

Calling report again is a duplication of the work consuming cpu cycles for
each zbd ns.

Unless something wrong we can get away with following prep patch with one
call in zns.c :-
No. That will not work if the backend is a DM device. You will hit the warning
mentioned above. DM sets the number of zones manually. See dm-table.c, function
dm_table_set_restrictions().

We could get to have blk_revalidate_disk_zones() working on a DM device, but
that is not very useful since the backend was validated already, and the bitmaps
are useless since there is no scheduling of BIO/req done at DM level.
quoted hunk ↗ jump to hunk
From abceef7bfdf9b278c492c755bf5f242159ef51e5 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <redacted>
Date: Fri, 11 Dec 2020 21:21:44 -0800
Subject: [PATCH V6 2/7] block: add nr_conv_zones and nr_seq_zones helpers

Add two request members that are needed to implement the NVMeOF ZBD
backend which exports a number of conventional zones and a number of
sequential zones so we don't have to repeat the work what
blk_revalidate_disk_zones() already does.

Signed-off-by: Chaitanya Kulkarni <redacted>
---
 block/blk-sysfs.c      | 14 ++++++++++++++
 block/blk-zoned.c      |  9 +++++++++
 include/linux/blkdev.h | 13 +++++++++++++
 3 files changed, 36 insertions(+)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index b513f1683af0..f10cf45ae177 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -307,6 +307,16 @@ static ssize_t queue_nr_zones_show(struct
request_queue *q, char *page)
     return queue_var_show(blk_queue_nr_zones(q), page);
 }
 
+static ssize_t queue_nr_conv_zones_show(struct request_queue *q, char
*page)
+{
+    return queue_var_show(blk_queue_nr_conv_zones(q), page);
+}
+
+static ssize_t queue_nr_seq_zones_show(struct request_queue *q, char *page)
+{
+    return queue_var_show(blk_queue_nr_seq_zones(q), page);
+}
+
 static ssize_t queue_max_open_zones_show(struct request_queue *q, char
*page)
 {
     return queue_var_show(queue_max_open_zones(q), page);
@@ -588,6 +598,8 @@ QUEUE_RO_ENTRY(queue_zone_append_max,
"zone_append_max_bytes");
 
 QUEUE_RO_ENTRY(queue_zoned, "zoned");
 QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
+QUEUE_RO_ENTRY(queue_nr_conv_zones, "nr_conv_zones");
+QUEUE_RO_ENTRY(queue_nr_seq_zones, "nr_seq_zones");
 QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones");
 QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones");
 
@@ -642,6 +654,8 @@ static struct attribute *queue_attrs[] = {
     &queue_nonrot_entry.attr,
     &queue_zoned_entry.attr,
     &queue_nr_zones_entry.attr,
+    &queue_nr_conv_zones_entry.attr,
+    &queue_nr_seq_zones_entry.attr,
     &queue_max_open_zones_entry.attr,
     &queue_max_active_zones_entry.attr,
     &queue_nomerges_entry.attr,
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 6817a673e5ce..ea38c7928e41 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -390,6 +390,8 @@ struct blk_revalidate_zone_args {
     unsigned long    *conv_zones_bitmap;
     unsigned long    *seq_zones_wlock;
     unsigned int    nr_zones;
+    unsigned int    nr_conv_zones;
+    unsigned int    nr_seq_zones;
     sector_t    zone_sectors;
     sector_t    sector;
 };
@@ -449,6 +451,7 @@ static int blk_revalidate_zone_cb(struct blk_zone
*zone, unsigned int idx,
                 return -ENOMEM;
         }
         set_bit(idx, args->conv_zones_bitmap);
+        args->nr_conv_zones++;
         break;
     case BLK_ZONE_TYPE_SEQWRITE_REQ:
     case BLK_ZONE_TYPE_SEQWRITE_PREF:
@@ -458,6 +461,7 @@ static int blk_revalidate_zone_cb(struct blk_zone
*zone, unsigned int idx,
             if (!args->seq_zones_wlock)
                 return -ENOMEM;
         }
+        args->nr_seq_zones++;
         break;
     default:
         pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n",
@@ -489,6 +493,9 @@ int blk_revalidate_disk_zones(struct gendisk *disk,
     struct request_queue *q = disk->queue;
     struct blk_revalidate_zone_args args = {
         .disk        = disk,
+        /* just for redability */
+        .nr_conv_zones    = 0,
+        .nr_seq_zones    = 0,
     };
     unsigned int noio_flag;
     int ret;
@@ -519,6 +526,8 @@ int blk_revalidate_disk_zones(struct gendisk *disk,
     if (ret >= 0) {
         blk_queue_chunk_sectors(q, args.zone_sectors);
         q->nr_zones = args.nr_zones;
+        q->nr_conv_zones = args.nr_conv_zones;
+        q->nr_seq_zones = args.nr_seq_zones;
         swap(q->seq_zones_wlock, args.seq_zones_wlock);
         swap(q->conv_zones_bitmap, args.conv_zones_bitmap);
         if (update_driver_data)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2bdaa7cacfa3..697ded01e049 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -526,6 +526,9 @@ struct request_queue {
     unsigned long        *seq_zones_wlock;
     unsigned int        max_open_zones;
     unsigned int        max_active_zones;
+    unsigned int        nr_conv_zones;
+    unsigned int        nr_seq_zones;
+
 #endif /* CONFIG_BLK_DEV_ZONED */
 
     /*
@@ -726,6 +729,16 @@ static inline unsigned int
blk_queue_nr_zones(struct request_queue *q)
     return blk_queue_is_zoned(q) ? q->nr_zones : 0;
 }
 
+static inline unsigned int blk_queue_nr_conv_zones(struct request_queue *q)
+{
+    return blk_queue_is_zoned(q) ? q->nr_conv_zones : 0;
+}
+
+static inline unsigned int blk_queue_nr_seq_zones(struct request_queue *q)
+{
+    return blk_queue_is_zoned(q) ? q->nr_seq_zones : 0;
+}
+
 static inline unsigned int blk_queue_zone_no(struct request_queue *q,
                          sector_t sector)
 {

-- 
Damien Le Moal
Western Digital Research
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help