Re: [PATCH V6 4/6] nvmet: add ZBD over ZNS backend support
From: Damien Le Moal <hidden>
Date: 2020-12-13 10:33:14
Also in:
linux-nvme
On Sat, 2020-12-12 at 21:50 -0800, Chaitanya Kulkarni wrote: [...]
+static int nvmet_bdev_validate_zns_zones_cb(struct blk_zone *z,
+ unsigned int idx, void *data)
+{
+ struct blk_zone *zone = data;
+
+ memcpy(zone, z, sizeof(struct blk_zone));See below. This is not necessary.
+
+ return 0;
+}
+
+static bool nvmet_bdev_has_conv_zones(struct block_device *bdev)
+{
+ struct blk_zone zone;
+ int reported_zones;
+ unsigned int zno;
+
+ if (bdev->bd_disk->queue->conv_zones_bitmap)
+ return false;Bug.
+
+ for (zno = 0; zno < blk_queue_nr_zones(bdev->bd_disk->queue); zno++) {Large capacity SMR drives have over 75,000 zones these days. Doing a report zones one zone at a time will take forever. This needs to be optimized: see below.
+ reported_zones = blkdev_report_zones(bdev, + zno * bdev_zone_sectors(bdev), 1, + nvmet_bdev_validate_zns_zones_cb, + &zone); + + if (reported_zones != 1) + return true; + + if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) + return true;
This test should be in the nvmet_bdev_validate_zns_zones_cb() callback. That callback can return an error if it finds a conventional zone. That will stop blkdev_report_zones().
+ } + + return false; +}
What about this:
static int nvmet_bdev_validate_zns_zones_cb(struct blk_zone *z,
unsigned int idx, void *data)
{
if (z->type == BLK_ZONE_TYPE_CONVENTIONAL)
return -ENOTSUPP;
return 0;
}
static bool nvmet_bdev_has_conv_zones(struct block_device *bdev)
{
int ret;
if (bdev->bd_disk->queue->conv_zones_bitmap)
return true;
ret = blkdev_report_zones(bdev,
get_capacity(bdev->bd_disk), bdev_nr_zones(bdev),
nvmet_bdev_validate_zns_zones_cb, NULL);
if (ret < 1)
return true;
return false;
}
All zones are checked using the callback with the loop in
blkdev_report_zones().
[...]+void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
+{
+ sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
+ struct request_queue *q = req->ns->bdev->bd_disk->queue;
+ unsigned int max_sects = queue_max_zone_append_sectors(q);
+ u16 status = NVME_SC_SUCCESS;
+ unsigned int total_len = 0;
+ struct scatterlist *sg;
+ int ret = 0, sg_cnt;
+ struct bio *bio;
+
+ if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req)))
+ return;
+
+ if (!req->sg_cnt) {
+ nvmet_req_complete(req, 0);
+ return;
+ }
+
+ if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) {
+ bio = &req->b.inline_bio;
+ bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
+ } else {
+ bio = bio_alloc(GFP_KERNEL, req->sg_cnt);
+ }
+
+ bio_set_dev(bio, req->ns->bdev);
+ bio->bi_iter.bi_sector = sect;
+ bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE;
+ if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
+ bio->bi_opf |= REQ_FUA;
+
+ for_each_sg(req->sg, sg, req->sg_cnt, sg_cnt) {
+ struct page *p = sg_page(sg);
+ unsigned int l = sg->length;
+ unsigned int o = sg->offset;
+ bool same_page = false;
+
+ ret = bio_add_hw_page(q, bio, p, l, o, max_sects, &same_page);
+ if (ret != sg->length) {
+ status = NVME_SC_INTERNAL;
+ goto out_bio_put;
+ }
+ if (same_page)
+ put_page(p);
+
+ total_len += sg->length;
+ }
+
+ if (total_len != nvmet_rw_data_len(req)) {
+ status = NVME_SC_INTERNAL | NVME_SC_DNR;
+ goto out_bio_put;
+ }
+
+ ret = submit_bio_wait(bio);
+ status = ret < 0 ? NVME_SC_INTERNAL : status;
+
+ req->cqe->result.u64 = nvmet_sect_to_lba(req->ns,
+ bio->bi_iter.bi_sector);Why set this if the BIO failed ? There may be no problems doing so, but I do not see the point either.
+ +out_bio_put: + if (bio != &req->b.inline_bio) + bio_put(bio); + nvmet_req_complete(req, status); +}
-- Damien Le Moal Western Digital