Thread (40 messages) 40 messages, 6 authors, 2021-10-21

Re: [dpdk-dev] [PATCH v3 2/2] vhost: enable IOMMU for async vhost

From: Ding, Xuan <hidden>
Date: 2021-09-27 04:56:14

Hi Jiayu,
-----Original Message-----
From: Hu, Jiayu <redacted>
Sent: Monday, September 27, 2021 12:18 PM
To: Ding, Xuan <redacted>; dev@dpdk.org; Burakov, Anatoly
[off-list ref]; maxime.coquelin@redhat.com; Xia, Chenbo
[off-list ref]
Cc: Jiang, Cheng1 <redacted>; Richardson, Bruce
[off-list ref]; Pai G, Sunil [off-list ref]; Wang,
Yinan [off-list ref]; Yang, YvonneX [off-list ref]
Subject: RE: [PATCH v3 2/2] vhost: enable IOMMU for async vhost

Hi Xuan,
quoted
-----Original Message-----
From: Ding, Xuan <redacted>
Sent: Saturday, September 25, 2021 6:04 PM
To: dev@dpdk.org; Burakov, Anatoly <redacted>;
maxime.coquelin@redhat.com; Xia, Chenbo [off-list ref]
Cc: Hu, Jiayu <redacted>; Jiang, Cheng1 <redacted>;
Richardson, Bruce [off-list ref]; Pai G, Sunil
[off-list ref]; Wang, Yinan [off-list ref]; Yang,
YvonneX [off-list ref]; Ding, Xuan [off-list ref]
Subject: [PATCH v3 2/2] vhost: enable IOMMU for async vhost

The use of IOMMU has many advantages, such as isolation and address
translation. This patch extends the capbility of DMA engine to use IOMMU if
the DMA engine is bound to vfio.

When set memory table, the guest memory will be mapped into the default
container of DPDK.

Signed-off-by: Xuan Ding <redacted>
---
 lib/vhost/vhost.h      |   4 ++
 lib/vhost/vhost_user.c | 112
++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 114 insertions(+), 2 deletions(-)
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index
89a31e4ca8..bc5695e899 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -370,6 +370,10 @@ struct virtio_net {
 int16_tbroadcast_rarp;
 uint32_tnr_vring;
 intasync_copy;
+
+/* Record the dma map status for each region. */
+bool*async_map_status;
+
 intextbuf;
 intlinearbuf;
 struct vhost_virtqueue*virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
29a4c9af60..3990e9b057 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -45,6 +45,8 @@
 #include <rte_common.h>
 #include <rte_malloc.h>
 #include <rte_log.h>
+#include <rte_vfio.h>
+#include <rte_errno.h>

 #include "iotlb.h"
 #include "vhost.h"
@@ -141,6 +143,63 @@ get_blk_size(int fd)
 return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;  }

+static int
+async_dma_map(struct rte_vhost_mem_region *region, bool
+*dma_map_success, bool do_map) {
+uint64_t host_iova;
+int ret = 0;
+
+host_iova = rte_mem_virt2iova((void *)(uintptr_t)region-
quoted
host_user_addr);
+if (do_map) {
+/* Add mapped region into the default container of DPDK. */
+ret =
rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
+ region->host_user_addr,
+ host_iova,
+ region->size);
+*dma_map_success = ret == 0;
+
+if (ret) {
+/*
+ * DMA device may bind with kernel driver, in this
case,
+ * we don't need to program IOMMU manually.
However, if no
+ * device is bound with vfio/uio in DPDK, and vfio
kernel
+ * module is loaded, the API will still be called and
return
+ * with ENODEV/ENOSUP.
+ *
+ * DPDK VFIO only returns ENODEV/ENOSUP in very
similar
+ * situations(VFIO either unsupported, or supported
+ * but no devices found). Either way, no mappings
could be
+ * performed. We treat it as normal case in async
path.
+ */
+if (rte_errno == ENODEV && rte_errno == ENOTSUP) {
+return 0;
+} else {
+VHOST_LOG_CONFIG(ERR, "DMA engine map
failed\n");
+return ret;
+}
+}
+
+} else {
+/* No need to do vfio unmap if the map failed. */
+if (!*dma_map_success)
+return 0;
+
+/* Remove mapped region from the default container of
DPDK. */
+ret =
rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
+   region->host_user_addr,
+   host_iova,
+   region->size);
+if (ret) {
+VHOST_LOG_CONFIG(ERR, "DMA engine unmap
failed\n");
+return ret;
+}
+/* Clear the flag once the unmap succeeds. */
+*dma_map_success = 0;
+}
+
+return ret;
+}
+
 static void
 free_mem_region(struct virtio_net *dev)  { @@ -153,6 +212,9 @@
free_mem_region(struct virtio_net *dev)
 for (i = 0; i < dev->mem->nregions; i++) {
 reg = &dev->mem->regions[i];
 if (reg->host_user_addr) {
+if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+async_dma_map(reg, &dev-
quoted
async_map_status[i], false);
+
 munmap(reg->mmap_addr, reg->mmap_size);
 close(reg->fd);
 }
@@ -203,6 +265,11 @@ vhost_backend_cleanup(struct virtio_net *dev)
 }

 dev->postcopy_listening = 0;
+
+if (dev->async_map_status) {
+rte_free(dev->async_map_status);
+dev->async_map_status = NULL;
+}
 }

 static void
@@ -621,6 +688,17 @@ numa_realloc(struct virtio_net *dev, int index)
 }
 dev->mem = mem;

+if (dev->async_copy && rte_vfio_is_enabled("vfio")) {
+dev->async_map_status = rte_zmalloc_socket("async-dma-
map-status",
+sizeof(bool) * dev->mem->nregions,
0, node);
+if (!dev->async_map_status) {
+VHOST_LOG_CONFIG(ERR,
+"(%d) failed to realloc dma mapping status on
node\n",
+dev->vid);
+return dev;
+}
+}
+
 gp = rte_realloc_socket(dev->guest_pages, dev->max_guest_pages *
sizeof(*gp),
 RTE_CACHE_LINE_SIZE, node);
 if (!gp) {
@@ -1151,12 +1229,14 @@ vhost_user_postcopy_register(struct virtio_net
*dev, int main_fd,  static int  vhost_user_mmap_region(struct virtio_net *dev,
 struct rte_vhost_mem_region *region,
+uint32_t region_index,
 uint64_t mmap_offset)
 {
 void *mmap_addr;
 uint64_t mmap_size;
 uint64_t alignment;
 int populate;
+int ret;

 /* Check for memory_size + mmap_offset overflow */
 if (mmap_offset >= -region->size) {
@@ -1210,13 +1290,25 @@ vhost_user_mmap_region(struct virtio_net *dev,
 region->mmap_size = mmap_size;
 region->host_user_addr = (uint64_t)(uintptr_t)mmap_addr +
mmap_offset;

-if (dev->async_copy)
+if (dev->async_copy) {
 if (add_guest_pages(dev, region, alignment) < 0) {
 VHOST_LOG_CONFIG(ERR,
 "adding guest pages to region
failed.\n");
 return -1;
 }

+if (rte_vfio_is_enabled("vfio")) {
+ret = async_dma_map(region, &dev-
quoted
async_map_status[region_index], true);
+if (ret) {
+VHOST_LOG_CONFIG(ERR, "Configure
IOMMU for DMA "
+"engine failed\n");
+rte_free(dev->async_map_status);
+dev->async_map_status = NULL;
The freed dev->async_map_status is accessed in free_mem_region() later.
You need to free it after calling free_mem_region().
Thanks for the catch! Will fix it in next version.
quoted
+return -1;
+}
+}
+}
+
 VHOST_LOG_CONFIG(INFO,
 "guest memory region size: 0x%" PRIx64 "\n"
 "\t guest physical addr: 0x%" PRIx64 "\n"
@@ -1291,6 +1383,11 @@ vhost_user_set_mem_table(struct virtio_net
**pdev, struct VhostUserMsg *msg,
 dev->mem = NULL;
 }

+if (dev->async_map_status) {
+rte_free(dev->async_map_status);
+dev->async_map_status = NULL;
+}
To handle the gust memory hot-plug case, you need to un-map
iommu tables before program iommu for new memory. But you
seem only free the old dev->async_map_status.
Yes, you are right. Will unmap the region in iommu table in hot-plug scenario.

Regards,
Xuan
Thanks,
Jiayu
quoted
+
 /* Flush IOTLB cache as previous HVAs are now invalid */
 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
 for (i = 0; i < dev->nr_vring; i++)
@@ -1329,6 +1426,17 @@ vhost_user_set_mem_table(struct virtio_net
**pdev, struct VhostUserMsg *msg,
 goto free_guest_pages;
 }

+if (dev->async_copy) {
+dev->async_map_status = rte_zmalloc_socket("async-dma-
map-status",
+sizeof(bool) * memory->nregions, 0,
numa_node);
+if (!dev->async_map_status) {
+VHOST_LOG_CONFIG(ERR,
+"(%d) failed to allocate memory for dma
mapping status\n",
+dev->vid);
+goto free_guest_pages;
+}
+}
+
 for (i = 0; i < memory->nregions; i++) {
 reg = &dev->mem->regions[i];
@@ -1345,7 +1453,7 @@ vhost_user_set_mem_table(struct virtio_net
**pdev, struct VhostUserMsg *msg,

 mmap_offset = memory->regions[i].mmap_offset;

-if (vhost_user_mmap_region(dev, reg, mmap_offset) < 0) {
+if (vhost_user_mmap_region(dev, reg, i, mmap_offset) < 0) {
 VHOST_LOG_CONFIG(ERR, "Failed to mmap
region %u\n", i);
 goto free_mem_table;
 }
--
2.17.1
  
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help