Thread (50 messages) 50 messages, 7 authors, 2021-07-21

Re: [dpdk-dev] [PATCH 1/1] lib/vhost: support async dequeue for split ring

From: Maxime Coquelin <hidden>
Date: 2021-06-07 16:17:24

Hi Yuan,

This is a first review, I will certainly have more comments later.

On 6/2/21 10:31 AM, Yuan Wang wrote:
This patch implements asynchronous dequeue data path for split ring.
A new asynchronous dequeue function is introduced. With this function,
the application can try to receive packets from the guest with
offloading large copies to the DMA engine, thus saving precious CPU
cycles.
Do you have any number to share?
Signed-off-by: Wenwu Ma <redacted>
Signed-off-by: Yuan Wang <redacted>
Signed-off-by: Jiayu Hu <redacted>
---
 doc/guides/prog_guide/vhost_lib.rst |  10 +
 examples/vhost/ioat.c               |  30 +-
 examples/vhost/ioat.h               |   3 +
 examples/vhost/main.c               |  60 +--
 lib/vhost/rte_vhost_async.h         |  44 ++-
 lib/vhost/version.map               |   3 +
 lib/vhost/virtio_net.c              | 549 ++++++++++++++++++++++++++++
 7 files changed, 664 insertions(+), 35 deletions(-)
Please split the patch in multple parts.
At least don't mix example and lib changes in the same patch.
quoted hunk ↗ jump to hunk
diff --git a/doc/guides/prog_guide/vhost_lib.rst b/doc/guides/prog_guide/vhost_lib.rst
index 6b7206bc1d..785ab0fb34 100644
--- a/doc/guides/prog_guide/vhost_lib.rst
+++ b/doc/guides/prog_guide/vhost_lib.rst
@@ -281,6 +281,16 @@ The following is an overview of some key Vhost API functions:
   Poll enqueue completion status from async data path. Completed packets
   are returned to applications through ``pkts``.
 
+* ``rte_vhost_try_dequeue_burst(vid, queue_id, mbuf_pool, pkts, count, nr_inflight)``
The function should contain async in its name.

BTW, I think we should also rename below APIs while they are
experimental to highlight it is async related:

rte_vhost_submit_enqueue_burst
rte_vhost_poll_enqueue_completed
quoted hunk ↗ jump to hunk
+
+  Try to receive packets from the guest with offloading large packets
+  to the DMA engine. Successfully dequeued packets are transfer
+  completed and returned in ``pkts``. But there may be other packets
+  that are sent from the guest but being transferred by the DMA engine,
+  called in-flight packets. This function will return in-flight packets
+  only after the DMA engine finishes transferring. The amount of
+  in-flight packets by now is returned in ``nr_inflight``.
+
 Vhost-user Implementations
 --------------------------
 
diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 2a2c2d7202..236306c9c7 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -17,7 +17,6 @@ struct packet_tracker {
 	unsigned short next_read;
 	unsigned short next_write;
 	unsigned short last_remain;
-	unsigned short ioat_space;
 };
 
 struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
@@ -61,18 +60,30 @@ open_ioat(const char *value)
 		goto out;
 	}
 	while (i < args_nr) {
+		char *txd, *rxd;
+		bool is_txd;
 		char *arg_temp = dma_arg[i];
 		uint8_t sub_nr;
+
 		sub_nr = rte_strsplit(arg_temp, strlen(arg_temp), ptrs, 2, '@');
 		if (sub_nr != 2) {
 			ret = -1;
 			goto out;
 		}
 
-		start = strstr(ptrs[0], "txd");
-		if (start == NULL) {
+		txd = strstr(ptrs[0], "txd");
+		rxd = strstr(ptrs[0], "rxd");
+		if (txd == NULL && rxd == NULL) {
 			ret = -1;
 			goto out;
+		} else if (txd) {
+			is_txd = true;
+			start = txd;
+			ret |= ASYNC_RX_VHOST;
+		} else {
+			is_txd = false;
+			start = rxd;
+			ret |= ASYNC_TX_VHOST;
 		}
 
 		start += 3;
@@ -82,7 +93,8 @@ open_ioat(const char *value)
 			goto out;
 		}
 
-		vring_id = 0 + VIRTIO_RXQ;
+		vring_id = is_txd ? VIRTIO_RXQ : VIRTIO_TXQ;
+
 		if (rte_pci_addr_parse(ptrs[1],
 				&(dma_info + vid)->dmas[vring_id].addr) < 0) {
 			ret = -1;
@@ -113,7 +125,6 @@ open_ioat(const char *value)
 			goto out;
 		}
 		rte_rawdev_start(dev_id);
-		cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE - 1;
 		dma_info->nr++;
 		i++;
 	}
@@ -128,7 +139,7 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 		struct rte_vhost_async_status *opaque_data, uint16_t count)
 {
 	uint32_t i_desc;
-	uint16_t dev_id = dma_bind[vid].dmas[queue_id * 2 + VIRTIO_RXQ].dev_id;
+	uint16_t dev_id = dma_bind[vid].dmas[queue_id].dev_id;
It looks broken with regards to multiqueue (it was before this patch).

In open_ioat(), only dma_bind[vid].dmas[VIRTIO_RXQ] and
dma_bind[vid].dmas[VIRTIO_TXQ] are set.

As it seems that the application does not support multiqueue, it may be
a good idea to check queue_id value before using it.
quoted hunk ↗ jump to hunk
 	struct rte_vhost_iov_iter *src = NULL;
 	struct rte_vhost_iov_iter *dst = NULL;
 	unsigned long i_seg;
@@ -140,7 +151,7 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 			src = descs[i_desc].src;
 			dst = descs[i_desc].dst;
 			i_seg = 0;
-			if (cb_tracker[dev_id].ioat_space < src->nr_segs)
+			if (rte_ioat_burst_capacity(dev_id) < src->nr_segs)
This change should be in a dedicated patch, it is not related to dequeue
support.
quoted hunk ↗ jump to hunk
 				break;
 			while (i_seg < src->nr_segs) {
 				rte_ioat_enqueue_copy(dev_id,
@@ -155,7 +166,6 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
 			}
 			write &= mask;
 			cb_tracker[dev_id].size_track[write] = src->nr_segs;
-			cb_tracker[dev_id].ioat_space -= src->nr_segs;
 			write++;
 		}
 	} else {
@@ -181,8 +191,7 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
 		unsigned short mask = MAX_ENQUEUED_SIZE - 1;
 		unsigned short i;
 
-		uint16_t dev_id = dma_bind[vid].dmas[queue_id * 2
-				+ VIRTIO_RXQ].dev_id;
+		uint16_t dev_id = dma_bind[vid].dmas[queue_id].dev_id;
 		n_seg = rte_ioat_completed_ops(dev_id, 255, NULL, NULL, dump, dump);
 		if (n_seg < 0) {
 			RTE_LOG(ERR,
@@ -194,7 +203,6 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
 		if (n_seg == 0)
 			return 0;
 
-		cb_tracker[dev_id].ioat_space += n_seg;
 		n_seg += cb_tracker[dev_id].last_remain;
 
 		read = cb_tracker[dev_id].next_read;
diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
index 1aa28ed6a3..db7acefc02 100644
--- a/examples/vhost/ioat.h
+++ b/examples/vhost/ioat.h
@@ -13,6 +13,9 @@
 #define IOAT_RING_SIZE 4096
 #define MAX_ENQUEUED_SIZE 4096
 
+#define ASYNC_RX_VHOST	1
+#define ASYNC_TX_VHOST	2
+
 struct dma_info {
 	struct rte_pci_addr addr;
 	uint16_t dev_id;
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index d2179eadb9..a5662a1a91 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -93,7 +93,8 @@ static int client_mode;
 
 static int builtin_net_driver;
 
-static int async_vhost_driver;
+static int async_rx_vhost_driver;
+static int async_tx_vhost_driver;
 
 static char *dma_type;
 
@@ -671,13 +672,17 @@ us_vhost_parse_args(int argc, char **argv)
 			break;
 
 		case OPT_DMAS_NUM:
-			if (open_dma(optarg) == -1) {
+			ret = open_dma(optarg);
+			if (ret == -1) {
 				RTE_LOG(INFO, VHOST_CONFIG,
 					"Wrong DMA args\n");
 				us_vhost_usage(prgname);
 				return -1;
 			}
-			async_vhost_driver = 1;
+			if (ret & ASYNC_RX_VHOST)
+				async_rx_vhost_driver = 1;
+			if (ret & ASYNC_TX_VHOST)
+				async_tx_vhost_driver = 1;
 			break;
 
 		case OPT_CLIENT_NUM:
@@ -887,7 +892,7 @@ drain_vhost(struct vhost_dev *vdev)
 
 	if (builtin_net_driver) {
 		ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
-	} else if (async_vhost_driver) {
+	} else if (async_rx_vhost_driver) {
I think we should consider having ops for async and sync instead of all
these if/else. It could be refactored as preliminary patch for this
series.
quoted hunk ↗ jump to hunk
 		uint32_t cpu_cpl_nr = 0;
 		uint16_t enqueue_fail = 0;
 		struct rte_mbuf *m_cpu_cpl[nr_xmit];
@@ -914,7 +919,7 @@ drain_vhost(struct vhost_dev *vdev)
 				__ATOMIC_SEQ_CST);
 	}
 
-	if (!async_vhost_driver)
+	if (!async_rx_vhost_driver)
 		free_pkts(m, nr_xmit);
 }
 
  
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help