Thread (11 messages) 11 messages, 3 authors, 2024-06-18

Re: [PATCH net-next v3 4/4] virtio_net: improve dim command request efficiency

From: Heng Qi <hidden>
Date: 2024-06-18 14:27:04
Also in: virtualization

On Tue, 18 Jun 2024 09:29:48 +0800, Jason Wang [off-list ref] wrote:
On Mon, Jun 17, 2024 at 4:08 PM Heng Qi [off-list ref] wrote:
quoted
On Mon, 17 Jun 2024 12:05:30 +0800, Jason Wang [off-list ref] wrote:
quoted
On Thu, Jun 6, 2024 at 2:15 PM Heng Qi [off-list ref] wrote:
quoted
Currently, control vq handles commands synchronously,
leading to increased delays for dim commands during multi-queue
VM configuration and directly impacting dim performance.

To address this, we are shifting to asynchronous processing of
ctrlq's dim commands.

Signed-off-by: Heng Qi <redacted>
---
 drivers/net/virtio_net.c | 233 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 208 insertions(+), 25 deletions(-)
Hi Jason,

I will incorporate your feedback and update the next version.

Thanks
quoted
quoted
quoted
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e59e12bb7601..0338528993ab 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -376,6 +376,13 @@ struct control_buf {
        struct completion completion;
 };

+struct virtnet_coal_node {
+       struct control_buf ctrl;
+       struct virtio_net_ctrl_coal_vq coal_vqs;
+       bool is_coal_wait;
+       struct list_head list;
+};
+
 struct virtnet_info {
        struct virtio_device *vdev;
        struct virtqueue *cvq;
@@ -420,6 +427,9 @@ struct virtnet_info {
        /* Lock to protect the control VQ */
        struct mutex cvq_lock;

+       /* Work struct for acquisition of cvq processing results. */
+       struct work_struct get_cvq;
+
        /* Host can handle any s/g split between our header and packet data */
        bool any_header_sg;
@@ -464,6 +474,14 @@ struct virtnet_info {
        struct virtnet_interrupt_coalesce intr_coal_tx;
        struct virtnet_interrupt_coalesce intr_coal_rx;

+       /* Free nodes used for concurrent delivery */
+       struct mutex coal_free_lock;
+       struct list_head coal_free_list;
+
+       /* Filled when there are no free nodes or cvq buffers */
+       struct mutex coal_wait_lock;
+       struct list_head coal_wait_list;
+
        unsigned long guest_offloads;
        unsigned long guest_offloads_capable;
@@ -670,7 +688,7 @@ static void virtnet_cvq_done(struct virtqueue *cvq)
 {
        struct virtnet_info *vi = cvq->vdev->priv;

-       complete(&vi->ctrl->completion);
+       schedule_work(&vi->get_cvq);
 }

 static void skb_xmit_done(struct virtqueue *vq)
@@ -2696,7 +2714,7 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi,
                                       struct scatterlist *in)
 {
        struct scatterlist *sgs[5], hdr, stat;
-       u32 out_num = 0, tmp, in_num = 0;
+       u32 out_num = 0, in_num = 0;
        int ret;

        /* Caller should know better */
@@ -2730,14 +2748,14 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi,
                return false;
        }

-       if (unlikely(!virtqueue_kick(vi->cvq)))
-               goto unlock;
+       if (unlikely(!virtqueue_kick(vi->cvq))) {
+               mutex_unlock(&vi->cvq_lock);
+               return false;
+       }
+       mutex_unlock(&vi->cvq_lock);

-       wait_for_completion(&vi->ctrl->completion);
-       virtqueue_get_buf(vi->cvq, &tmp);
+       wait_for_completion(&ctrl->completion);

-unlock:
-       mutex_unlock(&vi->cvq_lock);
        return ctrl->status == VIRTIO_NET_OK;
 }
@@ -2747,6 +2765,86 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
        return virtnet_send_command_reply(vi, class, cmd, vi->ctrl, out, NULL);
 }

+static void virtnet_process_dim_cmd(struct virtnet_info *vi,
+                                   struct virtnet_coal_node *node)
+{
+       u16 qnum = le16_to_cpu(node->coal_vqs.vqn) / 2;
+
+       mutex_lock(&vi->rq[qnum].dim_lock);
+       vi->rq[qnum].intr_coal.max_usecs =
+               le32_to_cpu(node->coal_vqs.coal.max_usecs);
+       vi->rq[qnum].intr_coal.max_packets =
+               le32_to_cpu(node->coal_vqs.coal.max_packets);
+       vi->rq[qnum].dim.state = DIM_START_MEASURE;
+       mutex_unlock(&vi->rq[qnum].dim_lock);
+
+       if (node->is_coal_wait) {
+               mutex_lock(&vi->coal_wait_lock);
+               list_del(&node->list);
+               mutex_unlock(&vi->coal_wait_lock);
+               kfree(node);
+       } else {
+               mutex_lock(&vi->coal_free_lock);
+               list_add(&node->list, &vi->coal_free_list);
+               mutex_unlock(&vi->coal_free_lock);
+       }
+}
+
+static int virtnet_add_dim_command(struct virtnet_info *vi,
+                                  struct virtnet_coal_node *coal_node)
+{
+       struct scatterlist sg;
+       int ret;
+
+       sg_init_one(&sg, &coal_node->coal_vqs, sizeof(coal_node->coal_vqs));
+       ret = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_NOTF_COAL,
+                                        VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET,
+                                        &coal_node->ctrl, &sg, NULL);
+       if (!ret) {
+               dev_warn(&vi->dev->dev,
+                        "Failed to change coalescing params.\n");
+               return ret;
+       }
+
+       virtnet_process_dim_cmd(vi, coal_node);
+
+       return 0;
+}
+
+static void virtnet_get_cvq_work(struct work_struct *work)
+{
+       struct virtnet_info *vi =
+               container_of(work, struct virtnet_info, get_cvq);
+       struct virtnet_coal_node *wait_coal;
+       bool valid = false;
+       unsigned int tmp;
+       void *res;
+
+       mutex_lock(&vi->cvq_lock);
+       while ((res = virtqueue_get_buf(vi->cvq, &tmp)) != NULL) {
+               complete((struct completion *)res);
+               valid = true;
+       }
+       mutex_unlock(&vi->cvq_lock);
How could we synchronize with the device in this case?

E.g what happens if the device finishes another buf here?
That's a good question. I think we can solve it using the following snippet?
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e59e12bb7601..5dc3e1244016 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -420,6 +427,12 @@ struct virtnet_info {
        /* Lock to protect the control VQ */
        struct mutex cvq_lock;

+       /* Atomic to confirm whether the cvq work is scheduled. */
+       atomic_t scheduled;
+
+       /* Work struct for acquisition of cvq processing results. */
+       struct work_struct get_cvq;
+

@@ -670,7 +691,9 @@ static void virtnet_cvq_done(struct virtqueue *cvq)
 {
        struct virtnet_info *vi = cvq->vdev->priv;

-       complete(&vi->ctrl->completion);
+       virtqueue_disable_cb(cvq);
+       if (!atomic_xchg(&vi->scheduled, 1))
+               schedule_work(&vi->get_cvq);
I think workqueue subsystem should already handle things like this.
quoted
 }


+static void virtnet_get_cvq_work(struct work_struct *work)
+{
+       struct virtnet_info *vi =
+               container_of(work, struct virtnet_info, get_cvq);
+       struct virtnet_coal_node *wait_coal;
+       bool valid = false;
+       unsigned int tmp;
+       void *res;
+
+       mutex_lock(&vi->cvq_lock);
+       while ((res = virtqueue_get_buf(vi->cvq, &tmp)) != NULL) {
+               complete((struct completion *)res);
+               valid = true;
+       }
+       mutex_unlock(&vi->cvq_lock);
+
+       atomic_set(&vi->scheduled, 0);
+       virtqueue_enable_cb_prepare(vi->cvq);
We have a brunch of examples in the current codes. Generally it should
be something like.

again:
    disable_cb()
    while(get_buf());
    if (enable_cb())
        disable_cb()
        goto again;
quoted
+}
quoted
quoted
+
+       if (!valid)
+               return;
+
+       while (true) {
+               wait_coal = NULL;
+               mutex_lock(&vi->coal_wait_lock);
+               if (!list_empty(&vi->coal_wait_list))
+                       wait_coal = list_first_entry(&vi->coal_wait_list,
+                                                    struct virtnet_coal_node,
+                                                    list);
+               mutex_unlock(&vi->coal_wait_lock);
+               if (wait_coal)
+                       if (virtnet_add_dim_command(vi, wait_coal))
+                               break;
+               else
+                       break;
+       }
This is still an ad-hoc optimization for dim in the general path here.

Could we have a fn callback so for non dim it's just a completion and
for dim it would be a schedule_work()?
OK, I will try this.

And how about this :

+static void virtnet_cvq_work_sched(struct virtqueue *cvq)
+{
+       struct virtnet_info *vi = cvq->vdev->priv;
+
+       virtqueue_disable_cb(cvq);
+       if (!atomic_xchg(&vi->scheduled, 1))
+               schedule_work(&vi->get_cvq);
+}
+
 static void virtnet_cvq_done(struct virtqueue *cvq)
 {
        struct virtnet_info *vi = cvq->vdev->priv;
+       unsigned int tmp;

+       virtqueue_get_buf(vi->cvq, &tmp);
        complete(&vi->ctrl->completion);
 }
@@ -5318,7 +5472,11 @@ static int virtnet_find_vqs(struct virtnet_info *vi)

        /* Parameters for control virtqueue, if any */
        if (vi->has_cvq) {
-               callbacks[total_vqs - 1] = virtnet_cvq_done;
+               if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
+                       callbacks[total_vqs - 1] = virtnet_cvq_work_sched;
+               else
+                       callbacks[total_vqs - 1] = virtnet_cvq_done;
+
                names[total_vqs - 1] = "control";
        }
Just to clarify, I meant a callback function per control_buf. (I've
avoid touching virtqueue callback layers)

quoted
quoted
quoted
+}
 static int virtnet_set_mac_address(struct net_device *dev, void *p)
 {
        struct virtnet_info *vi = netdev_priv(dev);
@@ -4398,35 +4496,73 @@ static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi,
        return 0;
 }

+static void virtnet_put_wait_coal(struct virtnet_info *vi,
+                                 struct receive_queue *rq,
+                                 struct dim_cq_moder moder)
+{
+       struct virtnet_coal_node *wait_node;
+
+       wait_node = kzalloc(sizeof(*wait_node), GFP_KERNEL);
+       if (!wait_node) {
+               rq->dim.state = DIM_START_MEASURE;
+               return;
+       }
+
+       wait_node->is_coal_wait = true;
+       wait_node->coal_vqs.vqn = cpu_to_le16(rxq2vq(rq - vi->rq));
+       wait_node->coal_vqs.coal.max_usecs = cpu_to_le32(moder.usec);
+       wait_node->coal_vqs.coal.max_packets = cpu_to_le32(moder.pkts);
+       mutex_lock(&vi->coal_wait_lock);
+       list_add_tail(&wait_node->list, &vi->coal_wait_list);
+       mutex_unlock(&vi->coal_wait_lock);
+}
+
 static void virtnet_rx_dim_work(struct work_struct *work)
 {
        struct dim *dim = container_of(work, struct dim, work);
        struct receive_queue *rq = container_of(dim,
                        struct receive_queue, dim);
        struct virtnet_info *vi = rq->vq->vdev->priv;
-       struct net_device *dev = vi->dev;
+       struct virtnet_coal_node *avail_coal;
        struct dim_cq_moder update_moder;
-       int qnum, err;

-       qnum = rq - vi->rq;
+       update_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);

        mutex_lock(&rq->dim_lock);
-       if (!rq->dim_enabled)
-               goto out;
-
-       update_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
-       if (update_moder.usec != rq->intr_coal.max_usecs ||
-           update_moder.pkts != rq->intr_coal.max_packets) {
-               err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum,
-                                                      update_moder.usec,
-                                                      update_moder.pkts);
-               if (err)
-                       pr_debug("%s: Failed to send dim parameters on rxq%d\n",
-                                dev->name, qnum);
-               dim->state = DIM_START_MEASURE;
+       if (!rq->dim_enabled ||
+           (update_moder.usec == rq->intr_coal.max_usecs &&
+            update_moder.pkts == rq->intr_coal.max_packets)) {
+               rq->dim.state = DIM_START_MEASURE;
+               mutex_unlock(&rq->dim_lock);
+               return;
        }
-out:
        mutex_unlock(&rq->dim_lock);
+
+       mutex_lock(&vi->cvq_lock);
+       if (vi->cvq->num_free < 3) {
+               virtnet_put_wait_coal(vi, rq, update_moder);
+               mutex_unlock(&vi->cvq_lock);
+               return;
+       }
Could we simply sleep instead of using a list here?
Do you mean using a semaphore, or a waitqueue?
I meant sleep and wait for more space.

Thanks
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help