[PATCH 5/6] virtio/vsock: add support for dgram
From: Bobby Eshleman <hidden>
Date: 2022-08-15 17:57:08
Also in:
kvm, lkml
Subsystem:
networking [general], the rest, virtio and vhost vsock driver, virtio core, virtio host (vhost), vm sockets (af_vsock) · Maintainers:
"David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds, Stefan Hajnoczi, Stefano Garzarella, "Michael S. Tsirkin", Jason Wang
This patch supports dgram in virtio and on the vhost side. Signed-off-by: Jiang Wang <redacted> Signed-off-by: Bobby Eshleman <redacted> --- drivers/vhost/vsock.c | 2 +- include/net/af_vsock.h | 2 + include/uapi/linux/virtio_vsock.h | 1 + net/vmw_vsock/af_vsock.c | 26 +++- net/vmw_vsock/virtio_transport.c | 2 +- net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++-- 6 files changed, 186 insertions(+), 20 deletions(-)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index a5d1bdb786fe..3dc72a5647ca 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c@@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void) int ret; ret = vsock_core_register(&vhost_transport.transport, - VSOCK_TRANSPORT_F_H2G); + VSOCK_TRANSPORT_F_H2G | VSOCK_TRANSPORT_F_DGRAM); if (ret < 0) return ret;
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 1c53c4c4d88f..37e55c81e4df 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h@@ -78,6 +78,8 @@ struct vsock_sock { s64 vsock_stream_has_data(struct vsock_sock *vsk); s64 vsock_stream_has_space(struct vsock_sock *vsk); struct sock *vsock_create_connected(struct sock *parent); +int vsock_bind_stream(struct vsock_sock *vsk, + struct sockaddr_vm *addr); /**** TRANSPORT ****/
diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
index 857df3a3a70d..0975b9c88292 100644
--- a/include/uapi/linux/virtio_vsock.h
+++ b/include/uapi/linux/virtio_vsock.h@@ -70,6 +70,7 @@ struct virtio_vsock_hdr { enum virtio_vsock_type { VIRTIO_VSOCK_TYPE_STREAM = 1, VIRTIO_VSOCK_TYPE_SEQPACKET = 2, + VIRTIO_VSOCK_TYPE_DGRAM = 3, }; enum virtio_vsock_op {
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 1893f8aafa48..87e4ae1866d3 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c@@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk, return 0; } +int vsock_bind_stream(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + int retval; + + spin_lock_bh(&vsock_table_lock); + retval = __vsock_bind_connectible(vsk, addr); + spin_unlock_bh(&vsock_table_lock); + + return retval; +} +EXPORT_SYMBOL(vsock_bind_stream); + static int __vsock_bind_dgram(struct vsock_sock *vsk, struct sockaddr_vm *addr) {
@@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport *t, int features) } if (features & VSOCK_TRANSPORT_F_DGRAM) { - if (t_dgram) { - err = -EBUSY; - goto err_busy; + /* TODO: always chose the G2H variant over others, support nesting later */ + if (features & VSOCK_TRANSPORT_F_G2H) { + if (t_dgram) + pr_warn("virtio_vsock: t_dgram already set\n"); + t_dgram = t; + } + + if (!t_dgram) { + t_dgram = t; } - t_dgram = t; } if (features & VSOCK_TRANSPORT_F_LOCAL) {
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 073314312683..d4526ca462d2 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c@@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void) return -ENOMEM; ret = vsock_core_register(&virtio_transport.transport, - VSOCK_TRANSPORT_F_G2H); + VSOCK_TRANSPORT_F_G2H | VSOCK_TRANSPORT_F_DGRAM); if (ret) goto out_wq;
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index bdf16fff054f..aedb48728677 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c@@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); static u16 virtio_transport_get_type(struct sock *sk) { - if (sk->sk_type == SOCK_STREAM) + if (sk->sk_type == SOCK_DGRAM) + return VIRTIO_VSOCK_TYPE_DGRAM; + else if (sk->sk_type == SOCK_STREAM) return VIRTIO_VSOCK_TYPE_STREAM; else return VIRTIO_VSOCK_TYPE_SEQPACKET;
@@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, vvs = vsk->trans; /* we can send less than pkt_len bytes */ - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) + pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; + else + return 0; + } - /* virtio_transport_get_credit might return less than pkt_len credit */ - pkt_len = virtio_transport_get_credit(vvs, pkt_len); + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) { + /* virtio_transport_get_credit might return less than pkt_len credit */ + pkt_len = virtio_transport_get_credit(vvs, pkt_len); - /* Do not send zero length OP_RW pkt */ - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) - return pkt_len; + /* Do not send zero length OP_RW pkt */ + if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) + return pkt_len; + } skb = virtio_transport_alloc_skb(info, pkt_len, src_cid, src_port, dst_cid, dst_port, &err); if (!skb) { - virtio_transport_put_credit(vvs, pkt_len); + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) + virtio_transport_put_credit(vvs, pkt_len); return err; }
@@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, } EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); +static ssize_t +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, size_t len) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + struct sk_buff *skb; + size_t total = 0; + u32 free_space; + int err = -EFAULT; + + spin_lock_bh(&vvs->rx_lock); + if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) { + skb = __skb_dequeue(&vvs->rx_queue); + + total = len; + if (total > skb->len - vsock_metadata(skb)->off) + total = skb->len - vsock_metadata(skb)->off; + else if (total < skb->len - vsock_metadata(skb)->off) + msg->msg_flags |= MSG_TRUNC; + + /* sk_lock is held by caller so no one else can dequeue. + * Unlock rx_lock since memcpy_to_msg() may sleep. + */ + spin_unlock_bh(&vvs->rx_lock); + + err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total); + if (err) + return err; + + spin_lock_bh(&vvs->rx_lock); + + virtio_transport_dec_rx_pkt(vvs, skb); + consume_skb(skb); + } + + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt); + + spin_unlock_bh(&vvs->rx_lock); + + if (total > 0 && msg->msg_name) { + /* Provide the address of the sender. */ + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name); + + vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid), + le32_to_cpu(vsock_hdr(skb)->src_port)); + msg->msg_namelen = sizeof(*vm_addr); + } + return total; +} + +static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk) +{ + return virtio_transport_stream_has_data(vsk); +} + int virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
@@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg, size_t len, int flags) { - return -EOPNOTSUPP; + struct sock *sk; + size_t err = 0; + long timeout; + + DEFINE_WAIT(wait); + + sk = &vsk->sk; + err = 0; + + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK) + return -EOPNOTSUPP; + + lock_sock(sk); + + if (!len) + goto out; + + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + + while (1) { + s64 ready; + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + ready = virtio_transport_dgram_has_data(vsk); + + if (ready == 0) { + if (timeout == 0) { + err = -EAGAIN; + finish_wait(sk_sleep(sk), &wait); + break; + } + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + finish_wait(sk_sleep(sk), &wait); + break; + } else if (timeout == 0) { + err = -EAGAIN; + finish_wait(sk_sleep(sk), &wait); + break; + } + } else { + finish_wait(sk_sleep(sk), &wait); + + if (ready < 0) { + err = -ENOMEM; + goto out; + } + + err = virtio_transport_dgram_do_dequeue(vsk, msg, len); + break; + } + } +out: + release_sock(sk); + return err; } EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
@@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); int virtio_transport_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr) { - return -EOPNOTSUPP; + return vsock_bind_stream(vsk, addr); } EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); bool virtio_transport_dgram_allow(u32 cid, u32 port) { - return false; + return true; } EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
@@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk, struct msghdr *msg, size_t dgram_len) { - return -EOPNOTSUPP; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_RW, + .msg = msg, + .pkt_len = dgram_len, + .vsk = vsk, + .remote_cid = remote_addr->svm_cid, + .remote_port = remote_addr->svm_port, + }; + + return virtio_transport_send_pkt_info(vsk, &info); } EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
@@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk, struct virtio_vsock_hdr *hdr = vsock_hdr(skb); int err = 0; + if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) { + virtio_transport_recv_enqueue(vsk, skb); + sk->sk_data_ready(sk); + return err; + } + switch (le16_to_cpu(hdr->op)) { case VIRTIO_VSOCK_OP_RW: virtio_transport_recv_enqueue(vsk, skb);
@@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, static bool virtio_transport_valid_type(u16 type) { return (type == VIRTIO_VSOCK_TYPE_STREAM) || - (type == VIRTIO_VSOCK_TYPE_SEQPACKET); + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) || + (type == VIRTIO_VSOCK_TYPE_DGRAM); } /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
@@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, goto free_pkt; } + if (sk->sk_type == SOCK_DGRAM) { + virtio_transport_recv_connected(sk, skb); + goto out; + } + space_available = virtio_transport_space_update(sk, skb); /* Update CID in case it has changed after a transport reset event */
@@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, break; } +out: release_sock(sk); /* Release refcnt obtained when we fetched this socket out of the
--
2.35.1