[RFC PATCH net-next 11/12] vhost_net: passing raw xdp buff to tun
From: Jason Wang <jasowang@redhat.com>
Date: 2018-05-21 09:05:37
Also in:
kvm, lkml
Subsystem:
networking drivers, the rest, tun/tap driver, virtio host (vhost) · Maintainers:
Andrew Lunn, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds, Willem de Bruijn, Jason Wang, "Michael S. Tsirkin"
This patches implement a TUN specific msg_control:
#define TUN_MSG_UBUF 1
#define TUN_MSG_PTR 2
struct tun_msg_ctl {
int type;
void *ptr;
};
The first supported type is ubuf which is already used by vhost_net
zerocopy code. The second is XDP buff, which allows vhost_net to pass
XDP buff to TUN. This could be used to implement accepting an array of
XDP buffs from vhost_net in the following patches.
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
drivers/net/tun.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++-
drivers/vhost/net.c | 21 ++++++++++--
include/linux/if_tun.h | 7 ++++
3 files changed, 116 insertions(+), 3 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 2560378..b586b3f 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c@@ -2387,18 +2387,107 @@ static void tun_sock_write_space(struct sock *sk) kill_fasync(&tfile->fasync, SIGIO, POLL_OUT); } +static int tun_xdp_one(struct tun_struct *tun, + struct tun_file *tfile, + struct xdp_buff *xdp) +{ + struct virtio_net_hdr *gso = xdp->data_hard_start + sizeof(int); + struct tun_pcpu_stats *stats; + struct bpf_prog *xdp_prog; + struct sk_buff *skb = NULL; + u32 rxhash = 0, act; + int buflen = *(int *)xdp->data_hard_start; + int err = 0; + bool skb_xdp = false; + + preempt_disable(); + rcu_read_lock(); + + xdp_prog = rcu_dereference(tun->xdp_prog); + if (xdp_prog) { + if (gso->gso_type) { + skb_xdp = true; + goto build; + } + xdp_set_data_meta_invalid(xdp); + xdp->rxq = &tfile->xdp_rxq; + act = tun_do_xdp(tun, tfile, xdp_prog, xdp, &err); + if (err) + goto out; + if (act != XDP_PASS) + goto out; + } + +build: + skb = build_skb(xdp->data_hard_start, buflen); + if (!skb) { + err = -ENOMEM; + goto out; + } + + if (skb_xdp) { + err = do_xdp_generic(xdp_prog, skb); + if (err != XDP_PASS) + goto out; + } + + skb_reserve(skb, xdp->data - xdp->data_hard_start); + skb_put(skb, xdp->data_end - xdp->data); + + if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) { + this_cpu_inc(tun->pcpu_stats->rx_frame_errors); + kfree_skb(skb); + err = -EINVAL; + goto out; + } + + skb->protocol = eth_type_trans(skb, tun->dev); + skb_reset_network_header(skb); + skb_probe_transport_header(skb, 0); + + if (!rcu_dereference(tun->steering_prog)) + rxhash = __skb_get_hash_symmetric(skb); + + netif_receive_skb(skb); + + stats = get_cpu_ptr(tun->pcpu_stats); + u64_stats_update_begin(&stats->syncp); + stats->rx_packets++; + stats->rx_bytes += skb->len; + u64_stats_update_end(&stats->syncp); + put_cpu_ptr(stats); + + if (rxhash) + tun_flow_update(tun, rxhash, tfile); + +out: + rcu_read_unlock(); + preempt_enable(); + + return err; +} + static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { int ret; struct tun_file *tfile = container_of(sock, struct tun_file, socket); struct tun_struct *tun = tun_get(tfile); + struct tun_msg_ctl *ctl = m->msg_control; if (!tun) return -EBADFD; - ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter, + if (ctl && ctl->type == TUN_MSG_PTR) { + ret = tun_xdp_one(tun, tfile, ctl->ptr); + if (!ret) + ret = total_len; + goto out; + } + + ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter, m->msg_flags & MSG_DONTWAIT, m->msg_flags & MSG_MORE); +out: tun_put(tun); return ret; }
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 1209e84..0d84de6 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c@@ -117,6 +117,7 @@ struct vhost_net_virtqueue { struct vhost_net_ubuf_ref *ubufs; struct ptr_ring *rx_ring; struct vhost_net_buf rxq; + struct xdp_buff xdp[VHOST_RX_BATCH]; }; struct vhost_net {
@@ -570,6 +571,7 @@ static void handle_tx_copy(struct vhost_net *net) { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; struct vhost_virtqueue *vq = &nvq->vq; + struct xdp_buff xdp; unsigned out, in; int head; struct msghdr msg = {
@@ -584,6 +586,7 @@ static void handle_tx_copy(struct vhost_net *net) size_t hdr_size; struct socket *sock; struct vhost_net_ubuf_ref *uninitialized_var(ubufs); + struct tun_msg_ctl ctl; int sent_pkts = 0; s16 nheads = 0;
@@ -628,6 +631,14 @@ static void handle_tx_copy(struct vhost_net *net) vq->heads[nheads].id = cpu_to_vhost32(vq, head); vq->heads[nheads].len = 0; + err = vhost_net_build_xdp(nvq, &msg.msg_iter, &xdp); + if (!err) { + ctl.type = TUN_MSG_PTR; + ctl.ptr = &xdp; + msg.msg_control = &ctl; + } else + msg.msg_control = NULL; + total_len += len; if (total_len < VHOST_NET_WEIGHT && vhost_has_more_pkts(net, vq)) {
@@ -734,16 +745,21 @@ static void handle_tx_zerocopy(struct vhost_net *net) /* use msg_control to pass vhost zerocopy ubuf info to skb */ if (zcopy_used) { struct ubuf_info *ubuf; + struct tun_msg_ctl ctl; + ubuf = nvq->ubuf_info + nvq->upend_idx; + ctl.type = TUN_MSG_UBUF; + ctl.ptr = ubuf; + vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head); vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS; ubuf->callback = vhost_zerocopy_callback; ubuf->ctx = nvq->ubufs; ubuf->desc = nvq->upend_idx; refcount_set(&ubuf->refcnt, 1); - msg.msg_control = ubuf; - msg.msg_controllen = sizeof(ubuf); + msg.msg_control = &ctl; + msg.msg_controllen = sizeof(ctl); ubufs = nvq->ubufs; atomic_inc(&ubufs->refcount); nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
@@ -751,6 +767,7 @@ static void handle_tx_zerocopy(struct vhost_net *net) msg.msg_control = NULL; ubufs = NULL; } + total_len += len; if (total_len < VHOST_NET_WEIGHT && vhost_has_more_pkts(net, vq)) {
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 3d2996d..ba46dce 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h@@ -19,6 +19,13 @@ #define TUN_XDP_FLAG 0x1UL +#define TUN_MSG_UBUF 1 +#define TUN_MSG_PTR 2 +struct tun_msg_ctl { + int type; + void *ptr; +}; + #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) struct socket *tun_get_socket(struct file *); struct ptr_ring *tun_get_tx_ring(struct file *file);
--
2.7.4