Thread (6 messages) 6 messages, 3 authors, 2019-02-21

Re: [PATCH bpf-next v5 1/3] libbpf: add support for using AF_XDP sockets

From: Ye Xiaolong <hidden>
Date: 2019-02-21 06:03:29

Hi Magnus

On 02/19, Magnus Karlsson wrote:
[snip]
+static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
+{
+	bool prog_attached = false;
+	__u32 prog_id = 0;
+	int err;
+
+	err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id,
+				  xsk->config.xdp_flags);
+	if (err)
+		return err;
+
+	if (!prog_id) {
+		prog_attached = true;
+		err = xsk_create_bpf_maps(xsk);
+		if (err)
+			return err;
+
+		err = xsk_load_xdp_prog(xsk);
+		if (err)
+			goto out_maps;
+	} else {
+		xsk->fd = bpf_prog_get_fd_by_id(prog_id);
I suppose it should be 

		xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
quoted hunk ↗ jump to hunk
+	}
+
+	err = xsk_update_bpf_maps(xsk, true, xsk->fd);
+	if (err)
+		goto out_load;
+
+	return 0;
+
+out_load:
+	if (prog_attached)
+		close(xsk->prog_fd);
+out_maps:
+	if (prog_attached)
+		xsk_delete_bpf_maps(xsk);
+	return err;
+}
+
+int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
+		       __u32 queue_id, struct xsk_umem *umem,
+		       struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
+		       const struct xsk_socket_config *usr_config)
+{
+	struct sockaddr_xdp sxdp = {};
+	struct xdp_mmap_offsets off;
+	struct xsk_socket *xsk;
+	socklen_t optlen;
+	void *map;
+	int err;
+
+	if (!umem || !xsk_ptr || !rx || !tx)
+		return -EFAULT;
+
+	if (umem->refcount) {
+		pr_warning("Error: shared umems not supported by libbpf.\n");
+		return -EBUSY;
+	}
+
+	xsk = calloc(1, sizeof(*xsk));
+	if (!xsk)
+		return -ENOMEM;
+
+	if (umem->refcount++ > 0) {
+		xsk->fd = socket(AF_XDP, SOCK_RAW, 0);
+		if (xsk->fd < 0) {
+			err = -errno;
+			goto out_xsk_alloc;
+		}
+	} else {
+		xsk->fd = umem->fd;
+	}
+
+	xsk->outstanding_tx = 0;
+	xsk->queue_id = queue_id;
+	xsk->umem = umem;
+	xsk->ifindex = if_nametoindex(ifname);
+	if (!xsk->ifindex) {
+		err = -errno;
+		goto out_socket;
+	}
+	strncpy(xsk->ifname, ifname, IFNAMSIZ);
+
+	xsk_set_xdp_socket_config(&xsk->config, usr_config);
+
+	if (rx) {
+		err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
+				 &xsk->config.rx_size,
+				 sizeof(xsk->config.rx_size));
+		if (err) {
+			err = -errno;
+			goto out_socket;
+		}
+	}
+	if (tx) {
+		err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
+				 &xsk->config.tx_size,
+				 sizeof(xsk->config.tx_size));
+		if (err) {
+			err = -errno;
+			goto out_socket;
+		}
+	}
+
+	optlen = sizeof(off);
+	err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
+	if (err) {
+		err = -errno;
+		goto out_socket;
+	}
+
+	if (rx) {
+		map = xsk_mmap(NULL, off.rx.desc +
+			       xsk->config.rx_size * sizeof(struct xdp_desc),
+			       PROT_READ | PROT_WRITE,
+			       MAP_SHARED | MAP_POPULATE,
+			       xsk->fd, XDP_PGOFF_RX_RING);
+		if (map == MAP_FAILED) {
+			err = -errno;
+			goto out_socket;
+		}
+
+		rx->mask = xsk->config.rx_size - 1;
+		rx->size = xsk->config.rx_size;
+		rx->producer = map + off.rx.producer;
+		rx->consumer = map + off.rx.consumer;
+		rx->ring = map + off.rx.desc;
+	}
+	xsk->rx = rx;
+
+	if (tx) {
+		map = xsk_mmap(NULL, off.tx.desc +
+			       xsk->config.tx_size * sizeof(struct xdp_desc),
+			       PROT_READ | PROT_WRITE,
+			       MAP_SHARED | MAP_POPULATE,
+			       xsk->fd, XDP_PGOFF_TX_RING);
+		if (map == MAP_FAILED) {
+			err = -errno;
+			goto out_mmap_rx;
+		}
+
+		tx->mask = xsk->config.tx_size - 1;
+		tx->size = xsk->config.tx_size;
+		tx->producer = map + off.tx.producer;
+		tx->consumer = map + off.tx.consumer;
+		tx->ring = map + off.tx.desc;
+		tx->cached_cons = xsk->config.tx_size;
+	}
+	xsk->tx = tx;
+
+	sxdp.sxdp_family = PF_XDP;
+	sxdp.sxdp_ifindex = xsk->ifindex;
+	sxdp.sxdp_queue_id = xsk->queue_id;
+	sxdp.sxdp_flags = xsk->config.bind_flags;
+
+	err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
+	if (err) {
+		err = -errno;
+		goto out_mmap_tx;
+	}
+
+	if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
+		err = xsk_setup_xdp_prog(xsk);
+		if (err)
+			goto out_mmap_tx;
+	}
+
+	*xsk_ptr = xsk;
+	return 0;
+
+out_mmap_tx:
+	if (tx)
+		munmap(xsk->tx,
+		       off.tx.desc +
+		       xsk->config.tx_size * sizeof(struct xdp_desc));
+out_mmap_rx:
+	if (rx)
+		munmap(xsk->rx,
+		       off.rx.desc +
+		       xsk->config.rx_size * sizeof(struct xdp_desc));
+out_socket:
+	if (--umem->refcount)
+		close(xsk->fd);
+out_xsk_alloc:
+	free(xsk);
+	return err;
+}
+
+int xsk_umem__delete(struct xsk_umem *umem)
+{
+	struct xdp_mmap_offsets off;
+	socklen_t optlen;
+	int err;
+
+	if (!umem)
+		return 0;
+
+	if (umem->refcount)
+		return -EBUSY;
+
+	optlen = sizeof(off);
+	err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
+	if (!err) {
+		munmap(umem->fill->ring,
+		       off.fr.desc + umem->config.fill_size * sizeof(__u64));
+		munmap(umem->comp->ring,
+		       off.cr.desc + umem->config.comp_size * sizeof(__u64));
+	}
+
+	close(umem->fd);
+	free(umem);
+
+	return 0;
+}
+
+void xsk_socket__delete(struct xsk_socket *xsk)
+{
+	struct xdp_mmap_offsets off;
+	socklen_t optlen;
+	int err;
+
+	if (!xsk)
+		return;
+
+	(void)xsk_update_bpf_maps(xsk, 0, 0);
+
+	optlen = sizeof(off);
+	err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
+	if (!err) {
+		if (xsk->rx)
+			munmap(xsk->rx->ring,
+			       off.rx.desc +
+			       xsk->config.rx_size * sizeof(struct xdp_desc));
+		if (xsk->tx)
+			munmap(xsk->tx->ring,
+			       off.tx.desc +
+			       xsk->config.tx_size * sizeof(struct xdp_desc));
+	}
+
+	xsk->umem->refcount--;
+	/* Do not close an fd that also has an associated umem connected
+	 * to it.
+	 */
+	if (xsk->fd != xsk->umem->fd)
+		close(xsk->fd);
+	free(xsk);
+}
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
new file mode 100644
index 0000000..a497f00
--- /dev/null
+++ b/tools/lib/bpf/xsk.h
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * AF_XDP user-space access library.
+ *
+ * Copyright(c) 2018 - 2019 Intel Corporation.
+ *
+ * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#ifndef __LIBBPF_XSK_H
+#define __LIBBPF_XSK_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include <linux/if_xdp.h>
+
+#include "libbpf.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Do not access these members directly. Use the functions below. */
+#define DEFINE_XSK_RING(name) \
+struct name { \
+	__u32 cached_prod; \
+	__u32 cached_cons; \
+	__u32 mask; \
+	__u32 size; \
+	__u32 *producer; \
+	__u32 *consumer; \
+	void *ring; \
+}
+
+DEFINE_XSK_RING(xsk_ring_prod);
+DEFINE_XSK_RING(xsk_ring_cons);
+
+struct xsk_umem;
+struct xsk_socket;
+
+static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill,
+					      __u32 idx)
+{
+	__u64 *addrs = (__u64 *)fill->ring;
+
+	return &addrs[idx & fill->mask];
+}
+
+static inline const __u64 *
+xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx)
+{
+	const __u64 *addrs = (const __u64 *)comp->ring;
+
+	return &addrs[idx & comp->mask];
+}
+
+static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx,
+						      __u32 idx)
+{
+	struct xdp_desc *descs = (struct xdp_desc *)tx->ring;
+
+	return &descs[idx & tx->mask];
+}
+
+static inline const struct xdp_desc *
+xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx)
+{
+	const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring;
+
+	return &descs[idx & rx->mask];
+}
+
+static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
+{
+	__u32 free_entries = r->cached_cons - r->cached_prod;
+
+	if (free_entries >= nb)
+		return free_entries;
+
+	/* Refresh the local tail pointer.
+	 * cached_cons is r->size bigger than the real consumer pointer so
+	 * that this addition can be avoided in the more frequently
+	 * executed code that computs free_entries in the beginning of
+	 * this function. Without this optimization it whould have been
+	 * free_entries = r->cached_prod - r->cached_cons + r->size.
+	 */
+	r->cached_cons = *r->consumer + r->size;
+
+	return r->cached_cons - r->cached_prod;
+}
+
+static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
+{
+	__u32 entries = r->cached_prod - r->cached_cons;
+
+	if (entries == 0) {
+		r->cached_prod = *r->producer;
+		entries = r->cached_prod - r->cached_cons;
+	}
+
+	return (entries > nb) ? nb : entries;
+}
+
+static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod,
+					    size_t nb, __u32 *idx)
+{
+	if (unlikely(xsk_prod_nb_free(prod, nb) < nb))
+		return 0;
+
+	*idx = prod->cached_prod;
+	prod->cached_prod += nb;
+
+	return nb;
+}
+
+static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb)
+{
+	/* Make sure everything has been written to the ring before signalling
+	 * this to the kernel.
+	 */
+	smp_wmb();
+
+	*prod->producer += nb;
+}
+
+static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons,
+					 size_t nb, __u32 *idx)
+{
+	size_t entries = xsk_cons_nb_avail(cons, nb);
+
+	if (likely(entries > 0)) {
+		/* Make sure we do not speculatively read the data before
+		 * we have received the packet buffers from the ring.
+		 */
+		smp_rmb();
+
+		*idx = cons->cached_cons;
+		cons->cached_cons += entries;
+	}
+
+	return entries;
+}
+
+static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb)
+{
+	*cons->consumer += nb;
+}
+
+static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
+{
+	return &((char *)umem_area)[addr];
+}
+
+LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem);
+LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk);
+
+#define XSK_RING_CONS__DEFAULT_NUM_DESCS      2048
+#define XSK_RING_PROD__DEFAULT_NUM_DESCS      2048
+#define XSK_UMEM__DEFAULT_FRAME_SHIFT    11 /* 2048 bytes */
+#define XSK_UMEM__DEFAULT_FRAME_SIZE     (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT)
+#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0
+
+struct xsk_umem_config {
+	__u32 fill_size;
+	__u32 comp_size;
+	__u32 frame_size;
+	__u32 frame_headroom;
+};
+
+/* Flags for the libbpf_flags field. */
+#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)
+
+struct xsk_socket_config {
+	__u32 rx_size;
+	__u32 tx_size;
+	__u32 libbpf_flags;
+	__u32 xdp_flags;
+	__u16 bind_flags;
+};
+
+/* Set config to NULL to get the default configuration. */
+LIBBPF_API int xsk_umem__create(struct xsk_umem **umem,
+				void *umem_area, __u64 size,
+				struct xsk_ring_prod *fill,
+				struct xsk_ring_cons *comp,
+				const struct xsk_umem_config *config);
+LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk,
+				  const char *ifname, __u32 queue_id,
+				  struct xsk_umem *umem,
+				  struct xsk_ring_cons *rx,
+				  struct xsk_ring_prod *tx,
+				  const struct xsk_socket_config *config);
+
+/* Returns 0 for success and -EBUSY if the umem is still in use. */
+LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem);
+LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __LIBBPF_XSK_H */
-- 
2.7.4
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help