Re: [PATCH bpf-next v5 1/3] libbpf: add support for using AF_XDP sockets
From: Magnus Karlsson <hidden>
Date: 2019-02-21 07:09:57
On Thu, Feb 21, 2019 at 7:06 AM Ye Xiaolong [off-list ref] wrote:
Hi Magnus On 02/19, Magnus Karlsson wrote: [snip]quoted
+static int xsk_setup_xdp_prog(struct xsk_socket *xsk) +{ + bool prog_attached = false; + __u32 prog_id = 0; + int err; + + err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id, + xsk->config.xdp_flags); + if (err) + return err; + + if (!prog_id) { + prog_attached = true; + err = xsk_create_bpf_maps(xsk); + if (err) + return err; + + err = xsk_load_xdp_prog(xsk); + if (err) + goto out_maps; + } else { + xsk->fd = bpf_prog_get_fd_by_id(prog_id);I suppose it should be xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
My bad, that is correct. Will spin a v6. Thanks: Magnus
quoted
+ } + + err = xsk_update_bpf_maps(xsk, true, xsk->fd); + if (err) + goto out_load; + + return 0; + +out_load: + if (prog_attached) + close(xsk->prog_fd); +out_maps: + if (prog_attached) + xsk_delete_bpf_maps(xsk); + return err; +} + +int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, + __u32 queue_id, struct xsk_umem *umem, + struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, + const struct xsk_socket_config *usr_config) +{ + struct sockaddr_xdp sxdp = {}; + struct xdp_mmap_offsets off; + struct xsk_socket *xsk; + socklen_t optlen; + void *map; + int err; + + if (!umem || !xsk_ptr || !rx || !tx) + return -EFAULT; + + if (umem->refcount) { + pr_warning("Error: shared umems not supported by libbpf.\n"); + return -EBUSY; + } + + xsk = calloc(1, sizeof(*xsk)); + if (!xsk) + return -ENOMEM; + + if (umem->refcount++ > 0) { + xsk->fd = socket(AF_XDP, SOCK_RAW, 0); + if (xsk->fd < 0) { + err = -errno; + goto out_xsk_alloc; + } + } else { + xsk->fd = umem->fd; + } + + xsk->outstanding_tx = 0; + xsk->queue_id = queue_id; + xsk->umem = umem; + xsk->ifindex = if_nametoindex(ifname); + if (!xsk->ifindex) { + err = -errno; + goto out_socket; + } + strncpy(xsk->ifname, ifname, IFNAMSIZ); + + xsk_set_xdp_socket_config(&xsk->config, usr_config); + + if (rx) { + err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, + &xsk->config.rx_size, + sizeof(xsk->config.rx_size)); + if (err) { + err = -errno; + goto out_socket; + } + } + if (tx) { + err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING, + &xsk->config.tx_size, + sizeof(xsk->config.tx_size)); + if (err) { + err = -errno; + goto out_socket; + } + } + + optlen = sizeof(off); + err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + if (err) { + err = -errno; + goto out_socket; + } + + if (rx) { + map = xsk_mmap(NULL, off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, + xsk->fd, XDP_PGOFF_RX_RING); + if (map == MAP_FAILED) { + err = -errno; + goto out_socket; + } + + rx->mask = xsk->config.rx_size - 1; + rx->size = xsk->config.rx_size; + rx->producer = map + off.rx.producer; + rx->consumer = map + off.rx.consumer; + rx->ring = map + off.rx.desc; + } + xsk->rx = rx; + + if (tx) { + map = xsk_mmap(NULL, off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, + xsk->fd, XDP_PGOFF_TX_RING); + if (map == MAP_FAILED) { + err = -errno; + goto out_mmap_rx; + } + + tx->mask = xsk->config.tx_size - 1; + tx->size = xsk->config.tx_size; + tx->producer = map + off.tx.producer; + tx->consumer = map + off.tx.consumer; + tx->ring = map + off.tx.desc; + tx->cached_cons = xsk->config.tx_size; + } + xsk->tx = tx; + + sxdp.sxdp_family = PF_XDP; + sxdp.sxdp_ifindex = xsk->ifindex; + sxdp.sxdp_queue_id = xsk->queue_id; + sxdp.sxdp_flags = xsk->config.bind_flags; + + err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp)); + if (err) { + err = -errno; + goto out_mmap_tx; + } + + if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { + err = xsk_setup_xdp_prog(xsk); + if (err) + goto out_mmap_tx; + } + + *xsk_ptr = xsk; + return 0; + +out_mmap_tx: + if (tx) + munmap(xsk->tx, + off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc)); +out_mmap_rx: + if (rx) + munmap(xsk->rx, + off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc)); +out_socket: + if (--umem->refcount) + close(xsk->fd); +out_xsk_alloc: + free(xsk); + return err; +} + +int xsk_umem__delete(struct xsk_umem *umem) +{ + struct xdp_mmap_offsets off; + socklen_t optlen; + int err; + + if (!umem) + return 0; + + if (umem->refcount) + return -EBUSY; + + optlen = sizeof(off); + err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + if (!err) { + munmap(umem->fill->ring, + off.fr.desc + umem->config.fill_size * sizeof(__u64)); + munmap(umem->comp->ring, + off.cr.desc + umem->config.comp_size * sizeof(__u64)); + } + + close(umem->fd); + free(umem); + + return 0; +} + +void xsk_socket__delete(struct xsk_socket *xsk) +{ + struct xdp_mmap_offsets off; + socklen_t optlen; + int err; + + if (!xsk) + return; + + (void)xsk_update_bpf_maps(xsk, 0, 0); + + optlen = sizeof(off); + err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + if (!err) { + if (xsk->rx) + munmap(xsk->rx->ring, + off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc)); + if (xsk->tx) + munmap(xsk->tx->ring, + off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc)); + } + + xsk->umem->refcount--; + /* Do not close an fd that also has an associated umem connected + * to it. + */ + if (xsk->fd != xsk->umem->fd) + close(xsk->fd); + free(xsk); +}diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h new file mode 100644 index 0000000..a497f00 --- /dev/null +++ b/tools/lib/bpf/xsk.h@@ -0,0 +1,203 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* + * AF_XDP user-space access library. + * + * Copyright(c) 2018 - 2019 Intel Corporation. + * + * Author(s): Magnus Karlsson <magnus.karlsson@intel.com> + */ + +#ifndef __LIBBPF_XSK_H +#define __LIBBPF_XSK_H + +#include <stdio.h> +#include <stdint.h> +#include <linux/if_xdp.h> + +#include "libbpf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Do not access these members directly. Use the functions below. */ +#define DEFINE_XSK_RING(name) \ +struct name { \ + __u32 cached_prod; \ + __u32 cached_cons; \ + __u32 mask; \ + __u32 size; \ + __u32 *producer; \ + __u32 *consumer; \ + void *ring; \ +} + +DEFINE_XSK_RING(xsk_ring_prod); +DEFINE_XSK_RING(xsk_ring_cons); + +struct xsk_umem; +struct xsk_socket; + +static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill, + __u32 idx) +{ + __u64 *addrs = (__u64 *)fill->ring; + + return &addrs[idx & fill->mask]; +} + +static inline const __u64 * +xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx) +{ + const __u64 *addrs = (const __u64 *)comp->ring; + + return &addrs[idx & comp->mask]; +} + +static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx, + __u32 idx) +{ + struct xdp_desc *descs = (struct xdp_desc *)tx->ring; + + return &descs[idx & tx->mask]; +} + +static inline const struct xdp_desc * +xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx) +{ + const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring; + + return &descs[idx & rx->mask]; +} + +static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb) +{ + __u32 free_entries = r->cached_cons - r->cached_prod; + + if (free_entries >= nb) + return free_entries; + + /* Refresh the local tail pointer. + * cached_cons is r->size bigger than the real consumer pointer so + * that this addition can be avoided in the more frequently + * executed code that computs free_entries in the beginning of + * this function. Without this optimization it whould have been + * free_entries = r->cached_prod - r->cached_cons + r->size. + */ + r->cached_cons = *r->consumer + r->size; + + return r->cached_cons - r->cached_prod; +} + +static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb) +{ + __u32 entries = r->cached_prod - r->cached_cons; + + if (entries == 0) { + r->cached_prod = *r->producer; + entries = r->cached_prod - r->cached_cons; + } + + return (entries > nb) ? nb : entries; +} + +static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod, + size_t nb, __u32 *idx) +{ + if (unlikely(xsk_prod_nb_free(prod, nb) < nb)) + return 0; + + *idx = prod->cached_prod; + prod->cached_prod += nb; + + return nb; +} + +static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb) +{ + /* Make sure everything has been written to the ring before signalling + * this to the kernel. + */ + smp_wmb(); + + *prod->producer += nb; +} + +static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons, + size_t nb, __u32 *idx) +{ + size_t entries = xsk_cons_nb_avail(cons, nb); + + if (likely(entries > 0)) { + /* Make sure we do not speculatively read the data before + * we have received the packet buffers from the ring. + */ + smp_rmb(); + + *idx = cons->cached_cons; + cons->cached_cons += entries; + } + + return entries; +} + +static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb) +{ + *cons->consumer += nb; +} + +static inline void *xsk_umem__get_data(void *umem_area, __u64 addr) +{ + return &((char *)umem_area)[addr]; +} + +LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem); +LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk); + +#define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048 +#define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048 +#define XSK_UMEM__DEFAULT_FRAME_SHIFT 11 /* 2048 bytes */ +#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT) +#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0 + +struct xsk_umem_config { + __u32 fill_size; + __u32 comp_size; + __u32 frame_size; + __u32 frame_headroom; +}; + +/* Flags for the libbpf_flags field. */ +#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) + +struct xsk_socket_config { + __u32 rx_size; + __u32 tx_size; + __u32 libbpf_flags; + __u32 xdp_flags; + __u16 bind_flags; +}; + +/* Set config to NULL to get the default configuration. */ +LIBBPF_API int xsk_umem__create(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk, + const char *ifname, __u32 queue_id, + struct xsk_umem *umem, + struct xsk_ring_cons *rx, + struct xsk_ring_prod *tx, + const struct xsk_socket_config *config); + +/* Returns 0 for success and -EBUSY if the umem is still in use. */ +LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem); +LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __LIBBPF_XSK_H */ --2.7.4