Re: [PATCH v2 bpf-next] bpf: add lookup_and_delete_elem support to hashtab
From: Denis Salopek <hidden>
Date: 2021-03-13 08:55:02
Hello, Thank you for your feedback and comments. On Mon, Mar 01, 2021 at 06:02:37PM -0800, Yonghong Song wrote:
On 2/28/21 1:40 AM, Denis Salopek wrote:quoted
Extend the existing bpf_map_lookup_and_delete_elem() functionality to hashtab maps, in addition to stacks and queues. Create a new hashtab bpf_map_ops function that does lookup and deletion of the element under the same bucket lock and add the created map_ops to bpf.h. Add the appropriate test cases to 'maps' and 'lru_map' selftests accompanied with new test_progs. Cc: Juraj Vijtiuk <redacted> Cc: Luka Oreskovic <redacted> Cc: Luka Perkov <luka.perkov@sartura.hr> Signed-off-by: Denis Salopek <redacted> --- v2: Add functionality for LRU/per-CPU, add test_progs tests. --- include/linux/bpf.h | 2 + kernel/bpf/hashtab.c | 80 +++++ kernel/bpf/syscall.c | 14 +- .../bpf/prog_tests/lookup_and_delete.c | 283 ++++++++++++++++++ .../bpf/progs/test_lookup_and_delete.c | 26 ++ tools/testing/selftests/bpf/test_lru_map.c | 8 + tools/testing/selftests/bpf/test_maps.c | 19 +- 7 files changed, 430 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c create mode 100644 tools/testing/selftests/bpf/progs/test_lookup_and_delete.cdiff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4c730863fa77..0bcc4f89af40 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h@@ -67,6 +67,8 @@ struct bpf_map_ops { void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key); int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); + int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key, + void *value); int (*map_lookup_and_delete_batch)(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr);diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 330d721dd2af..8c3334d1b6b3 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c@@ -1401,6 +1401,82 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key, rcu_read_unlock(); } +static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, + void *value, bool is_lru_map, + bool is_percpu) +{ + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + u32 hash, key_size, value_size; + struct hlist_nulls_head *head; + int cpu, off = 0, ret; + struct htab_elem *l; + unsigned long flags; + void __percpu *pptr; + struct bucket *b; + + key_size = map->key_size; + value_size = round_up(map->value_size, 8); + + hash = htab_map_hash(key, key_size, htab->hashrnd); + b = __select_bucket(htab, hash); + head = &b->head; + + ret = htab_lock_bucket(htab, b, hash, &flags); + if (ret) + return ret; + + l = lookup_elem_raw(head, hash, key, key_size); + if (l) { + if (is_percpu) { + pptr = htab_elem_get_ptr(l, key_size); + for_each_possible_cpu(cpu) { + bpf_long_memcpy(value + off, + per_cpu_ptr(pptr, cpu), + value_size); + off += value_size; + } + } else { + copy_map_value(map, value, l->key + round_up(key_size, 8));For hashtab lookup elem, BPF_F_LOCK flag may be set by user, I think hashtab lookup_and_delete_elem should also support this flag, so user can ensure they always get a lock protected sane value. We have the following libbpf APIs. LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value); LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags); LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value); Previously, bpf_map_lookup_and_delete_elem only supports queue/stack, which does not need flags as it does not support BPF_F_LOCK so we are fine. Maybe similar to bpf_map_lookup_elem_flags() we add a bpf_map_lookup_and_delete_elem_flags()? Maybe libbpf v1.0 can consolidate into a better uniform api.
If I understood correctly, there shouldn't be much changes for this addition: - add LIBBPF_API prototype and function in bpf.[hc] - those are practically the same as bpf_map_lookup_elem_flags() but we call BPF_LOOKUP_AND_DELETE_ELEM syscall, - add global declaration for bpf_map_lookup_elem_flags() in libbpf.map, - make the necessary checks for flags and the lock in the functions, - call copy_map_value_locked() if BPF_F_LOCK is set, - mask lock with check_and_init_map_lock(). Is this right or is there anything else I've missed?
quoted
+ } + + hlist_nulls_del_rcu(&l->hash_node); + if (!is_lru_map) + free_htab_elem(htab, l); + } else + ret = -ENOENT; + + htab_unlock_bucket(htab, b, hash, flags); + + if (is_lru_map && l) + bpf_lru_push_free(&htab->lru, &l->lru_node); + + return ret; +} + +static int htab_map_lookup_and_delete_elem(struct bpf_map *map, + void *key, void *value) +{ + return __htab_map_lookup_and_delete_elem(map, key, value, false, false); +} + +static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map, + void *key, void *value) +{ + return __htab_map_lookup_and_delete_elem(map, key, value, false, true); +} + +static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key, + void *value) +{ + return __htab_map_lookup_and_delete_elem(map, key, value, true, false); +} + +static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map, + void *key, void *value) +{ + return __htab_map_lookup_and_delete_elem(map, key, value, true, true); +} + static int __htab_map_lookup_and_delete_batch(struct bpf_map *map, const union bpf_attr *attr,@@ -1934,6 +2010,7 @@ const struct bpf_map_ops htab_map_ops = { .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, .map_lookup_elem = htab_map_lookup_elem, + .map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem, .map_update_elem = htab_map_update_elem, .map_delete_elem = htab_map_delete_elem, .map_gen_lookup = htab_map_gen_lookup,@@ -1954,6 +2031,7 @@ const struct bpf_map_ops htab_lru_map_ops = { .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, .map_lookup_elem = htab_lru_map_lookup_elem, + .map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem, .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys, .map_update_elem = htab_lru_map_update_elem, .map_delete_elem = htab_lru_map_delete_elem,@@ -2077,6 +2155,7 @@ const struct bpf_map_ops htab_percpu_map_ops = { .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, .map_lookup_elem = htab_percpu_map_lookup_elem, + .map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem, .map_update_elem = htab_percpu_map_update_elem, .map_delete_elem = htab_map_delete_elem, .map_seq_show_elem = htab_percpu_map_seq_show_elem,@@ -2096,6 +2175,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = { .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, .map_lookup_elem = htab_lru_percpu_map_lookup_elem, + .map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem, .map_update_elem = htab_lru_percpu_map_update_elem, .map_delete_elem = htab_lru_map_delete_elem, .map_seq_show_elem = htab_percpu_map_seq_show_elem,diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c859bc46d06c..2634aa4a2f37 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c@@ -1495,7 +1495,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) goto err_put; } - value_size = map->value_size; + value_size = bpf_map_value_size(map); err = -ENOMEM; value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);@@ -1505,6 +1505,18 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) if (map->map_type == BPF_MAP_TYPE_QUEUE || map->map_type == BPF_MAP_TYPE_STACK) { err = map->ops->map_pop_elem(map, value); + } else if (map->map_type == BPF_MAP_TYPE_HASH || + map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { + if (!bpf_map_is_dev_bound(map)) { + bpf_disable_instrumentation(); + rcu_read_lock(); + err = map->ops->map_lookup_and_delete_elem(map, key, value); + rcu_read_unlock(); + bpf_enable_instrumentation(); + maybe_wait_bpf_programs(map);maybe_wait_bpf_programs(map) is mostly for map-in-map. but I think it is okay to put it here in case in the future we will support map-in-map here. If maybe_wait_bpf_programs() get inlined which mostly likely is the case, the compiler should be able to optimize it away.
I didn't realise at first it's only for map-in-map and forgot to remove it later, so I can remove this if you think it's better?
quoted
+ } } else { err = -ENOTSUPP; }diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c new file mode 100644 index 000000000000..05123bbcdc1c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c@@ -0,0 +1,283 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <test_progs.h> +#include "test_lookup_and_delete.skel.h" + +#define START_VALUE 1234 +#define NEW_VALUE 4321 +#define MAX_ENTRIES 2 + +static int duration; +static int nr_cpus; + +static int fill_values(int map_fd) +{ + __u64 key, value = START_VALUE; + int err; + + for (key = 1; key < MAX_ENTRIES + 1; key++) { + err = bpf_map_update_elem(map_fd, &key, &value, BPF_NOEXIST); + if (CHECK(err, "bpf_map_update_elem", "failed\n"))You can use if (!ASSERT_OK(err, "bpf_map_update_elem")) to save you from explicit "failed" string. The same for some later other CHECK usages.
Ok.
quoted
+ return -1; + } + + return 0; +} + +static int fill_values_percpu(int map_fd) +{ + BPF_DECLARE_PERCPU(__u64, value); + int i, err; + u64 key; + + for (i = 0; i < nr_cpus; i++) + bpf_percpu(value, i) = START_VALUE; + + for (key = 1; key < MAX_ENTRIES + 1; key++) { + err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST); + if (CHECK(err, "bpf_map_update_elem", "failed\n")) + return -1; + } + + return 0; +} +[...]quoted
diff --git a/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c new file mode 100644 index 000000000000..eb19de8bb415 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c@@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +__u32 set_pid; +__u64 set_key; +__u64 set_value;Please add "= 0" to the above declaration to make it llvm10 friendly.
Ok, I'll add this. Sorry, checkpatch.pl gave me an error with it, that's why I removed it.
quoted
+ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 2); + __type(key, __u64); + __type(value, __u64); +} hash_map SEC(".maps"); + +SEC("tp/syscalls/sys_enter_getpgid") +int bpf_lookup_and_delete_test(const void *ctx) +{ + if (set_pid == bpf_get_current_pid_tgid() >> 32) + bpf_map_update_elem(&hash_map, &set_key, &set_value, BPF_NOEXIST); + + return 0; +} + +char _license[] SEC("license") = "GPL";[...]