[RFC PATCH bpf-next 08/15] bpf: Use scope-based charge for bpf_map_area_alloc
From: Yafang Shao <hidden>
Date: 2022-07-29 15:23:40
Also in:
bpf, linux-mm
Subsystem:
bpf [core], bpf [general] (safe dynamic programs and tools), bpf [l7 framework] (sockmap), bpf [networking] (struct_ops, reuseport), bpf [ringbuf], bpf [tracing], networking [general], networking [sockets], the rest, xdp (express data path) · Maintainers:
Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, Eduard Zingerman, Kumar Kartikeya Dwivedi, John Fastabend, Jakub Sitnicki, Martin KaFai Lau, Song Liu, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Kuniyuki Iwashima, Willem de Bruijn, Linus Torvalds, David S. Miller, Jesper Dangaard Brouer
Let's also get memcg from the bpf map in bpf_map_area_alloc() instead of using the current memcg. Signed-off-by: Yafang Shao <redacted> --- include/linux/bpf.h | 2 +- kernel/bpf/bpf_struct_ops.c | 4 ++-- kernel/bpf/cpumap.c | 2 +- kernel/bpf/devmap.c | 12 ++++++++---- kernel/bpf/hashtab.c | 5 +++-- kernel/bpf/ringbuf.c | 14 +++++++++----- kernel/bpf/stackmap.c | 3 ++- kernel/bpf/syscall.c | 28 ++++++++++++++++++---------- net/core/sock_map.c | 7 ++++--- 9 files changed, 48 insertions(+), 29 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3f9893e14124..711d9b1829d4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h@@ -1637,7 +1637,7 @@ void bpf_map_put(struct bpf_map *map); void *bpf_map_container_alloc(u64 size, int numa_node); void *bpf_map_container_mmapable_alloc(u64 size, int numa_node, u32 align, u32 offset); -void *bpf_map_area_alloc(u64 size, int numa_node); +void *bpf_map_area_alloc(struct bpf_map *map, u64 size, int numa_node); void bpf_map_area_free(void *base); void bpf_map_container_free(void *base); bool bpf_map_write_active(const struct bpf_map *map);
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 66df3059a3fe..874fda7e2b8b 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c@@ -625,9 +625,9 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) st_map->st_ops = st_ops; map = &st_map->map; - st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE); + st_map->uvalue = bpf_map_area_alloc(map, vt->size, NUMA_NO_NODE); st_map->links = - bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_links *), + bpf_map_area_alloc(map, btf_type_vlen(t) * sizeof(struct bpf_links *), NUMA_NO_NODE); st_map->image = bpf_jit_alloc_exec(PAGE_SIZE); if (!st_map->uvalue || !st_map->links || !st_map->image) {
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 23e941826eec..95c1642deaf6 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c@@ -110,7 +110,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) } /* Alloc array for possible remote "destination" CPUs */ - cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries * + cmap->cpu_map = bpf_map_area_alloc(&cmap->map, cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *), cmap->map.numa_node); if (!cmap->cpu_map)
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 3e99c10f1729..b625d578bc93 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c@@ -88,13 +88,15 @@ static DEFINE_PER_CPU(struct list_head, dev_flush_list); static DEFINE_SPINLOCK(dev_map_lock); static LIST_HEAD(dev_map_list); -static struct hlist_head *dev_map_create_hash(unsigned int entries, +static struct hlist_head *dev_map_create_hash(struct bpf_map *map, + unsigned int entries, int numa_node) { int i; struct hlist_head *hash; - hash = bpf_map_area_alloc((u64) entries * sizeof(*hash), numa_node); + hash = bpf_map_area_alloc(map, (u64) entries * sizeof(*hash), + numa_node); if (hash != NULL) for (i = 0; i < entries; i++) INIT_HLIST_HEAD(&hash[i]);
@@ -138,14 +140,16 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) } if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { - dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets, + dtab->dev_index_head = dev_map_create_hash(&dtab->map, + dtab->n_buckets, dtab->map.numa_node); if (!dtab->dev_index_head) return -ENOMEM; spin_lock_init(&dtab->index_lock); } else { - dtab->netdev_map = bpf_map_area_alloc((u64) dtab->map.max_entries * + dtab->netdev_map = bpf_map_area_alloc(&dtab->map, + (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *), dtab->map.numa_node); if (!dtab->netdev_map)
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index d2fb144276ab..2a34a115e14f 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c@@ -331,7 +331,8 @@ static int prealloc_init(struct bpf_htab *htab) if (htab_has_extra_elems(htab)) num_entries += num_possible_cpus(); - htab->elems = bpf_map_area_alloc((u64)htab->elem_size * num_entries, + htab->elems = bpf_map_area_alloc(&htab->map, + (u64)htab->elem_size * num_entries, htab->map.numa_node); if (!htab->elems) return -ENOMEM;
@@ -532,7 +533,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) goto free_htab; err = -ENOMEM; - htab->buckets = bpf_map_area_alloc(htab->n_buckets * + htab->buckets = bpf_map_area_alloc(&htab->map, htab->n_buckets * sizeof(struct bucket), htab->map.numa_node); if (!htab->buckets)
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 35258aa45236..7c875d4d5b2f 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c@@ -59,7 +59,9 @@ struct bpf_ringbuf_hdr { u32 pg_off; }; -static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node) +static struct bpf_ringbuf *bpf_ringbuf_area_alloc(struct bpf_map *map, + size_t data_sz, + int numa_node) { const gfp_t flags = GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN | __GFP_ZERO;
@@ -89,7 +91,7 @@ static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node) * user-space implementations significantly. */ array_size = (nr_meta_pages + 2 * nr_data_pages) * sizeof(*pages); - pages = bpf_map_area_alloc(array_size, numa_node); + pages = bpf_map_area_alloc(map, array_size, numa_node); if (!pages) return NULL;
@@ -127,11 +129,12 @@ static void bpf_ringbuf_notify(struct irq_work *work) wake_up_all(&rb->waitq); } -static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node) +static struct bpf_ringbuf *bpf_ringbuf_alloc(struct bpf_map *map, + size_t data_sz, int numa_node) { struct bpf_ringbuf *rb; - rb = bpf_ringbuf_area_alloc(data_sz, numa_node); + rb = bpf_ringbuf_area_alloc(map, data_sz, numa_node); if (!rb) return NULL;
@@ -170,7 +173,8 @@ static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr) bpf_map_init_from_attr(&rb_map->map, attr); - rb_map->rb = bpf_ringbuf_alloc(attr->max_entries, rb_map->map.numa_node); + rb_map->rb = bpf_ringbuf_alloc(&rb_map->map, attr->max_entries, + rb_map->map.numa_node); if (!rb_map->rb) { bpf_map_container_free(rb_map); return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 0c3185406f56..c9a91ca05a03 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c@@ -48,7 +48,8 @@ static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) (u64)smap->map.value_size; int err; - smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries, + smap->elems = bpf_map_area_alloc(&smap->map, + elem_size * smap->map.max_entries, smap->map.numa_node); if (!smap->elems) return -ENOMEM;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7289ee1a300a..4f893d2ac4fd 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c@@ -334,16 +334,6 @@ static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) flags, numa_node, __builtin_return_address(0)); } -void *bpf_map_area_alloc(u64 size, int numa_node) -{ - return __bpf_map_area_alloc(size, numa_node, false); -} - -void bpf_map_area_free(void *area) -{ - kvfree(area); -} - static u32 bpf_map_flags_retain_permanent(u32 flags) { /* Some map creation flags are not tied to the map object but
@@ -495,6 +485,24 @@ static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map) } #endif +void *bpf_map_area_alloc(struct bpf_map *map, u64 size, int numa_node) +{ + struct mem_cgroup *memcg, *old_memcg; + void *ptr; + + memcg = bpf_map_get_memcg(map); + old_memcg = set_active_memcg(memcg); + ptr = __bpf_map_area_alloc(size, numa_node, false); + set_active_memcg(old_memcg); + + return ptr; +} + +void bpf_map_area_free(void *area) +{ + kvfree(area); +} + /* * The return pointer is a bpf_map container, as follow, * struct bpf_map_container {
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 4b5876c4a47d..1f49dc89822c 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c@@ -48,8 +48,9 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) bpf_map_init_from_attr(&stab->map, attr); raw_spin_lock_init(&stab->lock); - stab->sks = bpf_map_area_alloc((u64) stab->map.max_entries * - sizeof(struct sock *), + stab->sks = bpf_map_area_alloc(&stab->map, + (u64)stab->map.max_entries * + sizeof(struct sock *), stab->map.numa_node); if (!stab->sks) { bpf_map_container_free(stab);
@@ -1091,7 +1092,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) goto free_htab; } - htab->buckets = bpf_map_area_alloc(htab->buckets_num * + htab->buckets = bpf_map_area_alloc(&htab->map, htab->buckets_num * sizeof(struct bpf_shtab_bucket), htab->map.numa_node); if (!htab->buckets) {
--
2.17.1