Re: [PATCH net-next v10 3/5] net: devmem: implement autorelease token management
From: Jakub Kicinski <kuba@kernel.org>
Date: 2026-01-21 01:00:44
Also in:
linux-arch, linux-doc, linux-kselftest, lkml
On Thu, 15 Jan 2026 21:02:14 -0800 Bobby Eshleman wrote:
quoted hunk ↗ jump to hunk
diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 596c306ce52b..a5301b150663 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml@@ -562,6 +562,17 @@ attribute-sets: type: u32 checks: min: 1 + - + name: autorelease + doc: | + Token autorelease mode. If true (1), leaked tokens are automatically + released when the socket closes. If false (0), leaked tokens are only + released when the dmabuf is torn down. Once a binding is created with + a specific mode, all subsequent bindings system-wide must use the + same mode. + + Optional. Defaults to false if not specified. + type: u8
if you plan to have more values - u32, if not - flag u8 is 8b value + 24b of padding, it's only useful for proto fields
quoted hunk ↗ jump to hunk
operations: list:@@ -769,6 +780,7 @@ operations: - ifindex - fd - queues + - autorelease reply: attributes: - id
static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1); +static DEFINE_MUTEX(devmem_ar_lock); +DEFINE_STATIC_KEY_FALSE(tcp_devmem_ar_key); +EXPORT_SYMBOL(tcp_devmem_ar_key);
I don't think you need the export, perhaps move the helper in here in the first place (while keeping the static inline wrapper when devmem=n)?
+ if (autorelease) + static_branch_enable(&tcp_devmem_ar_key);
This is user-controlled (non-root), right? So I think we need the deferred version of key helpers.
- if (direction == DMA_TO_DEVICE) {
- binding->vec = kvmalloc_array(dmabuf->size / PAGE_SIZE,
- sizeof(struct net_iov *),
- GFP_KERNEL);
- if (!binding->vec) {
- err = -ENOMEM;
- goto err_unmap;
- }
+ binding->vec = kvmalloc_array(dmabuf->size / PAGE_SIZE,
+ sizeof(struct net_iov *),
+ GFP_KERNEL | __GFP_ZERO);make it a kvcalloc() while we're touching it, pls
quoted hunk ↗ jump to hunk
+ if (!binding->vec) { + err = -ENOMEM; + goto err_unmap; } /* For simplicity we expect to make PAGE_SIZE allocations, but the@@ -306,25 +386,41 @@ net_devmem_bind_dmabuf(struct net_device *dev, niov = &owner->area.niovs[i]; niov->type = NET_IOV_DMABUF; niov->owner = &owner->area; + atomic_set(&niov->uref, 0);
Isn't it zero'ed during alloc?
page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), net_devmem_get_dma_addr(niov)); - if (direction == DMA_TO_DEVICE) - binding->vec[owner->area.base_virtual / PAGE_SIZE + i] = niov; + binding->vec[owner->area.base_virtual / PAGE_SIZE + i] = niov; } virtual += len; }
+ if (info->attrs[NETDEV_A_DMABUF_AUTORELEASE]) + autorelease = + !!nla_get_u8(info->attrs[NETDEV_A_DMABUF_AUTORELEASE]);
nla_get_u8_default()
priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk); if (IS_ERR(priv)) return PTR_ERR(priv);
+static noinline_for_stack int
+sock_devmem_dontneed_manual_release(struct sock *sk,
+ struct dmabuf_token *tokens,
+ unsigned int num_tokens)
+{
+ struct net_iov *niov;
+ unsigned int i, j;
+ netmem_ref netmem;
+ unsigned int token;
+ int num_frags = 0;
+ int ret = 0;
+
+ if (!sk->sk_devmem_info.binding)
+ return -EINVAL;
+
+ for (i = 0; i < num_tokens; i++) {
+ for (j = 0; j < tokens[i].token_count; j++) {
+ size_t size = sk->sk_devmem_info.binding->dmabuf->size;
+
+ token = tokens[i].token_start + j;
+ if (token >= size / PAGE_SIZE)
+ break;
+
+ if (++num_frags > MAX_DONTNEED_FRAGS)
+ return ret;
+
+ niov = sk->sk_devmem_info.binding->vec[token];
+ if (atomic_dec_and_test(&niov->uref)) {Don't you need something like "atomic dec non zero and test" ? refcount has refcount_dec_not_one() 🤔️
+ netmem = net_iov_to_netmem(niov); + WARN_ON_ONCE(!napi_pp_put_page(netmem)); + } + ret++; + }
frag_limit_reached: - xa_unlock_bh(&sk->sk_user_frags); + xa_unlock_bh(&sk->sk_devmem_info.frags);
may be worth separating the sk_devmem_info change out for clarity
for (k = 0; k < netmem_num; k++) WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
quoted hunk ↗ jump to hunk
@@ -2503,7 +2506,15 @@ void tcp_v4_destroy_sock(struct sock *sk) tcp_release_user_frags(sk); - xa_destroy(&sk->sk_user_frags); + if (!net_devmem_autorelease_enabled() && sk->sk_devmem_info.binding) { + net_devmem_dmabuf_binding_user_put(sk->sk_devmem_info.binding); + net_devmem_dmabuf_binding_put(sk->sk_devmem_info.binding); + sk->sk_devmem_info.binding = NULL; + WARN_ONCE(!xa_empty(&sk->sk_devmem_info.frags), + "non-empty xarray discovered in autorelease off mode"); + } + + xa_destroy(&sk->sk_devmem_info.frags);
Let's wrap this up in a helper that'll live in devmem.c