Thread (34 messages) 34 messages, 6 authors, 2026-02-05

Re: [PATCH net-next v10 3/5] net: devmem: implement autorelease token management

From: Jakub Kicinski <kuba@kernel.org>
Date: 2026-01-21 01:00:44
Also in: linux-arch, linux-doc, linux-kselftest, lkml

On Thu, 15 Jan 2026 21:02:14 -0800 Bobby Eshleman wrote:
quoted hunk ↗ jump to hunk
diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index 596c306ce52b..a5301b150663 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -562,6 +562,17 @@ attribute-sets:
         type: u32
         checks:
           min: 1
+      -
+        name: autorelease
+        doc: |
+          Token autorelease mode. If true (1), leaked tokens are automatically
+          released when the socket closes. If false (0), leaked tokens are only
+          released when the dmabuf is torn down. Once a binding is created with
+          a specific mode, all subsequent bindings system-wide must use the
+          same mode.
+
+          Optional. Defaults to false if not specified.
+        type: u8
if you plan to have more values - u32, if not - flag
u8 is 8b value + 24b of padding, it's only useful for proto fields
quoted hunk ↗ jump to hunk
 operations:
   list:
@@ -769,6 +780,7 @@ operations:
             - ifindex
             - fd
             - queues
+            - autorelease
         reply:
           attributes:
             - id
 static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);
+static DEFINE_MUTEX(devmem_ar_lock);
+DEFINE_STATIC_KEY_FALSE(tcp_devmem_ar_key);
+EXPORT_SYMBOL(tcp_devmem_ar_key);
I don't think you need the export, perhaps move the helper in here in
the first place (while keeping the static inline wrapper when devmem=n)?
+	if (autorelease)
+		static_branch_enable(&tcp_devmem_ar_key);
This is user-controlled (non-root), right? So I think we need 
the deferred version of key helpers. 
-	if (direction == DMA_TO_DEVICE) {
-		binding->vec = kvmalloc_array(dmabuf->size / PAGE_SIZE,
-					      sizeof(struct net_iov *),
-					      GFP_KERNEL);
-		if (!binding->vec) {
-			err = -ENOMEM;
-			goto err_unmap;
-		}
+	binding->vec = kvmalloc_array(dmabuf->size / PAGE_SIZE,
+				      sizeof(struct net_iov *),
+				      GFP_KERNEL | __GFP_ZERO);
make it a kvcalloc() while we're touching it, pls
quoted hunk ↗ jump to hunk
+	if (!binding->vec) {
+		err = -ENOMEM;
+		goto err_unmap;
 	}
 
 	/* For simplicity we expect to make PAGE_SIZE allocations, but the
@@ -306,25 +386,41 @@ net_devmem_bind_dmabuf(struct net_device *dev,
 			niov = &owner->area.niovs[i];
 			niov->type = NET_IOV_DMABUF;
 			niov->owner = &owner->area;
+			atomic_set(&niov->uref, 0);
Isn't it zero'ed during alloc?
 			page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov),
 						      net_devmem_get_dma_addr(niov));
-			if (direction == DMA_TO_DEVICE)
-				binding->vec[owner->area.base_virtual / PAGE_SIZE + i] = niov;
+			binding->vec[owner->area.base_virtual / PAGE_SIZE + i] = niov;
 		}
 
 		virtual += len;
 	}
 
+	if (info->attrs[NETDEV_A_DMABUF_AUTORELEASE])
+		autorelease =
+			!!nla_get_u8(info->attrs[NETDEV_A_DMABUF_AUTORELEASE]);
nla_get_u8_default() 
 	priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk);
 	if (IS_ERR(priv))
 		return PTR_ERR(priv);
+static noinline_for_stack int
+sock_devmem_dontneed_manual_release(struct sock *sk,
+				    struct dmabuf_token *tokens,
+				    unsigned int num_tokens)
+{
+	struct net_iov *niov;
+	unsigned int i, j;
+	netmem_ref netmem;
+	unsigned int token;
+	int num_frags = 0;
+	int ret = 0;
+
+	if (!sk->sk_devmem_info.binding)
+		return -EINVAL;
+
+	for (i = 0; i < num_tokens; i++) {
+		for (j = 0; j < tokens[i].token_count; j++) {
+			size_t size = sk->sk_devmem_info.binding->dmabuf->size;
+
+			token = tokens[i].token_start + j;
+			if (token >= size / PAGE_SIZE)
+				break;
+
+			if (++num_frags > MAX_DONTNEED_FRAGS)
+				return ret;
+
+			niov = sk->sk_devmem_info.binding->vec[token];
+			if (atomic_dec_and_test(&niov->uref)) {
Don't you need something like "atomic dec non zero and test" ?
refcount has refcount_dec_not_one() 🤔️
+				netmem = net_iov_to_netmem(niov);
+				WARN_ON_ONCE(!napi_pp_put_page(netmem));
+			}
+			ret++;
+		}
 frag_limit_reached:
-	xa_unlock_bh(&sk->sk_user_frags);
+	xa_unlock_bh(&sk->sk_devmem_info.frags);
may be worth separating the sk_devmem_info change out for clarity
 	for (k = 0; k < netmem_num; k++)
 		WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
quoted hunk ↗ jump to hunk
@@ -2503,7 +2506,15 @@ void tcp_v4_destroy_sock(struct sock *sk)
 
 	tcp_release_user_frags(sk);
 
-	xa_destroy(&sk->sk_user_frags);
+	if (!net_devmem_autorelease_enabled() && sk->sk_devmem_info.binding) {
+		net_devmem_dmabuf_binding_user_put(sk->sk_devmem_info.binding);
+		net_devmem_dmabuf_binding_put(sk->sk_devmem_info.binding);
+		sk->sk_devmem_info.binding = NULL;
+		WARN_ONCE(!xa_empty(&sk->sk_devmem_info.frags),
+			  "non-empty xarray discovered in autorelease off mode");
+	}
+
+	xa_destroy(&sk->sk_devmem_info.frags);
Let's wrap this up in a helper that'll live in devmem.c
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help