[PATCH 2/6] SUNRPC: Provide a shared workqueue for cache release callbacks
From: Chuck Lever <cel@kernel.org>
Date: 2026-05-01 14:51:33
Also in:
linux-nfs, lkml
Subsystem:
filesystems (vfs and infrastructure), kernel nfsd, sunrpc, and lockd servers, networking [general], nfs, sunrpc, and lockd clients, the rest · Maintainers:
Alexander Viro, Christian Brauner, Chuck Lever, Jeff Layton, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Trond Myklebust, Anna Schumaker, Linus Torvalds
From: Chuck Lever <chuck.lever@oracle.com>
Cache .put callbacks may need to release sub-objects whose
cleanup sleeps (path_put, auth_domain_put, put_group_info), which
precludes running the release from a call_rcu() softirq callback.
Commit 48db892356d6 ("NFSD: Defer sub-object cleanup in export
put callbacks") introduced nfsd_export_wq for that purpose, with
a dedicated workqueue chosen so that flush_workqueue() in the
per-namespace teardown path drains only NFSD export release work
rather than blocking on unrelated work queued to system_unbound_wq.
Subsequent patches in this series convert the sunrpc ip_map and
unix_gid put callbacks to the same queue_rcu_work() pattern, and
those would otherwise need their own per-cache workqueue for the
same reason. Hoist the workqueue up to the sunrpc layer so that
all four cache_detail put callbacks share a single workqueue,
managed entirely within net/sunrpc/cache.c.
Expose the workqueue through three helpers.
sunrpc_cache_queue_release() schedules a deferred release after
the next RCU grace period. sunrpc_cache_destroy_net()
encapsulates the cache_unregister_net() + drain +
cache_destroy_net() sequence that single-cache teardowns
otherwise have to open-code, putting the ordering rule in one
place. sunrpc_cache_drain() exposes the underlying
rcu_barrier() + flush_workqueue() primitive for the rare caller
that drains multiple cache_details together, such as
nfsd_export_shutdown(). Allocate the workqueue in
cache_initialize() and destroy it in a new cache_destroy()
called from cleanup_sunrpc(). Replace the local nfsd_export_wq
with the shared sunrpc helpers and drop the
nfsd_export_wq_init/shutdown helpers and their callers.
Assisted-by: Claude:claude-opus-4-7[1m]
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfsd/export.c | 41 +++-----------------------
fs/nfsd/export.h | 2 --
fs/nfsd/nfsctl.c | 8 +----
include/linux/sunrpc/cache.h | 3 ++
net/sunrpc/cache.c | 70 +++++++++++++++++++++++++++++++++++++++++++-
net/sunrpc/sunrpc.h | 3 +-
net/sunrpc/sunrpc_syms.c | 23 +++++++++------
7 files changed, 93 insertions(+), 57 deletions(-)
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 15972919e1e9..3c4340e743fa 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c@@ -39,8 +39,6 @@ * second map contains a reference to the entry in the first map. */ -static struct workqueue_struct *nfsd_export_wq; - #define EXPKEY_HASHBITS 8 #define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS) #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1)
@@ -62,7 +60,7 @@ static void expkey_put(struct kref *ref) struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); INIT_RCU_WORK(&key->ek_rwork, expkey_release); - queue_rcu_work(nfsd_export_wq, &key->ek_rwork); + sunrpc_cache_queue_release(&key->ek_rwork); } static int expkey_upcall(struct cache_detail *cd, struct cache_head *h)
@@ -652,7 +650,7 @@ static void svc_export_put(struct kref *ref) struct svc_export *exp = container_of(ref, struct svc_export, h.ref); INIT_RCU_WORK(&exp->ex_rwork, svc_export_release); - queue_rcu_work(nfsd_export_wq, &exp->ex_rwork); + sunrpc_cache_queue_release(&exp->ex_rwork); } /**
@@ -2193,36 +2191,6 @@ const struct seq_operations nfs_exports_op = { .show = e_show, }; -/** - * nfsd_export_wq_init - allocate the export release workqueue - * - * Called once at module load. The workqueue runs deferred svc_export and - * svc_expkey release work scheduled by queue_rcu_work() in the cache put - * callbacks. - * - * Return values: - * %0: workqueue allocated - * %-ENOMEM: allocation failed - */ -int nfsd_export_wq_init(void) -{ - nfsd_export_wq = alloc_workqueue("nfsd_export", WQ_UNBOUND, 0); - if (!nfsd_export_wq) - return -ENOMEM; - return 0; -} - -/** - * nfsd_export_wq_shutdown - drain and free the export release workqueue - * - * Called once at module unload. Per-namespace teardown in - * nfsd_export_shutdown() has already drained all deferred work. - */ -void nfsd_export_wq_shutdown(void) -{ - destroy_workqueue(nfsd_export_wq); -} - /* * Initialize the exports module. */
@@ -2284,9 +2252,8 @@ nfsd_export_shutdown(struct net *net) cache_unregister_net(nn->svc_expkey_cache, net); cache_unregister_net(nn->svc_export_cache, net); - /* Drain deferred export and expkey release work. */ - rcu_barrier(); - flush_workqueue(nfsd_export_wq); + /* One drain covers both caches' deferred release work. */ + sunrpc_cache_drain(); cache_destroy_net(nn->svc_expkey_cache, net); cache_destroy_net(nn->svc_export_cache, net); svcauth_unix_purge(net);
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index b05399374574..8969e81de448 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h@@ -111,8 +111,6 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp, /* * Function declarations */ -int nfsd_export_wq_init(void); -void nfsd_export_wq_shutdown(void); int nfsd_export_init(struct net *); void nfsd_export_shutdown(struct net *); void nfsd_export_flush(struct net *);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 064a2e749bc9..468aad8c3af9 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c@@ -2536,12 +2536,9 @@ static int __init init_nfsd(void) if (retval) goto out_free_pnfs; nfsd_lockd_init(); /* lockd->nfsd callbacks */ - retval = nfsd_export_wq_init(); - if (retval) - goto out_free_lockd; retval = register_pernet_subsys(&nfsd_net_ops); if (retval < 0) - goto out_free_export_wq; + goto out_free_lockd; retval = register_cld_notifier(); if (retval) goto out_free_subsys;
@@ -2570,8 +2567,6 @@ static int __init init_nfsd(void) unregister_cld_notifier(); out_free_subsys: unregister_pernet_subsys(&nfsd_net_ops); -out_free_export_wq: - nfsd_export_wq_shutdown(); out_free_lockd: nfsd_lockd_shutdown(); nfsd_drc_slab_free();
@@ -2592,7 +2587,6 @@ static void __exit exit_nfsd(void) nfsd4_destroy_laundry_wq(); unregister_cld_notifier(); unregister_pernet_subsys(&nfsd_net_ops); - nfsd_export_wq_shutdown(); nfsd_drc_slab_free(); nfsd_lockd_shutdown(); nfsd4_free_slabs();
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 83c88dc82e69..84802438a5fc 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h@@ -237,11 +237,14 @@ extern int cache_check(struct cache_detail *detail, extern void cache_flush(void); extern void cache_purge(struct cache_detail *detail); #define NEVER (0x7FFFFFFF) +extern void sunrpc_cache_queue_release(struct rcu_work *rwork); +extern void sunrpc_cache_drain(void); extern int cache_register_net(struct cache_detail *cd, struct net *net); extern void cache_unregister_net(struct cache_detail *cd, struct net *net); extern struct cache_detail *cache_create_net(const struct cache_detail *tmpl, struct net *net); extern void cache_destroy_net(struct cache_detail *cd, struct net *net); +extern void sunrpc_cache_destroy_net(struct cache_detail *cd, struct net *net); extern void sunrpc_init_cache_detail(struct cache_detail *cd); extern void sunrpc_destroy_cache_detail(struct cache_detail *cd);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 488a14961b19..733bcd3daa46 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c@@ -1705,9 +1705,77 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) return -ENOMEM; } -void __init cache_initialize(void) +static struct workqueue_struct *sunrpc_cache_wq; + +/** + * sunrpc_cache_queue_release - schedule deferred cache release work + * @rwork: caller-initialized rcu_work to queue + * + * Run @rwork in process context after the next RCU grace period. + * Use this for cache .put callbacks whose cleanup may sleep + * (path_put(), auth_domain_put()). + */ +void sunrpc_cache_queue_release(struct rcu_work *rwork) { + queue_rcu_work(sunrpc_cache_wq, rwork); +} +EXPORT_SYMBOL_GPL(sunrpc_cache_queue_release); + +/** + * sunrpc_cache_drain - drain pending cache release work + * + * Wait for outstanding RCU callbacks to enqueue their release + * work, then flush that work to completion. + */ +void sunrpc_cache_drain(void) +{ + rcu_barrier(); + flush_workqueue(sunrpc_cache_wq); +} +EXPORT_SYMBOL_GPL(sunrpc_cache_drain); + +/** + * sunrpc_cache_destroy_net - quiesce and tear down a per-net cache + * @cd: the cache_detail to release + * @net: the network namespace owning @cd + * + * Canonical teardown for caches whose .put callbacks use + * sunrpc_cache_queue_release(). Unregister @cd to stop new + * lookups, drain in-flight RCU callbacks and queued release + * work, then free @cd and its hash table. The drain ensures + * release workers complete while the cache_detail is still + * valid. + */ +void sunrpc_cache_destroy_net(struct cache_detail *cd, struct net *net) +{ + cache_unregister_net(cd, net); + sunrpc_cache_drain(); + cache_destroy_net(cd, net); +} +EXPORT_SYMBOL_GPL(sunrpc_cache_destroy_net); + +/** + * cache_initialize - allocate sunrpc cache subsystem resources + */ +int __init cache_initialize(void) +{ + sunrpc_cache_wq = alloc_workqueue("sunrpc_cache", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0); + if (!sunrpc_cache_wq) + return -ENOMEM; INIT_DEFERRABLE_WORK(&cache_cleaner, do_cache_clean); + return 0; +} + +/** + * cache_destroy - release sunrpc cache subsystem resources + * + * Caller must ensure no further sunrpc_cache_queue_release() + * calls can be scheduled before invoking this. + */ +void cache_destroy(void) +{ + destroy_workqueue(sunrpc_cache_wq); } int cache_register_net(struct cache_detail *cd, struct net *net)
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index 7fa35ee8f9a4..75ee201e4800 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h@@ -41,7 +41,8 @@ struct svc_rqst; int rpc_clients_notifier_register(void); void rpc_clients_notifier_unregister(void); void auth_domain_cleanup(void); -void __init cache_initialize(void); +int __init cache_initialize(void); +void cache_destroy(void); void svc_sock_update_bufs(struct svc_serv *serv); enum svc_auth_status svc_authenticate(struct svc_rqst *rqstp); #endif /* _NET_SUNRPC_SUNRPC_H */
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index ab88ce46afb5..d75ff1e592f2 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c@@ -97,24 +97,26 @@ init_sunrpc(void) if (err) goto out2; - cache_initialize(); - - err = register_pernet_subsys(&sunrpc_net_ops); + err = cache_initialize(); if (err) goto out3; - err = register_rpc_pipefs(); + err = register_pernet_subsys(&sunrpc_net_ops); if (err) goto out4; - err = rpc_sysfs_init(); + err = register_rpc_pipefs(); if (err) goto out5; - err = genl_register_family(&sunrpc_nl_family); + err = rpc_sysfs_init(); if (err) goto out6; + err = genl_register_family(&sunrpc_nl_family); + if (err) + goto out7; + sunrpc_debugfs_init(); #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) rpc_register_sysctl();
@@ -123,12 +125,14 @@ init_sunrpc(void) init_socket_xprt(); /* clnt sock transport */ return 0; -out6: +out7: rpc_sysfs_exit(); -out5: +out6: unregister_rpc_pipefs(); -out4: +out5: unregister_pernet_subsys(&sunrpc_net_ops); +out4: + cache_destroy(); out3: rpcauth_remove_module(); out2:
@@ -157,6 +161,7 @@ cleanup_sunrpc(void) rpc_unregister_sysctl(); #endif rcu_barrier(); /* Wait for completion of call_rcu()'s */ + cache_destroy(); } MODULE_DESCRIPTION("Sun RPC core"); MODULE_LICENSE("GPL");
--
2.53.0