Re: [PATCH net-next 1/2] pppoe: remove rwlock usage
From: Eric Dumazet <edumazet@google.com>
Date: 2025-08-26 07:33:06
Also in:
lkml
On Mon, Aug 25, 2025 at 7:34 PM Qingfang Deng [off-list ref] wrote:
quoted hunk ↗ jump to hunk
Like ppp_generic.c, convert the PPPoE socket hash table to use RCU for lookups and a spinlock for updates. This removes rwlock usage and allows lockless readers on the fast path. - Mark hash table and list pointers as __rcu. - Use spin_lock() to protect writers. - Readers use rcu_dereference() under rcu_read_lock(). All known callers of get_item() already hold the RCU read lock, so no additional locking is needed. - Set SOCK_RCU_FREE to defer socket freeing until after an RCU grace period. Signed-off-by: Qingfang Deng <dqfext@gmail.com> --- drivers/net/ppp/pppoe.c | 83 ++++++++++++++++++++++------------------ include/linux/if_pppox.h | 2 +- 2 files changed, 46 insertions(+), 39 deletions(-)diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 410effa42ade..f99533c80b66 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c@@ -100,8 +100,8 @@ struct pppoe_net { * as well, moreover in case of SMP less locking * controversy here */ - struct pppox_sock *hash_table[PPPOE_HASH_SIZE]; - rwlock_t hash_lock; + struct pppox_sock __rcu *hash_table[PPPOE_HASH_SIZE]; + spinlock_t hash_lock; }; /*@@ -162,13 +162,13 @@ static struct pppox_sock *__get_item(struct pppoe_net *pn, __be16 sid, int hash = hash_item(sid, addr); struct pppox_sock *ret; - ret = pn->hash_table[hash]; + ret = rcu_dereference(pn->hash_table[hash]); while (ret) { if (cmp_addr(&ret->pppoe_pa, sid, addr) && ret->pppoe_ifindex == ifindex) return ret; - ret = ret->next; + ret = rcu_dereference(ret->next); } return NULL;@@ -177,19 +177,20 @@ static struct pppox_sock *__get_item(struct pppoe_net *pn, __be16 sid, static int __set_item(struct pppoe_net *pn, struct pppox_sock *po) { int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); - struct pppox_sock *ret; + struct pppox_sock *ret, *first; - ret = pn->hash_table[hash]; + first = rcu_dereference_protected(pn->hash_table[hash], lockdep_is_held(&pn->hash_lock)); + ret = first; while (ret) { if (cmp_2_addr(&ret->pppoe_pa, &po->pppoe_pa) && ret->pppoe_ifindex == po->pppoe_ifindex) return -EALREADY; - ret = ret->next; + ret = rcu_dereference_protected(ret->next, lockdep_is_held(&pn->hash_lock)); } - po->next = pn->hash_table[hash]; - pn->hash_table[hash] = po; + RCU_INIT_POINTER(po->next, first); + rcu_assign_pointer(pn->hash_table[hash], po); return 0; }@@ -198,20 +199,24 @@ static void __delete_item(struct pppoe_net *pn, __be16 sid, char *addr, int ifindex) { int hash = hash_item(sid, addr); - struct pppox_sock *ret, **src; + struct pppox_sock *ret, __rcu **src; - ret = pn->hash_table[hash]; + ret = rcu_dereference_protected(pn->hash_table[hash], lockdep_is_held(&pn->hash_lock)); src = &pn->hash_table[hash]; while (ret) { if (cmp_addr(&ret->pppoe_pa, sid, addr) && ret->pppoe_ifindex == ifindex) { - *src = ret->next; + struct pppox_sock *next; + + next = rcu_dereference_protected(ret->next, + lockdep_is_held(&pn->hash_lock)); + rcu_assign_pointer(*src, next); break; } src = &ret->next; - ret = ret->next; + ret = rcu_dereference_protected(ret->next, lockdep_is_held(&pn->hash_lock)); } }@@ -225,11 +230,9 @@ static inline struct pppox_sock *get_item(struct pppoe_net *pn, __be16 sid, { struct pppox_sock *po; - read_lock_bh(&pn->hash_lock); po = __get_item(pn, sid, addr, ifindex); if (po) sock_hold(sk_pppox(po));
Are you sure that RCU rules make sure sk_refcnt can not be zero ?
sock_hold() will crash otherwise.
if (po && !refcount_inc_not_zero(&sk_pppox(po)->sk_refcnt))
po = NULL;
I will send fixes to drivers/net/pptp.c, net/l2tp/l2tp_ppp.c,
net/phonet/socket.c, net/qrtr/af_qrtr.c, net/tipc/socket.c