Re: [PATCH net-next v3 3/5] net/ipv6: Remove expired routes with a separated list of routes.
From: David Ahern <dsahern@kernel.org>
Date: 2024-02-05 04:45:18
On 2/2/24 1:21 AM, thinker.li@gmail.com wrote:
quoted hunk ↗ jump to hunk
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 733ace18806c..36bfa987c314 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c@@ -1255,6 +1255,7 @@ static void cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt, bool del_peer) { + struct fib6_table *table; struct fib6_info *f6i; f6i = addrconf_get_prefix_route(del_peer ? &ifp->peer_addr : &ifp->addr,
addrconf_get_prefix_route walks the table, so you know it is already there ...
quoted hunk ↗ jump to hunk
@@ -1264,8 +1265,18 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, if (del_rt) ip6_del_rt(dev_net(ifp->idev->dev), f6i, false); else { - if (!(f6i->fib6_flags & RTF_EXPIRES)) + if (!(f6i->fib6_flags & RTF_EXPIRES)) { + table = f6i->fib6_table; + spin_lock_bh(&table->tb6_lock); fib6_set_expires(f6i, expires); + /* If fib6_node is null, the f6i is just + * removed from the table. + */ + if (rcu_dereference_protected(f6i->fib6_node,
... meaning this check should not be needed
quoted hunk ↗ jump to hunk
+ lockdep_is_held(&table->tb6_lock))) + fib6_add_gc_list(f6i); + spin_unlock_bh(&table->tb6_lock); + } fib6_info_release(f6i); } }@@ -2706,6 +2717,7 @@ EXPORT_SYMBOL_GPL(addrconf_prefix_rcv_add_addr); void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) { struct prefix_info *pinfo; + struct fib6_table *table; __u32 valid_lft; __u32 prefered_lft; int addr_type, err;@@ -2782,11 +2794,23 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) if (valid_lft == 0) { ip6_del_rt(net, rt, false); rt = NULL; - } else if (addrconf_finite_timeout(rt_expires)) { - /* not infinity */ - fib6_set_expires(rt, jiffies + rt_expires); } else { - fib6_clean_expires(rt); + table = rt->fib6_table; + spin_lock_bh(&table->tb6_lock);
when it comes to locking, I prefer the lock and unlock lines to *pop* - meaning newline on both sides so it is clear and stands out.
+ if (addrconf_finite_timeout(rt_expires)) {
+ /* not infinity */
+ fib6_set_expires(rt, jiffies + rt_expires);
+ /* If fib6_node is null, the f6i is
+ * just removed from the table.
+ */
+ if (rcu_dereference_protected(rt->fib6_node,similarly here, this code is entered because rt is set based on addrconf_get_prefix_route.
quoted hunk ↗ jump to hunk
+ lockdep_is_held(&table->tb6_lock))) + fib6_add_gc_list(rt); + } else { + fib6_clean_expires(rt); + fib6_remove_gc_list(rt); + } + spin_unlock_bh(&table->tb6_lock); } } else if (valid_lft) { clock_t expires = 0;@@ -4741,6 +4765,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, u32 flags, bool modify_peer) { + struct fib6_table *table; struct fib6_info *f6i; u32 prio;@@ -4761,10 +4786,21 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, ifp->rt_priority, ifp->idev->dev, expires, flags, GFP_KERNEL); } else { - if (!expires) + table = f6i->fib6_table; + spin_lock_bh(&table->tb6_lock); + if (!expires) { fib6_clean_expires(f6i); - else + fib6_remove_gc_list(f6i); + } else { fib6_set_expires(f6i, expires); + /* If fib6_node is null, the f6i is just removed + * from the table. + */ + if (rcu_dereference_protected(f6i->fib6_node,
and here as well. f6i is set based on a table lookup.
+ lockdep_is_held(&table->tb6_lock))) + fib6_add_gc_list(f6i); + } + spin_unlock_bh(&table->tb6_lock); fib6_info_release(f6i); }
...
quoted hunk ↗ jump to hunk
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index a68462668158..5ca9fd4f7945 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c@@ -1410,8 +1410,17 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE); } - if (rt) + if (rt) { + spin_lock_bh(&rt->fib6_table->tb6_lock); fib6_set_expires(rt, jiffies + (HZ * lifetime)); + /* If fib6_node is null, the f6i is just removed from the + * table. + */
How about: /* If fib6_node is NULL, the route was removed between * the rt6_get_dflt_router or rt6_add_dflt_router calls * above and here. */
quoted hunk ↗ jump to hunk
+ if (rcu_dereference_protected(rt->fib6_node,> + lockdep_is_held(&rt->fib6_table->tb6_lock))) + fib6_add_gc_list(rt); + spin_unlock_bh(&rt->fib6_table->tb6_lock); + } if (in6_dev->cnf.accept_ra_min_hop_limit < 256 && ra_msg->icmph.icmp6_hop_limit) { if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) {diff --git a/net/ipv6/route.c b/net/ipv6/route.c index dd6ff5b20918..cfaf226ecf98 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c@@ -989,10 +989,20 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); if (rt) { - if (!addrconf_finite_timeout(lifetime)) + spin_lock_bh(&rt->fib6_table->tb6_lock); + if (!addrconf_finite_timeout(lifetime)) { fib6_clean_expires(rt); - else + fib6_remove_gc_list(rt); + } else { fib6_set_expires(rt, jiffies + HZ * lifetime); + /* If fib6_node is null, the f6i is just removed + * from the table. + */
Similarly, enhance the comment: /* If fib6_node is NULL, the route was removed * between the get or add calls above and here. */
+ if (rcu_dereference_protected(rt->fib6_node, + lockdep_is_held(&rt->fib6_table->tb6_lock))) + fib6_add_gc_list(rt); + } + spin_unlock_bh(&rt->fib6_table->tb6_lock); fib6_info_release(rt); }