Thread (4 messages) 4 messages, 2 authors, 2026-02-22

[PATCH net-next v2] net: ipv4: add lwtunnel hash to fib_info_hash to fix mpls collisions

From: Vitaliy Guschin <hidden>
Date: 2026-02-22 01:40:55
Subsystem: networking [general], networking [ipv4/ipv6], the rest · Maintainers: "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, David Ahern, Ido Schimmel, Linus Torvalds

Currently, fib_info_hash_bucket does not account for MPLS labels
(lwtunnel state) when calculating the hash for fib_info objects. This leads
to massive hash collisions when many routes are configured with the same
gateway but different MPLS labels.

To resolve this, introduce lwtunnel_get_encap_hash() helper which calls a
new .get_encap_hash callback in lwtunnel_encap_ops. Implement this callback
for mpls_iptunnel to provide a hash of the MPLS label set.

This ensures proper distribution in the fib_info_hash table, improving
route installation and deletion performance by avoiding massive hash
collisions. In a test case with 100,000 MPLS routes, this changes the
algorithmic complexity from O(N) lookup in a single bucket to a
well-distributed hash table lookup.

Performance test (Batch installation of 100,000 routes with MPLS labels):
CPU: Intel(R) Core(TM) i5-8400 CPU @ 2.80GHz

- Before patch: 6m 0.258s (sys 5m 56.895s)
- After patch:  0m 0.879s (sys 0m 0.468s)

Signed-off-by: Vitaliy Guschin <redacted>
---

Changes in v2:
  - Removed unnecessary nla_total_size() call in lwtunnel_get_encap_hash logic.

 include/net/lwtunnel.h   |  7 +++++++
 net/core/lwtunnel.c      | 22 ++++++++++++++++++++++
 net/ipv4/fib_semantics.c | 12 +++++++++++-
 net/mpls/mpls_iptunnel.c | 13 +++++++++++++
 4 files changed, 53 insertions(+), 1 deletion(-)
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 26232f603e33..c91e4d4fa08b 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -47,6 +47,7 @@ struct lwtunnel_encap_ops {
 	int (*fill_encap)(struct sk_buff *skb,
 			  struct lwtunnel_state *lwtstate);
 	int (*get_encap_size)(struct lwtunnel_state *lwtstate);
+	unsigned int (*get_encap_hash)(struct lwtunnel_state *lwtstate);
 	int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b);
 	int (*xmit)(struct sk_buff *skb);
 
@@ -127,6 +128,7 @@ int lwtunnel_build_state(struct net *net, u16 encap_type,
 int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate,
 			int encap_attr, int encap_type_attr);
 int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate);
+unsigned int lwtunnel_get_encap_hash(struct lwtunnel_state *lwtstate);
 struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb);
@@ -237,6 +239,11 @@ static inline int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
 	return 0;
 }
 
+static inline unsigned int lwtunnel_get_encap_hash(struct lwtunnel_state *lwtstate)
+{
+	return 0;
+}
+
 static inline struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len)
 {
 	return NULL;
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index f9d76d85d04f..07b01a0c1895 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -289,6 +289,28 @@ int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
 }
 EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size);
 
+unsigned int lwtunnel_get_encap_hash(struct lwtunnel_state *lwtstate)
+{
+	const struct lwtunnel_encap_ops *ops;
+	unsigned int hash = 0;
+
+	if (!lwtstate)
+		return 0;
+
+	if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+	    lwtstate->type > LWTUNNEL_ENCAP_MAX)
+		return 0;
+
+	rcu_read_lock();
+	ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+	if (likely(ops && ops->get_encap_hash))
+		hash = ops->get_encap_hash(lwtstate);
+	rcu_read_unlock();
+
+	return hash;
+}
+EXPORT_SYMBOL_GPL(lwtunnel_get_encap_hash);
+
 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
 {
 	const struct lwtunnel_encap_ops *ops;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 0caf38e44c73..775582537561 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -325,6 +325,16 @@ static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
 	return val;
 }
 
+static unsigned int fib_info_hashfn_nh(unsigned int val, const struct fib_nh *nh)
+{
+	val ^= nh->fib_nh_oif;
+
+	if (nh->fib_nh_lws)
+		val ^= lwtunnel_get_encap_hash(nh->fib_nh_lws);
+
+	return val;
+}
+
 static unsigned int fib_info_hashfn_result(const struct net *net,
 					   unsigned int val)
 {
@@ -344,7 +354,7 @@ static struct hlist_head *fib_info_hash_bucket(struct fib_info *fi)
 		val ^= fi->nh->id;
 	} else {
 		for_nexthops(fi) {
-			val ^= nh->fib_nh_oif;
+			val ^= fib_info_hashfn_nh(val, nh);
 		} endfor_nexthops(fi)
 	}
 
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 1a1a0eb5b787..0960dfb3d633 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -259,6 +259,18 @@ static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate)
 	return nlsize;
 }
 
+static unsigned int mpls_encap_hash(struct lwtunnel_state *lwtstate)
+{
+	struct mpls_iptunnel_encap *tun_encap_info;
+	unsigned int hash;
+
+	tun_encap_info = mpls_lwtunnel_encap(lwtstate);
+
+	hash = jhash2(tun_encap_info->label, tun_encap_info->labels, 0);
+
+	return hash;
+}
+
 static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
 {
 	struct mpls_iptunnel_encap *a_hdr = mpls_lwtunnel_encap(a);
@@ -281,6 +293,7 @@ static const struct lwtunnel_encap_ops mpls_iptun_ops = {
 	.xmit = mpls_xmit,
 	.fill_encap = mpls_fill_encap_info,
 	.get_encap_size = mpls_encap_nlsize,
+	.get_encap_hash = mpls_encap_hash,
 	.cmp_encap = mpls_encap_cmp,
 	.owner = THIS_MODULE,
 };
-- 
2.53.0
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help