Thread (20 messages) 20 messages, 3 authors, 2026-03-03
STALE91d REVIEWED: 4 (4M)

[PATCH v3 net-next 01/15] selftest: net: Add basic functionality tests for ipmr.

From: Kuniyuki Iwashima <kuniyu@google.com>
Date: 2026-02-28 22:18:05
Subsystem: kernel selftest framework, networking [general], networking [ipv4/ipv6], the rest · Maintainers: Shuah Khan, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, David Ahern, Ido Schimmel, Linus Torvalds

The new test exercise paths, where RTNL is needed, to
catch lockdep splat:

  setsockopt
    MRT_INIT / MRT_DONE
    MRT_ADD_VIF / MRT_DEL_VIF
    MRT_ADD_MFC / MRT_DEL_MFC / MRT_ADD_MFC_PROXY / MRT_DEL_MFC_PROXY
    MRT_TABLE
    MRT_FLUSH

  rtnetlink
    RTM_NEWROUTE
    RTM_DELROUTE

  NETDEV_UNREGISTER

I will extend this to cover IPv6 setsockopt() later.

Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
---
v3: Init @unused in nl_add_rtattr().
v2: Add ipmr to .gitignore
---
 .../selftests/net/forwarding/.gitignore       |   1 +
 .../testing/selftests/net/forwarding/Makefile |   4 +
 tools/testing/selftests/net/forwarding/ipmr.c | 455 ++++++++++++++++++
 3 files changed, 460 insertions(+)
 create mode 100644 tools/testing/selftests/net/forwarding/ipmr.c
diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore
index 2dea317f12e7..418ff96c52ef 100644
--- a/tools/testing/selftests/net/forwarding/.gitignore
+++ b/tools/testing/selftests/net/forwarding/.gitignore
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
 forwarding.config
+ipmr
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index ff4a00d91a26..bbaf4d937dd8 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -133,6 +133,10 @@ TEST_FILES := \
 	tc_common.sh \
 # end of TEST_FILES
 
+TEST_GEN_PROGS := \
+	ipmr
+# end of TEST_GEN_PROGS
+
 TEST_INCLUDES := \
 	$(wildcard ../lib/sh/*.sh) \
 	../lib.sh \
diff --git a/tools/testing/selftests/net/forwarding/ipmr.c b/tools/testing/selftests/net/forwarding/ipmr.c
new file mode 100644
index 000000000000..df870aad9ead
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ipmr.c
@@ -0,0 +1,455 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2026 Google LLC */
+
+#include <linux/if.h>
+#include <linux/mroute.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/socket.h>
+#include <sched.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+
+#include "kselftest_harness.h"
+
+FIXTURE(ipmr)
+{
+	int netlink_sk;
+	int raw_sk;
+	int veth_ifindex;
+};
+
+FIXTURE_VARIANT(ipmr)
+{
+	int family;
+	int protocol;
+	int level;
+	int opts[MRT_MAX - MRT_BASE + 1];
+};
+
+FIXTURE_VARIANT_ADD(ipmr, ipv4)
+{
+	.family = AF_INET,
+	.protocol = IPPROTO_IGMP,
+	.level = IPPROTO_IP,
+	.opts = {
+		MRT_INIT,
+		MRT_DONE,
+		MRT_ADD_VIF,
+		MRT_DEL_VIF,
+		MRT_ADD_MFC,
+		MRT_DEL_MFC,
+		MRT_VERSION,
+		MRT_ASSERT,
+		MRT_PIM,
+		MRT_TABLE,
+		MRT_ADD_MFC_PROXY,
+		MRT_DEL_MFC_PROXY,
+		MRT_FLUSH,
+	},
+};
+
+struct mfc_attr {
+	int table;
+	__u32 origin;
+	__u32 group;
+	int ifindex;
+	bool proxy;
+};
+
+static struct rtattr *nl_add_rtattr(struct nlmsghdr *nlmsg, struct rtattr *rta,
+				    int type, const void *data, int len)
+{
+	int unused = 0;
+
+	rta->rta_type = type;
+	rta->rta_len = RTA_LENGTH(len);
+	memcpy(RTA_DATA(rta), data, len);
+
+	nlmsg->nlmsg_len += NLMSG_ALIGN(rta->rta_len);
+
+	return RTA_NEXT(rta, unused);
+}
+
+static int nl_sendmsg_mfc(struct __test_metadata *_metadata, FIXTURE_DATA(ipmr) *self,
+			  __u16 nlmsg_type, struct mfc_attr *mfc_attr)
+{
+	struct {
+		struct nlmsghdr nlmsg;
+		struct rtmsg rtm;
+		char buf[4096];
+	} req = {
+		.nlmsg = {
+			.nlmsg_len = NLMSG_LENGTH(sizeof(req.rtm)),
+			/* ipmr does not care about NLM_F_CREATE and NLM_F_EXCL ... */
+			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+			.nlmsg_type = nlmsg_type,
+		},
+		.rtm = {
+			/* hard requirements in rtm_to_ipmr_mfcc() */
+			.rtm_family = RTNL_FAMILY_IPMR,
+			.rtm_dst_len = 32,
+			.rtm_type = RTN_MULTICAST,
+			.rtm_scope = RT_SCOPE_UNIVERSE,
+			.rtm_protocol = RTPROT_MROUTED,
+		},
+	};
+	struct nlmsghdr *nlmsg = &req.nlmsg;
+	struct nlmsgerr *errmsg;
+	struct rtattr *rta;
+	int err;
+
+	rta = (struct rtattr *)&req.buf;
+	rta = nl_add_rtattr(nlmsg, rta, RTA_TABLE, &mfc_attr->table, sizeof(mfc_attr->table));
+	rta = nl_add_rtattr(nlmsg, rta, RTA_SRC, &mfc_attr->origin, sizeof(mfc_attr->origin));
+	rta = nl_add_rtattr(nlmsg, rta, RTA_DST, &mfc_attr->group, sizeof(mfc_attr->group));
+	if (mfc_attr->ifindex)
+		rta = nl_add_rtattr(nlmsg, rta, RTA_IIF, &mfc_attr->ifindex, sizeof(mfc_attr->ifindex));
+	if (mfc_attr->proxy)
+		rta = nl_add_rtattr(nlmsg, rta, RTA_PREFSRC, NULL, 0);
+
+	err = send(self->netlink_sk, &req, req.nlmsg.nlmsg_len, 0);
+	ASSERT_EQ(err, req.nlmsg.nlmsg_len);
+
+	memset(&req, 0, sizeof(req));
+
+	err = recv(self->netlink_sk, &req, sizeof(req), 0);
+	ASSERT_TRUE(NLMSG_OK(nlmsg, err));
+	ASSERT_EQ(NLMSG_ERROR, nlmsg->nlmsg_type);
+
+	errmsg = (struct nlmsgerr *)NLMSG_DATA(nlmsg);
+	return errmsg->error;
+}
+
+FIXTURE_SETUP(ipmr)
+{
+	struct ifreq ifr = {
+		.ifr_name = "veth0",
+	};
+	int err;
+
+	err = unshare(CLONE_NEWNET);
+	ASSERT_EQ(0, err);
+
+	self->netlink_sk = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	ASSERT_LE(0, self->netlink_sk);
+
+	self->raw_sk = socket(variant->family, SOCK_RAW, variant->protocol);
+	ASSERT_LT(0, self->raw_sk);
+
+	err = system("ip link add veth0 type veth peer veth1");
+	ASSERT_EQ(0, err);
+
+	err = ioctl(self->raw_sk, SIOCGIFINDEX, &ifr);
+	ASSERT_EQ(0, err);
+
+	self->veth_ifindex = ifr.ifr_ifindex;
+}
+
+FIXTURE_TEARDOWN(ipmr)
+{
+	close(self->raw_sk);
+	close(self->netlink_sk);
+}
+
+TEST_F(ipmr, mrt_init)
+{
+	int err, val = 0;  /* any value is ok, but size must be int for MRT_INIT. */
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_INIT - MRT_BASE],
+			 &val,  sizeof(val));
+	ASSERT_EQ(0, err);
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_DONE - MRT_BASE],
+			 &val,  sizeof(val));
+	ASSERT_EQ(0, err);
+}
+
+TEST_F(ipmr, mrt_add_vif_register)
+{
+	struct vifctl vif = {
+		.vifc_vifi = 0,
+		.vifc_flags = VIFF_REGISTER,
+	};
+	int err;
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
+			 &vif,  sizeof(vif));
+	ASSERT_EQ(0, err);
+
+	err = system("cat /proc/net/ip_mr_vif | grep -q pimreg");
+	ASSERT_EQ(0, err);
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_DEL_VIF - MRT_BASE],
+			 &vif,  sizeof(vif));
+	ASSERT_EQ(0, err);
+}
+
+TEST_F(ipmr, mrt_del_vif_unreg)
+{
+	struct vifctl vif = {
+		.vifc_vifi = 0,
+		.vifc_flags = VIFF_USE_IFINDEX,
+		.vifc_lcl_ifindex = self->veth_ifindex,
+	};
+	int err;
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
+			 &vif,  sizeof(vif));
+	ASSERT_EQ(0, err);
+
+	err = system("cat /proc/net/ip_mr_vif | grep -q veth0");
+	ASSERT_EQ(0, err);
+
+	/* VIF is removed along with its device. */
+	err = system("ip link del veth0");
+	ASSERT_EQ(0, err);
+
+	/* mrt->vif_table[veth_ifindex]->dev is NULL. */
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_DEL_VIF - MRT_BASE],
+			 &vif,  sizeof(vif));
+	ASSERT_EQ(-1, err);
+	ASSERT_EQ(EADDRNOTAVAIL, errno);
+}
+
+TEST_F(ipmr, mrt_del_vif_netns_dismantle)
+{
+	struct vifctl vif = {
+		.vifc_vifi = 0,
+		.vifc_flags = VIFF_USE_IFINDEX,
+		.vifc_lcl_ifindex = self->veth_ifindex,
+	};
+	int err;
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
+			 &vif,  sizeof(vif));
+	ASSERT_EQ(0, err);
+
+	/* Let cleanup_net() remove veth0 and VIF. */
+}
+
+TEST_F(ipmr, mrt_add_mfc)
+{
+	struct mfcctl mfc = {};
+	int err;
+
+	/* MRT_ADD_MFC / MRT_ADD_MFC_PROXY does not need vif to exist (unlike netlink). */
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_ADD_MFC - MRT_BASE],
+			 &mfc,  sizeof(mfc));
+	ASSERT_EQ(0, err);
+
+	/* (0.0.0.0 -> 0.0.0.0) */
+	err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' ");
+	ASSERT_EQ(0, err);
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_DEL_MFC - MRT_BASE],
+			 &mfc,  sizeof(mfc));
+}
+
+TEST_F(ipmr, mrt_add_mfc_proxy)
+{
+	struct mfcctl mfc = {};
+	int err;
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_ADD_MFC_PROXY - MRT_BASE],
+			 &mfc,  sizeof(mfc));
+	ASSERT_EQ(0, err);
+
+	err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' ");
+	ASSERT_EQ(0, err);
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_DEL_MFC_PROXY - MRT_BASE],
+			 &mfc,  sizeof(mfc));
+}
+
+TEST_F(ipmr, mrt_add_mfc_netlink)
+{
+	struct vifctl vif = {
+		.vifc_vifi = 0,
+		.vifc_flags = VIFF_USE_IFINDEX,
+		.vifc_lcl_ifindex = self->veth_ifindex,
+	};
+	struct mfc_attr mfc_attr = {
+		.table = RT_TABLE_DEFAULT,
+		.origin = 0,
+		.group = 0,
+		.ifindex = self->veth_ifindex,
+		.proxy = false,
+	};
+	int err;
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
+			 &vif,  sizeof(vif));
+	ASSERT_EQ(0, err);
+
+	err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr);
+	ASSERT_EQ(0, err);
+
+	err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' ");
+	ASSERT_EQ(0, err);
+
+	err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr);
+	ASSERT_EQ(0, err);
+}
+
+TEST_F(ipmr, mrt_add_mfc_netlink_proxy)
+{
+	struct vifctl vif = {
+		.vifc_vifi = 0,
+		.vifc_flags = VIFF_USE_IFINDEX,
+		.vifc_lcl_ifindex = self->veth_ifindex,
+	};
+	struct mfc_attr mfc_attr = {
+		.table = RT_TABLE_DEFAULT,
+		.origin = 0,
+		.group = 0,
+		.ifindex = self->veth_ifindex,
+		.proxy = true,
+	};
+	int err;
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
+			 &vif,  sizeof(vif));
+	ASSERT_EQ(0, err);
+
+	err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr);
+	ASSERT_EQ(0, err);
+
+	err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' ");
+	ASSERT_EQ(0, err);
+
+	err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr);
+	ASSERT_EQ(0, err);
+}
+
+TEST_F(ipmr, mrt_add_mfc_netlink_no_vif)
+{
+	struct mfc_attr mfc_attr = {
+		.table = RT_TABLE_DEFAULT,
+		.origin = 0,
+		.group = 0,
+		.proxy = false,
+	};
+	int err;
+
+	/* netlink always requires RTA_IIF of an existing vif. */
+	mfc_attr.ifindex = 0;
+	err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr);
+	ASSERT_EQ(-ENFILE, err);
+
+	/* netlink always requires RTA_IIF of an existing vif. */
+	mfc_attr.ifindex = self->veth_ifindex;
+	err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr);
+	ASSERT_EQ(-ENFILE, err);
+}
+
+TEST_F(ipmr, mrt_del_mfc_netlink_netns_dismantle)
+{
+	struct vifctl vifs[2] = {
+		{
+			.vifc_vifi = 0,
+			.vifc_flags = VIFF_USE_IFINDEX,
+			.vifc_lcl_ifindex = self->veth_ifindex,
+		},
+		{
+			.vifc_vifi = 1,
+			.vifc_flags = VIFF_REGISTER,
+		}
+	};
+	struct mfc_attr mfc_attr = {
+		.table = RT_TABLE_DEFAULT,
+		.origin = 0,
+		.group = 0,
+		.ifindex = self->veth_ifindex,
+		.proxy = false,
+	};
+	int i, err;
+
+	for (i = 0; i < 2; i++) {
+		/* Create 2 VIFs just to avoid -ENFILE later. */
+		err = setsockopt(self->raw_sk,
+				 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
+				 &vifs[i],  sizeof(vifs[i]));
+		ASSERT_EQ(0, err);
+	}
+
+	/* Create a MFC for mrt->vif_table[0]. */
+	err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr);
+	ASSERT_EQ(0, err);
+
+	err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' ");
+	ASSERT_EQ(0, err);
+
+	/* Remove mrt->vif_table[0]. */
+	err = system("ip link del veth0");
+	ASSERT_EQ(0, err);
+
+	/* MFC entry is NOT removed even if the tied VIF is removed... */
+	err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' ");
+	ASSERT_EQ(0, err);
+
+	/* ... and netlink is not capable of removing such an entry
+	 * because netlink always requires a valid RTA_IIF ... :/
+	 */
+	err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr);
+	ASSERT_EQ(-ENODEV, err);
+
+	/* It can be removed by setsockopt(), but let cleanup_net() remove this time. */
+}
+
+TEST_F(ipmr, mrt_table_flush)
+{
+	struct vifctl vif = {
+		.vifc_vifi = 0,
+		.vifc_flags = VIFF_USE_IFINDEX,
+		.vifc_lcl_ifindex = self->veth_ifindex,
+	};
+	struct mfc_attr mfc_attr = {
+		.origin = 0,
+		.group = 0,
+		.ifindex = self->veth_ifindex,
+		.proxy = false,
+	};
+	int table_id = 92;
+	int err, flags;
+
+	/* Set a random table id rather than RT_TABLE_DEFAULT.
+	 * Note that /proc/net/ip_mr_{vif,cache} only supports RT_TABLE_DEFAULT.
+	 */
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_TABLE - MRT_BASE],
+			 &table_id,  sizeof(table_id));
+	ASSERT_EQ(0, err);
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
+			 &vif,  sizeof(vif));
+	ASSERT_EQ(0, err);
+
+	mfc_attr.table = table_id;
+	err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr);
+	ASSERT_EQ(0, err);
+
+	/* Flush mrt->vif_table[] and all caches. */
+	flags = MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC |
+		MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC;
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_FLUSH - MRT_BASE],
+			 &flags,  sizeof(flags));
+	ASSERT_EQ(0, err);
+}
+
+TEST_HARNESS_MAIN
-- 
2.53.0.473.g4a7958ca14-goog
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help