Thread (20 messages) 20 messages, 4 authors, 28d ago
COLD28d REVIEWED: 2 (2M)
Revisions (2)
  1. v1 current
  2. v2 [diff vs current]

[PATCH net-next 01/13] net/mlx5: LAG, factor out shared FDB code into dedicated file

From: Tariq Toukan <tariqt@nvidia.com>
Date: 2026-05-27 12:55:41
Also in: linux-rdma, lkml
Subsystem: mellanox mlx5 core vpi driver, networking drivers, the rest · Maintainers: Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch, Andrew Lunn, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds

From: Shay Drory <redacted>

Refactor shared FDB LAG logic into a new lag/shared_fdb.c file to
improve code organization and enable reuse. Move shared FDB specific
functions from lag.c and introduce consolidated APIs:
- mlx5_lag_shared_fdb_create() handles LAG activation with shared FDB
- mlx5_lag_shared_fdb_destroy() handles LAG deactivation with shared FDB

Update mlx5_do_bond(), mlx5_disable_lag() and mpesw.c to use the new
APIs, which simplifies the shared FDB code paths.

Signed-off-by: Shay Drory <redacted>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
 .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 156 ++++--------------
 .../net/ethernet/mellanox/mlx5/core/lag/lag.h |  26 +++
 .../ethernet/mellanox/mlx5/core/lag/mpesw.c   |  25 +--
 .../mellanox/mlx5/core/lag/shared_fdb.c       | 143 ++++++++++++++++
 5 files changed, 210 insertions(+), 142 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lag/shared_fdb.c
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index d39fe9c4a87c..19e50f0d55af 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -41,7 +41,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
 mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
 mlx5_core-$(CONFIG_MLX5_ESWITCH)     += lag/mp.o lag/port_sel.o lib/geneve.o lib/port_tun.o \
 					en_rep.o en/rep/bond.o en/mod_hdr.o \
-					en/mapping.o lag/mpesw.o
+					en/mapping.o lag/mpesw.o lag/shared_fdb.o
 mlx5_core-$(CONFIG_MLX5_CLS_ACT)     += en_tc.o en/rep/tc.o en/rep/neigh.o \
 					lib/fs_chains.o en/tc_tun.o \
 					esw/indir_table.o en/tc_tun_encap.o \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index 22b7efea34b8..5dfdd799828f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -817,43 +817,6 @@ char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
 	}
 }
 
-static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
-{
-	int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
-	struct mlx5_eswitch *master_esw;
-	struct mlx5_core_dev *dev0;
-	int i, j;
-	int err;
-
-	if (master_idx < 0)
-		return -EINVAL;
-
-	dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
-	master_esw = dev0->priv.eswitch;
-	mlx5_ldev_for_each(i, 0, ldev) {
-		struct mlx5_eswitch *slave_esw;
-
-		if (i == master_idx)
-			continue;
-
-		slave_esw = mlx5_lag_pf(ldev, i)->dev->priv.eswitch;
-
-		err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw,
-							       slave_esw, ldev->ports);
-		if (err)
-			goto err;
-	}
-	return 0;
-err:
-	mlx5_ldev_for_each_reverse(j, i, 0, ldev) {
-		if (j == master_idx)
-			continue;
-		mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
-							 mlx5_lag_pf(ldev, j)->dev->priv.eswitch);
-	}
-	return err;
-}
-
 static int mlx5_create_lag(struct mlx5_lag *ldev,
 			   struct lag_tracker *tracker,
 			   enum mlx5_lag_mode mode,
@@ -1218,12 +1181,15 @@ void mlx5_disable_lag(struct mlx5_lag *ldev)
 	if (idx < 0)
 		return;
 
+	if (shared_fdb) {
+		mlx5_lag_shared_fdb_destroy(ldev);
+		return;
+	}
+
 	dev0 = mlx5_lag_pf(ldev, idx)->dev;
 	roce_lag = __mlx5_lag_is_roce(ldev);
 
-	if (shared_fdb) {
-		mlx5_lag_remove_devices(ldev);
-	} else if (roce_lag) {
+	if (roce_lag) {
 		mlx5_lag_rescan_dev_locked(ldev, dev0, false);
 		mlx5_ldev_for_each(i, 0, ldev) {
 			if (i == idx)
@@ -1236,49 +1202,8 @@ void mlx5_disable_lag(struct mlx5_lag *ldev)
 	if (err)
 		return;
 
-	if (shared_fdb || roce_lag)
+	if (roce_lag)
 		mlx5_lag_add_devices(ldev);
-
-	if (shared_fdb)
-		mlx5_lag_reload_ib_reps_from_locked(ldev,
-						    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV,
-						    true);
-}
-
-bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
-{
-	struct mlx5_core_dev *dev;
-	bool ret = false;
-	int idx;
-	int i;
-
-	idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
-	if (idx < 0)
-		return false;
-
-	mlx5_ldev_for_each(i, 0, ldev) {
-		if (i == idx)
-			continue;
-		dev = mlx5_lag_pf(ldev, i)->dev;
-		if (is_mdev_switchdev_mode(dev) &&
-		    mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
-		    MLX5_CAP_GEN(dev, lag_native_fdb_selection) &&
-		    MLX5_CAP_ESW(dev, root_ft_on_other_esw) &&
-		    mlx5_eswitch_get_npeers(dev->priv.eswitch) ==
-		    MLX5_CAP_GEN(dev, num_lag_ports) - 1)
-			continue;
-		return false;
-	}
-
-	dev = mlx5_lag_pf(ldev, idx)->dev;
-	if (is_mdev_switchdev_mode(dev) &&
-	    mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
-	    mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) &&
-	    MLX5_CAP_ESW(dev, esw_shared_ingress_acl) &&
-	    mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1)
-		ret = true;
-
-	return ret;
 }
 
 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
@@ -1493,47 +1418,37 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
 
 		roce_lag = mlx5_lag_is_roce_lag(ldev);
 
-		if (shared_fdb || roce_lag)
-			mlx5_lag_remove_devices(ldev);
-
-		err = mlx5_activate_lag(ldev, &tracker,
-					roce_lag ? MLX5_LAG_MODE_ROCE :
-						   MLX5_LAG_MODE_SRIOV,
-					shared_fdb);
-		if (err) {
-			if (shared_fdb || roce_lag)
-				mlx5_lag_add_devices(ldev);
-			if (shared_fdb)
-				mlx5_lag_reload_ib_reps_from_locked(ldev, 0,
-								    true);
-
-			return;
-		}
+		if (shared_fdb) {
+			err = mlx5_lag_shared_fdb_create(ldev, &tracker,
+							 MLX5_LAG_MODE_SRIOV);
+			if (err)
+				return;
+		} else {
+			if (roce_lag)
+				mlx5_lag_remove_devices(ldev);
 
-		if (roce_lag) {
-			struct mlx5_core_dev *dev;
-
-			mlx5_lag_rescan_dev_locked(ldev, dev0, true);
-			mlx5_ldev_for_each(i, 0, ldev) {
-				if (i == idx)
-					continue;
-				dev = mlx5_lag_pf(ldev, i)->dev;
-				if (mlx5_get_roce_state(dev))
-					mlx5_nic_vport_enable_roce(dev);
-			}
-		} else if (shared_fdb) {
-			mlx5_lag_rescan_dev_locked(ldev, dev0, true);
-			err = mlx5_lag_reload_ib_reps_from_locked(ldev, 0,
-								  false);
+			err = mlx5_activate_lag(ldev, &tracker,
+						roce_lag ? MLX5_LAG_MODE_ROCE :
+							   MLX5_LAG_MODE_SRIOV,
+						false);
 			if (err) {
-				mlx5_lag_rescan_dev_locked(ldev, dev0, false);
-				mlx5_deactivate_lag(ldev);
-				mlx5_lag_add_devices(ldev);
-				mlx5_lag_reload_ib_reps_from_locked(ldev, 0,
-								    true);
-				mlx5_core_err(dev0, "Failed to enable lag\n");
+				if (roce_lag)
+					mlx5_lag_add_devices(ldev);
 				return;
 			}
+
+			if (roce_lag) {
+				struct mlx5_core_dev *dev;
+
+				mlx5_lag_rescan_dev_locked(ldev, dev0, true);
+				mlx5_ldev_for_each(i, 0, ldev) {
+					if (i == idx)
+						continue;
+					dev = mlx5_lag_pf(ldev, i)->dev;
+					if (mlx5_get_roce_state(dev))
+						mlx5_nic_vport_enable_roce(dev);
+				}
+			}
 		}
 		if (tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
 			ndev = mlx5_lag_active_backup_get_netdev(dev0);
@@ -1545,7 +1460,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
 						     ndev);
 			dev_put(ndev);
 		}
-		mlx5_lag_set_vports_agg_speed(ldev);
+		if (!shared_fdb)
+			mlx5_lag_set_vports_agg_speed(ldev);
 	} else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
 		mlx5_modify_lag(ldev, &tracker);
 		mlx5_lag_set_vports_agg_speed(ldev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
index 6afe7707d076..23c0457ce799 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
@@ -137,7 +137,33 @@ mlx5_lag_is_ready(struct mlx5_lag *ldev)
 	return test_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
 }
 
+#ifdef CONFIG_MLX5_ESWITCH
+int mlx5_lag_shared_fdb_create(struct mlx5_lag *ldev,
+			       struct lag_tracker *tracker,
+			       enum mlx5_lag_mode mode);
+void mlx5_lag_shared_fdb_destroy(struct mlx5_lag *ldev);
+int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev);
 bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev);
+#else
+static inline int mlx5_lag_shared_fdb_create(struct mlx5_lag *ldev,
+					     struct lag_tracker *tracker,
+					     enum mlx5_lag_mode mode)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void mlx5_lag_shared_fdb_destroy(struct mlx5_lag *ldev) {}
+
+static inline int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
+{
+	return false;
+}
+#endif
 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev);
 int mlx5_lag_demux_init(struct mlx5_core_dev *dev,
 			struct mlx5_flow_table_attr *ft_attr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
index 8a349f8fd823..64e2d1dd5308 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
@@ -92,38 +92,21 @@ static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev)
 	if (err)
 		return err;
 
-	mlx5_lag_remove_devices(ldev);
-
-	err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, true);
+	err = mlx5_lag_shared_fdb_create(ldev, NULL, MLX5_LAG_MODE_MPESW);
 	if (err) {
 		mlx5_core_warn(dev0, "Failed to create LAG in MPESW mode (%d)\n", err);
-		goto err_add_devices;
+		mlx5_mpesw_metadata_cleanup(ldev);
+		return err;
 	}
 
-	mlx5_lag_rescan_dev_locked(ldev, dev0, true);
-	err = mlx5_lag_reload_ib_reps_from_locked(ldev, 0, false);
-	if (err)
-		goto err_rescan_drivers;
-
-	mlx5_lag_set_vports_agg_speed(ldev);
-
 	return 0;
-
-err_rescan_drivers:
-	mlx5_lag_rescan_dev_locked(ldev, dev0, false);
-	mlx5_deactivate_lag(ldev);
-err_add_devices:
-	mlx5_lag_add_devices(ldev);
-	mlx5_lag_reload_ib_reps_from_locked(ldev, 0, true);
-	mlx5_mpesw_metadata_cleanup(ldev);
-	return err;
 }
 
 void mlx5_lag_disable_mpesw(struct mlx5_lag *ldev)
 {
 	if (ldev->mode == MLX5_LAG_MODE_MPESW) {
 		mlx5_mpesw_metadata_cleanup(ldev);
-		mlx5_disable_lag(ldev);
+		mlx5_lag_shared_fdb_destroy(ldev);
 	}
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/shared_fdb.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/shared_fdb.c
new file mode 100644
index 000000000000..e5b8e9f1e6fd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/shared_fdb.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/netdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_core.h"
+#include "lag.h"
+#include "eswitch.h"
+
+bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
+{
+	struct mlx5_core_dev *dev;
+	bool ret = false;
+	int idx;
+	int i;
+
+	idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
+	if (idx < 0)
+		return false;
+
+	mlx5_ldev_for_each(i, 0, ldev) {
+		if (i == idx)
+			continue;
+		dev = mlx5_lag_pf(ldev, i)->dev;
+		if (is_mdev_switchdev_mode(dev) &&
+		    mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
+		    MLX5_CAP_GEN(dev, lag_native_fdb_selection) &&
+		    MLX5_CAP_ESW(dev, root_ft_on_other_esw) &&
+		    mlx5_eswitch_get_npeers(dev->priv.eswitch) ==
+		    MLX5_CAP_GEN(dev, num_lag_ports) - 1)
+			continue;
+		return false;
+	}
+
+	dev = mlx5_lag_pf(ldev, idx)->dev;
+	if (is_mdev_switchdev_mode(dev) &&
+	    mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
+	    mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) &&
+	    MLX5_CAP_ESW(dev, esw_shared_ingress_acl) &&
+	    mlx5_eswitch_get_npeers(dev->priv.eswitch) ==
+	    MLX5_CAP_GEN(dev, num_lag_ports) - 1)
+		ret = true;
+
+	return ret;
+}
+
+int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
+{
+	int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
+	struct mlx5_eswitch *master_esw;
+	struct mlx5_core_dev *dev0;
+	int i, j;
+	int err;
+
+	if (master_idx < 0)
+		return -EINVAL;
+
+	dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
+	master_esw = dev0->priv.eswitch;
+	mlx5_ldev_for_each(i, 0, ldev) {
+		struct mlx5_eswitch *slave_esw;
+
+		if (i == master_idx)
+			continue;
+
+		slave_esw = mlx5_lag_pf(ldev, i)->dev->priv.eswitch;
+
+		err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw,
+							       slave_esw,
+							       ldev->ports);
+		if (err)
+			goto err;
+	}
+	return 0;
+err:
+	mlx5_ldev_for_each_reverse(j, i, 0, ldev) {
+		struct mlx5_eswitch *slave_esw;
+
+		if (j == master_idx)
+			continue;
+		slave_esw = mlx5_lag_pf(ldev, j)->dev->priv.eswitch;
+		mlx5_eswitch_offloads_single_fdb_del_one(master_esw, slave_esw);
+	}
+	return err;
+}
+
+int mlx5_lag_shared_fdb_create(struct mlx5_lag *ldev,
+			       struct lag_tracker *tracker,
+			       enum mlx5_lag_mode mode)
+{
+	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
+	struct mlx5_core_dev *dev0;
+	int err;
+
+	if (idx < 0)
+		return -EINVAL;
+
+	dev0 = mlx5_lag_pf(ldev, idx)->dev;
+
+	mlx5_lag_remove_devices(ldev);
+
+	err = mlx5_activate_lag(ldev, tracker, mode, true);
+	if (err) {
+		mlx5_core_warn(dev0, "Failed to create LAG in shared FDB mode (%d)\n",
+			       err);
+		goto err_add_devices;
+	}
+
+	mlx5_lag_rescan_dev_locked(ldev, dev0, true);
+	err = mlx5_lag_reload_ib_reps_from_locked(ldev, 0, false);
+	if (err) {
+		mlx5_core_err(dev0, "Failed to enable lag\n");
+		goto err_rescan_drivers;
+	}
+
+	mlx5_lag_set_vports_agg_speed(ldev);
+	return 0;
+
+err_rescan_drivers:
+	mlx5_lag_rescan_dev_locked(ldev, dev0, false);
+	mlx5_deactivate_lag(ldev);
+err_add_devices:
+	mlx5_lag_add_devices(ldev);
+	mlx5_lag_reload_ib_reps_from_locked(ldev, 0, true);
+	return err;
+}
+
+void mlx5_lag_shared_fdb_destroy(struct mlx5_lag *ldev)
+{
+	int err;
+
+	mlx5_lag_remove_devices(ldev);
+
+	err = mlx5_deactivate_lag(ldev);
+	if (err)
+		return;
+
+	mlx5_lag_add_devices(ldev);
+	mlx5_lag_reload_ib_reps_from_locked(ldev,
+					    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV,
+					    true);
+}
-- 
2.44.0
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help