Thread (126 messages) 126 messages, 11 authors, 2021-10-03

[dpdk-dev] [PATCH v5 2/9] net/mlx5: support representor of sub function

From: Xueming Li <hidden>
Date: 2021-03-28 13:49:05
Subsystem: networking drivers, the rest · Maintainers: Andrew Lunn, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds

This patch adds support for SF representor. Similar to VF representor,
switch port name of SF representor in phys_port_name sysfs key is
"pf<x>sf<y>".

Device representor argument is "representors=sf[list]", list member
could be mix of instance and range. Example:
  representors=sf[0,2,4,8-12,-1]

To probe VF representor and SF representor, need to separate into 2
devices:
  -a <BDF>,representor=vf[list] -a <BDF>,representor=sf[list]

Signed-off-by: Xueming Li <redacted>
Acked-by: Viacheslav Ovsiienko <redacted>
---
 doc/guides/nics/mlx5.rst                |  58 +++++++++--
 drivers/net/mlx5/linux/mlx5_ethdev_os.c |   2 +
 drivers/net/mlx5/linux/mlx5_os.c        | 127 +++++++++++++++++++-----
 drivers/net/mlx5/mlx5.c                 |   1 +
 drivers/net/mlx5/mlx5.h                 |   9 ++
 drivers/net/mlx5/mlx5_ethdev.c          | 100 +++++++++++++++++++
 6 files changed, 263 insertions(+), 34 deletions(-)
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index a2cfc51b2a..2e2909d82d 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -931,14 +931,18 @@ Driver options
 - ``representor`` parameter [list]
 
   This parameter can be used to instantiate DPDK Ethernet devices from
-  existing port (or VF) representors configured on the device.
+  existing port (PF, VF or SF) representors configured on the device.
 
   It is a standard parameter whose format is described in
   :ref:`ethernet_device_standard_device_arguments`.
 
-  For instance, to probe port representors 0 through 2::
+  For instance, to probe VF port representors 0 through 2::
 
-    representor=[0-2]
+    representor=vf[0-2]
+
+  To probe SF port representors 0 through 2::
+
+    representor=sf[0-2]
 
 - ``max_dump_files_num`` parameter [int]
 
@@ -1351,15 +1355,15 @@ Quick Start Guide on OFED/EN
 Enable switchdev mode
 ---------------------
 
-Switchdev mode is a mode in E-Switch, that binds between representor and VF.
-Representor is a port in DPDK that is connected to a VF in such a way
-that assuming there are no offload flows, each packet that is sent from the VF
-will be received by the corresponding representor. While each packet that is
-sent to a representor will be received by the VF.
+Switchdev mode is a mode in E-Switch, that binds between representor and VF or SF.
+Representor is a port in DPDK that is connected to a VF or SF in such a way
+that assuming there are no offload flows, each packet that is sent from the VF or SF
+will be received by the corresponding representor. While each packet that is or SF
+sent to a representor will be received by the VF or SF.
 This is very useful in case of SRIOV mode, where the first packet that is sent
-by the VF will be received by the DPDK application which will decide if this
+by the VF or SF will be received by the DPDK application which will decide if this
 flow should be offloaded to the E-Switch. After offloading the flow packet
-that the VF that are matching the flow will not be received any more by
+that the VF or SF that are matching the flow will not be received any more by
 the DPDK application.
 
 1. Enable SRIOV mode::
@@ -1386,6 +1390,40 @@ the DPDK application.
 
         echo switchdev > /sys/class/net/<net device>/compat/devlink/mode
 
+SubFunction representor support
+-------------------------------
+SubFunction is a portion of the PCI device, a SF netdev has its own
+dedicated queues(txq, rxq). A SF netdev supports E-Switch representation
+offload similar to existing PF and VF representors. A SF shares PCI
+level resources with other SFs and/or with its parent PCI function.
+
+1. Configure SF feature::
+
+        mlxconfig -d <mst device> set PF_BAR2_SIZE=<0/1/2/3> PF_BAR2_ENABLE=1
+
+        Value of PF_BAR2_SIZE:
+
+            0: 8 SFs
+            1: 16 SFs
+            2: 32 SFs
+            3: 64 SFs
+
+2. Reset the FW::
+
+        mlxfwreset -d <mst device> reset
+
+3. Enable switchdev mode::
+
+        echo switchdev > /sys/class/net/<net device>/compat/devlink/mode
+
+4. Create SF::
+
+        mlnx-sf -d <PCI_BDF> -a create
+
+5. Probe SF representor::
+
+        testpmd> port attach <PCI_BDF>,representor=sf0,dv_flow_en=1
+
 Performance tuning
 ------------------
 
diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
index cb692b22f2..2127fcfbfa 100644
--- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c
+++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
@@ -1010,6 +1010,8 @@ mlx5_sysfs_check_switch_info(bool device_dir,
 	case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
 		/* Fallthrough */
 	case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
+		/* Fallthrough */
+	case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
 		/* New representors naming schema. */
 		switch_info->representor = 1;
 		break;
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 5740214950..aac923ea39 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -701,6 +701,8 @@ mlx5_queue_counter_id_prepare(struct rte_eth_dev *dev)
  *   Verbs device parameters (name, port, switch_info) to spawn.
  * @param config
  *   Device configuration parameters.
+ * @param config
+ *   Device arguments.
  *
  * @return
  *   A valid Ethernet device object on success, NULL otherwise and rte_errno
@@ -712,7 +714,8 @@ mlx5_queue_counter_id_prepare(struct rte_eth_dev *dev)
 static struct rte_eth_dev *
 mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	       struct mlx5_dev_spawn_data *spawn,
-	       struct mlx5_dev_config *config)
+	       struct mlx5_dev_config *config,
+	       struct rte_eth_devargs *eth_da)
 {
 	const struct mlx5_switch_info *switch_info = &spawn->info;
 	struct mlx5_dev_ctx_shared *sh = NULL;
@@ -742,34 +745,82 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 
 	/* Determine if this port representor is supposed to be spawned. */
 	if (switch_info->representor && dpdk_dev->devargs) {
-		struct rte_eth_devargs eth_da;
-
-		err = rte_eth_devargs_parse(dpdk_dev->devargs->args, &eth_da);
-		if (err) {
-			rte_errno = -err;
-			DRV_LOG(ERR, "failed to process device arguments: %s",
-				strerror(rte_errno));
-			return NULL;
-		}
-		if (eth_da.type == RTE_ETH_REPRESENTOR_NONE) {
-			/* Representor not specified. */
+		switch (eth_da->type) {
+		case RTE_ETH_REPRESENTOR_SF:
+			if (switch_info->name_type !=
+					MLX5_PHYS_PORT_NAME_TYPE_PFSF) {
+				rte_errno = EBUSY;
+				return NULL;
+			}
+			break;
+		case RTE_ETH_REPRESENTOR_VF:
+			/* Allows HPF representor index -1 as exception. */
+			if (!(spawn->info.port_name == -1 &&
+			      switch_info->name_type ==
+					MLX5_PHYS_PORT_NAME_TYPE_PFHPF) &&
+			    switch_info->name_type !=
+					MLX5_PHYS_PORT_NAME_TYPE_PFVF) {
+				rte_errno = EBUSY;
+				return NULL;
+			}
+			break;
+		case RTE_ETH_REPRESENTOR_NONE:
 			rte_errno = EBUSY;
 			return NULL;
-		}
-		if (eth_da.type != RTE_ETH_REPRESENTOR_VF) {
+			break;
+		default:
 			rte_errno = ENOTSUP;
 			DRV_LOG(ERR, "unsupported representor type: %s",
 				dpdk_dev->devargs->args);
 			return NULL;
 		}
-		for (i = 0; i < eth_da.nb_representor_ports; ++i)
-			if (eth_da.representor_ports[i] ==
+		/* Check controller ID: */
+		for (i = 0; i < eth_da->nb_mh_controllers; ++i)
+			if (eth_da->mh_controllers[i] ==
+			    (uint16_t)switch_info->ctrl_num)
+				break;
+		if (eth_da->nb_mh_controllers &&
+		    i == eth_da->nb_mh_controllers) {
+			rte_errno = EBUSY;
+			return NULL;
+		}
+		/* Check SF/VF ID: */
+		for (i = 0; i < eth_da->nb_representor_ports; ++i)
+			if (eth_da->representor_ports[i] ==
 			    (uint16_t)switch_info->port_name)
 				break;
-		if (i == eth_da.nb_representor_ports) {
+		if (eth_da->type != RTE_ETH_REPRESENTOR_PF &&
+		    i == eth_da->nb_representor_ports) {
 			rte_errno = EBUSY;
 			return NULL;
 		}
+		/* Check PF ID. Check after repr port to avoid warning flood. */
+		if (spawn->pf_bond >= 0) {
+			for (i = 0; i < eth_da->nb_ports; ++i)
+				if (eth_da->ports[i] ==
+				    (uint16_t)switch_info->pf_num)
+					break;
+			if (eth_da->nb_ports && i == eth_da->nb_ports) {
+				/* For backward compatibility, bonding
+				 * representor syntax supported with limitation,
+				 * device iterator won't find it:
+				 *    <PF1_BDF>,representor=#
+				 */
+				if (switch_info->pf_num > 0 &&
+				    eth_da->ports[0] == 0) {
+					DRV_LOG(WARNING, "Representor on Bonding PF should use pf#vf# format: %s",
+						dpdk_dev->devargs->args);
+				} else {
+					rte_errno = EBUSY;
+					return NULL;
+				}
+			}
+		} else if (eth_da->nb_ports > 1 || eth_da->ports[0]) {
+			rte_errno = EINVAL;
+			DRV_LOG(ERR, "PF id not supported by non-bond device: %s",
+				dpdk_dev->devargs->args);
+			return NULL;
+		}
 	}
 	/* Build device name. */
 	if (spawn->pf_bond <  0) {
@@ -777,8 +828,11 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 		if (!switch_info->representor)
 			strlcpy(name, dpdk_dev->name, sizeof(name));
 		else
-			snprintf(name, sizeof(name), "%s_representor_%u",
-				 dpdk_dev->name, switch_info->port_name);
+			snprintf(name, sizeof(name), "%s_representor_%s%u",
+				 dpdk_dev->name,
+				 switch_info->name_type ==
+				 MLX5_PHYS_PORT_NAME_TYPE_PFSF ? "sf" : "vf",
+				 switch_info->port_name);
 	} else {
 		/* Bonding device. */
 		if (!switch_info->representor)
@@ -786,9 +840,11 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 				 dpdk_dev->name,
 				 mlx5_os_get_dev_device_name(spawn->phys_dev));
 		else
-			snprintf(name, sizeof(name), "%s_%s_representor_%u",
+			snprintf(name, sizeof(name), "%s_%s_representor_%s%u",
 				 dpdk_dev->name,
 				 mlx5_os_get_dev_device_name(spawn->phys_dev),
+				 switch_info->name_type ==
+				 MLX5_PHYS_PORT_NAME_TYPE_PFSF ? "sf" : "vf",
 				 switch_info->port_name);
 	}
 	/* check if the device is already spawned */
@@ -1063,9 +1119,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	priv->vport_id = switch_info->representor ?
 			 switch_info->port_name + 1 : -1;
 #endif
-	/* representor_id field keeps the unmodified VF index. */
-	priv->representor_id = switch_info->representor ?
-			       switch_info->port_name : -1;
+	priv->representor_id = mlx5_representor_id_encode(switch_info);
 	/*
 	 * Look for sibling devices in order to reuse their switch domain
 	 * if any, otherwise allocate one.
@@ -1839,6 +1893,7 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 	struct mlx5_dev_spawn_data *list = NULL;
 	struct mlx5_dev_config dev_config;
 	unsigned int dev_config_vf;
+	struct rte_eth_devargs eth_da = { .type = RTE_ETH_REPRESENTOR_NONE };
 	int ret;
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
@@ -1849,6 +1904,27 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 			strerror(rte_errno));
 		return -rte_errno;
 	}
+	if (pci_dev->device.devargs) {
+		/* Parse representor information from device argument. */
+		if (pci_dev->device.devargs->cls_str)
+			ret = rte_eth_devargs_parse
+				(pci_dev->device.devargs->cls_str, &eth_da);
+		if (ret) {
+			DRV_LOG(ERR, "failed to parse device arguments: %s",
+				pci_dev->device.devargs->cls_str);
+			return -rte_errno;
+		}
+		if (eth_da.type == RTE_ETH_REPRESENTOR_NONE) {
+			/* Support legacy device argument */
+			ret = rte_eth_devargs_parse
+				(pci_dev->device.devargs->args, &eth_da);
+			if (ret) {
+				DRV_LOG(ERR, "failed to parse device arguments: %s",
+					pci_dev->device.devargs->args);
+				return -rte_errno;
+			}
+		}
+	}
 	errno = 0;
 	ibv_list = mlx5_glue->get_device_list(&ret);
 	if (!ibv_list) {
@@ -2021,6 +2097,8 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 				case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
 					/* Fallthrough */
 				case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
+					/* Fallthrough */
+				case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
 					if (list[ns].info.pf_num == bd)
 						ns++;
 					break;
@@ -2198,7 +2276,8 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		dev_config.log_hp_size = MLX5_ARG_UNSET;
 		list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device,
 						 &list[i],
-						 &dev_config);
+						 &dev_config,
+						 &eth_da);
 		if (!list[i].eth_dev) {
 			if (rte_errno != EBUSY && rte_errno != EEXIST)
 				break;
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index fb586317ca..22058a0ad5 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1454,6 +1454,7 @@ const struct eth_dev_ops mlx5_dev_ops = {
 	.xstats_get_names = mlx5_xstats_get_names,
 	.fw_version_get = mlx5_fw_version_get,
 	.dev_infos_get = mlx5_dev_infos_get,
+	.representor_info_get = mlx5_representor_info_get,
 	.read_clock = mlx5_txpp_read_clock,
 	.dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get,
 	.vlan_filter_set = mlx5_vlan_filter_set,
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 8e8727a6c5..33c6b39a1e 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1032,6 +1032,15 @@ int mlx5_flow_aso_age_mng_init(struct mlx5_dev_ctx_shared *sh);
 /* mlx5_ethdev.c */
 
 int mlx5_dev_configure(struct rte_eth_dev *dev);
+int mlx5_representor_info_get(struct rte_eth_dev *dev,
+			      struct rte_eth_representor_info *info);
+#define MLX5_REPRESENTOR_ID(pf, type, repr) \
+		(((pf) << 14) + ((type) << 12) + ((repr) & 0xfff))
+#define MLX5_REPRESENTOR_REPR(repr_id) \
+		((repr_id) & 0xfff)
+#define MLX5_REPRESENTOR_TYPE(repr_id) \
+		(((repr_id) >> 12) & 3)
+uint16_t mlx5_representor_id_encode(const struct mlx5_switch_info *info);
 int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver,
 			size_t fw_size);
 int mlx5_dev_infos_get(struct rte_eth_dev *dev,
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 51b39ddde5..1ffb13cf2e 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -377,6 +377,106 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
 	return 0;
 }
 
+/**
+ * Calculate representor ID from port switch info.
+ *
+ * Uint16 representor ID bits definition:
+ *   pf: 2
+ *   type: 2
+ *   vf/sf: 12
+ *
+ * @param info
+ *   Port switch info.
+ *
+ * @return
+ *   Encoded representor ID.
+ */
+uint16_t
+mlx5_representor_id_encode(const struct mlx5_switch_info *info)
+{
+	enum rte_eth_representor_type type = RTE_ETH_REPRESENTOR_VF;
+	uint16_t repr = info->port_name;
+
+	if (info->representor == 0)
+		return UINT16_MAX;
+	if (info->name_type == MLX5_PHYS_PORT_NAME_TYPE_PFSF)
+		type = RTE_ETH_REPRESENTOR_SF;
+	if (info->name_type == MLX5_PHYS_PORT_NAME_TYPE_PFHPF)
+		repr = UINT16_MAX;
+	return MLX5_REPRESENTOR_ID(info->pf_num, type, repr);
+}
+
+/**
+ * DPDK callback to get information about representor.
+ *
+ * Representor ID bits definition:
+ *   vf/sf: 12
+ *   type: 2
+ *   pf: 2
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param[out] info
+ *   Nullable info structure output buffer.
+ *
+ * @return
+ *   negative on error, or the number of representor ranges.
+ */
+int
+mlx5_representor_info_get(struct rte_eth_dev *dev,
+			  struct rte_eth_representor_info *info)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	int n_type = 3; /* Number of representor types, VF, HPF and SF. */
+	int n_pf = 2; /* Number of PFs. */
+	int i = 0, pf;
+
+	if (info == NULL)
+		goto out;
+	info->controller = 0;
+	info->pf = priv->pf_bond >= 0 ? priv->pf_bond : 0;
+	for (pf = 0; pf < n_pf; ++pf) {
+		/* VF range. */
+		info->ranges[i].type = RTE_ETH_REPRESENTOR_VF;
+		info->ranges[i].controller = 0;
+		info->ranges[i].pf = pf;
+		info->ranges[i].vf = 0;
+		info->ranges[i].id_base =
+			MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, 0);
+		info->ranges[i].id_end =
+			MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
+		snprintf(info->ranges[i].name,
+			 sizeof(info->ranges[i].name), "pf%dvf", pf);
+		i++;
+		/* HPF range. */
+		info->ranges[i].type = RTE_ETH_REPRESENTOR_VF;
+		info->ranges[i].controller = 0;
+		info->ranges[i].pf = pf;
+		info->ranges[i].vf = UINT16_MAX;
+		info->ranges[i].id_base =
+			MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
+		info->ranges[i].id_end =
+			MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
+		snprintf(info->ranges[i].name,
+			 sizeof(info->ranges[i].name), "pf%dvf", pf);
+		i++;
+		/* SF range. */
+		info->ranges[i].type = RTE_ETH_REPRESENTOR_SF;
+		info->ranges[i].controller = 0;
+		info->ranges[i].pf = pf;
+		info->ranges[i].vf = 0;
+		info->ranges[i].id_base =
+			MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, 0);
+		info->ranges[i].id_end =
+			MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
+		snprintf(info->ranges[i].name,
+			 sizeof(info->ranges[i].name), "pf%dsf", pf);
+		i++;
+	}
+out:
+	return n_type * n_pf;
+}
+
 /**
  * Get firmware version of a device.
  *
-- 
2.25.1
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help