[dpdk-dev] [PATCH v3 6/9] net/mlx5: save bonding member ports information
From: Xueming Li <hidden>
Date: 2021-01-18 11:30:29
Subsystem:
networking drivers, the rest · Maintainers:
Andrew Lunn, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds
Since kernel bonding netdev doesn't provide statistics counter that reflects all member ports, PMD has to manually summarize counters from each member ports. As a preparation, this patch collects bonding member port information and saves to shared context data. Signed-off-by: Xueming Li <redacted> Acked-by: Viacheslav Ovsiienko <redacted> --- drivers/net/mlx5/linux/mlx5_ethdev_os.c | 4 +- drivers/net/mlx5/linux/mlx5_os.c | 91 ++++++++++++++++--------- drivers/net/mlx5/mlx5.c | 2 + drivers/net/mlx5/mlx5.h | 19 +++++- drivers/net/mlx5/mlx5_ethdev.c | 5 +- 5 files changed, 84 insertions(+), 37 deletions(-)
diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
index ac311de46d..84610a7bc0 100644
--- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c
+++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c@@ -150,8 +150,8 @@ mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[MLX5_NAMESIZE]) MLX5_ASSERT(priv); MLX5_ASSERT(priv->sh); - if (priv->bond_ifindex > 0) { - memcpy(ifname, priv->bond_name, MLX5_NAMESIZE); + if (priv->master && priv->sh->bond.ifindex > 0) { + memcpy(ifname, priv->sh->bond.ifname, MLX5_NAMESIZE); return 0; } ifindex = mlx5_ifindex(dev);
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 521a0a5789..47a7c3dff0 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c@@ -1417,19 +1417,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, */ MLX5_ASSERT(spawn->ifindex); priv->if_index = spawn->ifindex; - if (priv->pf_bond >= 0 && priv->master) { - /* Get bond interface info */ - err = mlx5_sysfs_bond_info(priv->if_index, - &priv->bond_ifindex, - priv->bond_name); - if (err) - DRV_LOG(ERR, "unable to get bond info: %s", - strerror(rte_errno)); - else - DRV_LOG(INFO, "PF device %u, bond device %u(%s)", - priv->if_index, priv->bond_ifindex, - priv->bond_name); - } eth_dev->data->dev_private = priv; priv->dev_data = eth_dev->data; eth_dev->data->mac_addrs = priv->mac;
@@ -1697,6 +1684,8 @@ mlx5_dev_spawn_data_cmp(const void *a, const void *b) * Netlink RDMA group socket handle. * @param[in] owner * Rerepsentor owner PF index. + * @param[out] bond_info + * Pointer to bonding information. * * @return * negative value if no bonding device found, otherwise
@@ -1705,19 +1694,22 @@ mlx5_dev_spawn_data_cmp(const void *a, const void *b) static int mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, const struct rte_pci_addr *pci_dev, - int nl_rdma, uint16_t owner) + int nl_rdma, uint16_t owner, + struct mlx5_bond_info *bond_info) { char ifname[IF_NAMESIZE + 1]; unsigned int ifindex; unsigned int np, i; - FILE *file = NULL; + FILE *bond_file = NULL, *file; int pf = -1; + int ret; /* * Try to get master device name. If something goes * wrong suppose the lack of kernel support and no * bonding devices. */ + memset(bond_info, 0, sizeof(*bond_info)); if (nl_rdma < 0) return -1; if (!strstr(ibv_dev->name, "bond"))
@@ -1741,15 +1733,15 @@ mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, /* Try to read bonding slave names from sysfs. */ MKSTR(slaves, "/sys/class/net/%s/master/bonding/slaves", ifname); - file = fopen(slaves, "r"); - if (file) + bond_file = fopen(slaves, "r"); + if (bond_file) break; } - if (!file) + if (!bond_file) return -1; /* Use safe format to check maximal buffer length. */ MLX5_ASSERT(atol(RTE_STR(IF_NAMESIZE)) == IF_NAMESIZE); - while (fscanf(file, "%" RTE_STR(IF_NAMESIZE) "s", ifname) == 1) { + while (fscanf(bond_file, "%" RTE_STR(IF_NAMESIZE) "s", ifname) == 1) { char tmp_str[IF_NAMESIZE + 32]; struct rte_pci_addr pci_addr; struct mlx5_switch_info info;
@@ -1762,13 +1754,7 @@ mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, " for netdev \"%s\"", ifname); continue; } - if (pci_dev->domain != pci_addr.domain || - pci_dev->bus != pci_addr.bus || - pci_dev->devid != pci_addr.devid || - pci_dev->function + owner != pci_addr.function) - continue; /* Slave interface PCI address match found. */ - fclose(file); snprintf(tmp_str, sizeof(tmp_str), "/sys/class/net/%s/phys_port_name", ifname); file = fopen(tmp_str, "rb");
@@ -1777,13 +1763,52 @@ mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, info.name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET; if (fscanf(file, "%32s", tmp_str) == 1) mlx5_translate_port_name(tmp_str, &info); - if (info.name_type == MLX5_PHYS_PORT_NAME_TYPE_LEGACY || - info.name_type == MLX5_PHYS_PORT_NAME_TYPE_UPLINK) + fclose(file); + /* Only process PF ports. */ + if (info.name_type != MLX5_PHYS_PORT_NAME_TYPE_LEGACY && + info.name_type != MLX5_PHYS_PORT_NAME_TYPE_UPLINK) + continue; + /* Check max bonding member. */ + if (info.port_name >= MLX5_BOND_MAX_PORTS) { + DRV_LOG(WARNING, "bonding index out of range, " + "please increase MLX5_BOND_MAX_PORTS: %s", + tmp_str); + break; + } + /* Match PCI address. */ + if (pci_dev->domain == pci_addr.domain && + pci_dev->bus == pci_addr.bus && + pci_dev->devid == pci_addr.devid && + pci_dev->function + owner == pci_addr.function) pf = info.port_name; - break; - } - if (file) + /* Get ifindex. */ + snprintf(tmp_str, sizeof(tmp_str), + "/sys/class/net/%s/ifindex", ifname); + file = fopen(tmp_str, "rb"); + if (!file) + break; + ret = fscanf(file, "%u", &ifindex); fclose(file); + if (ret != 1) + break; + /* Save bonding info. */ + strncpy(bond_info->ports[info.port_name].ifname, ifname, + sizeof(bond_info->ports[0].ifname)); + bond_info->ports[info.port_name].pci_addr = pci_addr; + bond_info->ports[info.port_name].ifindex = ifindex; + bond_info->n_port++; + } + if (pf >= 0) { + /* Get bond interface info */ + ret = mlx5_sysfs_bond_info(ifindex, &bond_info->ifindex, + bond_info->ifname); + if (ret) + DRV_LOG(ERR, "unable to get bond info: %s", + strerror(rte_errno)); + else + DRV_LOG(INFO, "PF device %u, bond device %u(%s)", + ifindex, bond_info->ifindex, bond_info->ifname); + } return pf; }
@@ -1838,6 +1863,7 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev, unsigned int dev_config_vf; struct rte_eth_devargs eth_da = *req_eth_da; struct rte_pci_addr owner_pci = pci_dev->addr; /* Owner PF. */ + struct mlx5_bond_info bond_info; int ret = -1; if (rte_eal_process_type() == RTE_PROC_PRIMARY)
@@ -1869,7 +1895,8 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev, DRV_LOG(DEBUG, "checking device \"%s\"", ibv_list[ret]->name); bd = mlx5_device_bond_pci_match - (ibv_list[ret], &owner_pci, nl_rdma, owner_id); + (ibv_list[ret], &owner_pci, nl_rdma, owner_id, + &bond_info); if (bd >= 0) { /* * Bonding device detected. Only one match is allowed,
@@ -1978,6 +2005,7 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev, MLX5_ASSERT(nd == 1); MLX5_ASSERT(np); for (i = 1; i <= np; ++i) { + list[ns].bond_info = &bond_info; list[ns].max_port = np; list[ns].phys_port = i; list[ns].phys_dev = ibv_match[0];
@@ -2068,6 +2096,7 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev, */ for (i = 0; i != nd; ++i) { memset(&list[ns].info, 0, sizeof(list[ns].info)); + list[ns].bond_info = NULL; list[ns].max_port = 1; list[ns].phys_port = 1; list[ns].phys_dev = ibv_match[i];
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index d613ffd655..e170db948d 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c@@ -927,6 +927,8 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn, rte_errno = ENOMEM; goto exit; } + if (spawn->bond_info) + sh->bond = *spawn->bond_info; err = mlx5_os_open_device(spawn, config, sh); if (!sh->ctx) goto error;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index e7afa438ce..508f98f8cd 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h@@ -115,6 +115,7 @@ struct mlx5_dev_spawn_data { void *phys_dev; /**< Associated physical device. */ struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */ struct rte_pci_device *pci_dev; /**< Backend PCI device. */ + struct mlx5_bond_info *bond_info; }; /** Key string for IPC. */
@@ -661,6 +662,19 @@ struct mlx5_flex_parser_profiles { void *obj; /* Flex parser node object. */ }; +/* Bonding device information. */ +struct mlx5_bond_info { + int n_port; /* Number of bond member ports. */ + uint32_t ifindex; + char ifname[MLX5_NAMESIZE + 1]; +#define MLX5_BOND_MAX_PORTS 2 + struct { + char ifname[MLX5_NAMESIZE + 1]; + uint32_t ifindex; + struct rte_pci_addr pci_addr; + } ports[MLX5_BOND_MAX_PORTS]; +}; + /* * Shared Infiniband device context for Master/Representors * which belong to same IB device with multiple IB ports.
@@ -671,6 +685,7 @@ struct mlx5_dev_ctx_shared { uint32_t devx:1; /* Opened with DV. */ uint32_t flow_hit_aso_en:1; /* Flow Hit ASO is supported. */ uint32_t max_port; /* Maximal IB device port index. */ + struct mlx5_bond_info bond; /* Bonding information. */ void *ctx; /* Verbs/DV/DevX context. */ void *pd; /* Protection Domain. */ uint32_t pdn; /* Protection Domain number. */
@@ -916,10 +931,8 @@ struct mlx5_priv { uint32_t vport_meta_tag; /* Used for vport index match ove VF LAG. */ uint32_t vport_meta_mask; /* Used for vport index field match mask. */ int32_t representor_id; /* RTE_ETH_REPR(), -1 if not a representor. */ - int32_t pf_bond; /* >=0 means PF index in bonding configuration. */ + int32_t pf_bond; /* >=0, representor owner PF index in bonding. */ unsigned int if_index; /* Associated kernel network device index. */ - uint32_t bond_ifindex; /**< Bond interface index. */ - char bond_name[MLX5_NAMESIZE]; /**< Bond interface name. */ /* RX/TX queues. */ unsigned int rxqs_n; /* RX queues array size. */ unsigned int txqs_n; /* TX queues array size. */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 5341eb16c9..29389fc98f 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c@@ -42,7 +42,10 @@ mlx5_ifindex(const struct rte_eth_dev *dev) MLX5_ASSERT(priv); MLX5_ASSERT(priv->if_index); - ifindex = priv->bond_ifindex > 0 ? priv->bond_ifindex : priv->if_index; + if (priv->master && priv->sh->bond.ifindex > 0) + ifindex = priv->sh->bond.ifindex; + else + ifindex = priv->if_index; if (!ifindex) rte_errno = ENXIO; return ifindex;
--
2.25.1