Re: [PATCH v1] net/mlx5: support upstream rdma-core
From: Nélio Laranjeiro <hidden>
Date: 2017-08-24 14:59:58
On Thu, Aug 24, 2017 at 12:23:10PM +0000, Shachar Beiser wrote:
This removes the dependency on specific Mellanox OFED libraries by using the upstream rdma-core and linux upstream community code. Minimal requirements: rdma-core v16 and Kernel Linux 4.14.
Is not it also suppose to keep working with previous kernel if the user installs Mellanox OFED?
quoted hunk ↗ jump to hunk
Signed-off-by: Shachar Beiser <redacted> [...]diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
Is not it better to split this documentation in two subparts, one with for people with new kernel and rdma-core and the others with old kernel versions and Mellanox OFED?
quoted hunk ↗ jump to hunk
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index 14b739a..2de1c78 100644 --- a/drivers/net/mlx5/Makefile +++ b/drivers/net/mlx5/Makefile@@ -104,41 +104,20 @@ mlx5_autoconf.h.new: FORCE mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh $Q $(RM) -f -- '$@'[...] $Q sh -- '$<' '$@' \ - HAVE_VERBS_MLX5_OPCODE_TSO \ - infiniband/mlx5_hw.h \ - enum MLX5_OPCODE_TSO \ + HAVE_IBV_MLX5_MOD_MPW \ + infiniband/mlx5dv.h \ + enum MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED \ $(AUTOCONF_OUTPUT) - $Q sh -- '$<' '$@' \ - HAVE_ETHTOOL_LINK_MODE_25G \ - /usr/include/linux/ethtool.h \ - enum ETHTOOL_LINK_MODE_25000baseCR_Full_BIT \ - $(AUTOCONF_OUTPUT) - $Q sh -- '$<' '$@' \ - HAVE_ETHTOOL_LINK_MODE_50G \ - /usr/include/linux/ethtool.h \ - enum ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT \ - $(AUTOCONF_OUTPUT) - $Q sh -- '$<' '$@' \ - HAVE_ETHTOOL_LINK_MODE_100G \ - /usr/include/linux/ethtool.h \ - enum ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT \ - $(AUTOCONF_OUTPUT) - $Q sh -- '$<' '$@' \ - HAVE_UPDATE_CQ_CI \ - infiniband/mlx5_hw.h \ - func ibv_mlx5_exp_update_cq_ci \ - $(AUTOCONF_OUTPUT) - # Create mlx5_autoconf.h or update it in case it differs from the new one.
Keep the ETHTOOL_LINK_MODE_* macros, it is still necessary for previous kernel versions.
quoted hunk ↗ jump to hunk
mlx5_autoconf.h: mlx5_autoconf.h.newdiff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index bd66a7c..c2e37a3 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c@@ -247,10 +247,8 @@ struct mlx5_args { .filter_ctrl = mlx5_dev_filter_ctrl, .rx_descriptor_status = mlx5_rx_descriptor_status, .tx_descriptor_status = mlx5_tx_descriptor_status, -#ifdef HAVE_UPDATE_CQ_CI .rx_queue_intr_enable = mlx5_rx_intr_enable, .rx_queue_intr_disable = mlx5_rx_intr_disable, -#endif }; static struct {@@ -442,7 +440,7 @@ struct mlx5_args { struct ibv_device *ibv_dev; int err = 0; struct ibv_context *attr_ctx = NULL; - struct ibv_device_attr device_attr; + struct ibv_device_attr_ex device_attr; unsigned int sriov; unsigned int mps; unsigned int tunnel_en;@@ -493,34 +491,24 @@ struct mlx5_args { PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) || (pci_dev->id.device_id == PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF)); - /* - * Multi-packet send is supported by ConnectX-4 Lx PF as well - * as all ConnectX-5 devices. - */
This comment should be kept bellow.
quoted hunk ↗ jump to hunk
[...]@@ -539,13 +527,29 @@ struct mlx5_args { return -err; } ibv_dev = list[i]; - DEBUG("device opened"); - if (ibv_query_device(attr_ctx, &device_attr)) +#ifdef HAVE_IBV_MLX5_MOD_MPW + struct mlx5dv_context attrs_out; + mlx5dv_query_device(attr_ctx, &attrs_out); + if (attrs_out.flags & (MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW | + MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED)) { + INFO("Enhanced MPW is detected\n"); + mps = MLX5_MPW_ENHANCED; + } else if (attrs_out.flags & MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED) { + INFO("MPW is detected\n"); + mps = MLX5_MPW; + } else { + INFO("MPW is disabled\n"); + mps = MLX5_MPW_DISABLED; + } +#else + mps = MLX5_MPW_DISABLED; +#endif
This does not guarantee you won't have fall in the faulty kernel. Take in consideration the following point, I have a kernel 4.13 and a rdma-core v20, this rdma-core library version embed the enum defined for the autoconf i.e. enum MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED in mlx5dv.h. This code will be available and executed on a faulty kernel version. Won't I face the issue?
quoted hunk ↗ jump to hunk
@@ -664,29 +660,32 @@ struct mlx5_args { priv->ind_table_max_size = ETH_RSS_RETA_SIZE_512; DEBUG("maximum RX indirection table size is %u", priv->ind_table_max_size); - priv->hw_vlan_strip = !!(exp_device_attr.wq_vlan_offloads_cap & - IBV_EXP_RECEIVE_WQ_CVLAN_STRIP); + priv->hw_vlan_strip = !!(device_attr_ex.raw_packet_caps & + IBV_RAW_PACKET_CAP_CVLAN_STRIPPING); DEBUG("VLAN stripping is %ssupported", (priv->hw_vlan_strip ? "" : "not ")); - priv->hw_fcs_strip = !!(exp_device_attr.exp_device_cap_flags & - IBV_EXP_DEVICE_SCATTER_FCS); + priv->hw_fcs_strip = + !!(device_attr_ex.orig_attr.device_cap_flags & + IBV_WQ_FLAGS_SCATTER_FCS);
Wrong indentation above.
quoted hunk ↗ jump to hunk
diff --git a/drivers/net/mlx5/mlx5.rst b/drivers/net/mlx5/mlx5.rst
Seems this is an error (copy/paste from doc/guides/nics/mlx5.rst).
quoted hunk ↗ jump to hunk
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index 57f6237..f2acb61 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c@@ -97,21 +97,15 @@ struct ethtool_link_settings { #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 #endif -#ifndef HAVE_ETHTOOL_LINK_MODE_25G #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 -#endif -#ifndef HAVE_ETHTOOL_LINK_MODE_50G #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 -#endif -#ifndef HAVE_ETHTOOL_LINK_MODE_100G #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 -#endif #define ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32 (SCHAR_MAX)
This hunk should remain present.
quoted hunk ↗ jump to hunk
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index 7dd3ebb..5b20fdd 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c@@ -1005,17 +1005,17 @@ struct mlx5_flow_action { } rte_flow->drop = 1; drop = (void *)((uintptr_t)flow->ibv_attr + flow->offset); - *drop = (struct ibv_exp_flow_spec_action_drop){ - .type = IBV_EXP_FLOW_SPEC_ACTION_DROP, + *drop = (struct ibv_flow_spec_action_drop){ + .type = IBV_FLOW_SPEC_ACTION_DROP, .size = size, }; ++flow->ibv_attr->num_of_specs; - flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop); + flow->offset += sizeof(struct ibv_flow_spec_action_drop); rte_flow->ibv_attr = flow->ibv_attr; if (!priv->started) return rte_flow; rte_flow->qp = priv->flow_drop_queue->qp; - rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp, + rte_flow->ibv_flow = ibv_create_flow(rte_flow->qp, rte_flow->ibv_attr);
Wrong Indentation.
quoted hunk ↗ jump to hunk
if (!rte_flow->ibv_flow) { rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,@@ -1124,7 +1122,7 @@ struct mlx5_flow_action { } if (!priv->started) return rte_flow; - rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp, + rte_flow->ibv_flow = ibv_create_flow(rte_flow->qp, rte_flow->ibv_attr);
Wrong Indentation.
quoted hunk ↗ jump to hunk
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h index 608072f..c1c4935 100644 --- a/drivers/net/mlx5/mlx5_prm.h +++ b/drivers/net/mlx5/mlx5_prm.h@@ -35,13 +35,14 @@ #define RTE_PMD_MLX5_PRM_H_ #include <assert.h> +#include <rte_byteorder.h>
Where is it necessary?
quoted hunk ↗ jump to hunk
/* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC #pragma GCC diagnostic ignored "-Wpedantic" #endif -#include <infiniband/mlx5_hw.h> +#include <infiniband/mlx5dv.h> #ifdef PEDANTIC #pragma GCC diagnostic error "-Wpedantic" #endif@@ -89,10 +90,6 @@ /* Default max packet length to be inlined. */ #define MLX5_EMPW_MAX_INLINE_LEN (4U * MLX5_WQE_SIZE) -#ifndef HAVE_VERBS_MLX5_OPCODE_TSO -#define MLX5_OPCODE_TSO MLX5_OPCODE_LSO_MPW /* Compat with OFED 3.3. */ -#endif -
Should be in another commit fixing:
3cf87e68d97b ("net/mlx5: remove old MLNX OFED 3.3 verification")
quoted hunk ↗ jump to hunk
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index 35c5cb4..dc54714 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c@@ -37,15 +37,13 @@ #include <string.h> #include <stdint.h> #include <fcntl.h> -
This empty should remain.
quoted hunk ↗ jump to hunk
/* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC #pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> -#include <infiniband/arch.h> -#include <infiniband/mlx5_hw.h> +#include <infiniband/mlx5dv.h> #ifdef PEDANTIC #pragma GCC diagnostic error "-Wpedantic" #endif@@ -55,7 +53,9 @@ #include <rte_ethdev.h> #include <rte_common.h> #include <rte_interrupts.h> +#include <rte_hexdump.h> #include <rte_debug.h> +#include <rte_io.h>
Where are those includes necessary?
quoted hunk ↗ jump to hunk
@@ -1329,13 +1352,12 @@ struct priv *priv = mlx5_get_priv(dev); struct rxq *rxq = (*priv->rxqs)[rx_queue_id]; struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq); - int ret; + int ret = 0;
This should be in its own patch fixing the issue.
quoted hunk ↗ jump to hunk
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index fe9e7ea..991ea94 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c@@ -42,8 +42,7 @@ #pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> -#include <infiniband/mlx5_hw.h> -#include <infiniband/arch.h> +#include <infiniband/mlx5dv.h> #ifdef PEDANTIC #pragma GCC diagnostic error "-Wpedantic" #endif@@ -603,7 +602,7 @@ ds = 3; use_dseg: /* Add the remaining packet as a simple ds. */ - naddr = htonll(addr); + naddr = rte_cpu_to_be_64(addr);
All those replace from hton* ntoh* should be done in their own patch.
quoted hunk ↗ jump to hunk
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index b3b161d..72d0330 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h@@ -292,8 +295,8 @@ struct txq_ctrl { extern uint8_t rss_hash_default_key[]; extern const size_t rss_hash_default_key_len; -size_t priv_flow_attr(struct priv *, struct ibv_exp_flow_attr *, - size_t, enum hash_rxq_type); +size_t priv_flow_attr(struct priv *, struct ibv_flow_attr *, size_t, + enum hash_rxq_type);
Indentation issue.
quoted hunk ↗ jump to hunk
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c index 37854a7..5bef200 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c@@ -43,8 +43,7 @@ #pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> -#include <infiniband/mlx5_hw.h> -#include <infiniband/arch.h> +#include <infiniband/mlx5dv.h> #ifdef PEDANTIC #pragma GCC diagnostic error "-Wpedantic" #endif@@ -561,7 +560,7 @@ return; } for (i = 0; i < n; ++i) - wq[i].addr = htonll((uintptr_t)elts[i]->buf_addr + + wq[i].addr = rte_cpu_to_be_64((uintptr_t)elts[i]->buf_addr + RTE_PKTMBUF_HEADROOM);
Same here this should be in another commit.
quoted hunk ↗ jump to hunk
rxq->rq_ci += n; rte_wmb();diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index 4b0b532..3156ad2 100644 --- a/drivers/net/mlx5/mlx5_txq.c +++ b/drivers/net/mlx5/mlx5_txq.c@@ -241,16 +247,16 @@ if (priv->mps == MLX5_MPW_ENHANCED) tmpl.txq.mpw_hdr_dseg = priv->mpw_hdr_dseg; /* MRs will be registered in mp2mr[] later. */ - attr.cq = (struct ibv_exp_cq_init_attr){ + attr.cq = (struct ibv_cq_init_attr_ex){ .comp_mask = 0, }; cqe_n = ((desc / MLX5_TX_COMP_THRESH) - 1) ? ((desc / MLX5_TX_COMP_THRESH) - 1) : 1; if (priv->mps == MLX5_MPW_ENHANCED) cqe_n += MLX5_TX_COMP_THRESH_INLINE_DIV; - tmpl.cq = ibv_exp_create_cq(priv->ctx, + tmpl.cq = ibv_create_cq(priv->ctx, cqe_n, - NULL, NULL, 0, &attr.cq); + NULL, NULL, 0);
Indentation issue. Please split this in the three commits: - first fixing the return value, - one changing the hton/ntoh by rte equivalents, - and this one. Thanks, -- Nélio Laranjeiro 6WIND