[PATCH 02/31] net/bnxt: add Tx batching support
From: Ajit Khaparde <ajit.khaparde@broadcom.com>
Date: 2018-06-19 21:31:07
Subsystem:
networking drivers, the rest · Maintainers:
Andrew Lunn, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds
Batch more than one Tx requests such that only one completion is generarted by the HW. We request a Tx completion for first and last Tx request in the batch. Signed-off-by: Ajit Khaparde <ajit.khaparde@broadcom.com> --- drivers/net/bnxt/bnxt_cpr.h | 12 ++++++ drivers/net/bnxt/bnxt_txq.h | 1 + drivers/net/bnxt/bnxt_txr.c | 97 +++++++++++++++++++++++++++++---------------- 3 files changed, 75 insertions(+), 35 deletions(-)
diff --git a/drivers/net/bnxt/bnxt_cpr.h b/drivers/net/bnxt/bnxt_cpr.h
index 6c1e6d2b0..5b36bf7d7 100644
--- a/drivers/net/bnxt/bnxt_cpr.h
+++ b/drivers/net/bnxt/bnxt_cpr.h@@ -22,12 +22,20 @@ #define ADV_RAW_CMP(idx, n) ((idx) + (n)) #define NEXT_RAW_CMP(idx) ADV_RAW_CMP(idx, 1) #define RING_CMP(ring, idx) ((idx) & (ring)->ring_mask) +#define RING_CMPL(ring_mask, idx) ((idx) & (ring_mask)) #define NEXT_CMP(idx) RING_CMP(ADV_RAW_CMP(idx, 1)) #define FLIP_VALID(cons, mask, val) ((cons) >= (mask) ? !(val) : (val)) #define DB_CP_REARM_FLAGS (DB_KEY_CP | DB_IDX_VALID) #define DB_CP_FLAGS (DB_KEY_CP | DB_IDX_VALID | DB_IRQ_DIS) +#define NEXT_CMPL(cpr, idx, v, inc) do { \ + (idx) += (inc); \ + if (unlikely((idx) == (cpr)->cp_ring_struct->ring_size)) { \ + (v) = !(v); \ + idx = 0; \ + } \ +} while (0) #define B_CP_DB_REARM(cpr, raw_cons) \ rte_write32((DB_CP_REARM_FLAGS | \ RING_CMP(((cpr)->cp_ring_struct), raw_cons)), \
@@ -50,6 +58,10 @@ rte_write32((DB_CP_FLAGS | \ RING_CMP(((cpr)->cp_ring_struct), raw_cons)), \ ((cpr)->cp_doorbell)) +#define B_CP_DB(cpr, raw_cons, ring_mask) \ + rte_write32((DB_CP_FLAGS | \ + RING_CMPL((ring_mask), raw_cons)), \ + ((cpr)->cp_doorbell)) struct bnxt_ring; struct bnxt_cp_ring_info {
diff --git a/drivers/net/bnxt/bnxt_txq.h b/drivers/net/bnxt/bnxt_txq.h
index 720ca90cf..f2c712a75 100644
--- a/drivers/net/bnxt/bnxt_txq.h
+++ b/drivers/net/bnxt/bnxt_txq.h@@ -24,6 +24,7 @@ struct bnxt_tx_queue { uint8_t wthresh; /* Write-back threshold reg */ uint32_t ctx_curr; /* Hardware context states */ uint8_t tx_deferred_start; /* not in global dev start */ + uint8_t cmpl_next; /* Next BD to trigger a compl */ struct bnxt *bp; int index;
diff --git a/drivers/net/bnxt/bnxt_txr.c b/drivers/net/bnxt/bnxt_txr.c
index 470fddd56..0fdf0fd08 100644
--- a/drivers/net/bnxt/bnxt_txr.c
+++ b/drivers/net/bnxt/bnxt_txr.c@@ -114,7 +114,9 @@ static inline uint32_t bnxt_tx_avail(struct bnxt_tx_ring_info *txr) } static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt, - struct bnxt_tx_queue *txq) + struct bnxt_tx_queue *txq, + uint16_t *coal_pkts, + uint16_t *cmpl_next) { struct bnxt_tx_ring_info *txr = txq->tx_ring; struct tx_bd_long *txbd;
@@ -146,8 +148,15 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt, return -ENOMEM; txbd = &txr->tx_desc_ring[txr->tx_prod]; - txbd->opaque = txr->tx_prod; + txbd->opaque = *coal_pkts; txbd->flags_type = tx_buf->nr_bds << TX_BD_LONG_FLAGS_BD_CNT_SFT; + txbd->flags_type |= TX_BD_SHORT_FLAGS_COAL_NOW; + if (!*cmpl_next) { + txbd->flags_type |= TX_BD_LONG_FLAGS_NO_CMPL; + } else { + *coal_pkts = 0; + *cmpl_next = false; + } txbd->len = tx_pkt->data_len; if (txbd->len >= 2014) txbd->flags_type |= TX_BD_LONG_FLAGS_LHINT_GTE2K;
@@ -235,7 +244,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt, txbd = &txr->tx_desc_ring[txr->tx_prod]; txbd->address = rte_cpu_to_le_32(rte_mbuf_data_iova(m_seg)); - txbd->flags_type = TX_BD_SHORT_TYPE_TX_BD_SHORT; + txbd->flags_type |= TX_BD_SHORT_TYPE_TX_BD_SHORT; txbd->len = m_seg->data_len; m_seg = m_seg->next;
@@ -278,35 +287,44 @@ static int bnxt_handle_tx_cp(struct bnxt_tx_queue *txq) struct bnxt_cp_ring_info *cpr = txq->cp_ring; uint32_t raw_cons = cpr->cp_raw_cons; uint32_t cons; - int nb_tx_pkts = 0; + uint32_t nb_tx_pkts = 0; struct tx_cmpl *txcmp; + struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring; + struct bnxt_ring *cp_ring_struct = cpr->cp_ring_struct; + uint32_t ring_mask = cp_ring_struct->ring_mask; + uint32_t opaque = 0; - if ((txq->tx_ring->tx_ring_struct->ring_size - - (bnxt_tx_avail(txq->tx_ring))) > - txq->tx_free_thresh) { - while (1) { - cons = RING_CMP(cpr->cp_ring_struct, raw_cons); - txcmp = (struct tx_cmpl *)&cpr->cp_desc_ring[cons]; - - if (!CMP_VALID(txcmp, raw_cons, cpr->cp_ring_struct)) - break; - cpr->valid = FLIP_VALID(cons, - cpr->cp_ring_struct->ring_mask, - cpr->valid); - - if (CMP_TYPE(txcmp) == TX_CMPL_TYPE_TX_L2) - nb_tx_pkts++; - else - RTE_LOG_DP(DEBUG, PMD, - "Unhandled CMP type %02x\n", - CMP_TYPE(txcmp)); - raw_cons = NEXT_RAW_CMP(raw_cons); - } - if (nb_tx_pkts) - bnxt_tx_cmp(txq, nb_tx_pkts); + if (((txq->tx_ring->tx_prod - txq->tx_ring->tx_cons) & + txq->tx_ring->tx_ring_struct->ring_mask) < txq->tx_free_thresh) + return 0; + + do { + cons = RING_CMPL(ring_mask, raw_cons); + txcmp = (struct tx_cmpl *)&cpr->cp_desc_ring[cons]; + rte_prefetch_non_temporal(&cp_desc_ring[(cons + 2) & + ring_mask]); + + if (!CMPL_VALID(txcmp, cpr->valid)) + break; + opaque = rte_cpu_to_le_32(txcmp->opaque); + NEXT_CMPL(cpr, cons, cpr->valid, 1); + rte_prefetch0(&cp_desc_ring[cons]); + + if (CMP_TYPE(txcmp) == TX_CMPL_TYPE_TX_L2) + nb_tx_pkts += opaque; + else + RTE_LOG_DP(ERR, PMD, + "Unhandled CMP type %02x\n", + CMP_TYPE(txcmp)); + raw_cons = cons; + } while (nb_tx_pkts < ring_mask); + + if (nb_tx_pkts) { + bnxt_tx_cmp(txq, nb_tx_pkts); cpr->cp_raw_cons = raw_cons; - B_CP_DIS_DB(cpr, cpr->cp_raw_cons); + B_CP_DB(cpr, cpr->cp_raw_cons, ring_mask); } + return nb_tx_pkts; }
@@ -315,8 +333,8 @@ uint16_t bnxt_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, { struct bnxt_tx_queue *txq = tx_queue; uint16_t nb_tx_pkts = 0; - uint16_t db_mask = txq->tx_ring->tx_ring_struct->ring_size >> 2; - uint16_t last_db_mask = 0; + uint16_t coal_pkts = 0; + uint16_t cmpl_next = txq->cmpl_next; /* Handle TX completions */ bnxt_handle_tx_cp(txq);
@@ -326,16 +344,25 @@ uint16_t bnxt_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, PMD_DRV_LOG(DEBUG, "Tx q stopped;return\n"); return 0; } + + txq->cmpl_next = 0; /* Handle TX burst request */ for (nb_tx_pkts = 0; nb_tx_pkts < nb_pkts; nb_tx_pkts++) { - if (bnxt_start_xmit(tx_pkts[nb_tx_pkts], txq)) { + int rc; + + /* Request a completion on first and last packet */ + cmpl_next |= (nb_pkts == nb_tx_pkts + 1); + coal_pkts++; + rc = bnxt_start_xmit(tx_pkts[nb_tx_pkts], txq, + &coal_pkts, &cmpl_next); + + if (unlikely(rc)) { + /* Request a completion in next cycle */ + txq->cmpl_next = 1; break; - } else if ((nb_tx_pkts & db_mask) != last_db_mask) { - B_TX_DB(txq->tx_ring->tx_doorbell, - txq->tx_ring->tx_prod); - last_db_mask = nb_tx_pkts & db_mask; } } + if (nb_tx_pkts) B_TX_DB(txq->tx_ring->tx_doorbell, txq->tx_ring->tx_prod);
--
2.15.1 (Apple Git-101)