[PATCH net-next 2/2] net: macb: distribute evenly Tx SRAM segments
From: Théo Lebrun <theo.lebrun@bootlin.com>
Date: 2026-03-05 17:20:26
Also in:
lkml
Subsystem:
atmel macb ethernet driver, networking drivers, the rest · Maintainers:
Théo Lebrun, Andrew Lunn, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds
GEM has registers to configure the Tx SRAM segments distribution across
queues. The reset value is apprioriate (even spread) but we need to
care if/when number of active queues is modified (or if we inherited
unevenly initialised hardware from bootloader).
To distribute segments, we take as input the number of queues
(bp->num_queues) and the number of segments (found inside DCFG6).
Its output is a number of segments for each queue, formatted as
powers-of-two (eg 2 for queue 0 means it has 2^2=4 segments).
As the distribution logic is quite complex (at least its initial
versions had bugs), it is kunit-tested and those tests live at the end
of macb_main.c. To test:
⟩ env --unset=CROSS_COMPILE make ARCH=um mrproper
⟩ env --unset=CROSS_COMPILE ./tools/testing/kunit/kunit.py run \
--kconfig_add CONFIG_NET=y \
--kconfig_add CONFIG_COMMON_CLK=y \
--kconfig_add CONFIG_MACB=y 'macb*'
Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
drivers/net/ethernet/cadence/Kconfig | 6 ++
drivers/net/ethernet/cadence/macb.h | 5 ++
drivers/net/ethernet/cadence/macb_main.c | 135 +++++++++++++++++++++++++++++++
3 files changed, 146 insertions(+)
diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig
index 5b2a461dfd28..3ae7123352f5 100644
--- a/drivers/net/ethernet/cadence/Kconfig
+++ b/drivers/net/ethernet/cadence/Kconfig@@ -51,4 +51,10 @@ config MACB_PCI To compile this driver as a module, choose M here: the module will be called macb_pci. +config MACB_KUNIT_TEST + bool "KUnit test for MACB" if !KUNIT_ALL_TESTS + depends on KUNIT + depends on MACB + default KUNIT_ALL_TESTS + endif # NET_VENDOR_CADENCE
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 30fa65e2bdf2..81fdd17b34db 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h@@ -193,6 +193,9 @@ #define GEM_TXBDCTRL 0x04cc /* TX Buffer Descriptor control register */ #define GEM_RXBDCTRL 0x04d0 /* RX Buffer Descriptor control register */ +#define GEM_TXQSEGALLOC_LOWER 0x05A0 /* Tx queue segment allocation (low) */ +#define GEM_TXQSEGALLOC_UPPER 0x05A4 /* Tx queue segment allocation (high) */ + /* Screener Type 2 match registers */ #define GEM_SCRT2 0x540
@@ -543,6 +546,8 @@ #define GEM_PBUF_CUTTHRU_SIZE 1 #define GEM_DAW64_OFFSET 23 #define GEM_DAW64_SIZE 1 +#define GEM_SEGMENTS_BIT_SIZE_OFFSET 16 +#define GEM_SEGMENTS_BIT_SIZE_SIZE 3 /* Bitfields in DCFG8. */ #define GEM_T1SCR_OFFSET 24
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index bac83a2b4c4d..022577756eab 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c@@ -2751,6 +2751,59 @@ static u32 macb_dbw(struct macb *bp) } } +/* + * Distribute evenly available segments across queues. The computation is + * complex because (1) segments are counted in powers of two and (2) a queue + * can only use up to 8 segments. There are four types of cases: + * - Sharing all segments equally is doable. Take num_queues=4 and + * num_segments=16. Each queue will get 2^2=4 segments. + * - Sharing all segments is doable. Take num_queues=5 and num_segments=16. + * Three queues will get 2^2=4 segments and two will get 2^1=2 segments. + * - Sharing all segments is not doable because not enough queues are + * available. Take num_queues=1 and num_segments=16; queue 0 can only have 8 + * segments. + * - Sharing all segments is not doable because not enough segments are + * available. Take num_queues=4 and num_segments=2. + * + * We start by computing the power each queue will have. For num_queues=5 and + * num_segments=16, each queue will have at least 2^1 segments. That leaves us + * with remaining_segments=6. If we increase the power for a queue, we get a + * delta of 2 (2^2-2^1). The first three queues will therefore be advantaged + * and each have 2^2 segments. The remaining 2 queues will only have 2^1 + * segments. + */ +static u64 gem_sram_distribute_segments(unsigned int num_queues, + unsigned int num_segments) +{ + unsigned int pow, remaining_segments, i; + unsigned int num_advantaged_queues = 0; + u64 val = 0; + + /* pow=0 for all queues. ilog2(0) is dangerous. */ + if (num_queues >= num_segments) + return 0; + + pow = min(ilog2(num_segments / num_queues), 3); + remaining_segments = num_segments - num_queues * (1U << pow); + + /* + * We can only distribute remaining segments if (1) there are remaining + * segments and (2) we did not reach the max segments per queue (2^3). + */ + if (remaining_segments != 0 && pow != 3) { + unsigned int delta = (1U << (pow + 1)) - (1U << pow); + + num_advantaged_queues = remaining_segments / delta; + } + + for (i = 0; i < num_advantaged_queues; i++) + val |= ((pow + 1) & 0b11) << (i * 4); + for (i = num_advantaged_queues; i < num_queues; i++) + val |= (pow & 0b11) << (i * 4); + + return val; +} + /* Configure the receive DMA engine * - use the correct receive buffer size * - set best burst length for DMA operations
@@ -2832,6 +2885,19 @@ static void macb_init_hw(struct macb *bp) if (bp->caps & MACB_CAPS_JUMBO) bp->rx_frm_len_mask = MACB_RX_JFRMLEN_MASK; + /* + * Distribute Tx SRAM segments evenly based on active number of queues. + */ + if (macb_is_gem(bp)) { + unsigned int num_segments; + u64 val; + + num_segments = 1U << GEM_BFEXT(SEGMENTS_BIT_SIZE, gem_readl(bp, DCFG6)); + val = gem_sram_distribute_segments(bp->num_queues, num_segments); + gem_writel(bp, TXQSEGALLOC_LOWER, val); + gem_writel(bp, TXQSEGALLOC_UPPER, val >> 32); + } + macb_configure_dma(bp); /* Enable RX partial store and forward and set watermark */
@@ -6031,3 +6097,72 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Cadence MACB/GEM Ethernet driver"); MODULE_AUTHOR("Haavard Skinnemoen (Atmel)"); MODULE_ALIAS("platform:macb"); + +#ifdef CONFIG_MACB_KUNIT_TEST +#include <kunit/test.h> + +struct macb_sram_segments_case { + unsigned int num_queues, num_segments; +}; + +static void macb_sram_segments_test(struct kunit *test) +{ + const struct macb_sram_segments_case *p = test->param_value; + u64 val = gem_sram_distribute_segments(p->num_queues, p->num_segments); + unsigned int i, sum_segments = 0, max_assigned_segments; + unsigned int num_queues = min(p->num_queues, p->num_segments); + + for (i = 0; i < num_queues; i++) { + unsigned int q_segments = (val >> (i * 4)) & 0b11; + + q_segments = 1U << q_segments; + sum_segments += q_segments; + KUNIT_ASSERT_GT_MSG(test, q_segments, 0, "queue %d, val %#llx", i, val); + } + + for (i = num_queues; i < 16; i++) { + unsigned int pow = (val >> (i * 4)) & 0b11; + + KUNIT_ASSERT_EQ_MSG(test, pow, 0, "queue %d, val %#llx", i, val); + } + + max_assigned_segments = min(p->num_segments, 8 * p->num_queues); + KUNIT_ASSERT_EQ_MSG(test, sum_segments, max_assigned_segments, "val %#llx", val); +} + +struct macb_sram_segments_case macb_sram_segments_cases[] = { + /* num_segments can only be powers of two. */ + { .num_queues = 4, .num_segments = 2 }, + { .num_queues = 1, .num_segments = 16 }, + { .num_queues = 4, .num_segments = 16 }, + { .num_queues = 5, .num_segments = 16 }, + { .num_queues = 15, .num_segments = 16 }, + { .num_queues = 16, .num_segments = 16 }, +}; + +static void macb_sram_segments_case_desc(struct macb_sram_segments_case *t, char *desc) +{ + u64 val = gem_sram_distribute_segments(t->num_queues, t->num_segments); + + snprintf(desc, KUNIT_PARAM_DESC_SIZE, + "num_queues=%d num_segments=%d TXQSEGALLOC=%#llx", + t->num_queues, t->num_segments, val); +} + +KUNIT_ARRAY_PARAM(macb_sram_segments, + macb_sram_segments_cases, + macb_sram_segments_case_desc); + +static struct kunit_case macb_test_cases[] = { + KUNIT_CASE_PARAM(macb_sram_segments_test, macb_sram_segments_gen_params), + {} +}; + +static struct kunit_suite macb_test_suite = { + .name = "macb", + .test_cases = macb_test_cases, +}; + +kunit_test_suite(macb_test_suite); + +#endif
--
2.53.0