Thread (9 messages) 9 messages, 2 authors, 2026-03-16
STALE97d

[PATCH net-next v2 2/2] net: macb: distribute evenly Tx SRAM segments

From: Théo Lebrun <theo.lebrun@bootlin.com>
Date: 2026-03-11 16:42:07
Also in: lkml
Subsystem: atmel macb ethernet driver, networking drivers, the rest · Maintainers: Théo Lebrun, Andrew Lunn, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds

GEM has registers to configure the Tx SRAM segments distribution across
queues. The reset value is apprioriate (even spread) but we need to
care if/when number of active queues is modified (or if we inherited
unevenly initialised hardware from bootloader).

To distribute segments, we take as input the number of queues
(bp->num_queues) and the number of segments (found inside DCFG6).
Its output is a number of segments for each queue, formatted as
powers-of-two (eg 2 for queue 0 means it has 2^2=4 segments).

As the distribution logic is quite complex (at least its initial
versions had bugs), it is kunit-tested in macb_kunit.c and the
implementation lives in macb_utils.c. To test:

⟩ env --unset=CROSS_COMPILE make ARCH=um mrproper
⟩ env --unset=CROSS_COMPILE ./tools/testing/kunit/kunit.py run \
        --kconfig_add CONFIG_NET=y \
        --kconfig_add CONFIG_COMMON_CLK=y \
        --kconfig_add CONFIG_MACB=y macb

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/Kconfig      | 13 ++++++
 drivers/net/ethernet/cadence/Makefile     |  5 ++-
 drivers/net/ethernet/cadence/macb.h       |  8 ++++
 drivers/net/ethernet/cadence/macb_kunit.c | 73 +++++++++++++++++++++++++++++++
 drivers/net/ethernet/cadence/macb_main.c  | 15 +++++++
 drivers/net/ethernet/cadence/macb_utils.c | 56 ++++++++++++++++++++++++
 6 files changed, 169 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig
index 5b2a461dfd28..65c8d6ef519b 100644
--- a/drivers/net/ethernet/cadence/Kconfig
+++ b/drivers/net/ethernet/cadence/Kconfig
@@ -51,4 +51,17 @@ config MACB_PCI
 	  To compile this driver as a module, choose M here: the module
 	  will be called macb_pci.
 
+config MACB_KUNIT_TEST
+	bool "KUnit test for MACB" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	depends on MACB
+	default KUNIT_ALL_TESTS
+	help
+	  Build KUnit tests for the MACB driver.
+
+	  For more information on KUnit and unit tests in general,
+	  please refer to the KUnit documentation.
+
+	  If unsure, say N.
+
 endif # NET_VENDOR_CADENCE
diff --git a/drivers/net/ethernet/cadence/Makefile b/drivers/net/ethernet/cadence/Makefile
index 1f33cdca9a3c..f5a0abb9fdaf 100644
--- a/drivers/net/ethernet/cadence/Makefile
+++ b/drivers/net/ethernet/cadence/Makefile
@@ -2,7 +2,7 @@
 #
 # Makefile for the Atmel network device drivers.
 #
-macb-y	:= macb_main.o
+macb-y	:= macb_main.o macb_utils.o
 
 ifeq ($(CONFIG_MACB_USE_HWSTAMP),y)
 macb-y	+= macb_ptp.o
@@ -10,3 +10,6 @@ endif
 
 obj-$(CONFIG_MACB) += macb.o
 obj-$(CONFIG_MACB_PCI) += macb_pci.o
+
+obj-$(CONFIG_MACB_KUNIT_TEST) += macb_kunit.o
+macb-test-y := macb_kunit.o macb_utils.o
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index b08afe340996..0464e774273a 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -197,6 +197,9 @@
 #define GEM_TXBDCTRL	0x04cc /* TX Buffer Descriptor control register */
 #define GEM_RXBDCTRL	0x04d0 /* RX Buffer Descriptor control register */
 
+#define GEM_TXQSEGALLOC_LOWER	0x05A0 /* Tx queue segment allocation (low) */
+#define GEM_TXQSEGALLOC_UPPER	0x05A4 /* Tx queue segment allocation (high) */
+
 /* Screener Type 2 match registers */
 #define GEM_SCRT2		0x540
 
@@ -549,6 +552,8 @@
 #define GEM_PBUF_CUTTHRU_SIZE			1
 #define GEM_DAW64_OFFSET			23
 #define GEM_DAW64_SIZE				1
+#define GEM_SEGMENTS_BIT_SIZE_OFFSET		16
+#define GEM_SEGMENTS_BIT_SIZE_SIZE		3
 
 /* Bitfields in DCFG8. */
 #define GEM_T1SCR_OFFSET			24
@@ -1494,4 +1499,7 @@ struct macb_queue_enst_config {
 	u8 queue_id;
 };
 
+u64 gem_sram_distribute_segments(unsigned int num_queues,
+				 unsigned int num_segments);
+
 #endif /* _MACB_H */
diff --git a/drivers/net/ethernet/cadence/macb_kunit.c b/drivers/net/ethernet/cadence/macb_kunit.c
new file mode 100644
index 000000000000..74b37669c00c
--- /dev/null
+++ b/drivers/net/ethernet/cadence/macb_kunit.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <kunit/test.h>
+#include <linux/netdevice.h>
+#include "macb.h"
+
+struct macb_sram_segments_case {
+	unsigned int num_queues, num_segments;
+};
+
+static void macb_sram_segments_test(struct kunit *test)
+{
+	const struct macb_sram_segments_case *p = test->param_value;
+	u64 val = gem_sram_distribute_segments(p->num_queues, p->num_segments);
+	unsigned int i, sum_segments = 0, max_assigned_segments;
+	unsigned int num_queues = min(p->num_queues, p->num_segments);
+
+	for (i = 0; i < num_queues; i++) {
+		unsigned int q_segments = (val >> (i * 4)) & 0b11;
+
+		q_segments = 1U << q_segments;
+		sum_segments += q_segments;
+		KUNIT_ASSERT_GT_MSG(test, q_segments, 0, "queue %d, val %#llx",
+				    i, val);
+	}
+
+	for (i = num_queues; i < 16; i++) {
+		unsigned int pow = (val >> (i * 4)) & 0b11;
+
+		KUNIT_ASSERT_EQ_MSG(test, pow, 0, "queue %d, val %#llx",
+				    i, val);
+	}
+
+	max_assigned_segments = min(p->num_segments, 8 * p->num_queues);
+	KUNIT_ASSERT_EQ_MSG(test, sum_segments, max_assigned_segments,
+			    "val %#llx", val);
+}
+
+struct macb_sram_segments_case macb_sram_segments_cases[] = {
+	/* num_segments can only be powers of two. */
+	{ .num_queues = 4,  .num_segments = 2 },
+	{ .num_queues = 1,  .num_segments = 16 },
+	{ .num_queues = 4,  .num_segments = 16 },
+	{ .num_queues = 5,  .num_segments = 16 },
+	{ .num_queues = 15, .num_segments = 16 },
+	{ .num_queues = 16, .num_segments = 16 },
+};
+
+static void macb_sram_segments_case_desc(struct macb_sram_segments_case *t,
+					 char *desc)
+{
+	u64 val = gem_sram_distribute_segments(t->num_queues, t->num_segments);
+
+	snprintf(desc, KUNIT_PARAM_DESC_SIZE,
+		 "num_queues=%d num_segments=%d TXQSEGALLOC=%#llx",
+		 t->num_queues, t->num_segments, val);
+}
+
+KUNIT_ARRAY_PARAM(macb_sram_segments,
+		  macb_sram_segments_cases,
+		  macb_sram_segments_case_desc);
+
+static struct kunit_case macb_test_cases[] = {
+	KUNIT_CASE_PARAM(macb_sram_segments_test,
+			 macb_sram_segments_gen_params),
+	{}
+};
+
+static struct kunit_suite macb_test_suite = {
+	.name = "macb",
+	.test_cases = macb_test_cases,
+};
+
+kunit_test_suite(macb_test_suite);
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 8b2c77446dbd..16d71468fca7 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -2954,6 +2954,21 @@ static void macb_init_hw(struct macb *bp)
 	if (bp->caps & MACB_CAPS_JUMBO)
 		bp->rx_frm_len_mask = MACB_RX_JFRMLEN_MASK;
 
+	/*
+	 * Distribute Tx SRAM segments evenly based on active number of queues.
+	 */
+	if (macb_is_gem(bp)) {
+		unsigned int num_segments;
+		u64 val;
+
+		num_segments = 1U << GEM_BFEXT(SEGMENTS_BIT_SIZE,
+					       gem_readl(bp, DCFG6));
+		val = gem_sram_distribute_segments(bp->num_queues,
+						   num_segments);
+		gem_writel(bp, TXQSEGALLOC_LOWER, val);
+		gem_writel(bp, TXQSEGALLOC_UPPER, val >> 32);
+	}
+
 	macb_configure_dma(bp);
 
 	/* Enable RX partial store and forward and set watermark */
diff --git a/drivers/net/ethernet/cadence/macb_utils.c b/drivers/net/ethernet/cadence/macb_utils.c
new file mode 100644
index 000000000000..77e0b5c1df86
--- /dev/null
+++ b/drivers/net/ethernet/cadence/macb_utils.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/netdevice.h>
+#include "macb.h"
+
+/*
+ * Distribute evenly available segments across queues. The computation is
+ * complex because (1) segments are counted in powers of two and (2) a queue
+ * can only use up to 8 segments. There are four types of cases:
+ *  - Sharing all segments equally is doable. Take num_queues=4 and
+ *    num_segments=16. Each queue will get 2^2=4 segments.
+ *  - Sharing all segments is doable. Take num_queues=5 and num_segments=16.
+ *    Three queues will get 2^2=4 segments and two will get 2^1=2 segments.
+ *  - Sharing all segments is not doable because not enough queues are
+ *    available. Take num_queues=1 and num_segments=16; queue 0 can only have 8
+ *    segments.
+ *  - Sharing all segments is not doable because not enough segments are
+ *    available. Take num_queues=4 and num_segments=2.
+ *
+ * We start by computing the power each queue will have. For num_queues=5 and
+ * num_segments=16, each queue will have at least 2^1 segments. That leaves us
+ * with remaining_segments=6. If we increase the power for a queue, we get a
+ * delta of 2 (2^2-2^1). The first three queues will therefore be advantaged
+ * and each have 2^2 segments. The remaining 2 queues will only have 2^1
+ * segments.
+ */
+u64 gem_sram_distribute_segments(unsigned int num_queues,
+				 unsigned int num_segments)
+{
+	unsigned int pow, remaining_segments, i;
+	unsigned int num_advantaged_queues = 0;
+	u64 val = 0;
+
+	/* pow=0 for all queues. ilog2(0) is dangerous. */
+	if (num_queues >= num_segments)
+		return 0;
+
+	pow = min(ilog2(num_segments / num_queues), 3);
+	remaining_segments = num_segments - num_queues * (1U << pow);
+
+	/*
+	 * We can only distribute remaining segments if (1) there are remaining
+	 * segments and (2) we did not reach the max segments per queue (2^3).
+	 */
+	if (remaining_segments != 0 && pow != 3) {
+		unsigned int delta = (1U << (pow + 1)) - (1U << pow);
+
+		num_advantaged_queues = remaining_segments / delta;
+	}
+
+	for (i = 0; i < num_advantaged_queues; i++)
+		val |= ((pow + 1) & 0b11) << (i * 4);
+	for (i = num_advantaged_queues; i < num_queues; i++)
+		val |= (pow & 0b11) << (i * 4);
+
+	return val;
+}
-- 
2.53.0
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help