Re: [PATCH v5 15/18] dmaengine: ti: k3-udma-v2: New driver for K3 BCDMA_V2
From: Péter Ujfalusi <peter.ujfalusi@gmail.com>
Date: 2026-02-26 19:38:21
Also in:
dmaengine, linux-arm-kernel, lkml
On 18/02/2026 11:52, Sai Sree Kartheek Adivi wrote:
quoted hunk ↗ jump to hunk
Add support for BCDMA_V2. The BCDMA_V2 is different than the existing BCDMA supported by the k3-udma driver. The changes in BCDMA_V2 are: - Autopair: There is no longer a need for PSIL pair and AUTOPAIR bit needs to set in the RT_CTL register. - Static channel mapping: Each channel is mapped to a single peripheral. - Direct IRQs: There is no INT-A and interrupt lines from DMA are directly connected to GIC. - Remote side configuration handled by DMA. So no need to write to PEER registers to START / STOP / PAUSE / TEARDOWN. - Unified Channel Space: Tx and Rx channels share a single register space. Each channel index is specifically fixed in hardware as either Tx or Rx in an interleaved manner. Also, since a version member is introduced in the match_data, Add version v1 in match_data of SoCs using v1 DMA. Signed-off-by: Sai Sree Kartheek Adivi <redacted> --- drivers/dma/ti/Kconfig | 14 +- drivers/dma/ti/Makefile | 1 + drivers/dma/ti/k3-udma-common.c | 86 +- drivers/dma/ti/k3-udma-v2.c | 1283 +++++++++++++++++++++++++++++ drivers/dma/ti/k3-udma.c | 9 + drivers/dma/ti/k3-udma.h | 121 +-- include/linux/soc/ti/k3-ringacc.h | 3 + 7 files changed, 1446 insertions(+), 71 deletions(-) create mode 100644 drivers/dma/ti/k3-udma-v2.cdiff --git a/drivers/dma/ti/Kconfig b/drivers/dma/ti/Kconfig index 712e456015459..40713bd1e8e9b 100644 --- a/drivers/dma/ti/Kconfig +++ b/drivers/dma/ti/Kconfig@@ -49,6 +49,18 @@ config TI_K3_UDMA Enable support for the TI UDMA (Unified DMA) controller. This DMA engine is used in AM65x and j721e. +config TI_K3_UDMA_V2 + tristate "Texas Instruments K3 UDMA v2 support" + depends on ARCH_K3 + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + select TI_K3_UDMA_COMMON + select TI_K3_RINGACC + select TI_K3_PSIL + help + Enable support for the TI UDMA (Unified DMA) v2 controller. This + DMA engine is used in AM62L. + config TI_K3_UDMA_COMMON tristate default n@@ -63,7 +75,7 @@ config TI_K3_UDMA_GLUE_LAYER config TI_K3_PSIL tristate - default TI_K3_UDMA + default TI_K3_UDMA || TI_K3_UDMA_V2 config TI_DMA_CROSSBAR booldiff --git a/drivers/dma/ti/Makefile b/drivers/dma/ti/Makefile index 41bfba944dc6c..296aa3421e71b 100644 --- a/drivers/dma/ti/Makefile +++ b/drivers/dma/ti/Makefile@@ -3,6 +3,7 @@ obj-$(CONFIG_TI_CPPI41) += cppi41.o obj-$(CONFIG_TI_EDMA) += edma.o obj-$(CONFIG_DMA_OMAP) += omap-dma.o obj-$(CONFIG_TI_K3_UDMA) += k3-udma.o +obj-$(CONFIG_TI_K3_UDMA_V2) += k3-udma-v2.o obj-$(CONFIG_TI_K3_UDMA_COMMON) += k3-udma-common.o obj-$(CONFIG_TI_K3_UDMA_GLUE_LAYER) += k3-udma-glue.o k3-psil-lib-objs := k3-psil.o \diff --git a/drivers/dma/ti/k3-udma-common.c b/drivers/dma/ti/k3-udma-common.c index 0ffc6becc402e..ff2b0353515ee 100644 --- a/drivers/dma/ti/k3-udma-common.c +++ b/drivers/dma/ti/k3-udma-common.c@@ -171,8 +171,13 @@ bool udma_is_desc_really_done(struct udma_chan *uc, struct udma_desc *d) uc->config.dir != DMA_MEM_TO_DEV || !(uc->config.tx_flags & DMA_PREP_INTERRUPT)) return true; - peer_bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG); - bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG); + if (uc->ud->match_data->version == K3_UDMA_V2) {
Consider to start with V1? V3 might be similar to V2 and you save on churn in the future? Same comment for other version checks.
quoted hunk ↗ jump to hunk
+ peer_bcnt = udma_chanrt_read(uc, UDMA_CHAN_RT_PERIPH_BCNT_REG); + bcnt = udma_chanrt_read(uc, UDMA_CHAN_RT_BCNT_REG); + } else { + peer_bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG); + bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG); + } /* Transfer is incomplete, store current residue and time stamp */ if (peer_bcnt < bcnt) {@@ -319,6 +324,7 @@ udma_prep_slave_sg_tr(struct udma_chan *uc, struct scatterlist *sgl, size_t tr_size; int num_tr = 0; int tr_idx = 0; + u32 extra_flags = 0; u64 asel; /* estimate the number of TRs we will need */@@ -342,6 +348,11 @@ udma_prep_slave_sg_tr(struct udma_chan *uc, struct scatterlist *sgl, else asel = (u64)uc->config.asel << K3_ADDRESS_ASEL_SHIFT; + if (uc->ud->match_data->type == DMA_TYPE_BCDMA && + uc->ud->match_data->version == K3_UDMA_V2 && + dir == DMA_MEM_TO_DEV) + extra_flags = CPPI5_TR_CSF_EOP; + tr_req = d->hwdesc[0].tr_req_base; for_each_sg(sgl, sgent, sglen, i) { dma_addr_t sg_addr = sg_dma_address(sgent);@@ -358,7 +369,7 @@ udma_prep_slave_sg_tr(struct udma_chan *uc, struct scatterlist *sgl, cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1, false, false, CPPI5_TR_EVENT_SIZE_COMPLETION, 0); - cppi5_tr_csf_set(&tr_req[tr_idx].flags, CPPI5_TR_CSF_SUPR_EVT); + cppi5_tr_csf_set(&tr_req[tr_idx].flags, CPPI5_TR_CSF_SUPR_EVT | extra_flags); sg_addr |= asel; tr_req[tr_idx].addr = sg_addr;@@ -372,7 +383,7 @@ udma_prep_slave_sg_tr(struct udma_chan *uc, struct scatterlist *sgl, false, false, CPPI5_TR_EVENT_SIZE_COMPLETION, 0); cppi5_tr_csf_set(&tr_req[tr_idx].flags, - CPPI5_TR_CSF_SUPR_EVT); + CPPI5_TR_CSF_SUPR_EVT | extra_flags); tr_req[tr_idx].addr = sg_addr + tr0_cnt1 * tr0_cnt0; tr_req[tr_idx].icnt0 = tr1_cnt0;@@ -2052,6 +2063,8 @@ int udma_get_tchan(struct udma_chan *uc) uc->tchan = NULL; return ret; } + if (ud->match_data->version == K3_UDMA_V2) + uc->chan = uc->tchan; if (ud->tflow_cnt) { int tflow_id;@@ -2102,6 +2115,8 @@ int udma_get_rchan(struct udma_chan *uc) uc->rchan = NULL; return ret; } + if (ud->match_data->version == K3_UDMA_V2) + uc->chan = uc->rchan; return 0; }@@ -2379,16 +2394,26 @@ int bcdma_setup_resources(struct udma_dev *ud) ud->bchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->bchan_cnt), sizeof(unsigned long), GFP_KERNEL); + bitmap_zero(ud->bchan_map, ud->bchan_cnt); ud->bchans = devm_kcalloc(dev, ud->bchan_cnt, sizeof(*ud->bchans), GFP_KERNEL); ud->tchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->tchan_cnt), sizeof(unsigned long), GFP_KERNEL); + bitmap_zero(ud->tchan_map, ud->tchan_cnt); ud->tchans = devm_kcalloc(dev, ud->tchan_cnt, sizeof(*ud->tchans), GFP_KERNEL); - ud->rchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->rchan_cnt), - sizeof(unsigned long), GFP_KERNEL); - ud->rchans = devm_kcalloc(dev, ud->rchan_cnt, sizeof(*ud->rchans), - GFP_KERNEL); + if (ud->match_data->version == K3_UDMA_V2) { + ud->rchan_map = ud->tchan_map; + ud->rchans = ud->tchans; + ud->chan_map = ud->tchan_map; + ud->chans = ud->tchans; + } else { + ud->rchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->rchan_cnt), + sizeof(unsigned long), GFP_KERNEL); + bitmap_zero(ud->rchan_map, ud->rchan_cnt); + ud->rchans = devm_kcalloc(dev, ud->rchan_cnt, sizeof(*ud->rchans), + GFP_KERNEL); + } /* BCDMA do not really have flows, but the driver expect it */ ud->rflow_in_use = devm_kcalloc(dev, BITS_TO_LONGS(ud->rchan_cnt), sizeof(unsigned long),@@ -2484,11 +2509,18 @@ int setup_resources(struct udma_dev *ud) if (ret) return ret; - ch_count = ud->bchan_cnt + ud->tchan_cnt + ud->rchan_cnt; - if (ud->bchan_cnt) - ch_count -= bitmap_weight(ud->bchan_map, ud->bchan_cnt); - ch_count -= bitmap_weight(ud->tchan_map, ud->tchan_cnt); - ch_count -= bitmap_weight(ud->rchan_map, ud->rchan_cnt); + if (ud->match_data->version == K3_UDMA_V2) {
I would probbaly check for V1 and leave the V2 as simple else - trusting that v3 will likely be closer to it than v1?
quoted hunk ↗ jump to hunk
+ ch_count = ud->bchan_cnt + ud->tchan_cnt; + if (ud->bchan_cnt) + ch_count -= bitmap_weight(ud->bchan_map, ud->bchan_cnt); + ch_count -= bitmap_weight(ud->tchan_map, ud->tchan_cnt); + } else { + ch_count = ud->bchan_cnt + ud->tchan_cnt + ud->rchan_cnt; + if (ud->bchan_cnt) + ch_count -= bitmap_weight(ud->bchan_map, ud->bchan_cnt); + ch_count -= bitmap_weight(ud->tchan_map, ud->tchan_cnt); + ch_count -= bitmap_weight(ud->rchan_map, ud->rchan_cnt); + } if (!ch_count) return -ENODEV;@@ -2510,15 +2542,25 @@ int setup_resources(struct udma_dev *ud) ud->rflow_cnt)); break; case DMA_TYPE_BCDMA: - dev_info(dev, - "Channels: %d (bchan: %u, tchan: %u, rchan: %u)\n", - ch_count, - ud->bchan_cnt - bitmap_weight(ud->bchan_map, - ud->bchan_cnt), - ud->tchan_cnt - bitmap_weight(ud->tchan_map, - ud->tchan_cnt), - ud->rchan_cnt - bitmap_weight(ud->rchan_map, - ud->rchan_cnt)); + if (ud->match_data->version == K3_UDMA_V1) { + dev_info(dev, + "Channels: %d (bchan: %u, tchan: %u, rchan: %u)\n", + ch_count, + ud->bchan_cnt - bitmap_weight(ud->bchan_map, + ud->bchan_cnt), + ud->tchan_cnt - bitmap_weight(ud->tchan_map, + ud->tchan_cnt), + ud->rchan_cnt - bitmap_weight(ud->rchan_map, + ud->rchan_cnt)); + } else if (ud->match_data->version == K3_UDMA_V2) { + dev_info(dev, + "Channels: %d (bchan: %u, chan: %u)\n", + ch_count, + ud->bchan_cnt - bitmap_weight(ud->bchan_map, + ud->bchan_cnt), + ud->chan_cnt - bitmap_weight(ud->chan_map, + ud->chan_cnt)); + }
if you have else if {} you do want to have plain else {} to handle cases
when neither.
CHeck for V1 and leave V2 for a plain else branch?
Optionally if indentation is geting tight, just create a helper function
to print this info.
break; case DMA_TYPE_PKTDMA: dev_info(dev,
I think this and the series looks good, the only thing I would consider is to revers the V1/2 checks when it makes sense - future incarnations of UDMA might be closer to V2 than V1 and you save on maintanance headache. -- Péter