Thread (37 messages) 37 messages, 7 authors, 2021-11-17

Re: Re: [PATCH v7 11/11] media: hantro: Support NV12 on the G2 core

From: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
Date: 2021-10-20 11:08:33
Also in: linux-arm-kernel, linux-media, linux-rockchip, lkml

Hi Jernej,

On Tue, 19 Oct 2021 at 13:38, Jernej Škrabec [off-list ref] wrote:
Hi Andrzej!

Dne petek, 15. oktober 2021 ob 19:19:47 CEST je Andrzej Pietrasiewicz
napisal(a):
quoted
Hi Jernej,

W dniu 14.10.2021 o 19:42, Jernej Škrabec pisze:
quoted
Hi Andrzej!

Dne sreda, 29. september 2021 ob 18:04:39 CEST je Andrzej Pietrasiewicz
napisal(a):
quoted
The G2 decoder block produces NV12 4x4 tiled format (NV12_4L4).
Enable the G2 post-processor block, in order to produce regular NV12.

The logic in hantro_postproc.c is leveraged to take care of allocating
the extra buffers and configure the post-processor, which is
significantly simpler than the one on the G1.
Quick summary of discussion on LibreELEC Slack:
When using NV12 format on Allwinner H6 variant of G2 (needs some driver
changes), I get frames out of order. If I use native NV12 tiled format,
frames
quoted
quoted
are ordered correctly.

Currently I'm not sure if this is issue with my changes or is this general
issue.

I would be grateful if anyone can test frame order with and without
postprocessing enabled on imx8. Take some dynamic video with a lot of
short
quoted
quoted
scenes. It's pretty obvious when frames are out of order.
I checked on imx8 and cannot observe any such artifacts.
I finally found the issue. As you mentioned on Slack, register write order once
already affected decoding. Well, it's the case again. I made hacky test and
moved postproc enable call after output buffers are set and it worked. So, this
is actually core quirk which is obviously fixed in newer variants.
Ugh, good catch.

What happens if you move all the calls to HANTRO_PP_REG_WRITE_S
(HANTRO_PP_REG_WRITE does a relaxed write)?

Or what happens if the HANTRO_PP_REG_WRITE(vpu, out_luma_base, dst_dma)
is moved to be done after all the other registers?
This makes this series with minor adaptations completely working on H6. I see
no reason not to merge whole series.
Do you have plans to submit your H6 work on top of this?

Thanks,
Ezequiel

Thanks for testing.

Best regards,
Jernej
quoted
Andrzej
quoted
However, given that frames themself are correctly decoded and without
postprocessing in right order, that shouldn't block merging previous
patches.
quoted
quoted
I tried few different videos and frames were all decoded correctly.

Best regards,
Jernej
quoted
Signed-off-by: Ezequiel Garcia <redacted>
Signed-off-by: Andrzej Pietrasiewicz <redacted>
---
  .../staging/media/hantro/hantro_g2_vp9_dec.c  |  6 ++--
  drivers/staging/media/hantro/hantro_hw.h      |  1 +
  .../staging/media/hantro/hantro_postproc.c    | 31 +++++++++++++++++++
  drivers/staging/media/hantro/imx8m_vpu_hw.c   | 11 +++++++
  4 files changed, 46 insertions(+), 3 deletions(-)
diff --git a/drivers/staging/media/hantro/hantro_g2_vp9_dec.c b/drivers/
staging/media/hantro/hantro_g2_vp9_dec.c
quoted
index 7f827b9f0133..1a26be72c878 100644
--- a/drivers/staging/media/hantro/hantro_g2_vp9_dec.c
+++ b/drivers/staging/media/hantro/hantro_g2_vp9_dec.c
@@ -152,7 +152,7 @@ static void config_output(struct hantro_ctx *ctx,
   hantro_reg_write(ctx->dev, &g2_out_dis, 0);
   hantro_reg_write(ctx->dev, &g2_output_format, 0);

-  luma_addr = vb2_dma_contig_plane_dma_addr(&dst->base.vb.vb2_buf,
0);
quoted
+  luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
   hantro_write_addr(ctx->dev, G2_OUT_LUMA_ADDR, luma_addr);

   chroma_addr = luma_addr + chroma_offset(ctx, dec_params);
@@ -191,7 +191,7 @@ static void config_ref(struct hantro_ctx *ctx,
   hantro_reg_write(ctx->dev, &ref_reg->hor_scale, (refw << 14) /
dst->vp9.width);
quoted
   hantro_reg_write(ctx->dev, &ref_reg->ver_scale, (refh << 14) /
dst->vp9.height);
quoted
-  luma_addr = vb2_dma_contig_plane_dma_addr(&buf->base.vb.vb2_buf,
0);
quoted
+  luma_addr = hantro_get_dec_buf_addr(ctx, &buf->base.vb.vb2_buf);
   hantro_write_addr(ctx->dev, ref_reg->y_base, luma_addr);

   chroma_addr = luma_addr + chroma_offset(ctx, dec_params);
@@ -236,7 +236,7 @@ static void config_ref_registers(struct hantro_ctx
*ctx,
quoted
quoted
quoted
   config_ref(ctx, dst, &ref_regs[1], dec_params, dec_params-
golden_frame_ts);
   config_ref(ctx, dst, &ref_regs[2], dec_params, dec_params-
alt_frame_ts);

-  mv_addr = vb2_dma_contig_plane_dma_addr(&mv_ref->base.vb.vb2_buf,
0) +
quoted
+  mv_addr = hantro_get_dec_buf_addr(ctx, &mv_ref->base.vb.vb2_buf) +
             mv_offset(ctx, dec_params);
   hantro_write_addr(ctx->dev, G2_REF_MV_ADDR(0), mv_addr);
diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/
media/hantro/hantro_hw.h
quoted
index 2961d399fd60..3d4a5dc1e6d5 100644
--- a/drivers/staging/media/hantro/hantro_hw.h
+++ b/drivers/staging/media/hantro/hantro_hw.h
@@ -274,6 +274,7 @@ extern const struct hantro_variant
rk3399_vpu_variant;
quoted
quoted
quoted
  extern const struct hantro_variant sama5d4_vdec_variant;

  extern const struct hantro_postproc_ops hantro_g1_postproc_ops;
+extern const struct hantro_postproc_ops hantro_g2_postproc_ops;

  extern const u32 hantro_vp8_dec_mc_filter[8][6];
diff --git a/drivers/staging/media/hantro/hantro_postproc.c b/drivers/
staging/media/hantro/hantro_postproc.c
quoted
index 4549aec08feb..79a66d001738 100644
--- a/drivers/staging/media/hantro/hantro_postproc.c
+++ b/drivers/staging/media/hantro/hantro_postproc.c
@@ -11,6 +11,7 @@
  #include "hantro.h"
  #include "hantro_hw.h"
  #include "hantro_g1_regs.h"
+#include "hantro_g2_regs.h"

  #define HANTRO_PP_REG_WRITE(vpu, reg_name, val) \
  { \
@@ -99,6 +100,21 @@ static void hantro_postproc_g1_enable(struct
hantro_ctx
quoted
quoted
*ctx)
quoted
   HANTRO_PP_REG_WRITE(vpu, display_width, ctx->dst_fmt.width);
  }

+static void hantro_postproc_g2_enable(struct hantro_ctx *ctx)
+{
+  struct hantro_dev *vpu = ctx->dev;
+  struct vb2_v4l2_buffer *dst_buf;
+  size_t chroma_offset = ctx->dst_fmt.width * ctx->dst_fmt.height;
+  dma_addr_t dst_dma;
+
+  dst_buf = hantro_get_dst_buf(ctx);
+  dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
+
+  hantro_write_addr(vpu, G2_RS_OUT_LUMA_ADDR, dst_dma);
+  hantro_write_addr(vpu, G2_RS_OUT_CHROMA_ADDR, dst_dma +
chroma_offset);
quoted
+  hantro_reg_write(vpu, &g2_out_rs_e, 1);
+}
+
  void hantro_postproc_free(struct hantro_ctx *ctx)
  {
   struct hantro_dev *vpu = ctx->dev;
@@ -127,6 +143,9 @@ int hantro_postproc_alloc(struct hantro_ctx *ctx)
   if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE)
           buf_size += hantro_h264_mv_size(ctx->dst_fmt.width,
                                           ctx-
dst_fmt.height);
+  else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME)
+          buf_size += hantro_vp9_mv_size(ctx->dst_fmt.width,
+                                         ctx-
dst_fmt.height);

   for (i = 0; i < num_buffers; ++i) {
           struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i];
@@ -152,6 +171,13 @@ static void hantro_postproc_g1_disable(struct
hantro_ctx *ctx)
quoted
   HANTRO_PP_REG_WRITE_S(vpu, pipeline_en, 0x0);
  }

+static void hantro_postproc_g2_disable(struct hantro_ctx *ctx)
+{
+  struct hantro_dev *vpu = ctx->dev;
+
+  hantro_reg_write(vpu, &g2_out_rs_e, 0);
+}
+
  void hantro_postproc_disable(struct hantro_ctx *ctx)
  {
   struct hantro_dev *vpu = ctx->dev;
@@ -172,3 +198,8 @@ const struct hantro_postproc_ops
hantro_g1_postproc_ops
quoted
quoted
= {
quoted
   .enable = hantro_postproc_g1_enable,
   .disable = hantro_postproc_g1_disable,
  };
+
+const struct hantro_postproc_ops hantro_g2_postproc_ops = {
+  .enable = hantro_postproc_g2_enable,
+  .disable = hantro_postproc_g2_disable,
+};
diff --git a/drivers/staging/media/hantro/imx8m_vpu_hw.c b/drivers/
staging/
quoted
quoted
media/hantro/imx8m_vpu_hw.c
quoted
index 455a107ffb02..1a43f6fceef9 100644
--- a/drivers/staging/media/hantro/imx8m_vpu_hw.c
+++ b/drivers/staging/media/hantro/imx8m_vpu_hw.c
@@ -132,6 +132,14 @@ static const struct hantro_fmt imx8m_vpu_dec_fmts[]
= {
quoted
quoted
quoted
   },
  };

+static const struct hantro_fmt imx8m_vpu_g2_postproc_fmts[] = {
+  {
+          .fourcc = V4L2_PIX_FMT_NV12,
+          .codec_mode = HANTRO_MODE_NONE,
+          .postprocessed = true,
+  },
+};
+
  static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
   {
           .fourcc = V4L2_PIX_FMT_NV12_4L4,
@@ -301,6 +309,9 @@ const struct hantro_variant imx8mq_vpu_g2_variant = {
   .dec_offset = 0x0,
   .dec_fmts = imx8m_vpu_g2_dec_fmts,
   .num_dec_fmts = ARRAY_SIZE(imx8m_vpu_g2_dec_fmts),
+  .postproc_fmts = imx8m_vpu_g2_postproc_fmts,
+  .num_postproc_fmts = ARRAY_SIZE(imx8m_vpu_g2_postproc_fmts),
+  .postproc_ops = &hantro_g2_postproc_ops,
   .codec = HANTRO_HEVC_DECODER | HANTRO_VP9_DECODER,
   .codec_ops = imx8mq_vpu_g2_codec_ops,
   .init = imx8mq_vpu_hw_init,
--
2.17.1
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help