Thread (32 messages) 32 messages, 5 authors, 2021-05-12

Re: [PATCH v1 01/13] arm64: Do not enable uaccess for flush_icache_range

From: Mark Rutland <mark.rutland@arm.com>
Date: 2021-05-12 10:01:40

On Wed, May 12, 2021 at 09:52:28AM +0100, Fuad Tabba wrote:
Hi Mark,
quoted
quoted
No functional change intended.
There is a performance change here, since the existing
`__flush_cache_user_range` takes IDC and DIC into account, whereas
`invalidate_icache_by_line` does not.
You're right. There is a performance change in this patch and a couple
of the others, which I will note in v2. However, I don't think that
this patch changes the behavior when it comes to IDC and DIC, does it?
It shouldn't be a functional problem, but it means that the new
`__flush_icache_range` will always perform redundant I-cache maintenance
rather than skipping this when the cpu has DIC=1.

It would be nice if we could structure this to take DIC into account
either in the new `__flush_icache_range`, or in the
`invalidate_icache_by_line` helper.
quoted
There's also an existing oversight where `__flush_cache_user_range`
takes ARM64_WORKAROUND_CLEAN_CACHE into account, but
`invalidate_icache_by_line` does not.
Sorry about this. I was evidently confused, as this does not make any
sense. This doesn't matter to `invalidate_icache_by_line`, and
`invalidate_dcache_by_line` already does the right thing via
`__dcache_op_workaround_clean_cache`.
I'd be happy to address that in v2, but let me make sure I understand
the issue properly.

Errata 819472 and friends (ARM64_WORKAROUND_CLEAN_CACHE) are related
to cache maintenance operations on data caches happening concurrently
with other accesses to the same address. The two places
invalidate_icache_by_line is used in conjunction with data caches are
__flush_icache_range and __flush_cache_user_range (which share the
same code before and after my patch series). In both cases,
invalidate_icache_by_line is called after the workaround is applied.
The third and only other user of invalidate_icache_by_line is
invalidate_icache_range, which only performs cache maintenance on the
icache.

The concern is that invalidate_icache_range might be performing a
cache maintenance operation on an address concurrently with another
processor performing a dc operation on the same address. Therefore,
invalidate_icache_range should perform DC CIVAC on the line before
invalidate_icache_by_line if ARM64_WORKAROUND_CLEAN_CACHE applies. Is
that right?

https://documentation-service.arm.com/static/5fa29fddb209f547eebd361d
Sorry, I had misread the code, and I don't think there's a bug to fix
here after all. Regardless, thanks for digging into that and trying to
make sense of my bogus suggestion.
quoted
Arguably similar is true in `swsusp_arch_suspend_exit`, but for that
we could add a comment and always use `DC CIVAC`.
I can do that in v2 as well.
A separate patch for `swsusp_arch_suspend_exit` would be great, since
that is something we should backport to stable as a fix.

Thanks,
Mark.
quoted
quoted
Reported-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/
Signed-off-by: Fuad Tabba <redacted>
---
 arch/arm64/include/asm/assembler.h | 13 ++++--
 arch/arm64/mm/cache.S              | 64 +++++++++++++++++++++---------
 2 files changed, 54 insertions(+), 23 deletions(-)
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 8418c1bd8f04..6ff7a3a3b238 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -426,16 +426,21 @@ alternative_endif
  * Macro to perform an instruction cache maintenance for the interval
  * [start, end)
  *
- *   start, end:     virtual addresses describing the region
- *   label:          A label to branch to on user fault.
- *   Corrupts:       tmp1, tmp2
+ *   start, end:     virtual addresses describing the region
+ *   needs_uaccess:  might access user space memory
+ *   label:          label to branch to on user fault (if needs_uaccess)
+ *   Corrupts:       tmp1, tmp2
  */
-     .macro invalidate_icache_by_line start, end, tmp1, tmp2, label
+     .macro invalidate_icache_by_line start, end, tmp1, tmp2, needs_uaccess, label
      icache_line_size \tmp1, \tmp2
      sub     \tmp2, \tmp1, #1
      bic     \tmp2, \start, \tmp2
 9997:
+     .if     \needs_uaccess
 USER(\label, ic      ivau, \tmp2)                    // invalidate I line PoU
+     .else
+     ic      ivau, \tmp2
+     .endif
      add     \tmp2, \tmp2, \tmp1
      cmp     \tmp2, \end
      b.lo    9997b
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 2d881f34dd9d..092f73acdf9a 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -15,30 +15,20 @@
 #include <asm/asm-uaccess.h>

 /*
- *   flush_icache_range(start,end)
+ *   __flush_cache_range(start,end) [needs_uaccess]
  *
  *   Ensure that the I and D caches are coherent within specified region.
  *   This is typically used when code has been written to a memory region,
  *   and will be executed.
  *
- *   - start   - virtual start address of region
- *   - end     - virtual end address of region
+ *   - start         - virtual start address of region
+ *   - end           - virtual end address of region
+ *   - needs_uaccess - (macro parameter) might access user space memory
  */
-SYM_FUNC_START(__flush_icache_range)
-     /* FALLTHROUGH */
-
-/*
- *   __flush_cache_user_range(start,end)
- *
- *   Ensure that the I and D caches are coherent within specified region.
- *   This is typically used when code has been written to a memory region,
- *   and will be executed.
- *
- *   - start   - virtual start address of region
- *   - end     - virtual end address of region
- */
-SYM_FUNC_START(__flush_cache_user_range)
+.macro       __flush_cache_range, needs_uaccess
+     .if     \needs_uaccess
      uaccess_ttbr0_enable x2, x3, x4
+     .endif
 alternative_if ARM64_HAS_CACHE_IDC
      dsb     ishst
      b       7f
@@ -47,7 +37,11 @@ alternative_else_nop_endif
      sub     x3, x2, #1
      bic     x4, x0, x3
 1:
+     .if     \needs_uaccess
 user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
+     .else
+alternative_insn "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
+     .endif
      add     x4, x4, x2
      cmp     x4, x1
      b.lo    1b
@@ -58,15 +52,47 @@ alternative_if ARM64_HAS_CACHE_DIC
      isb
      b       8f
 alternative_else_nop_endif
-     invalidate_icache_by_line x0, x1, x2, x3, 9f
+     invalidate_icache_by_line x0, x1, x2, x3, \needs_uaccess, 9f
 8:   mov     x0, #0
 1:
+     .if     \needs_uaccess
      uaccess_ttbr0_disable x1, x2
+     .endif
      ret
+
+     .if     \needs_uaccess
 9:
      mov     x0, #-EFAULT
      b       1b
+     .endif
+.endm
+
+/*
+ *   flush_icache_range(start,end)
+ *
+ *   Ensure that the I and D caches are coherent within specified region.
+ *   This is typically used when code has been written to a memory region,
+ *   and will be executed.
+ *
+ *   - start   - virtual start address of region
+ *   - end     - virtual end address of region
+ */
+SYM_FUNC_START(__flush_icache_range)
+     __flush_cache_range needs_uaccess=0
 SYM_FUNC_END(__flush_icache_range)
+
+/*
+ *   __flush_cache_user_range(start,end)
+ *
+ *   Ensure that the I and D caches are coherent within specified region.
+ *   This is typically used when code has been written to a memory region,
+ *   and will be executed.
+ *
+ *   - start   - virtual start address of region
+ *   - end     - virtual end address of region
+ */
+SYM_FUNC_START(__flush_cache_user_range)
+     __flush_cache_range needs_uaccess=1
 SYM_FUNC_END(__flush_cache_user_range)

 /*
@@ -86,7 +112,7 @@ alternative_else_nop_endif

      uaccess_ttbr0_enable x2, x3, x4

-     invalidate_icache_by_line x0, x1, x2, x3, 2f
+     invalidate_icache_by_line x0, x1, x2, x3, 1, 2f
      mov     x0, xzr
 1:
      uaccess_ttbr0_disable x1, x2
--
2.31.1.607.g51e8a6a459-goog
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help