Thread (29 messages) 29 messages, 3 authors, 2018-03-19
STALE3016d

[PATCH v5 22/23] crypto: arm64/sm3-ce - yield NEON after every block of input

From: Ard Biesheuvel <hidden>
Date: 2018-03-10 15:22:07
Also in: linux-crypto, linux-rt-users
Subsystem: arm64 port (aarch64 architecture), the rest · Maintainers: Catalin Marinas, Will Deacon, Linus Torvalds

Avoid excessive scheduling delays under a preemptible kernel by
conditionally yielding the NEON after every block of input.

Signed-off-by: Ard Biesheuvel <redacted>
---
 arch/arm64/crypto/sm3-ce-core.S | 30 +++++++++++++++-----
 1 file changed, 23 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/crypto/sm3-ce-core.S b/arch/arm64/crypto/sm3-ce-core.S
index 27169fe07a68..5a116c8d0cee 100644
--- a/arch/arm64/crypto/sm3-ce-core.S
+++ b/arch/arm64/crypto/sm3-ce-core.S
@@ -77,19 +77,25 @@
 	 */
 	.text
 ENTRY(sm3_ce_transform)
+	frame_push	3
+
+	mov		x19, x0
+	mov		x20, x1
+	mov		x21, x2
+
 	/* load state */
-	ld1		{v8.4s-v9.4s}, [x0]
+	ld1		{v8.4s-v9.4s}, [x19]
 	rev64		v8.4s, v8.4s
 	rev64		v9.4s, v9.4s
 	ext		v8.16b, v8.16b, v8.16b, #8
 	ext		v9.16b, v9.16b, v9.16b, #8
 
-	adr_l		x8, .Lt
+0:	adr_l		x8, .Lt
 	ldp		s13, s14, [x8]
 
 	/* load input */
-0:	ld1		{v0.16b-v3.16b}, [x1], #64
-	sub		w2, w2, #1
+1:	ld1		{v0.16b-v3.16b}, [x20], #64
+	sub		w21, w21, #1
 
 	mov		v15.16b, v8.16b
 	mov		v16.16b, v9.16b
@@ -125,14 +131,24 @@ CPU_LE(	rev32		v3.16b, v3.16b		)
 	eor		v9.16b, v9.16b, v16.16b
 
 	/* handled all input blocks? */
-	cbnz		w2, 0b
+	cbz		w21, 2f
+
+	if_will_cond_yield_neon
+	st1		{v8.4s-v9.4s}, [x19]
+	do_cond_yield_neon
+	ld1		{v8.4s-v9.4s}, [x19]
+	b		0b
+	endif_yield_neon
+
+	b		1b
 
 	/* save state */
-	rev64		v8.4s, v8.4s
+2:	rev64		v8.4s, v8.4s
 	rev64		v9.4s, v9.4s
 	ext		v8.16b, v8.16b, v8.16b, #8
 	ext		v9.16b, v9.16b, v9.16b, #8
-	st1		{v8.4s-v9.4s}, [x0]
+	st1		{v8.4s-v9.4s}, [x19]
+	frame_pop
 	ret
 ENDPROC(sm3_ce_transform)
 
-- 
2.15.1
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help