[PATCH net-next v7 02/28] asm: simd context helper API
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: 2018-10-06 02:57:37
Also in:
linux-arch, lkml
Subsystem:
alpha port, arm port, arm64 port (aarch64 architecture), generic include/asm header files, linux for powerpc (32-bit and 64-bit), m68k architecture, microblaze architecture, mips, nios2 architecture, openrisc architecture, parisc architecture, qualcomm hexagon architecture, risc-v architecture, s390 architecture, sparc + ultrasparc (sparc/sparc64), superh, synopsys arc architecture, tensilica xtensa port (xtensa), the rest, user-mode linux (uml), x86 architecture (32-bit and 64-bit) · Maintainers:
Richard Henderson, Matt Turner, Magnus Lindholm, Russell King, Catalin Marinas, Will Deacon, Arnd Bergmann, Madhavan Srinivasan, Michael Ellerman, Geert Uytterhoeven, Michal Simek, Thomas Bogendoerfer, Dinh Nguyen, Simon Schuster, Jonas Bonn, Stefan Kristiansson, Stafford Horne, "James E.J. Bottomley", Helge Deller, Brian Cain, Paul Walmsley, Palmer Dabbelt, Albert Ou, Heiko Carstens, Vasily Gorbik, Alexander Gordeev, "David S. Miller", Andreas Larsson, Yoshinori Sato, Rich Felker, John Paul Adrian Glaubitz, Vineet Gupta, Chris Zankel, Max Filippov, Linus Torvalds, Richard Weinberger, Anton Ivanov, Johannes Berg, Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen
Sometimes it's useful to amortize calls to XSAVE/XRSTOR and the related
FPU/SIMD functions over a number of calls, because FPU restoration is
quite expensive. This adds a simple header for carrying out this pattern:
simd_context_t simd_context;
simd_get(&simd_context);
while ((item = get_item_from_queue()) != NULL) {
encrypt_item(item, &simd_context);
simd_relax(&simd_context);
}
simd_put(&simd_context);
The relaxation step ensures that we don't trample over preemption, and
the get/put API should be a familiar paradigm in the kernel.
On the other end, code that actually wants to use SIMD instructions can
accept this as a parameter and check it via:
void encrypt_item(struct item *item, simd_context_t *simd_context)
{
if (item->len > LARGE_FOR_SIMD && simd_use(simd_context))
wild_simd_code(item);
else
boring_scalar_code(item);
}
The actual XSAVE happens during simd_use (and only on the first time),
so that if the context is never actually used, no performance penalty is
hit.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Samuel Neves <redacted>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Thomas Gleixner <redacted>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: linux-arch@vger.kernel.org
---
arch/alpha/include/asm/Kbuild | 1 +
arch/arc/include/asm/Kbuild | 1 +
arch/arm/include/asm/Kbuild | 1 -
arch/arm/include/asm/simd.h | 63 ++++++++++++++++++++++++++++++
arch/arm64/include/asm/simd.h | 51 +++++++++++++++++++++---
arch/c6x/include/asm/Kbuild | 1 +
arch/h8300/include/asm/Kbuild | 1 +
arch/hexagon/include/asm/Kbuild | 1 +
arch/ia64/include/asm/Kbuild | 1 +
arch/m68k/include/asm/Kbuild | 1 +
arch/microblaze/include/asm/Kbuild | 1 +
arch/mips/include/asm/Kbuild | 1 +
arch/nds32/include/asm/Kbuild | 1 +
arch/nios2/include/asm/Kbuild | 1 +
arch/openrisc/include/asm/Kbuild | 1 +
arch/parisc/include/asm/Kbuild | 1 +
arch/powerpc/include/asm/Kbuild | 1 +
arch/riscv/include/asm/Kbuild | 1 +
arch/s390/include/asm/Kbuild | 1 +
arch/sh/include/asm/Kbuild | 1 +
arch/sparc/include/asm/Kbuild | 1 +
arch/um/include/asm/Kbuild | 1 +
arch/unicore32/include/asm/Kbuild | 1 +
arch/x86/include/asm/simd.h | 44 ++++++++++++++++++++-
arch/xtensa/include/asm/Kbuild | 1 +
include/asm-generic/simd.h | 20 ++++++++++
include/linux/simd.h | 32 +++++++++++++++
27 files changed, 224 insertions(+), 8 deletions(-)
create mode 100644 arch/arm/include/asm/simd.h
create mode 100644 include/linux/simd.h
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index 0580cb8c84b2..220dfd170d45 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild@@ -13,3 +13,4 @@ generic-y += sections.h generic-y += trace_clock.h generic-y += current.h generic-y += kprobes.h +generic-y += simd.h
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index feed50ce89fa..a7f4255f1649 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild@@ -22,6 +22,7 @@ generic-y += parport.h generic-y += pci.h generic-y += percpu.h generic-y += preempt.h +generic-y += simd.h generic-y += topology.h generic-y += trace_clock.h generic-y += user.h
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 1d66db9c9db5..ebdc9eeb8d39 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild@@ -16,7 +16,6 @@ generic-y += rwsem.h generic-y += seccomp.h generic-y += segment.h generic-y += serial.h -generic-y += simd.h generic-y += sizes.h generic-y += timex.h generic-y += trace_clock.h
diff --git a/arch/arm/include/asm/simd.h b/arch/arm/include/asm/simd.h
new file mode 100644
index 000000000000..264ed84b41d8
--- /dev/null
+++ b/arch/arm/include/asm/simd.h@@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include <linux/simd.h> +#ifndef _ASM_SIMD_H +#define _ASM_SIMD_H + +#ifdef CONFIG_KERNEL_MODE_NEON +#include <asm/neon.h> + +static __must_check inline bool may_use_simd(void) +{ + return !in_nmi() && !in_irq() && !in_serving_softirq(); +} + +static inline void simd_get(simd_context_t *ctx) +{ + *ctx = may_use_simd() ? HAVE_FULL_SIMD : HAVE_NO_SIMD; +} + +static inline void simd_put(simd_context_t *ctx) +{ + if (*ctx & HAVE_SIMD_IN_USE) + kernel_neon_end(); + *ctx = HAVE_NO_SIMD; +} + +static __must_check inline bool simd_use(simd_context_t *ctx) +{ + if (!(*ctx & HAVE_FULL_SIMD)) + return false; + if (*ctx & HAVE_SIMD_IN_USE) + return true; + kernel_neon_begin(); + *ctx |= HAVE_SIMD_IN_USE; + return true; +} + +#else + +static __must_check inline bool may_use_simd(void) +{ + return false; +} + +static inline void simd_get(simd_context_t *ctx) +{ + *ctx = HAVE_NO_SIMD; +} + +static inline void simd_put(simd_context_t *ctx) +{ +} + +static __must_check inline bool simd_use(simd_context_t *ctx) +{ + return false; +} +#endif + +#endif /* _ASM_SIMD_H */
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index 6495cc51246f..a45ff1600040 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h@@ -1,11 +1,10 @@ -/* - * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> +/* SPDX-License-Identifier: GPL-2.0 * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. + * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ +#include <linux/simd.h> #ifndef __ASM_SIMD_H #define __ASM_SIMD_H
@@ -16,6 +15,8 @@ #include <linux/types.h> #ifdef CONFIG_KERNEL_MODE_NEON +#include <asm/neon.h> +#include <asm/simd.h> DECLARE_PER_CPU(bool, kernel_neon_busy);
@@ -40,9 +41,47 @@ static __must_check inline bool may_use_simd(void) !this_cpu_read(kernel_neon_busy); } +static inline void simd_get(simd_context_t *ctx) +{ + *ctx = may_use_simd() ? HAVE_FULL_SIMD : HAVE_NO_SIMD; +} + +static inline void simd_put(simd_context_t *ctx) +{ + if (*ctx & HAVE_SIMD_IN_USE) + kernel_neon_end(); + *ctx = HAVE_NO_SIMD; +} + +static __must_check inline bool simd_use(simd_context_t *ctx) +{ + if (!(*ctx & HAVE_FULL_SIMD)) + return false; + if (*ctx & HAVE_SIMD_IN_USE) + return true; + kernel_neon_begin(); + *ctx |= HAVE_SIMD_IN_USE; + return true; +} + #else /* ! CONFIG_KERNEL_MODE_NEON */ -static __must_check inline bool may_use_simd(void) { +static __must_check inline bool may_use_simd(void) +{ + return false; +} + +static inline void simd_get(simd_context_t *ctx) +{ + *ctx = HAVE_NO_SIMD; +} + +static inline void simd_put(simd_context_t *ctx) +{ +} + +static __must_check inline bool simd_use(simd_context_t *ctx) +{ return false; }
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 33a2c94fed0d..7543c38f7ade 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild@@ -30,6 +30,7 @@ generic-y += pgalloc.h generic-y += preempt.h generic-y += segment.h generic-y += serial.h +generic-y += simd.h generic-y += tlbflush.h generic-y += topology.h generic-y += trace_clock.h
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index a5d0b2991f47..1fcef25ee19d 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild@@ -39,6 +39,7 @@ generic-y += preempt.h generic-y += scatterlist.h generic-y += sections.h generic-y += serial.h +generic-y += simd.h generic-y += sizes.h generic-y += spinlock.h generic-y += timex.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index dd2fd9c0d292..217d4695fd8a 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild@@ -29,6 +29,7 @@ generic-y += rwsem.h generic-y += sections.h generic-y += segment.h generic-y += serial.h +generic-y += simd.h generic-y += sizes.h generic-y += topology.h generic-y += trace_clock.h
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 557bbc8ba9f5..41c5ebdf79e5 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild@@ -4,6 +4,7 @@ generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += preempt.h +generic-y += simd.h generic-y += trace_clock.h generic-y += vtime.h generic-y += word-at-a-time.h
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index a4b8d3331a9e..73898dd1a4d0 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild@@ -19,6 +19,7 @@ generic-y += mm-arch-hooks.h generic-y += percpu.h generic-y += preempt.h generic-y += sections.h +generic-y += simd.h generic-y += spinlock.h generic-y += topology.h generic-y += trace_clock.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 569ba9e670c1..7a877eea99d3 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild@@ -25,6 +25,7 @@ generic-y += parport.h generic-y += percpu.h generic-y += preempt.h generic-y += serial.h +generic-y += simd.h generic-y += syscalls.h generic-y += topology.h generic-y += trace_clock.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 58351e48421e..e8868e0fb2c3 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild@@ -16,6 +16,7 @@ generic-y += qrwlock.h generic-y += qspinlock.h generic-y += sections.h generic-y += segment.h +generic-y += simd.h generic-y += trace_clock.h generic-y += unaligned.h generic-y += user.h
diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild
index dbc4e5422550..fb2f113716ce 100644
--- a/arch/nds32/include/asm/Kbuild
+++ b/arch/nds32/include/asm/Kbuild@@ -46,6 +46,7 @@ generic-y += sections.h generic-y += segment.h generic-y += serial.h generic-y += shmbuf.h +generic-y += simd.h generic-y += sizes.h generic-y += stat.h generic-y += switch_to.h
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index 8fde4fa2c34f..571a9d9ad107 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild@@ -33,6 +33,7 @@ generic-y += preempt.h generic-y += sections.h generic-y += segment.h generic-y += serial.h +generic-y += simd.h generic-y += spinlock.h generic-y += topology.h generic-y += trace_clock.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index eb87cd8327c8..b6231211bbad 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild@@ -34,6 +34,7 @@ generic-y += qrwlock_types.h generic-y += qrwlock.h generic-y += sections.h generic-y += segment.h +generic-y += simd.h generic-y += string.h generic-y += switch_to.h generic-y += topology.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index 2013d639e735..97970b4d05ab 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild@@ -17,6 +17,7 @@ generic-y += percpu.h generic-y += preempt.h generic-y += seccomp.h generic-y += segment.h +generic-y += simd.h generic-y += topology.h generic-y += trace_clock.h generic-y += user.h
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 3196d227e351..2337190aaf69 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild@@ -8,3 +8,4 @@ generic-y += preempt.h generic-y += rwsem.h generic-y += vtime.h generic-y += msi.h +generic-y += simd.h
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index efdbe311e936..438a11d9c47a 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild@@ -46,6 +46,7 @@ generic-y += setup.h generic-y += shmbuf.h generic-y += shmparam.h generic-y += signal.h +generic-y += simd.h generic-y += socket.h generic-y += sockios.h generic-y += stat.h
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index e3239772887a..3744c4c61fb5 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild@@ -22,6 +22,7 @@ generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += preempt.h generic-y += rwsem.h +generic-y += simd.h generic-y += trace_clock.h generic-y += unaligned.h generic-y += word-at-a-time.h
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 6a5609a55965..8e64ff35a933 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild@@ -16,6 +16,7 @@ generic-y += percpu.h generic-y += preempt.h generic-y += rwsem.h generic-y += serial.h +generic-y += simd.h generic-y += sizes.h generic-y += trace_clock.h generic-y += xor.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 410b263ef5c8..72b9e08fb350 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild@@ -17,5 +17,6 @@ generic-y += msi.h generic-y += preempt.h generic-y += rwsem.h generic-y += serial.h +generic-y += simd.h generic-y += trace_clock.h generic-y += word-at-a-time.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index b10dde6cb793..8c2bfa6e0494 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild@@ -22,6 +22,7 @@ generic-y += param.h generic-y += pci.h generic-y += percpu.h generic-y += preempt.h +generic-y += simd.h generic-y += switch_to.h generic-y += topology.h generic-y += trace_clock.h
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index bfc7abe77905..98a908720bbd 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild@@ -27,6 +27,7 @@ generic-y += preempt.h generic-y += sections.h generic-y += segment.h generic-y += serial.h +generic-y += simd.h generic-y += sizes.h generic-y += syscalls.h generic-y += topology.h
diff --git a/arch/x86/include/asm/simd.h b/arch/x86/include/asm/simd.h
index a341c878e977..4aad7f158dcb 100644
--- a/arch/x86/include/asm/simd.h
+++ b/arch/x86/include/asm/simd.h@@ -1,4 +1,11 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include <linux/simd.h> +#ifndef _ASM_SIMD_H +#define _ASM_SIMD_H #include <asm/fpu/api.h>
@@ -10,3 +17,38 @@ static __must_check inline bool may_use_simd(void) { return irq_fpu_usable(); } + +static inline void simd_get(simd_context_t *ctx) +{ +#if !defined(CONFIG_UML) + *ctx = may_use_simd() ? HAVE_FULL_SIMD : HAVE_NO_SIMD; +#else + *ctx = HAVE_NO_SIMD; +#endif +} + +static inline void simd_put(simd_context_t *ctx) +{ +#if !defined(CONFIG_UML) + if (*ctx & HAVE_SIMD_IN_USE) + kernel_fpu_end(); +#endif + *ctx = HAVE_NO_SIMD; +} + +static __must_check inline bool simd_use(simd_context_t *ctx) +{ +#if !defined(CONFIG_UML) + if (!(*ctx & HAVE_FULL_SIMD)) + return false; + if (*ctx & HAVE_SIMD_IN_USE) + return true; + kernel_fpu_begin(); + *ctx |= HAVE_SIMD_IN_USE; + return true; +#else + return false; +#endif +} + +#endif /* _ASM_SIMD_H */
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 82c756431b49..7950f359649d 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild@@ -24,6 +24,7 @@ generic-y += percpu.h generic-y += preempt.h generic-y += rwsem.h generic-y += sections.h +generic-y += simd.h generic-y += topology.h generic-y += trace_clock.h generic-y += word-at-a-time.h
diff --git a/include/asm-generic/simd.h b/include/asm-generic/simd.h
index d0343d58a74a..b3dd61ac010e 100644
--- a/include/asm-generic/simd.h
+++ b/include/asm-generic/simd.h@@ -1,5 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/simd.h> +#ifndef _ASM_SIMD_H +#define _ASM_SIMD_H + #include <linux/hardirq.h> /*
@@ -13,3 +17,19 @@ static __must_check inline bool may_use_simd(void) { return !in_interrupt(); } + +static inline void simd_get(simd_context_t *ctx) +{ + *ctx = HAVE_NO_SIMD; +} + +static inline void simd_put(simd_context_t *ctx) +{ +} + +static __must_check inline bool simd_use(simd_context_t *ctx) +{ + return false; +} + +#endif /* _ASM_SIMD_H */
diff --git a/include/linux/simd.h b/include/linux/simd.h
new file mode 100644
index 000000000000..4e0b8a9bdc14
--- /dev/null
+++ b/include/linux/simd.h@@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _SIMD_H +#define _SIMD_H + +typedef enum { + HAVE_NO_SIMD = 1 << 0, + HAVE_FULL_SIMD = 1 << 1, + HAVE_SIMD_IN_USE = 1 << 31 +} simd_context_t; + +#define DONT_USE_SIMD ((simd_context_t []){ HAVE_NO_SIMD }) + +#include <linux/sched.h> +#include <asm/simd.h> + +static inline bool simd_relax(simd_context_t *ctx) +{ +#ifdef CONFIG_PREEMPT + if ((*ctx & HAVE_SIMD_IN_USE) && need_resched()) { + simd_put(ctx); + simd_get(ctx); + return true; + } +#endif + return false; +} + +#endif /* _SIMD_H */
--
2.19.0