Thread (55 messages) 55 messages, 10 authors, 2025-04-17

Re: [RFC PATCH v1 10/15] KVM: VMX: Use WRMSRNS or its immediate form when available

From: "H. Peter Anvin" <hpa@zytor.com>
Date: 2025-03-31 20:45:55
Also in: bpf, kvm, linux-edac, linux-hyperv, linux-ide, linux-perf-users, linux-pm, lkml, llvm, xen-devel

On March 31, 2025 1:27:23 PM PDT, Konrad Rzeszutek Wilk [off-list ref] wrote:
On Mon, Mar 31, 2025 at 01:22:46AM -0700, Xin Li (Intel) wrote:
quoted
Signed-off-by: Xin Li (Intel) <xin@zytor.com>
---
 arch/x86/include/asm/msr-index.h |  6 ++++++
 arch/x86/kvm/vmx/vmenter.S       | 28 ++++++++++++++++++++++++----
 2 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index e6134ef2263d..04244c3ba374 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -1226,4 +1226,10 @@
 						* a #GP
 						*/
 
+/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */
+#define ASM_WRMSRNS		_ASM_BYTES(0x0f,0x01,0xc6)
+
+/* Instruction opcode for the immediate form RDMSR/WRMSRNS */
+#define ASM_WRMSRNS_RAX		_ASM_BYTES(0xc4,0xe7,0x7a,0xf6,0xc0)
+
 #endif /* _ASM_X86_MSR_INDEX_H */
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index f6986dee6f8c..9fae43723c44 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -64,6 +64,29 @@
 	RET
 .endm
 
+/*
+ * Write EAX to MSR_IA32_SPEC_CTRL.
+ *
+ * Choose the best WRMSR instruction based on availability.
+ *
+ * Replace with 'wrmsrns' and 'wrmsrns %rax, $MSR_IA32_SPEC_CTRL' once binutils support them.
+ */
+.macro WRITE_EAX_TO_MSR_IA32_SPEC_CTRL
+	ALTERNATIVE_2 __stringify(mov $MSR_IA32_SPEC_CTRL, %ecx;		\
+				  xor %edx, %edx;				\
+				  mov %edi, %eax;				\
+				  ds wrmsr),					\
+		      __stringify(mov $MSR_IA32_SPEC_CTRL, %ecx;		\
+				  xor %edx, %edx;				\
+				  mov %edi, %eax;				\
+				  ASM_WRMSRNS),					\
+		      X86_FEATURE_WRMSRNS,					\
+		      __stringify(xor %_ASM_AX, %_ASM_AX;			\
+				  mov %edi, %eax;				\
+				  ASM_WRMSRNS_RAX; .long MSR_IA32_SPEC_CTRL),	\
+		      X86_FEATURE_MSR_IMM
+.endm
+
 .section .noinstr.text, "ax"
 
 /**
@@ -123,10 +146,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
 	movl PER_CPU_VAR(x86_spec_ctrl_current), %esi
 	cmp %edi, %esi
 	je .Lspec_ctrl_done
-	mov $MSR_IA32_SPEC_CTRL, %ecx
-	xor %edx, %edx
-	mov %edi, %eax
-	wrmsr
Is that the right path forward?

That is replace the MSR write to disable speculative execution with a
non-serialized WRMSR? Doesn't that mean the WRMSRNS is speculative?

quoted
+	WRITE_EAX_TO_MSR_IA32_SPEC_CTRL
 
 .Lspec_ctrl_done:
 
-- 
2.49.0
So to clarify the semantics of WRMSRNS: it is an opt-in capability for the OS to allow the hardware to choose the amount of serialization needed on an MSR- or implementation-specific basis.

It also allows the OS to set multiple MSRs followed by a SERIALIZE instruction if full hard serialization is still desired, rather than having each individual MSR write do a full hard serialization (killing the full pipe and starting over from instruction fetch.)

This will replace the – architecturally questionable, in my opinion – practice of introducing non-serializing MSRs which after all are retroactive changes to the semantics WRMSR instruction with no opt-out (although the existence of SERIALIZE improves the situation somewhat.)

I agree that we need better documentation as to the semantics of WRMSRNS on critical MSRs like SPEC_CTRL, and especially in that specific case, when post-batch SERIALIZE would be called for.
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help