Thread (53 messages) 53 messages, 4 authors, 2021-04-07

Re: [RFC PATCH 16/18] virt/mshv: mmap vp register page

From: Nuno Das Neves <hidden>
Date: 2021-03-25 17:37:06
Also in: lkml

On 2/8/2021 11:49 AM, Michael Kelley wrote:
From: Nuno Das Neves <redacted> Sent: Friday, November 20, 2020 4:31 PM
quoted
Introduce mmap interface for a virtual processor, exposing a page for
setting and getting common registers while the VP is suspended.

This provides a more performant and convenient way to get and set these
registers in the context of a vmm's run-loop.

Co-developed-by: Lillian Grassin-Drake <redacted>
Signed-off-by: Lillian Grassin-Drake <redacted>
Signed-off-by: Nuno Das Neves <redacted>
---
 Documentation/virt/mshv/api.rst         | 11 ++++
 arch/x86/include/uapi/asm/hyperv-tlfs.h | 74 ++++++++++++++++++++++
 include/asm-generic/hyperv-tlfs.h       | 10 +++
 include/linux/mshv.h                    |  1 +
 include/uapi/asm-generic/hyperv-tlfs.h  |  5 ++
 include/uapi/linux/mshv.h               | 12 ++++
 virt/mshv/mshv_main.c                   | 82 +++++++++++++++++++++++++
 7 files changed, 195 insertions(+)
diff --git a/Documentation/virt/mshv/api.rst b/Documentation/virt/mshv/api.rst
index 7fd75f248eff..89c276a8778f 100644
--- a/Documentation/virt/mshv/api.rst
+++ b/Documentation/virt/mshv/api.rst
@@ -149,3 +149,14 @@ HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED
 Get/set various vp state. Currently these can be used to get and set
 emulated LAPIC state, and xsave data.

+3.10 mmap(vp)
+-------------
+:Type: vp mmap
+:Parameters: offset should be HV_VP_MMAP_REGISTERS_OFFSET
+:Returns: 0 on success
+
+Maps a page into userspace that can be used to get and set common registers
+while the vp is suspended.
+The page is laid out in struct hv_vp_register_page in asm/hyperv-tlfs.h.
+
I'm assuming there's no support for the corresponding munmap().
What happens if munmap is called?  Does it just fail and the page remains
mapped?
munmap() will successfully unmap the page from userspace.
The physical state page remains mapped in the hypervisor, tracked in mshv in vp->register_page.
This is re-used on subsequent mmap()s.
quoted
+
diff --git a/arch/x86/include/uapi/asm/hyperv-tlfs.h b/arch/x86/include/uapi/asm/hyperv-
tlfs.h
index 78758aedf23e..a241178567ff 100644
--- a/arch/x86/include/uapi/asm/hyperv-tlfs.h
+++ b/arch/x86/include/uapi/asm/hyperv-tlfs.h
@@ -1110,4 +1110,78 @@ struct hv_vp_state_data_xsave {
 	union hv_x64_xsave_xfem_register states;
 };

+/* Bits for dirty mask of hv_vp_register_page */
+#define HV_X64_REGISTER_CLASS_GENERAL	0
+#define HV_X64_REGISTER_CLASS_IP	1
+#define HV_X64_REGISTER_CLASS_XMM	2
+#define HV_X64_REGISTER_CLASS_SEGMENT	3
+#define HV_X64_REGISTER_CLASS_FLAGS	4
+
+#define HV_VP_REGISTER_PAGE_VERSION_1	1u
+
+struct hv_vp_register_page {
+	__u16 version;
+	bool isvalid;
Like enum, avoid type "bool" in data structures shared with
Hyper-V.
Indeed - this should be u8. I will change it.
quoted
+	__u8 rsvdz;
+	__u32 dirty;
+	union {
+		struct {
+			__u64 rax;
+			__u64 rcx;
+			__u64 rdx;
+			__u64 rbx;
+			__u64 rsp;
+			__u64 rbp;
+			__u64 rsi;
+			__u64 rdi;
+			__u64 r8;
+			__u64 r9;
+			__u64 r10;
+			__u64 r11;
+			__u64 r12;
+			__u64 r13;
+			__u64 r14;
+			__u64 r15;
+		};
+
+		__u64 gp_registers[16];
+	};
+	__u64 rip;
+	__u64 rflags;
+	union {
+		struct {
+			struct hv_u128 xmm0;
+			struct hv_u128 xmm1;
+			struct hv_u128 xmm2;
+			struct hv_u128 xmm3;
+			struct hv_u128 xmm4;
+			struct hv_u128 xmm5;
+		};
+
+		struct hv_u128 xmm_registers[6];
+	};
+	union {
+		struct {
+			struct hv_x64_segment_register es;
+			struct hv_x64_segment_register cs;
+			struct hv_x64_segment_register ss;
+			struct hv_x64_segment_register ds;
+			struct hv_x64_segment_register fs;
+			struct hv_x64_segment_register gs;
+		};
+
+		struct hv_x64_segment_register segment_registers[6];
+	};
+	/* read only */
+	__u64 cr0;
+	__u64 cr3;
+	__u64 cr4;
+	__u64 cr8;
+	__u64 efer;
+	__u64 dr7;
+	union hv_x64_pending_interruption_register pending_interruption;
+	union hv_x64_interrupt_state_register interrupt_state;
+	__u64 instruction_emulation_hints;
+};
+
 #endif
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index 4bc59a0344ce..9eed4b869110 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -837,4 +837,14 @@ struct hv_set_vp_state_in {
 	union hv_input_set_vp_state_data data[];
 };

+struct hv_map_vp_state_page_in {
+	u64 partition_id;
+	u32 vp_index;
+	enum hv_vp_state_page_type type;
+};
+
+struct hv_map_vp_state_page_out {
+	u64 map_location; /* page number */
+};
+
 #endif
diff --git a/include/linux/mshv.h b/include/linux/mshv.h
index 3933d80294f1..33f4d0cfee11 100644
--- a/include/linux/mshv.h
+++ b/include/linux/mshv.h
@@ -20,6 +20,7 @@ struct mshv_vp {
 	u32 index;
 	struct mshv_partition *partition;
 	struct mutex mutex;
+	struct page *register_page;
 	struct {
 		struct semaphore sem;
 		struct task_struct *task;
diff --git a/include/uapi/asm-generic/hyperv-tlfs.h b/include/uapi/asm-generic/hyperv-
tlfs.h
index b3c84c69b73f..a747f39b132a 100644
--- a/include/uapi/asm-generic/hyperv-tlfs.h
+++ b/include/uapi/asm-generic/hyperv-tlfs.h
@@ -92,4 +92,9 @@ enum hv_get_set_vp_state_type {
 	HV_GET_SET_VP_STATE_SYNTHETIC_TIMERS	= 4,
 };

+enum hv_vp_state_page_type {
+	HV_VP_STATE_PAGE_REGISTERS = 0,
+	HV_VP_STATE_PAGE_COUNT
+};
+
 #endif
diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
index ae0bb64bbec3..8537ff29aee5 100644
--- a/include/uapi/linux/mshv.h
+++ b/include/uapi/linux/mshv.h
@@ -13,6 +13,8 @@

 #define MSHV_VERSION	0x0

+#define MSHV_VP_MMAP_REGISTERS_OFFSET (HV_VP_STATE_PAGE_REGISTERS * 0x1000)
+
 struct mshv_create_partition {
 	__u64 flags;
 	struct hv_partition_creation_properties partition_creation_properties;
@@ -84,4 +86,14 @@ struct mshv_vp_state {
 #define MSHV_GET_VP_STATE	_IOWR(MSHV_IOCTL, 0x0A, struct mshv_vp_state)
 #define MSHV_SET_VP_STATE	_IOWR(MSHV_IOCTL, 0x0B, struct mshv_vp_state)

+/* register page mapping example:
+ * struct hv_vp_register_page *regs = mmap(NULL,
+ *					   4096,
+ *					   PROT_READ | PROT_WRITE,
+ *					   MAP_SHARED,
+ *					   vp_fd,
+ *					   HV_VP_MMAP_REGISTERS_OFFSET);
+ * munmap(regs, 4096);
+ */
+
 #endif
diff --git a/virt/mshv/mshv_main.c b/virt/mshv/mshv_main.c
index 70172d9488de..a597254fa4f4 100644
--- a/virt/mshv/mshv_main.c
+++ b/virt/mshv/mshv_main.c
@@ -43,11 +43,18 @@ static long mshv_partition_ioctl(struct file *filp, unsigned int ioctl,
unsigned
 static int mshv_dev_open(struct inode *inode, struct file *filp);
 static int mshv_dev_release(struct inode *inode, struct file *filp);
 static long mshv_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg);
+static int mshv_vp_mmap(struct file *file, struct vm_area_struct *vma);
+static vm_fault_t mshv_vp_fault(struct vm_fault *vmf);
+
+static const struct vm_operations_struct mshv_vp_vm_ops = {
+	.fault = mshv_vp_fault,
+};

 static const struct file_operations mshv_vp_fops = {
 	.release = mshv_vp_release,
 	.unlocked_ioctl = mshv_vp_ioctl,
 	.llseek = noop_llseek,
+	.mmap = mshv_vp_mmap,
 };

 static const struct file_operations mshv_partition_fops = {
@@ -499,6 +506,47 @@ hv_call_set_vp_registers(u32 vp_index,
 	return -hv_status_to_errno(status);
 }

+static int
+hv_call_map_vp_state_page(u32 vp_index, u64 partition_id,
+			  struct page **state_page)
+{
+	struct hv_map_vp_state_page_in *input;
+	struct hv_map_vp_state_page_out *output;
+	int status;
+	int ret;
+	unsigned long flags;
+
+	do {
+		local_irq_save(flags);
+		input = (struct hv_map_vp_state_page_in *)(*this_cpu_ptr(
+			hyperv_pcpu_input_arg));
+		output = (struct hv_map_vp_state_page_out *)(*this_cpu_ptr(
+			hyperv_pcpu_output_arg));
+
+		input->partition_id = partition_id;
+		input->vp_index = vp_index;
+		input->type = HV_VP_STATE_PAGE_REGISTERS;
+		status = hv_do_hypercall(HVCALL_MAP_VP_STATE_PAGE,
+						   input, output);
+
+		if (status != HV_STATUS_INSUFFICIENT_MEMORY) {
+			if (status == HV_STATUS_SUCCESS)
+				*state_page = pfn_to_page(output->map_location);
+			else
+				pr_err("%s: %s\n", __func__,
+				       hv_status_to_string(status));
+			local_irq_restore(flags);
+			ret = -hv_status_to_errno(status);
+			break;
+		}
+		local_irq_restore(flags);
+
+		ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id, 1);
+	} while (!ret);
+
+	return ret;
+}
+
 static void
 mshv_isr(void)
 {
@@ -1155,6 +1203,40 @@ mshv_vp_ioctl(struct file *filp, unsigned int ioctl, unsigned long
arg)
 	return r;
 }

+static vm_fault_t mshv_vp_fault(struct vm_fault *vmf)
+{
+	struct mshv_vp *vp = vmf->vma->vm_file->private_data;
+
+	vmf->page = vp->register_page;
+
+	return 0;
+}
+
+static int mshv_vp_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	int ret;
+	struct mshv_vp *vp = file->private_data;
+
+	if (vma->vm_pgoff != MSHV_VP_MMAP_REGISTERS_OFFSET)
+		return -EINVAL;
+
+	if (mutex_lock_killable(&vp->mutex))
+		return -EINTR;
+
+	if (!vp->register_page) {
+		ret = hv_call_map_vp_state_page(vp->index,
+						vp->partition->id,
+						&vp->register_page);
+		if (ret)
+			return ret;
+	}
+
+	mutex_unlock(&vp->mutex);
+
+	vma->vm_ops = &mshv_vp_vm_ops;
+	return 0;
+}
+
 static int
 mshv_vp_release(struct inode *inode, struct file *filp)
 {
--
2.25.1
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help