Thread (53 messages) 53 messages, 4 authors, 2021-04-07

Re: [RFC PATCH 10/18] virt/mshv: get and set vcpu registers ioctls

From: Nuno Das Neves <hidden>
Date: 2021-03-09 01:40:13
Also in: lkml

On 2/8/2021 11:47 AM, Michael Kelley wrote:
From: Nuno Das Neves <redacted> Sent: Friday, November 20, 2020 4:30 PM
quoted
Add ioctls for getting and setting virtual processor registers.

Co-developed-by: Lillian Grassin-Drake <redacted>
Signed-off-by: Lillian Grassin-Drake <redacted>
Signed-off-by: Nuno Das Neves <redacted>
---
 Documentation/virt/mshv/api.rst         |  11 +
 arch/x86/include/uapi/asm/hyperv-tlfs.h | 601 ++++++++++++++++++++++++
 include/asm-generic/hyperv-tlfs.h       |  65 +--
 include/linux/mshv.h                    |   1 +
 include/uapi/linux/mshv.h               |  12 +
 virt/mshv/mshv_main.c                   | 258 +++++++++-
 6 files changed, 903 insertions(+), 45 deletions(-)
[snip]
quoted
+
+union hv_register_value {
+	struct hv_u128 reg128;
+	__u64 reg64;
+	__u32 reg32;
+	__u16 reg16;
+	__u8 reg8;
+	union hv_x64_fp_register fp;
+	union hv_x64_fp_control_status_register fp_control_status;
+	union hv_x64_xmm_control_status_register xmm_control_status;
+	struct hv_x64_segment_register segment;
+	struct hv_x64_table_register table;
+	union hv_explicit_suspend_register explicit_suspend;
+	union hv_intercept_suspend_register intercept_suspend;
+	union hv_dispatch_suspend_register dispatch_suspend;
+	union hv_x64_interrupt_state_register interrupt_state;
+	union hv_x64_pending_interruption_register pending_interruption;
+	union hv_x64_msr_npiep_config_contents npiep_config;
+	union hv_x64_pending_exception_event pending_exception_event;
+	union hv_x64_pending_virtualization_fault_event
+		pending_virtualization_fault_event;
+};
+
 #endif
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index 6e5072e29897..b9295400c20b 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -622,53 +622,30 @@ struct hv_retarget_device_interrupt {
 } __packed __aligned(8);


-/* HvGetVpRegisters hypercall input with variable size reg name list*/
-struct hv_get_vp_registers_input {
-	struct {
-		u64 partitionid;
-		u32 vpindex;
-		u8  inputvtl;
-		u8  padding[3];
-	} header;
-	struct input {
-		u32 name0;
-		u32 name1;
-	} element[];
-} __packed;
-
+/* HvGetVpRegisters hypercall with variable size reg name list*/
+struct hv_get_vp_registers {
+	u64 partition_id;
+	u32 vp_index;
+	u8  input_vtl;
+	u8  rsvd_z8;
+	u16 rsvd_z16;
+	__aligned(8) enum hv_register_name names[];
+} __aligned(8);

-/* HvGetVpRegisters returns an array of these output elements */
-struct hv_get_vp_registers_output {
-	union {
-		struct {
-			u32 a;
-			u32 b;
-			u32 c;
-			u32 d;
-		} as32 __packed;
-		struct {
-			u64 low;
-			u64 high;
-		} as64 __packed;
-	};
+/* HvSetVpRegisters hypercall with variable size reg name/value list*/
+struct hv_register_assoc {
+	enum hv_register_name name;
+	__aligned(16) union hv_register_value value;
 };

-/* HvSetVpRegisters hypercall with variable size reg name/value list*/
-struct hv_set_vp_registers_input {
-	struct {
-		u64 partitionid;
-		u32 vpindex;
-		u8  inputvtl;
-		u8  padding[3];
-	} header;
-	struct {
-		u32 name;
-		u32 padding1;
-		u64 padding2;
-		u64 valuelow;
-		u64 valuehigh;
-	} element[];
-} __packed;
+struct hv_set_vp_registers {
+	u64 partition_id;
+	u32 vp_index;
+	u8  input_vtl;
+	u8  rsvd_z8;
+	u16 rsvd_z16;
+	struct hv_register_assoc elements[];
+} __aligned(16);
Throughout these structures, I think the approach needs to be more
explicit about the memory layout.  The current definitions assume that
the compiler is inserting padding in the expected places, and not in
any unexpected places.  My previous concerns about use of enum
also apply.

The code also removes some layouts that are used in the
not-yet-accepted patches for ARM64.   Let sync on how to get
those back in.
I'll add __packed to all these structures.
The hv_register_name enum can be replaced by #defines, and the type can be
u32 or u64 (it only needs 32 bits).

I'll sync with you on the ARM64 structs.
quoted
 enum hv_device_type {
 	HV_DEVICE_TYPE_LOGICAL = 0,
diff --git a/include/linux/mshv.h b/include/linux/mshv.h
index 50521c5f7948..dfe469f573f9 100644
--- a/include/linux/mshv.h
+++ b/include/linux/mshv.h
@@ -17,6 +17,7 @@
 struct mshv_vp {
 	u32 index;
 	struct mshv_partition *partition;
+	struct mutex mutex;
 };

 struct mshv_mem_region {
diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
index 1f053eae68a6..5d53ed655429 100644
--- a/include/uapi/linux/mshv.h
+++ b/include/uapi/linux/mshv.h
@@ -33,6 +33,14 @@ struct mshv_create_vp {
 	__u32 vp_index;
 };

+#define MSHV_VP_MAX_REGISTERS	128
+
+struct mshv_vp_registers {
+	int count; /* at most MSHV_VP_MAX_REGISTERS */
+	enum hv_register_name *names;
+	union hv_register_value *values;
+};
Having separate arrays for the names and values results in an extra
copy of the data down in the ioctl code.  Any reason the caller couldn't
supply the data as an array, where each entry is already a name/value
pair?
I initially thought it would not make a difference to the number of copies,
but it turns out it does. I will change it to use hv_register_assoc everywhere.
quoted
+
 #define MSHV_IOCTL 0xB8

 /* mshv device */
@@ -44,4 +52,8 @@ struct mshv_create_vp {
 #define MSHV_UNMAP_GUEST_MEMORY	_IOW(MSHV_IOCTL, 0x03, struct
mshv_user_mem_region)
 #define MSHV_CREATE_VP		_IOW(MSHV_IOCTL, 0x04, struct mshv_create_vp)

+/* vp device */
+#define MSHV_GET_VP_REGISTERS   _IOWR(MSHV_IOCTL, 0x05, struct
mshv_vp_registers)
+#define MSHV_SET_VP_REGISTERS   _IOW(MSHV_IOCTL, 0x06, struct mshv_vp_registers)
+
 #endif
diff --git a/virt/mshv/mshv_main.c b/virt/mshv/mshv_main.c
index 3be9d9a468c1..2a10137a1e84 100644
--- a/virt/mshv/mshv_main.c
+++ b/virt/mshv/mshv_main.c
@@ -74,6 +74,12 @@ static struct miscdevice mshv_dev = {
 #define HV_MAP_GPA_BATCH_SIZE	\
 		(PAGE_SIZE / sizeof(struct hv_map_gpa_pages) / sizeof(u64))
 #define PIN_PAGES_BATCH_SIZE	(0x10000000 / PAGE_SIZE)
+#define HV_GET_REGISTER_BATCH_SIZE	\
+	(PAGE_SIZE / \
+	 sizeof(struct hv_get_vp_registers) / sizeof(enum hv_register_name))
+#define HV_SET_REGISTER_BATCH_SIZE	\
+	(PAGE_SIZE / \
+	 sizeof(struct hv_set_vp_registers) / sizeof(struct hv_register_assoc))
These new size calculations have the same bug as HV_MAP_GPA_BATCH_SIZE.
The first divide operations should be subtraction.
Yep, I'll fix it.
With the correct calculation, HV_GET_REGISTER_BATCH_SIZE  will be
too large.  The input page will accommodate more 32 bit register names
than the output page will accommodate 128 bit register values.  The limit
should be based on the latter, not the former.  Or calculate both the
input and output limit and use the minimum.
I didn't think about this previously! Will fix.
quoted
 static int
 hv_call_withdraw_memory(u64 count, int node, u64 partition_id)
@@ -380,10 +386,258 @@ hv_call_unmap_gpa_pages(u64 partition_id,
 	return ret;
 }

+static int
+hv_call_get_vp_registers(u32 vp_index,
+			 u64 partition_id,
+			 u16 count,
+			 const enum hv_register_name *names,
+			 union hv_register_value *values)
+{
+	struct hv_get_vp_registers *input_page;
+	union hv_register_value *output_page;
+	u16 completed = 0;
+	u64 hypercall_status;
+	unsigned long remaining = count;
+	int rep_count;
+	int status;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	input_page = (struct hv_get_vp_registers *)(*this_cpu_ptr(
+		hyperv_pcpu_input_arg));
+	output_page = (union hv_register_value *)(*this_cpu_ptr(
+		hyperv_pcpu_output_arg));
+
+	input_page->partition_id = partition_id;
+	input_page->vp_index = vp_index;
+	input_page->input_vtl = 0;
+	input_page->rsvd_z8 = 0;
+	input_page->rsvd_z16 = 0;
+
+	while (remaining) {
+		rep_count = min(remaining, HV_GET_REGISTER_BATCH_SIZE);
+		memcpy(input_page->names, names,
+			sizeof(enum hv_register_name) * rep_count);
+
+		hypercall_status =
+			hv_do_rep_hypercall(HVCALL_GET_VP_REGISTERS, rep_count,
+					    0, input_page, output_page);
+		status = hypercall_status & HV_HYPERCALL_RESULT_MASK;
+		if (status != HV_STATUS_SUCCESS) {
+			pr_err("%s: completed %li out of %u, %s\n",
+			       __func__,
+			       count - remaining, count,
+			       hv_status_to_string(status));
+			break;
+		}
+		completed = (hypercall_status & HV_HYPERCALL_REP_COMP_MASK) >>
+			    HV_HYPERCALL_REP_COMP_OFFSET;
+		memcpy(values, output_page,
+			sizeof(union hv_register_value) * completed);
+
+		names += completed;
+		values += completed;
+		remaining -= completed;
+	}
+	local_irq_restore(flags);
+
+	return -hv_status_to_errno(status);
+}
+
+static int
+hv_call_set_vp_registers(u32 vp_index,
+			 u64 partition_id,
+			 u16 count,
+			 struct hv_register_assoc *registers)
+{
+	struct hv_set_vp_registers *input_page;
+	u16 completed = 0;
+	u64 hypercall_status;
+	unsigned long remaining = count;
+	int rep_count;
+	int status;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	input_page = (struct hv_set_vp_registers *)(*this_cpu_ptr(
+		hyperv_pcpu_input_arg));
+
+	input_page->partition_id = partition_id;
+	input_page->vp_index = vp_index;
+	input_page->input_vtl = 0;
+	input_page->rsvd_z8 = 0;
+	input_page->rsvd_z16 = 0;
+
+	while (remaining) {
+		rep_count = min(remaining, HV_SET_REGISTER_BATCH_SIZE);
+		memcpy(input_page->elements, registers,
+			sizeof(struct hv_register_assoc) * rep_count);
+
+		hypercall_status =
+			hv_do_rep_hypercall(HVCALL_SET_VP_REGISTERS, rep_count,
+					    0, input_page, NULL);
+		status = hypercall_status & HV_HYPERCALL_RESULT_MASK;
+		if (status != HV_STATUS_SUCCESS) {
+			pr_err("%s: completed %li out of %u, %s\n",
+			       __func__,
+			       count - remaining, count,
+			       hv_status_to_string(status));
+			break;
+		}
+		completed = (hypercall_status & HV_HYPERCALL_REP_COMP_MASK) >>
+			    HV_HYPERCALL_REP_COMP_OFFSET;
+		registers += completed;
+		remaining -= completed;
+	}
+
+	local_irq_restore(flags);
+
+	return -hv_status_to_errno(status);
+}
+
+static long
+mshv_vp_ioctl_get_regs(struct mshv_vp *vp, void __user *user_args)
+{
+	struct mshv_vp_registers args;
+	enum hv_register_name *names;
+	union hv_register_value *values;
+	long ret;
+
+	if (copy_from_user(&args, user_args, sizeof(args)))
+		return -EFAULT;
+
+	if (args.count > MSHV_VP_MAX_REGISTERS)
+		return -EINVAL;
+
+	names = kmalloc_array(args.count,
+			      sizeof(enum hv_register_name),
+			      GFP_KERNEL);
+	if (!names)
+		return -ENOMEM;
+
+	values = kmalloc_array(args.count,
+			       sizeof(union hv_register_value),
+			       GFP_KERNEL);
+	if (!values) {
+		kfree(names);
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(names, args.names,
+			   sizeof(enum hv_register_name) * args.count)) {
+		ret = -EFAULT;
+		goto free_return;
+	}
+
+	ret = hv_call_get_vp_registers(vp->index, vp->partition->id,
+				       args.count, names, values);
+	if (ret)
+		goto free_return;
+
+	if (copy_to_user(args.values, values,
+			 sizeof(union hv_register_value) * args.count)) {
+		ret = -EFAULT;
+	}
+
+free_return:
+	kfree(names);
+	kfree(values);
+	return ret;
+}
+
+static long
+mshv_vp_ioctl_set_regs(struct mshv_vp *vp, void __user *user_args)
+{
+	int i;
+	struct mshv_vp_registers args;
+	struct hv_register_assoc *registers;
+	enum hv_register_name *names;
+	union hv_register_value *values;
+	long ret;
+
+	if (copy_from_user(&args, user_args, sizeof(args)))
+		return -EFAULT;
+
+	if (args.count > MSHV_VP_MAX_REGISTERS)
+		return -EINVAL;
+
+	names = kmalloc_array(args.count,
+			      sizeof(enum hv_register_name),
+			      GFP_KERNEL);
+	if (!names)
+		return -ENOMEM;
+
+	values = kmalloc_array(args.count,
+			       sizeof(union hv_register_value),
+			       GFP_KERNEL);
+	if (!values) {
+		kfree(names);
+		return -ENOMEM;
+	}
+
+	registers = kmalloc_array(args.count,
+				  sizeof(struct hv_register_assoc),
+				  GFP_KERNEL);
+	if (!registers) {
+		kfree(values);
+		kfree(names);
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(names, args.names,
+			   sizeof(enum hv_register_name) * args.count)) {
+		ret = -EFAULT;
+		goto free_return;
+	}
+
+	if (copy_from_user(values, args.values,
+			   sizeof(union hv_register_value) * args.count)) {
+		ret = -EFAULT;
+		goto free_return;
+	}
+
+	for (i = 0; i < args.count; i++) {
+		memcpy(&registers[i].name, &names[i],
+		       sizeof(enum hv_register_name));
+		memcpy(&registers[i].value, &values[i],
+		       sizeof(union hv_register_value));
+	}
The above will result in uninitialized memory being sent to
Hyper-V, since there is implicit padding associated with the
32 bit name field.
This shouldn't be an issue after I change this to use hv_register_assoc,
instead of separate names and values buffers.
quoted
+
+	ret = hv_call_set_vp_registers(vp->index, vp->partition->id,
+				       args.count, registers);
+
+free_return:
+	kfree(names);
+	kfree(values);
+	kfree(registers);
+	return ret;
+}
+
+
 static long
 mshv_vp_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 {
-	return -ENOTTY;
+	struct mshv_vp *vp = filp->private_data;
+	long r = 0;
+
+	if (mutex_lock_killable(&vp->mutex))
+		return -EINTR;
+
+	switch (ioctl) {
+	case MSHV_GET_VP_REGISTERS:
+		r = mshv_vp_ioctl_get_regs(vp, (void __user *)arg);
+		break;
+	case MSHV_SET_VP_REGISTERS:
+		r = mshv_vp_ioctl_set_regs(vp, (void __user *)arg);
+		break;
+	default:
+		r = -ENOTTY;
+		break;
+	}
+	mutex_unlock(&vp->mutex);
+
+	return r;
 }

 static int
@@ -420,6 +674,8 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
 	if (!vp)
 		return -ENOMEM;

+	mutex_init(&vp->mutex);
+
 	vp->index = args.vp_index;
 	vp->partition = mshv_partition_get(partition);
 	if (!vp->partition) {
--
2.25.1
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help