[PATCH v3 15/19] drivers/hv: get and set vp state ioctls
From: Nuno Das Neves <hidden>
Date: 2021-09-28 18:32:14
Also in:
lkml
Subsystem:
documentation, generic include/asm header files, hyper-v/azure core and drivers, the rest, x86 architecture (32-bit and 64-bit) · Maintainers:
Jonathan Corbet, Arnd Bergmann, "K. Y. Srinivasan", Haiyang Zhang, Wei Liu, Dexuan Cui, Long Li, Linus Torvalds, Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen
Introduce ioctls for getting and setting guest vcpu emulated LAPIC state, and xsave data. Signed-off-by: Nuno Das Neves <redacted> --- Documentation/virt/mshv/api.rst | 8 ++ arch/x86/include/uapi/asm/hyperv-tlfs.h | 59 ++++++++++ drivers/hv/hv_call.c | 138 +++++++++++++++++++++++- drivers/hv/mshv.h | 25 +++++ drivers/hv/mshv_main.c | 122 +++++++++++++++++++++ include/asm-generic/hyperv-tlfs.h | 40 +++++++ include/uapi/asm-generic/hyperv-tlfs.h | 28 +++++ include/uapi/linux/mshv.h | 13 +++ 8 files changed, 432 insertions(+), 1 deletion(-)
diff --git a/Documentation/virt/mshv/api.rst b/Documentation/virt/mshv/api.rst
index 76f98485cd93..1613ac6e9428 100644
--- a/Documentation/virt/mshv/api.rst
+++ b/Documentation/virt/mshv/api.rst@@ -140,4 +140,12 @@ Assert interrupts in partitions that use Microsoft Hypervisor's internal emulated LAPIC. This must be enabled on partition creation with the flag: HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED +3.9 MSHV_GET_VP_STATE and MSHV_SET_VP_STATE +-------------------------- +:Type: vp ioctl +:Parameters: struct mshv_vp_state +:Returns: 0 on success + +Get/set various vp state. Currently these can be used to get and set +emulated LAPIC state, and xsave data.
diff --git a/arch/x86/include/uapi/asm/hyperv-tlfs.h b/arch/x86/include/uapi/asm/hyperv-tlfs.h
index e234297521a3..46806227e869 100644
--- a/arch/x86/include/uapi/asm/hyperv-tlfs.h
+++ b/arch/x86/include/uapi/asm/hyperv-tlfs.h@@ -1013,4 +1013,63 @@ union hv_interrupt_control { __u64 as_uint64; }; +struct hv_local_interrupt_controller_state { + __u32 apic_id; + __u32 apic_version; + __u32 apic_ldr; + __u32 apic_dfr; + __u32 apic_spurious; + __u32 apic_isr[8]; + __u32 apic_tmr[8]; + __u32 apic_irr[8]; + __u32 apic_esr; + __u32 apic_icr_high; + __u32 apic_icr_low; + __u32 apic_lvt_timer; + __u32 apic_lvt_thermal; + __u32 apic_lvt_perfmon; + __u32 apic_lvt_lint0; + __u32 apic_lvt_lint1; + __u32 apic_lvt_error; + __u32 apic_lvt_cmci; + __u32 apic_error_status; + __u32 apic_initial_count; + __u32 apic_counter_value; + __u32 apic_divide_configuration; + __u32 apic_remote_read; +} __packed; + +#define HV_XSAVE_DATA_NO_XMM_REGISTERS 1 + +union hv_x64_xsave_xfem_register { + __u64 as_uint64; + struct { + __u32 low_uint32; + __u32 high_uint32; + } __packed; + struct { + __u64 legacy_x87: 1; + __u64 legacy_sse: 1; + __u64 avx: 1; + __u64 mpx_bndreg: 1; + __u64 mpx_bndcsr: 1; + __u64 avx_512_op_mask: 1; + __u64 avx_512_zmmhi: 1; + __u64 avx_512_zmm16_31: 1; + __u64 rsvd8_9: 2; + __u64 pasid: 1; + __u64 cet_u: 1; + __u64 cet_s: 1; + __u64 rsvd13_16: 4; + __u64 xtile_cfg: 1; + __u64 xtile_data: 1; + __u64 rsvd19_63: 45; + } __packed; +}; + +struct hv_vp_state_data_xsave { + __u64 flags; + union hv_x64_xsave_xfem_register states; +} __packed; + #endif
diff --git a/drivers/hv/hv_call.c b/drivers/hv/hv_call.c
index 72e93d13d8ee..c358a2b51ba1 100644
--- a/drivers/hv/hv_call.c
+++ b/drivers/hv/hv_call.c@@ -426,7 +426,6 @@ int hv_call_install_intercept( } ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id, 1); - } while (!ret); return ret;
@@ -461,3 +460,140 @@ int hv_call_assert_virtual_interrupt( return 0; } +int hv_call_get_vp_state( + u32 vp_index, + u64 partition_id, + enum hv_get_set_vp_state_type type, + struct hv_vp_state_data_xsave xsave, + /* Choose between pages and ret_output */ + u64 page_count, + struct page **pages, + union hv_get_vp_state_out *ret_output) +{ + struct hv_get_vp_state_in *input; + union hv_get_vp_state_out *output; + u64 status; + int i; + u64 control; + unsigned long flags; + int ret = 0; + + if (page_count > HV_GET_VP_STATE_BATCH_SIZE) + return -EINVAL; + + if (!page_count && !ret_output) + return -EINVAL; + + do { + local_irq_save(flags); + input = (struct hv_get_vp_state_in *) + (*this_cpu_ptr(hyperv_pcpu_input_arg)); + output = (union hv_get_vp_state_out *) + (*this_cpu_ptr(hyperv_pcpu_output_arg)); + memset(input, 0, sizeof(*input)); + memset(output, 0, sizeof(*output)); + + input->partition_id = partition_id; + input->vp_index = vp_index; + input->state_data.type = type; + memcpy(&input->state_data.xsave, &xsave, sizeof(xsave)); + for (i = 0; i < page_count; i++) + input->output_data_pfns[i] = page_to_pfn(pages[i]); + + control = (HVCALL_GET_VP_STATE) | + (page_count << HV_HYPERCALL_VARHEAD_OFFSET); + + status = hv_do_hypercall(control, input, output); + + if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { + if (!hv_result_success(status)) + pr_err("%s: %s\n", __func__, + hv_status_to_string(status)); + else if (ret_output) + memcpy(ret_output, output, sizeof(*output)); + + local_irq_restore(flags); + ret = hv_status_to_errno(status); + break; + } + local_irq_restore(flags); + + ret = hv_call_deposit_pages(NUMA_NO_NODE, + partition_id, 1); + } while (!ret); + + return ret; +} + +int hv_call_set_vp_state( + u32 vp_index, + u64 partition_id, + enum hv_get_set_vp_state_type type, + struct hv_vp_state_data_xsave xsave, + /* Choose between pages and bytes */ + u64 page_count, + struct page **pages, + u32 num_bytes, + u8 *bytes) +{ + struct hv_set_vp_state_in *input; + u64 status; + int i; + u64 control; + unsigned long flags; + int ret = 0; + u16 varhead_sz; + + if (page_count > HV_SET_VP_STATE_BATCH_SIZE) + return -EINVAL; + if (sizeof(*input) + num_bytes > HV_HYP_PAGE_SIZE) + return -EINVAL; + + if (num_bytes) + /* round up to 8 and divide by 8 */ + varhead_sz = (num_bytes + 7) >> 3; + else if (page_count) + varhead_sz = page_count; + else + return -EINVAL; + + do { + local_irq_save(flags); + input = (struct hv_set_vp_state_in *) + (*this_cpu_ptr(hyperv_pcpu_input_arg)); + memset(input, 0, sizeof(*input)); + + input->partition_id = partition_id; + input->vp_index = vp_index; + input->state_data.type = type; + memcpy(&input->state_data.xsave, &xsave, sizeof(xsave)); + if (num_bytes) { + memcpy((u8 *)input->data, bytes, num_bytes); + } else { + for (i = 0; i < page_count; i++) + input->data[i].pfns = page_to_pfn(pages[i]); + } + + control = (HVCALL_SET_VP_STATE) | + (varhead_sz << HV_HYPERCALL_VARHEAD_OFFSET); + + status = hv_do_hypercall(control, input, NULL); + + if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { + if (!hv_result_success(status)) + pr_err("%s: %s\n", __func__, + hv_status_to_string(status)); + + local_irq_restore(flags); + ret = hv_status_to_errno(status); + break; + } + local_irq_restore(flags); + + ret = hv_call_deposit_pages(NUMA_NO_NODE, + partition_id, 1); + } while (!ret); + + return ret; +} +
diff --git a/drivers/hv/mshv.h b/drivers/hv/mshv.h
index c0a0ccb3626a..c8f3919a5cdc 100644
--- a/drivers/hv/mshv.h
+++ b/drivers/hv/mshv.h@@ -26,6 +26,12 @@ #define HV_SET_REGISTER_BATCH_SIZE \ ((HV_HYP_PAGE_SIZE - sizeof(struct hv_set_vp_registers)) \ / sizeof(struct hv_register_assoc)) +#define HV_GET_VP_STATE_BATCH_SIZE \ + ((HV_HYP_PAGE_SIZE - sizeof(struct hv_get_vp_state_in)) \ + / sizeof(u64)) +#define HV_SET_VP_STATE_BATCH_SIZE \ + ((HV_HYP_PAGE_SIZE - sizeof(struct hv_set_vp_state_in)) \ + / sizeof(u64)) extern struct mshv mshv;
@@ -72,5 +78,24 @@ int hv_call_assert_virtual_interrupt( u32 vector, u64 dest_addr, union hv_interrupt_control control); +int hv_call_get_vp_state( + u32 vp_index, + u64 partition_id, + enum hv_get_set_vp_state_type type, + struct hv_vp_state_data_xsave xsave, + /* Choose between pages and ret_output */ + u64 page_count, + struct page **pages, + union hv_get_vp_state_out *ret_output); +int hv_call_set_vp_state( + u32 vp_index, + u64 partition_id, + enum hv_get_set_vp_state_type type, + struct hv_vp_state_data_xsave xsave, + /* Choose between pages and bytes */ + u64 page_count, + struct page **pages, + u32 num_bytes, + u8 *bytes); #endif /* _MSHV_H */
diff --git a/drivers/hv/mshv_main.c b/drivers/hv/mshv_main.c
index ee41b59cc922..cef77f53d7c7 100644
--- a/drivers/hv/mshv_main.c
+++ b/drivers/hv/mshv_main.c@@ -288,6 +288,122 @@ mshv_vp_ioctl_set_regs(struct mshv_vp *vp, void __user *user_args) return ret; } +static long +mshv_vp_ioctl_get_set_state_pfn(struct mshv_vp *vp, + struct mshv_vp_state *args, + bool is_set) +{ + u64 page_count, remaining; + int completed; + struct page **pages; + long ret; + unsigned long u_buf; + + /* Buffer must be page aligned */ + if (!PAGE_ALIGNED(args->buf_size) || + !PAGE_ALIGNED(args->buf.bytes)) + return -EINVAL; + + if (!access_ok(args->buf.bytes, args->buf_size)) + return -EFAULT; + + /* Pin user pages so hypervisor can copy directly to them */ + page_count = args->buf_size >> HV_HYP_PAGE_SHIFT; + pages = kcalloc(page_count, sizeof(struct page *), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + remaining = page_count; + u_buf = (unsigned long)args->buf.bytes; + while (remaining) { + completed = pin_user_pages_fast( + u_buf, + remaining, + FOLL_WRITE, + &pages[page_count - remaining]); + if (completed < 0) { + pr_err("%s: failed to pin user pages error %i\n", + __func__, completed); + ret = completed; + goto unpin_pages; + } + remaining -= completed; + u_buf += completed * HV_HYP_PAGE_SIZE; + } + + if (is_set) + ret = hv_call_set_vp_state(vp->index, + vp->partition->id, + args->type, args->xsave, + page_count, pages, + 0, NULL); + else + ret = hv_call_get_vp_state(vp->index, + vp->partition->id, + args->type, args->xsave, + page_count, pages, + NULL); + +unpin_pages: + unpin_user_pages(pages, page_count - remaining); + kfree(pages); + return ret; +} + +static long +mshv_vp_ioctl_get_set_state(struct mshv_vp *vp, void __user *user_args, bool is_set) +{ + struct mshv_vp_state args; + long ret = 0; + union hv_get_vp_state_out vp_state; + + if (copy_from_user(&args, user_args, sizeof(args))) + return -EFAULT; + + /* For now just support these */ + if (args.type != HV_GET_SET_VP_STATE_LOCAL_INTERRUPT_CONTROLLER_STATE && + args.type != HV_GET_SET_VP_STATE_XSAVE) + return -EINVAL; + + /* If we need to pin pfns, delegate to helper */ + if (args.type & HV_GET_SET_VP_STATE_TYPE_PFN) + return mshv_vp_ioctl_get_set_state_pfn(vp, &args, is_set); + + if (args.buf_size < sizeof(vp_state)) + return -EINVAL; + + if (is_set) { + if (copy_from_user( + &vp_state, + args.buf.lapic, + sizeof(vp_state))) + return -EFAULT; + + return hv_call_set_vp_state(vp->index, + vp->partition->id, + args.type, args.xsave, + 0, NULL, + sizeof(vp_state), + (u8 *)&vp_state); + } + + ret = hv_call_get_vp_state(vp->index, + vp->partition->id, + args.type, args.xsave, + 0, NULL, + &vp_state); + + if (ret) + return ret; + + if (copy_to_user(args.buf.lapic, + &vp_state.interrupt_controller_state, + sizeof(vp_state.interrupt_controller_state))) + return -EFAULT; + + return 0; +} + static long mshv_vp_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) {
@@ -307,6 +423,12 @@ mshv_vp_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) case MSHV_SET_VP_REGISTERS: r = mshv_vp_ioctl_set_regs(vp, (void __user *)arg); break; + case MSHV_GET_VP_STATE: + r = mshv_vp_ioctl_get_set_state(vp, (void __user *)arg, false); + break; + case MSHV_SET_VP_STATE: + r = mshv_vp_ioctl_get_set_state(vp, (void __user *)arg, true); + break; default: r = -ENOTTY; break;
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index d1cc5dbc78b5..55a957436813 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h@@ -168,6 +168,9 @@ struct ms_hyperv_tsc_page { #define HVCALL_ASSERT_VIRTUAL_INTERRUPT 0x0094 #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 +#define HVCALL_MAP_VP_STATE_PAGE 0x00e1 +#define HVCALL_GET_VP_STATE 0x00e3 +#define HVCALL_SET_VP_STATE 0x00e4 /* Extended hypercalls */ #define HV_EXT_CALL_QUERY_CAPABILITIES 0x8001
@@ -832,4 +835,41 @@ struct hv_assert_virtual_interrupt { u16 rsvd_z1; } __packed; +struct hv_vp_state_data { + u32 type; + u32 rsvd; + struct hv_vp_state_data_xsave xsave; +} __packed; + +struct hv_get_vp_state_in { + u64 partition_id; + u32 vp_index; + u8 input_vtl; + u8 rsvd0; + u16 rsvd1; + struct hv_vp_state_data state_data; + u64 output_data_pfns[]; +} __packed; + +union hv_get_vp_state_out { + struct hv_local_interrupt_controller_state interrupt_controller_state; + /* Not supported yet */ + /* struct hv_synthetic_timers_state synthetic_timers_state; */ +} __packed; + +union hv_input_set_vp_state_data { + u64 pfns; + u8 bytes; +} __packed; + +struct hv_set_vp_state_in { + u64 partition_id; + u32 vp_index; + u8 input_vtl; + u8 rsvd0; + u16 rsvd1; + struct hv_vp_state_data state_data; + union hv_input_set_vp_state_data data[]; +} __packed; + #endif
diff --git a/include/uapi/asm-generic/hyperv-tlfs.h b/include/uapi/asm-generic/hyperv-tlfs.h
index 4ecb29fe1a0e..f4d8e9d148c3 100644
--- a/include/uapi/asm-generic/hyperv-tlfs.h
+++ b/include/uapi/asm-generic/hyperv-tlfs.h@@ -103,4 +103,32 @@ struct hv_register_assoc { union hv_register_value value; } __packed; +/* + * For getting and setting VP state, there are two options based on the state type: + * + * 1.) Data that is accessed by PFNs in the input hypercall page. This is used + * for state which may not fit into the hypercall pages. + * 2.) Data that is accessed directly in the input\output hypercall pages. + * This is used for state that will always fit into the hypercall pages. + * + * In the future this could be dynamic based on the size if needed. + * + * Note these hypercalls have an 8-byte aligned variable header size as per the tlfs + */ + +#define HV_GET_SET_VP_STATE_TYPE_PFN BIT(31) + +enum hv_get_set_vp_state_type { + HV_GET_SET_VP_STATE_LOCAL_INTERRUPT_CONTROLLER_STATE = 0, + + HV_GET_SET_VP_STATE_XSAVE = 1 | HV_GET_SET_VP_STATE_TYPE_PFN, + /* Synthetic message page */ + HV_GET_SET_VP_STATE_SIM_PAGE = 2 | HV_GET_SET_VP_STATE_TYPE_PFN, + /* Synthetic interrupt event flags page. */ + HV_GET_SET_VP_STATE_SIEF_PAGE = 3 | HV_GET_SET_VP_STATE_TYPE_PFN, + + /* Synthetic timers. */ + HV_GET_SET_VP_STATE_SYNTHETIC_TIMERS = 4, +}; + #endif
diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
index f65248a1ee89..73c24478e87e 100644
--- a/include/uapi/linux/mshv.h
+++ b/include/uapi/linux/mshv.h@@ -53,6 +53,17 @@ struct mshv_assert_interrupt { __u32 vector; }; +struct mshv_vp_state { + enum hv_get_set_vp_state_type type; + struct hv_vp_state_data_xsave xsave; /* only for xsave request */ + + __u64 buf_size; /* If xsave, must be page-aligned */ + union { + struct hv_local_interrupt_controller_state *lapic; + __u8 *bytes; /* Xsave data. must be page-aligned */ + } buf; +}; + #define MSHV_IOCTL 0xB8 /* mshv device */
@@ -70,5 +81,7 @@ struct mshv_assert_interrupt { #define MSHV_GET_VP_REGISTERS _IOWR(MSHV_IOCTL, 0x05, struct mshv_vp_registers) #define MSHV_SET_VP_REGISTERS _IOW(MSHV_IOCTL, 0x06, struct mshv_vp_registers) #define MSHV_RUN_VP _IOR(MSHV_IOCTL, 0x07, struct hv_message) +#define MSHV_GET_VP_STATE _IOWR(MSHV_IOCTL, 0x0A, struct mshv_vp_state) +#define MSHV_SET_VP_STATE _IOWR(MSHV_IOCTL, 0x0B, struct mshv_vp_state) #endif
--
2.23.4