--- v5
+++ v4
@@ -1,239 +1,305 @@
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
-hyperv Isolation VM requires bounce buffer support to copy
-data from/to encrypted memory and so enable swiotlb force
-mode to use swiotlb bounce buffer for DMA transaction.
-
-In Isolation VM with AMD SEV, the bounce buffer needs to be
-accessed via extra address space which is above shared_gpa_boundary
-(E.G 39 bit address line) reported by Hyper-V CPUID ISOLATION_CONFIG.
-The access physical address will be original physical address +
-shared_gpa_boundary. The shared_gpa_boundary in the AMD SEV SNP
-spec is called virtual top of memory(vTOM). Memory addresses below
-vTOM are automatically treated as private while memory above
-vTOM is treated as shared.
-
-Hyper-V initalizes swiotlb bounce buffer and default swiotlb
-needs to be disabled. pci_swiotlb_detect_override() and
-pci_swiotlb_detect_4gb() enable the default one. To override
-the setting, hyperv_swiotlb_detect() needs to run before
-these detect functions which depends on the pci_xen_swiotlb_
-init(). Make pci_xen_swiotlb_init() depends on the hyperv_swiotlb
-_detect() to keep the order.
-
-Swiotlb bounce buffer code calls set_memory_decrypted()
-to mark bounce buffer visible to host and map it in extra
-address space via memremap. Populate the shared_gpa_boundary
-(vTOM) via swiotlb_unencrypted_base variable.
-
-The map function memremap() can't work in the early place
-hyperv_iommu_swiotlb_init() and so initialize swiotlb bounce
-buffer in the hyperv_iommu_swiotlb_later_init().
+In Isolation VM, all shared memory with host needs to mark visible
+to host via hvcall. vmbus_establish_gpadl() has already done it for
+netvsc rx/tx ring buffer. The page buffer used by vmbus_sendpacket_
+pagebuffer() still need to handle. Use DMA API to map/umap these
+memory during sending/receiving packet and Hyper-V DMA ops callback
+will use swiotlb function to allocate bounce buffer and copy data
+from/to bounce buffer.
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
-Change since v4:
- * Use swiotlb_unencrypted_base variable to pass shared_gpa_
- boundary and map bounce buffer inside swiotlb code.
+ drivers/net/hyperv/hyperv_net.h | 6 ++
+ drivers/net/hyperv/netvsc.c | 144 +++++++++++++++++++++++++++++-
+ drivers/net/hyperv/rndis_filter.c | 2 +
+ include/linux/hyperv.h | 5 ++
+ 4 files changed, 154 insertions(+), 3 deletions(-)
-Change since v3:
- * Get hyperv bounce bufffer size via default swiotlb
- bounce buffer size function and keep default size as
- same as the one in the AMD SEV VM.
----
- arch/x86/include/asm/mshyperv.h | 2 ++
- arch/x86/mm/mem_encrypt.c | 3 +-
- arch/x86/xen/pci-swiotlb-xen.c | 3 +-
- drivers/hv/vmbus_drv.c | 3 ++
- drivers/iommu/hyperv-iommu.c | 60 +++++++++++++++++++++++++++++++++
- include/linux/hyperv.h | 1 +
- 6 files changed, 70 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
-index 165423e8b67a..2d22f29f90c9 100644
---- a/arch/x86/include/asm/mshyperv.h
-+++ b/arch/x86/include/asm/mshyperv.h
-@@ -182,6 +182,8 @@ int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
- struct hv_interrupt_entry *entry);
- int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
- int hv_set_mem_host_visibility(unsigned long addr, int numpages, bool visible);
-+void *hv_map_memory(void *addr, unsigned long size);
-+void hv_unmap_memory(void *addr);
- void hv_ghcb_msr_write(u64 msr, u64 value);
- void hv_ghcb_msr_read(u64 msr, u64 *value);
- #else /* CONFIG_HYPERV */
-diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
-index ff08dc463634..e2db0b8ed938 100644
---- a/arch/x86/mm/mem_encrypt.c
-+++ b/arch/x86/mm/mem_encrypt.c
-@@ -30,6 +30,7 @@
- #include <asm/processor-flags.h>
- #include <asm/msr.h>
- #include <asm/cmdline.h>
-+#include <asm/mshyperv.h>
-
- #include "mm_internal.h"
-
-@@ -202,7 +203,7 @@ void __init sev_setup_arch(void)
- phys_addr_t total_mem = memblock_phys_mem_size();
- unsigned long size;
-
-- if (!sev_active())
-+ if (!sev_active() && !hv_is_isolation_supported())
- return;
-
- /*
-diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
-index 54f9aa7e8457..43bd031aa332 100644
---- a/arch/x86/xen/pci-swiotlb-xen.c
-+++ b/arch/x86/xen/pci-swiotlb-xen.c
-@@ -4,6 +4,7 @@
-
- #include <linux/dma-map-ops.h>
- #include <linux/pci.h>
-+#include <linux/hyperv.h>
- #include <xen/swiotlb-xen.h>
-
- #include <asm/xen/hypervisor.h>
-@@ -91,6 +92,6 @@ int pci_xen_swiotlb_init_late(void)
- EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late);
-
- IOMMU_INIT_FINISH(pci_xen_swiotlb_detect,
-- NULL,
-+ hyperv_swiotlb_detect,
- pci_xen_swiotlb_init,
- NULL);
-diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
-index 392c1ac4f819..b0be287e9a32 100644
---- a/drivers/hv/vmbus_drv.c
-+++ b/drivers/hv/vmbus_drv.c
-@@ -23,6 +23,7 @@
- #include <linux/cpu.h>
- #include <linux/sched/task_stack.h>
-
-+#include <linux/dma-map-ops.h>
- #include <linux/delay.h>
- #include <linux/notifier.h>
- #include <linux/panic_notifier.h>
-@@ -2078,6 +2079,7 @@ struct hv_device *vmbus_device_create(const guid_t *type,
- return child_device_obj;
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index b11aa68b44ec..c2fbb9d4df2c 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -164,6 +164,7 @@ struct hv_netvsc_packet {
+ u32 total_bytes;
+ u32 send_buf_index;
+ u32 total_data_buflen;
++ struct hv_dma_range *dma_range;
+ };
+
+ #define NETVSC_HASH_KEYLEN 40
+@@ -1074,6 +1075,7 @@ struct netvsc_device {
+
+ /* Receive buffer allocated by us but manages by NetVSP */
+ void *recv_buf;
++ void *recv_original_buf;
+ u32 recv_buf_size; /* allocated bytes */
+ u32 recv_buf_gpadl_handle;
+ u32 recv_section_cnt;
+@@ -1082,6 +1084,8 @@ struct netvsc_device {
+
+ /* Send buffer allocated by us */
+ void *send_buf;
++ void *send_original_buf;
++ u32 send_buf_size;
+ u32 send_buf_gpadl_handle;
+ u32 send_section_cnt;
+ u32 send_section_size;
+@@ -1729,4 +1733,6 @@ struct rndis_message {
+ #define RETRY_US_HI 10000
+ #define RETRY_MAX 2000 /* >10 sec */
+
++void netvsc_dma_unmap(struct hv_device *hv_dev,
++ struct hv_netvsc_packet *packet);
+ #endif /* _HYPERV_NET_H */
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index 7bd935412853..fc312e5db4d5 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -153,8 +153,21 @@ static void free_netvsc_device(struct rcu_head *head)
+ int i;
+
+ kfree(nvdev->extension);
+- vfree(nvdev->recv_buf);
+- vfree(nvdev->send_buf);
++
++ if (nvdev->recv_original_buf) {
++ vunmap(nvdev->recv_buf);
++ vfree(nvdev->recv_original_buf);
++ } else {
++ vfree(nvdev->recv_buf);
++ }
++
++ if (nvdev->send_original_buf) {
++ vunmap(nvdev->send_buf);
++ vfree(nvdev->send_original_buf);
++ } else {
++ vfree(nvdev->send_buf);
++ }
++
+ kfree(nvdev->send_section_map);
+
+ for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
+@@ -330,6 +343,27 @@ int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
+ return nvchan->mrc.slots ? 0 : -ENOMEM;
}
-+static u64 vmbus_dma_mask = DMA_BIT_MASK(64);
- /*
- * vmbus_device_register - Register the child device
- */
-@@ -2118,6 +2120,7 @@ int vmbus_device_register(struct hv_device *child_device_obj)
++static void *netvsc_remap_buf(void *buf, unsigned long size)
++{
++ unsigned long *pfns;
++ void *vaddr;
++ int i;
++
++ pfns = kcalloc(size / HV_HYP_PAGE_SIZE, sizeof(unsigned long),
++ GFP_KERNEL);
++ if (!pfns)
++ return NULL;
++
++ for (i = 0; i < size / HV_HYP_PAGE_SIZE; i++)
++ pfns[i] = virt_to_hvpfn(buf + i * HV_HYP_PAGE_SIZE)
++ + (ms_hyperv.shared_gpa_boundary >> HV_HYP_PAGE_SHIFT);
++
++ vaddr = vmap_pfn(pfns, size / HV_HYP_PAGE_SIZE, PAGE_KERNEL_IO);
++ kfree(pfns);
++
++ return vaddr;
++}
++
+ static int netvsc_init_buf(struct hv_device *device,
+ struct netvsc_device *net_device,
+ const struct netvsc_device_info *device_info)
+@@ -340,6 +374,7 @@ static int netvsc_init_buf(struct hv_device *device,
+ unsigned int buf_size;
+ size_t map_words;
+ int i, ret = 0;
++ void *vaddr;
+
+ /* Get receive buffer area. */
+ buf_size = device_info->recv_sections * device_info->recv_section_size;
+@@ -375,6 +410,15 @@ static int netvsc_init_buf(struct hv_device *device,
+ goto cleanup;
}
- hv_debug_add_dev_dir(child_device_obj);
-
-+ child_device_obj->device.dma_mask = &vmbus_dma_mask;
- return 0;
-
- err_kset_unregister:
-diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
-index e285a220c913..a8ac2239de0f 100644
---- a/drivers/iommu/hyperv-iommu.c
-+++ b/drivers/iommu/hyperv-iommu.c
-@@ -13,14 +13,22 @@
- #include <linux/irq.h>
- #include <linux/iommu.h>
- #include <linux/module.h>
-+#include <linux/hyperv.h>
-+#include <linux/io.h>
-
- #include <asm/apic.h>
- #include <asm/cpu.h>
- #include <asm/hw_irq.h>
- #include <asm/io_apic.h>
-+#include <asm/iommu.h>
-+#include <asm/iommu_table.h>
- #include <asm/irq_remapping.h>
- #include <asm/hypervisor.h>
- #include <asm/mshyperv.h>
-+#include <asm/swiotlb.h>
-+#include <linux/dma-map-ops.h>
-+#include <linux/dma-direct.h>
-+#include <linux/set_memory.h>
-
- #include "irq_remapping.h"
-
-@@ -36,6 +44,9 @@
- static cpumask_t ioapic_max_cpumask = { CPU_BITS_NONE };
- static struct irq_domain *ioapic_ir_domain;
-
-+static unsigned long hyperv_io_tlb_size;
-+static void *hyperv_io_tlb_start;
-+
- static int hyperv_ir_set_affinity(struct irq_data *data,
- const struct cpumask *mask, bool force)
- {
-@@ -337,4 +348,53 @@ static const struct irq_domain_ops hyperv_root_ir_domain_ops = {
- .free = hyperv_root_irq_remapping_free,
- };
-
-+static void __init hyperv_iommu_swiotlb_init(void)
+
++ if (hv_isolation_type_snp()) {
++ vaddr = netvsc_remap_buf(net_device->recv_buf, buf_size);
++ if (!vaddr)
++ goto cleanup;
++
++ net_device->recv_original_buf = net_device->recv_buf;
++ net_device->recv_buf = vaddr;
++ }
++
+ /* Notify the NetVsp of the gpadl handle */
+ init_packet = &net_device->channel_init_pkt;
+ memset(init_packet, 0, sizeof(struct nvsp_message));
+@@ -477,6 +521,15 @@ static int netvsc_init_buf(struct hv_device *device,
+ goto cleanup;
+ }
+
++ if (hv_isolation_type_snp()) {
++ vaddr = netvsc_remap_buf(net_device->send_buf, buf_size);
++ if (!vaddr)
++ goto cleanup;
++
++ net_device->send_original_buf = net_device->send_buf;
++ net_device->send_buf = vaddr;
++ }
++
+ /* Notify the NetVsp of the gpadl handle */
+ init_packet = &net_device->channel_init_pkt;
+ memset(init_packet, 0, sizeof(struct nvsp_message));
+@@ -767,7 +820,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
+
+ /* Notify the layer above us */
+ if (likely(skb)) {
+- const struct hv_netvsc_packet *packet
++ struct hv_netvsc_packet *packet
+ = (struct hv_netvsc_packet *)skb->cb;
+ u32 send_index = packet->send_buf_index;
+ struct netvsc_stats *tx_stats;
+@@ -783,6 +836,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
+ tx_stats->bytes += packet->total_bytes;
+ u64_stats_update_end(&tx_stats->syncp);
+
++ netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
+ napi_consume_skb(skb, budget);
+ }
+
+@@ -947,6 +1001,82 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
+ memset(dest, 0, padding);
+ }
+
++void netvsc_dma_unmap(struct hv_device *hv_dev,
++ struct hv_netvsc_packet *packet)
+{
-+ /*
-+ * Allocate Hyper-V swiotlb bounce buffer at early place
-+ * to reserve large contiguous memory.
-+ */
-+ hyperv_io_tlb_size = swiotlb_size_or_default();
-+ hyperv_io_tlb_start = memblock_alloc(
-+ hyperv_io_tlb_size, PAGE_SIZE);
-+
-+ if (!hyperv_io_tlb_start) {
-+ pr_warn("Fail to allocate Hyper-V swiotlb buffer.\n");
++ u32 page_count = packet->cp_partial ?
++ packet->page_buf_cnt - packet->rmsg_pgcnt :
++ packet->page_buf_cnt;
++ int i;
++
++ if (!hv_is_isolation_supported())
+ return;
-+ }
++
++ if (!packet->dma_range)
++ return;
++
++ for (i = 0; i < page_count; i++)
++ dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma,
++ packet->dma_range[i].mapping_size,
++ DMA_TO_DEVICE);
++
++ kfree(packet->dma_range);
+}
+
-+int __init hyperv_swiotlb_detect(void)
++/* netvsc_dma_map - Map swiotlb bounce buffer with data page of
++ * packet sent by vmbus_sendpacket_pagebuffer() in the Isolation
++ * VM.
++ *
++ * In isolation VM, netvsc send buffer has been marked visible to
++ * host and so the data copied to send buffer doesn't need to use
++ * bounce buffer. The data pages handled by vmbus_sendpacket_pagebuffer()
++ * may not be copied to send buffer and so these pages need to be
++ * mapped with swiotlb bounce buffer. netvsc_dma_map() is to do
++ * that. The pfns in the struct hv_page_buffer need to be converted
++ * to bounce buffer's pfn. The loop here is necessary and so not
++ * use dma_map_sg() here.
++ */
++int netvsc_dma_map(struct hv_device *hv_dev,
++ struct hv_netvsc_packet *packet,
++ struct hv_page_buffer *pb)
+{
-+ if (!hypervisor_is_type(X86_HYPER_MS_HYPERV))
-+ return 0;
++ u32 page_count = packet->cp_partial ?
++ packet->page_buf_cnt - packet->rmsg_pgcnt :
++ packet->page_buf_cnt;
++ dma_addr_t dma;
++ int i;
+
+ if (!hv_is_isolation_supported())
+ return 0;
+
-+ /*
-+ * Enable swiotlb force mode in Isolation VM to
-+ * use swiotlb bounce buffer for dma transaction.
-+ */
-+ swiotlb_unencrypted_base = ms_hyperv.shared_gpa_boundary;
-+ swiotlb_force = SWIOTLB_FORCE;
-+ return 1;
++ packet->dma_range = kcalloc(page_count,
++ sizeof(*packet->dma_range),
++ GFP_KERNEL);
++ if (!packet->dma_range)
++ return -ENOMEM;
++
++ for (i = 0; i < page_count; i++) {
++ char *src = phys_to_virt((pb[i].pfn << HV_HYP_PAGE_SHIFT)
++ + pb[i].offset);
++ u32 len = pb[i].len;
++
++ dma = dma_map_single(&hv_dev->device, src, len,
++ DMA_TO_DEVICE);
++ if (dma_mapping_error(&hv_dev->device, dma)) {
++ kfree(packet->dma_range);
++ return -ENOMEM;
++ }
++
++ packet->dma_range[i].dma = dma;
++ packet->dma_range[i].mapping_size = len;
++ pb[i].pfn = dma >> HV_HYP_PAGE_SHIFT;
++ pb[i].offset = offset_in_hvpage(dma);
++ pb[i].len = len;
++ }
++
++ return 0;
+}
+
-+static void __init hyperv_iommu_swiotlb_later_init(void)
-+{
-+ /*
-+ * Swiotlb bounce buffer needs to be mapped in extra address
-+ * space. Map function doesn't work in the early place and so
-+ * call swiotlb_late_init_with_tbl() here.
-+ */
-+ if (swiotlb_late_init_with_tbl(hyperv_io_tlb_start,
-+ hyperv_io_tlb_size >> IO_TLB_SHIFT))
-+ panic("Fail to initialize hyperv swiotlb.\n");
-+}
-+
-+IOMMU_INIT_FINISH(hyperv_swiotlb_detect,
-+ NULL, hyperv_iommu_swiotlb_init,
-+ hyperv_iommu_swiotlb_later_init);
-+
- #endif
+ static inline int netvsc_send_pkt(
+ struct hv_device *device,
+ struct hv_netvsc_packet *packet,
+@@ -987,14 +1117,22 @@ static inline int netvsc_send_pkt(
+
+ trace_nvsp_send_pkt(ndev, out_channel, rpkt);
+
++ packet->dma_range = NULL;
+ if (packet->page_buf_cnt) {
+ if (packet->cp_partial)
+ pb += packet->rmsg_pgcnt;
+
++ ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb);
++ if (ret)
++ return ret;
++
+ ret = vmbus_sendpacket_pagebuffer(out_channel,
+ pb, packet->page_buf_cnt,
+ &nvmsg, sizeof(nvmsg),
+ req_id);
++
++ if (ret)
++ netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
+ } else {
+ ret = vmbus_sendpacket(out_channel,
+ &nvmsg, sizeof(nvmsg),
+diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
+index 983bf362466a..9425fee85aa0 100644
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -361,6 +361,8 @@ static void rndis_filter_receive_response(struct net_device *ndev,
+ }
+ }
+
++ netvsc_dma_unmap(((struct net_device_context *)
++ netdev_priv(ndev))->device_ctx, &request->pkt);
+ complete(&request->wait_event);
+ } else {
+ netdev_err(ndev,
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
-index a9e0bc3b1511..bb1a1519b93a 100644
+index babbe19f57e2..90abff664495 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
-@@ -1739,6 +1739,7 @@ int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
- int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
- void (*block_invalidate)(void *context,
- u64 block_mask));
-+int __init hyperv_swiotlb_detect(void);
-
- struct hyperv_pci_block_ops {
- int (*read_block)(struct pci_dev *dev, void *buf, unsigned int buf_len,
+@@ -1616,6 +1616,11 @@ struct hyperv_service_callback {
+ void (*callback)(void *context);
+ };
+
++struct hv_dma_range {
++ dma_addr_t dma;
++ u32 mapping_size;
++};
++
+ #define MAX_SRV_VER 0x7ffffff
+ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, u32 buflen,
+ const int *fw_version, int fw_vercnt,
--
2.25.1