--- v2
+++ v3
@@ -1,188 +1,202 @@
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
-VMbus ring buffer are shared with host and it's need to
-be accessed via extra address space of Isolation VM with
-SNP support. This patch is to map the ring buffer
-address in extra address space via ioremap(). HV host
-visibility hvcall smears data in the ring buffer and
-so reset the ring buffer memory to zero after calling
-visibility hvcall.
+Hyper-V Isolation VM requires bounce buffer support to copy
+data from/to encrypted memory and so enable swiotlb force
+mode to use swiotlb bounce buffer for DMA transaction.
+
+In Isolation VM with AMD SEV, the bounce buffer needs to be
+accessed via extra address space which is above shared_gpa_boundary
+(E.G 39 bit address line) reported by Hyper-V CPUID ISOLATION_CONFIG.
+The access physical address will be original physical address +
+shared_gpa_boundary. The shared_gpa_boundary in the AMD SEV SNP
+spec is called virtual top of memory(vTOM). Memory addresses below
+vTOM are automatically treated as private while memory above
+vTOM is treated as shared.
+
+ioremap_cache() can't use in the hyperv_iommu_swiotlb_init() which
+is too early place and remap bounce buffer in the hyperv_iommu_swiotlb_
+later_init().
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
- drivers/hv/Kconfig | 1 +
- drivers/hv/channel.c | 10 +++++
- drivers/hv/hyperv_vmbus.h | 2 +
- drivers/hv/ring_buffer.c | 84 ++++++++++++++++++++++++++++++---------
- 4 files changed, 79 insertions(+), 18 deletions(-)
-
-diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
-index 66c794d92391..a8386998be40 100644
---- a/drivers/hv/Kconfig
-+++ b/drivers/hv/Kconfig
-@@ -7,6 +7,7 @@ config HYPERV
- depends on X86 && ACPI && X86_LOCAL_APIC && HYPERVISOR_GUEST
- select PARAVIRT
- select X86_HV_CALLBACK_VECTOR
-+ select VMAP_PFN
- help
- Select this option to run Linux as a Hyper-V client operating
- system.
-diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
-index 4c4717c26240..60ef881a700c 100644
---- a/drivers/hv/channel.c
-+++ b/drivers/hv/channel.c
-@@ -712,6 +712,16 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
- if (err)
- goto error_clean_ring;
-
-+ err = hv_ringbuffer_post_init(&newchannel->outbound,
-+ page, send_pages);
-+ if (err)
-+ goto error_free_gpadl;
-+
-+ err = hv_ringbuffer_post_init(&newchannel->inbound,
-+ &page[send_pages], recv_pages);
-+ if (err)
-+ goto error_free_gpadl;
-+
- /* Create and init the channel open message */
- open_info = kzalloc(sizeof(*open_info) +
- sizeof(struct vmbus_channel_open_channel),
-diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
-index 40bc0eff6665..15cd23a561f3 100644
---- a/drivers/hv/hyperv_vmbus.h
-+++ b/drivers/hv/hyperv_vmbus.h
-@@ -172,6 +172,8 @@ extern int hv_synic_cleanup(unsigned int cpu);
- /* Interface */
-
- void hv_ringbuffer_pre_init(struct vmbus_channel *channel);
-+int hv_ringbuffer_post_init(struct hv_ring_buffer_info *ring_info,
-+ struct page *pages, u32 page_cnt);
-
- int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
- struct page *pages, u32 pagecnt, u32 max_pkt_size);
-diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
-index 2aee356840a2..d4f93fca1108 100644
---- a/drivers/hv/ring_buffer.c
-+++ b/drivers/hv/ring_buffer.c
-@@ -17,6 +17,8 @@
- #include <linux/vmalloc.h>
- #include <linux/slab.h>
- #include <linux/prefetch.h>
+ arch/x86/xen/pci-swiotlb-xen.c | 3 +-
+ drivers/hv/vmbus_drv.c | 3 ++
+ drivers/iommu/hyperv-iommu.c | 81 ++++++++++++++++++++++++++++++++++
+ include/linux/hyperv.h | 1 +
+ 4 files changed, 87 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
+index 54f9aa7e8457..43bd031aa332 100644
+--- a/arch/x86/xen/pci-swiotlb-xen.c
++++ b/arch/x86/xen/pci-swiotlb-xen.c
+@@ -4,6 +4,7 @@
+
+ #include <linux/dma-map-ops.h>
+ #include <linux/pci.h>
++#include <linux/hyperv.h>
+ #include <xen/swiotlb-xen.h>
+
+ #include <asm/xen/hypervisor.h>
+@@ -91,6 +92,6 @@ int pci_xen_swiotlb_init_late(void)
+ EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late);
+
+ IOMMU_INIT_FINISH(pci_xen_swiotlb_detect,
+- NULL,
++ hyperv_swiotlb_detect,
+ pci_xen_swiotlb_init,
+ NULL);
+diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
+index 92cb3f7d21d9..5e3bb76d4dee 100644
+--- a/drivers/hv/vmbus_drv.c
++++ b/drivers/hv/vmbus_drv.c
+@@ -23,6 +23,7 @@
+ #include <linux/cpu.h>
+ #include <linux/sched/task_stack.h>
+
++#include <linux/dma-map-ops.h>
+ #include <linux/delay.h>
+ #include <linux/notifier.h>
+ #include <linux/ptrace.h>
+@@ -2080,6 +2081,7 @@ struct hv_device *vmbus_device_create(const guid_t *type,
+ return child_device_obj;
+ }
+
++static u64 vmbus_dma_mask = DMA_BIT_MASK(64);
+ /*
+ * vmbus_device_register - Register the child device
+ */
+@@ -2120,6 +2122,7 @@ int vmbus_device_register(struct hv_device *child_device_obj)
+ }
+ hv_debug_add_dev_dir(child_device_obj);
+
++ child_device_obj->device.dma_mask = &vmbus_dma_mask;
+ return 0;
+
+ err_kset_unregister:
+diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
+index e285a220c913..2604619c6fa3 100644
+--- a/drivers/iommu/hyperv-iommu.c
++++ b/drivers/iommu/hyperv-iommu.c
+@@ -13,14 +13,22 @@
+ #include <linux/irq.h>
+ #include <linux/iommu.h>
+ #include <linux/module.h>
++#include <linux/hyperv.h>
+#include <linux/io.h>
-+#include <asm/mshyperv.h>
-
- #include "hyperv_vmbus.h"
-
-@@ -179,43 +181,89 @@ void hv_ringbuffer_pre_init(struct vmbus_channel *channel)
- mutex_init(&channel->outbound.ring_buffer_mutex);
- }
-
--/* Initialize the ring buffer. */
--int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
-- struct page *pages, u32 page_cnt, u32 max_pkt_size)
-+int hv_ringbuffer_post_init(struct hv_ring_buffer_info *ring_info,
-+ struct page *pages, u32 page_cnt)
+
+ #include <asm/apic.h>
+ #include <asm/cpu.h>
+ #include <asm/hw_irq.h>
+ #include <asm/io_apic.h>
++#include <asm/iommu.h>
++#include <asm/iommu_table.h>
+ #include <asm/irq_remapping.h>
+ #include <asm/hypervisor.h>
+ #include <asm/mshyperv.h>
++#include <asm/swiotlb.h>
++#include <linux/dma-map-ops.h>
++#include <linux/dma-direct.h>
++#include <linux/set_memory.h>
+
+ #include "irq_remapping.h"
+
+@@ -36,6 +44,8 @@
+ static cpumask_t ioapic_max_cpumask = { CPU_BITS_NONE };
+ static struct irq_domain *ioapic_ir_domain;
+
++static unsigned long hyperv_io_tlb_start, hyperv_io_tlb_size;
++
+ static int hyperv_ir_set_affinity(struct irq_data *data,
+ const struct cpumask *mask, bool force)
{
-+ u64 physic_addr = page_to_pfn(pages) << PAGE_SHIFT;
-+ unsigned long *pfns_wraparound;
-+ void *vaddr;
- int i;
-- struct page **pages_wraparound;
-
-- BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE));
-+ if (!hv_isolation_type_snp())
-+ return 0;
-+
-+ physic_addr += ms_hyperv.shared_gpa_boundary;
-
- /*
- * First page holds struct hv_ring_buffer, do wraparound mapping for
- * the rest.
- */
-- pages_wraparound = kcalloc(page_cnt * 2 - 1, sizeof(struct page *),
-+ pfns_wraparound = kcalloc(page_cnt * 2 - 1, sizeof(unsigned long),
- GFP_KERNEL);
-- if (!pages_wraparound)
-+ if (!pfns_wraparound)
- return -ENOMEM;
-
-- pages_wraparound[0] = pages;
-+ pfns_wraparound[0] = physic_addr >> PAGE_SHIFT;
- for (i = 0; i < 2 * (page_cnt - 1); i++)
-- pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1];
--
-- ring_info->ring_buffer = (struct hv_ring_buffer *)
-- vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL);
--
-- kfree(pages_wraparound);
-+ pfns_wraparound[i + 1] = (physic_addr >> PAGE_SHIFT) +
-+ i % (page_cnt - 1) + 1;
-
--
-- if (!ring_info->ring_buffer)
-+ vaddr = vmap_pfn(pfns_wraparound, page_cnt * 2 - 1, PAGE_KERNEL_IO);
-+ kfree(pfns_wraparound);
-+ if (!vaddr)
- return -ENOMEM;
-
-- ring_info->ring_buffer->read_index =
-- ring_info->ring_buffer->write_index = 0;
-+ /* Clean memory after setting host visibility. */
-+ memset((void *)vaddr, 0x00, page_cnt * PAGE_SIZE);
-+
-+ ring_info->ring_buffer = (struct hv_ring_buffer *)vaddr;
-+ ring_info->ring_buffer->read_index = 0;
-+ ring_info->ring_buffer->write_index = 0;
-
- /* Set the feature bit for enabling flow control. */
- ring_info->ring_buffer->feature_bits.value = 1;
-
+@@ -337,4 +347,75 @@ static const struct irq_domain_ops hyperv_root_ir_domain_ops = {
+ .free = hyperv_root_irq_remapping_free,
+ };
+
++void __init hyperv_iommu_swiotlb_init(void)
++{
++ unsigned long bytes, io_tlb_nslabs;
++ void *vstart;
++
++ /* Allocate Hyper-V swiotlb */
++ bytes = 200 * 1024 * 1024;
++ vstart = memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE);
++ io_tlb_nslabs = bytes >> IO_TLB_SHIFT;
++ hyperv_io_tlb_size = bytes;
++
++ if (!vstart) {
++ pr_warn("Fail to allocate swiotlb.\n");
++ return;
++ }
++
++ hyperv_io_tlb_start = virt_to_phys(vstart);
++ if (!hyperv_io_tlb_start)
++ panic("%s: Failed to allocate %lu bytes align=0x%lx.\n",
++ __func__, PAGE_ALIGN(bytes), PAGE_SIZE);
++
++ if (swiotlb_init_with_tbl(vstart, io_tlb_nslabs, 1))
++ panic("%s: Cannot allocate SWIOTLB buffer.\n", __func__);
++
++ swiotlb_set_max_segment(HV_HYP_PAGE_SIZE);
++}
++
++int __init hyperv_swiotlb_detect(void)
++{
++ if (hypervisor_is_type(X86_HYPER_MS_HYPERV)
++ && hv_is_isolation_supported()) {
++ /*
++ * Enable swiotlb force mode in Isolation VM to
++ * use swiotlb bounce buffer for dma transaction.
++ */
++ swiotlb_force = SWIOTLB_FORCE;
++ return 1;
++ }
++
+ return 0;
+}
+
-+/* Initialize the ring buffer. */
-+int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
-+ struct page *pages, u32 page_cnt, u32 max_pkt_size)
++void __init hyperv_iommu_swiotlb_later_init(void)
+{
-+ int i;
-+ struct page **pages_wraparound;
-+
-+ BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE));
-+
-+ if (!hv_isolation_type_snp()) {
-+ /*
-+ * First page holds struct hv_ring_buffer, do wraparound mapping for
-+ * the rest.
-+ */
-+ pages_wraparound = kcalloc(page_cnt * 2 - 1, sizeof(struct page *),
-+ GFP_KERNEL);
-+ if (!pages_wraparound)
-+ return -ENOMEM;
-+
-+ pages_wraparound[0] = pages;
-+ for (i = 0; i < 2 * (page_cnt - 1); i++)
-+ pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1];
-+
-+ ring_info->ring_buffer = (struct hv_ring_buffer *)
-+ vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL);
-+
-+ kfree(pages_wraparound);
-+
-+ if (!ring_info->ring_buffer)
-+ return -ENOMEM;
-+
-+ ring_info->ring_buffer->read_index =
-+ ring_info->ring_buffer->write_index = 0;
-+
-+ /* Set the feature bit for enabling flow control. */
-+ ring_info->ring_buffer->feature_bits.value = 1;
++ void *hyperv_io_tlb_remap;
++ int ret;
++
++ /* Mask bounce buffer visible to host and remap extra address. */
++ if (hv_isolation_type_snp()) {
++ ret = set_memory_decrypted((unsigned long)
++ phys_to_virt(hyperv_io_tlb_start),
++ HVPFN_UP(hyperv_io_tlb_size));
++ if (ret)
++ panic("%s: Fail to mark Hyper-v swiotlb buffer visible to host. err=%d\n",
++ __func__, ret);
++
++ hyperv_io_tlb_remap = ioremap_cache(hyperv_io_tlb_start
++ + ms_hyperv.shared_gpa_boundary,
++ hyperv_io_tlb_size);
++ if (!hyperv_io_tlb_remap)
++ panic("Fail to remap io tlb.\n");
++
++ memset(hyperv_io_tlb_remap, 0x00, hyperv_io_tlb_size);
++ swiotlb_set_bounce_remap(hyperv_io_tlb_remap);
+ }
-+
- ring_info->ring_size = page_cnt << PAGE_SHIFT;
- ring_info->ring_size_div10_reciprocal =
- reciprocal_value(ring_info->ring_size / 10);
++}
++
++IOMMU_INIT_FINISH(hyperv_swiotlb_detect,
++ NULL, hyperv_iommu_swiotlb_init,
++ hyperv_iommu_swiotlb_later_init);
++
+ #endif
+diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
+index 06eccaba10c5..babbe19f57e2 100644
+--- a/include/linux/hyperv.h
++++ b/include/linux/hyperv.h
+@@ -1759,6 +1759,7 @@ int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
+ int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask));
++int __init hyperv_swiotlb_detect(void);
+
+ struct hyperv_pci_block_ops {
+ int (*read_block)(struct pci_dev *dev, void *buf, unsigned int buf_len,
--
2.25.1