--- vrfc
+++ v4
@@ -1,722 +1,305 @@
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
-Hyper-V provides two kinds of Isolation VMs. VBS(Virtualization-based
-security) and AMD SEV-SNP base Isolation VMs. The memory of these vms
-are encrypted and host can't access guest memory directly. The
-guest needs to call hv host visibility hvcall to mark memory visible
-to host before sharing memory with host for IO operation. So there
-is bounce buffer request for IO operation to get data from host.
-To receive data, host puts data into the shared memory(bounce buffer)
-and guest copies the data to private memory. Vice versa.
+In Isolation VM, all shared memory with host needs to mark visible
+to host via hvcall. vmbus_establish_gpadl() has already done it for
+netvsc rx/tx ring buffer. The page buffer used by vmbus_sendpacket_
+pagebuffer() still need to handle. Use DMA API to map/umap these
+memory during sending/receiving packet and Hyper-V DMA ops callback
+will use swiotlb function to allocate bounce buffer and copy data
+from/to bounce buffer.
-For SNP isolation VM, guest needs to access the shared memory via
-extra address space which is specified by Hyper-V CPUID HYPERV_CPUID_
-ISOLATION_CONFIG. The access physical address of the shared memory
-should be bounce buffer memory GPA plus with shared_gpa_boundary.
-
-Vmbus channel ring buffer has been marked as host visible and works
-as bounce buffer for vmbus devices. vmbus_sendpacket_pagebuffer()
-and vmbus_sendpacket_mpb_desc() send package which uses system memory
-out of vmbus channel ring buffer. These memory still needs to allocate
-additional bounce buffer to commnuicate with host. Add vmbus_sendpacket_
-pagebuffer_bounce () and vmbus_sendpacket_mpb_desc_bounce() to handle
-such case.
-
-Signed-off-by: Sunil Muthuswamy <sunilmut@microsoft.com>
-Co-Developed-by: Sunil Muthuswamy <sunilmut@microsoft.com>
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
- drivers/hv/channel.c | 13 +-
- drivers/hv/channel_mgmt.c | 1 +
- drivers/hv/hv_bounce.c | 579 +++++++++++++++++++++++++++++++++++++-
- drivers/hv/hyperv_vmbus.h | 13 +
- include/linux/hyperv.h | 2 +
- 5 files changed, 605 insertions(+), 3 deletions(-)
+ drivers/net/hyperv/hyperv_net.h | 6 ++
+ drivers/net/hyperv/netvsc.c | 144 +++++++++++++++++++++++++++++-
+ drivers/net/hyperv/rndis_filter.c | 2 +
+ include/linux/hyperv.h | 5 ++
+ 4 files changed, 154 insertions(+), 3 deletions(-)
-diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
-index 976ef99dda28..f5391a050bdc 100644
---- a/drivers/hv/channel.c
-+++ b/drivers/hv/channel.c
-@@ -1090,7 +1090,11 @@ int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
- bufferlist[2].iov_base = &aligned_data;
- bufferlist[2].iov_len = (packetlen_aligned - packetlen);
-
-- return hv_ringbuffer_write(channel, bufferlist, 3, requestid);
-+ if (hv_is_isolation_supported())
-+ return vmbus_sendpacket_pagebuffer_bounce(channel, &desc,
-+ descsize, bufferlist, io_type, bounce_pkt, requestid);
-+ else
-+ return hv_ringbuffer_write(channel, bufferlist, 3, requestid);
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index b11aa68b44ec..c2fbb9d4df2c 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -164,6 +164,7 @@ struct hv_netvsc_packet {
+ u32 total_bytes;
+ u32 send_buf_index;
+ u32 total_data_buflen;
++ struct hv_dma_range *dma_range;
+ };
+
+ #define NETVSC_HASH_KEYLEN 40
+@@ -1074,6 +1075,7 @@ struct netvsc_device {
+
+ /* Receive buffer allocated by us but manages by NetVSP */
+ void *recv_buf;
++ void *recv_original_buf;
+ u32 recv_buf_size; /* allocated bytes */
+ u32 recv_buf_gpadl_handle;
+ u32 recv_section_cnt;
+@@ -1082,6 +1084,8 @@ struct netvsc_device {
+
+ /* Send buffer allocated by us */
+ void *send_buf;
++ void *send_original_buf;
++ u32 send_buf_size;
+ u32 send_buf_gpadl_handle;
+ u32 send_section_cnt;
+ u32 send_section_size;
+@@ -1729,4 +1733,6 @@ struct rndis_message {
+ #define RETRY_US_HI 10000
+ #define RETRY_MAX 2000 /* >10 sec */
+
++void netvsc_dma_unmap(struct hv_device *hv_dev,
++ struct hv_netvsc_packet *packet);
+ #endif /* _HYPERV_NET_H */
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index 7bd935412853..fc312e5db4d5 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -153,8 +153,21 @@ static void free_netvsc_device(struct rcu_head *head)
+ int i;
+
+ kfree(nvdev->extension);
+- vfree(nvdev->recv_buf);
+- vfree(nvdev->send_buf);
++
++ if (nvdev->recv_original_buf) {
++ vunmap(nvdev->recv_buf);
++ vfree(nvdev->recv_original_buf);
++ } else {
++ vfree(nvdev->recv_buf);
++ }
++
++ if (nvdev->send_original_buf) {
++ vunmap(nvdev->send_buf);
++ vfree(nvdev->send_original_buf);
++ } else {
++ vfree(nvdev->send_buf);
++ }
++
+ kfree(nvdev->send_section_map);
+
+ for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
+@@ -330,6 +343,27 @@ int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
+ return nvchan->mrc.slots ? 0 : -ENOMEM;
}
- EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer);
-
-@@ -1130,7 +1134,12 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
- bufferlist[2].iov_base = &aligned_data;
- bufferlist[2].iov_len = (packetlen_aligned - packetlen);
-
-- return hv_ringbuffer_write(channel, bufferlist, 3, requestid);
-+ if (hv_is_isolation_supported()) {
-+ return vmbus_sendpacket_mpb_desc_bounce(channel, desc,
-+ desc_size, bufferlist, io_type, bounce_pkt, requestid);
-+ } else {
-+ return hv_ringbuffer_write(channel, bufferlist, 3, requestid);
-+ }
+
++static void *netvsc_remap_buf(void *buf, unsigned long size)
++{
++ unsigned long *pfns;
++ void *vaddr;
++ int i;
++
++ pfns = kcalloc(size / HV_HYP_PAGE_SIZE, sizeof(unsigned long),
++ GFP_KERNEL);
++ if (!pfns)
++ return NULL;
++
++ for (i = 0; i < size / HV_HYP_PAGE_SIZE; i++)
++ pfns[i] = virt_to_hvpfn(buf + i * HV_HYP_PAGE_SIZE)
++ + (ms_hyperv.shared_gpa_boundary >> HV_HYP_PAGE_SHIFT);
++
++ vaddr = vmap_pfn(pfns, size / HV_HYP_PAGE_SIZE, PAGE_KERNEL_IO);
++ kfree(pfns);
++
++ return vaddr;
++}
++
+ static int netvsc_init_buf(struct hv_device *device,
+ struct netvsc_device *net_device,
+ const struct netvsc_device_info *device_info)
+@@ -340,6 +374,7 @@ static int netvsc_init_buf(struct hv_device *device,
+ unsigned int buf_size;
+ size_t map_words;
+ int i, ret = 0;
++ void *vaddr;
+
+ /* Get receive buffer area. */
+ buf_size = device_info->recv_sections * device_info->recv_section_size;
+@@ -375,6 +410,15 @@ static int netvsc_init_buf(struct hv_device *device,
+ goto cleanup;
+ }
+
++ if (hv_isolation_type_snp()) {
++ vaddr = netvsc_remap_buf(net_device->recv_buf, buf_size);
++ if (!vaddr)
++ goto cleanup;
++
++ net_device->recv_original_buf = net_device->recv_buf;
++ net_device->recv_buf = vaddr;
++ }
++
+ /* Notify the NetVsp of the gpadl handle */
+ init_packet = &net_device->channel_init_pkt;
+ memset(init_packet, 0, sizeof(struct nvsp_message));
+@@ -477,6 +521,15 @@ static int netvsc_init_buf(struct hv_device *device,
+ goto cleanup;
+ }
+
++ if (hv_isolation_type_snp()) {
++ vaddr = netvsc_remap_buf(net_device->send_buf, buf_size);
++ if (!vaddr)
++ goto cleanup;
++
++ net_device->send_original_buf = net_device->send_buf;
++ net_device->send_buf = vaddr;
++ }
++
+ /* Notify the NetVsp of the gpadl handle */
+ init_packet = &net_device->channel_init_pkt;
+ memset(init_packet, 0, sizeof(struct nvsp_message));
+@@ -767,7 +820,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
+
+ /* Notify the layer above us */
+ if (likely(skb)) {
+- const struct hv_netvsc_packet *packet
++ struct hv_netvsc_packet *packet
+ = (struct hv_netvsc_packet *)skb->cb;
+ u32 send_index = packet->send_buf_index;
+ struct netvsc_stats *tx_stats;
+@@ -783,6 +836,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
+ tx_stats->bytes += packet->total_bytes;
+ u64_stats_update_end(&tx_stats->syncp);
+
++ netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
+ napi_consume_skb(skb, budget);
+ }
+
+@@ -947,6 +1001,82 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
+ memset(dest, 0, padding);
}
- EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc);
-
-diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
-index e2846cacfd70..d8090b2e2421 100644
---- a/drivers/hv/channel_mgmt.c
-+++ b/drivers/hv/channel_mgmt.c
-@@ -359,6 +359,7 @@ static struct vmbus_channel *alloc_channel(void)
- if (!channel)
- return NULL;
-
-+ spin_lock_init(&channel->bp_lock);
- spin_lock_init(&channel->sched_lock);
- init_completion(&channel->rescind_event);
-
-diff --git a/drivers/hv/hv_bounce.c b/drivers/hv/hv_bounce.c
-index c5898325b238..bed1a361d167 100644
---- a/drivers/hv/hv_bounce.c
-+++ b/drivers/hv/hv_bounce.c
-@@ -9,12 +9,589 @@
- #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
- #include "hyperv_vmbus.h"
-+#include <linux/uio.h>
-+#include <asm/mshyperv.h>
-+
-+/* BP == Bounce Pages here */
-+#define BP_LIST_MAINTENANCE_FREQ (30 * HZ)
-+#define BP_MIN_TIME_IN_FREE_LIST (30 * HZ)
-+#define IS_BP_MAINTENANCE_TASK_NEEDED(channel) \
-+ (channel->bounce_page_alloc_count > \
-+ channel->min_bounce_resource_count && \
-+ !list_empty(&channel->bounce_page_free_head))
-+#define BP_QUEUE_MAINTENANCE_WORK(channel) \
-+ queue_delayed_work(system_unbound_wq, \
-+ &channel->bounce_page_list_maintain, \
-+ BP_LIST_MAINTENANCE_FREQ)
-+
-+#define hv_copy_to_bounce(bounce_pkt) \
-+ hv_copy_to_from_bounce(bounce_pkt, true)
-+#define hv_copy_from_bounce(bounce_pkt) \
-+ hv_copy_to_from_bounce(bounce_pkt, false)
-+/*
-+ * A list of bounce pages, with original va, bounce va and I/O details such as
-+ * the offset and length.
+
++void netvsc_dma_unmap(struct hv_device *hv_dev,
++ struct hv_netvsc_packet *packet)
++{
++ u32 page_count = packet->cp_partial ?
++ packet->page_buf_cnt - packet->rmsg_pgcnt :
++ packet->page_buf_cnt;
++ int i;
++
++ if (!hv_is_isolation_supported())
++ return;
++
++ if (!packet->dma_range)
++ return;
++
++ for (i = 0; i < page_count; i++)
++ dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma,
++ packet->dma_range[i].mapping_size,
++ DMA_TO_DEVICE);
++
++ kfree(packet->dma_range);
++}
++
++/* netvsc_dma_map - Map swiotlb bounce buffer with data page of
++ * packet sent by vmbus_sendpacket_pagebuffer() in the Isolation
++ * VM.
++ *
++ * In isolation VM, netvsc send buffer has been marked visible to
++ * host and so the data copied to send buffer doesn't need to use
++ * bounce buffer. The data pages handled by vmbus_sendpacket_pagebuffer()
++ * may not be copied to send buffer and so these pages need to be
++ * mapped with swiotlb bounce buffer. netvsc_dma_map() is to do
++ * that. The pfns in the struct hv_page_buffer need to be converted
++ * to bounce buffer's pfn. The loop here is necessary and so not
++ * use dma_map_sg() here.
+ */
-+struct hv_bounce_page_list {
-+ struct list_head link;
-+ u32 offset;
-+ u32 len;
-+ unsigned long va;
-+ unsigned long bounce_va;
-+ unsigned long bounce_original_va;
-+ unsigned long bounce_extra_pfn;
-+ unsigned long last_used_jiff;
-+};
-+
-+/*
-+ * This structure can be safely used to iterate over objects of the type
-+ * 'hv_page_buffer', 'hv_mpb_array' or 'hv_multipage_buffer'. The min array
-+ * size of 1 is needed to include the size of 'pfn_array' as part of the struct.
-+ */
-+struct hv_page_range {
-+ u32 len;
-+ u32 offset;
-+ u64 pfn_array[1];
-+};
-+
-+static inline struct hv_bounce_pkt *__hv_bounce_pkt_alloc(
-+ struct vmbus_channel *channel)
++int netvsc_dma_map(struct hv_device *hv_dev,
++ struct hv_netvsc_packet *packet,
++ struct hv_page_buffer *pb)
+{
-+ return kmem_cache_alloc(channel->bounce_pkt_cache,
-+ __GFP_ZERO | GFP_KERNEL);
-+}
-+
-+static inline void __hv_bounce_pkt_free(struct vmbus_channel *channel,
-+ struct hv_bounce_pkt *bounce_pkt)
-+{
-+ kmem_cache_free(channel->bounce_pkt_cache, bounce_pkt);
-+}
-+
-+static inline void hv_bounce_pkt_list_free(struct vmbus_channel *channel,
-+ const struct list_head *head)
-+{
-+ struct hv_bounce_pkt *bounce_pkt;
-+ struct hv_bounce_pkt *tmp;
-+
-+ list_for_each_entry_safe(bounce_pkt, tmp, head, link) {
-+ list_del(&bounce_pkt->link);
-+ __hv_bounce_pkt_free(channel, bounce_pkt);
-+ }
-+}
-+
-+/*
-+ * Assigns a free bounce packet from the channel, if one is available. Else,
-+ * allocates one. Use 'hv_bounce_resources_release' to release the bounce packet
-+ * as it also takes care of releasing the bounce pages within, if any.
-+ */
-+static struct hv_bounce_pkt *hv_bounce_pkt_assign(struct vmbus_channel *channel)
-+{
-+ if (channel->min_bounce_resource_count) {
-+ struct hv_bounce_pkt *bounce_pkt = NULL;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&channel->bp_lock, flags);
-+ if (!list_empty(&channel->bounce_pkt_free_list_head)) {
-+ bounce_pkt = list_first_entry(
-+ &channel->bounce_pkt_free_list_head,
-+ struct hv_bounce_pkt, link);
-+ list_del(&bounce_pkt->link);
-+ channel->bounce_pkt_free_count--;
-+ }
-+
-+ spin_unlock_irqrestore(&channel->bp_lock, flags);
-+ if (bounce_pkt)
-+ return bounce_pkt;
-+ }
-+
-+ return __hv_bounce_pkt_alloc(channel);
-+}
-+
-+static void hv_bounce_pkt_release(struct vmbus_channel *channel,
-+ struct hv_bounce_pkt *bounce_pkt)
-+{
-+ bool free_pkt = true;
-+
-+ if (channel->min_bounce_resource_count) {
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&channel->bp_lock, flags);
-+ if (channel->bounce_pkt_free_count <
-+ channel->min_bounce_resource_count) {
-+ list_add(&bounce_pkt->link,
-+ &channel->bounce_pkt_free_list_head);
-+ channel->bounce_pkt_free_count++;
-+ free_pkt = false;
-+ }
-+
-+ spin_unlock_irqrestore(&channel->bp_lock, flags);
-+ }
-+
-+ if (free_pkt)
-+ __hv_bounce_pkt_free(channel, bounce_pkt);
-+}
-+
-+/* Frees the list of bounce pages and all of the resources within */
-+static void hv_bounce_page_list_free(struct vmbus_channel *channel,
-+ const struct list_head *head)
-+{
-+ u16 count = 0;
-+ u64 pfn[HV_MIN_BOUNCE_BUFFER_PAGES];
-+ struct hv_bounce_page_list *bounce_page;
-+ struct hv_bounce_page_list *tmp;
-+
-+ BUILD_BUG_ON(HV_MIN_BOUNCE_BUFFER_PAGES > HV_MAX_MODIFY_GPA_REP_COUNT);
-+ list_for_each_entry(bounce_page, head, link) {
-+ if (hv_isolation_type_snp())
-+ pfn[count++] = virt_to_hvpfn(
-+ (void *)bounce_page->bounce_original_va);
-+ else
-+ pfn[count++] = virt_to_hvpfn(
-+ (void *)bounce_page->bounce_va);
-+
-+ if (count < HV_MIN_BOUNCE_BUFFER_PAGES &&
-+ !list_is_last(&bounce_page->link, head))
-+ continue;
-+ hv_mark_gpa_visibility(count, pfn, VMBUS_PAGE_NOT_VISIBLE);
-+ count = 0;
-+ }
-+
-+ /*
-+ * Need a second iteration because the page should not be freed until
-+ * it is marked not-visible to the host.
-+ */
-+ list_for_each_entry_safe(bounce_page, tmp, head, link) {
-+ list_del(&bounce_page->link);
-+
-+ if (hv_isolation_type_snp()) {
-+ vunmap((void *)bounce_page->bounce_va);
-+ free_page(bounce_page->bounce_original_va);
-+ } else
-+ free_page(bounce_page->bounce_va);
-+
-+ kmem_cache_free(channel->bounce_page_cache, bounce_page);
-+ }
-+}
-+
-+/* Allocate a list of bounce pages and make them host visible. */
-+static int hv_bounce_page_list_alloc(struct vmbus_channel *channel, u32 count)
-+{
-+ unsigned long flags;
-+ struct list_head head;
-+ u32 p;
-+ u64 pfn[HV_MIN_BOUNCE_BUFFER_PAGES];
-+ u32 pfn_count = 0;
-+ bool queue_work = false;
-+ int ret = -ENOSPC;
-+ unsigned long va = 0;
-+
-+ INIT_LIST_HEAD(&head);
-+ for (p = 0; p < count; p++) {
-+ struct hv_bounce_page_list *bounce_page;
-+
-+ va = __get_free_page(__GFP_ZERO | GFP_ATOMIC);
-+ if (unlikely(!va))
-+ goto err_free;
-+ bounce_page = kmem_cache_alloc(channel->bounce_page_cache,
-+ __GFP_ZERO | GFP_ATOMIC);
-+ if (unlikely(!bounce_page))
-+ goto err_free;
-+
-+ if (hv_isolation_type_snp()) {
-+ bounce_page->bounce_extra_pfn =
-+ virt_to_hvpfn((void *)va) +
-+ (ms_hyperv.shared_gpa_boundary
-+ >> HV_HYP_PAGE_SHIFT);
-+ bounce_page->bounce_original_va = va;
-+ bounce_page->bounce_va = (u64)ioremap_cache(
-+ bounce_page->bounce_extra_pfn << HV_HYP_PAGE_SHIFT,
-+ HV_HYP_PAGE_SIZE);
-+ if (!bounce_page->bounce_va)
-+ goto err_free;
-+ } else {
-+ bounce_page->bounce_va = va;
-+ }
-+
-+ pfn[pfn_count++] = virt_to_hvpfn((void *)va);
-+ bounce_page->last_used_jiff = jiffies;
-+
-+ /* Add to the tail to maintain LRU sorting */
-+ list_add_tail(&bounce_page->link, &head);
-+ va = 0;
-+ if (pfn_count == HV_MIN_BOUNCE_BUFFER_PAGES || p == count - 1) {
-+ ret = hv_mark_gpa_visibility(pfn_count, pfn,
-+ VMBUS_PAGE_VISIBLE_READ_WRITE);
-+ if (hv_isolation_type_snp())
-+ list_for_each_entry(bounce_page, &head, link)
-+ memset((u64 *)bounce_page->bounce_va, 0x00,
-+ HV_HYP_PAGE_SIZE);
-+
-+ if (unlikely(ret < 0))
-+ goto err_free;
-+ pfn_count = 0;
-+ }
-+ }
-+
-+ spin_lock_irqsave(&channel->bp_lock, flags);
-+ list_splice_tail(&head, &channel->bounce_page_free_head);
-+ channel->bounce_page_alloc_count += count;
-+ queue_work = IS_BP_MAINTENANCE_TASK_NEEDED(channel);
-+ spin_unlock_irqrestore(&channel->bp_lock, flags);
-+ if (queue_work)
-+ BP_QUEUE_MAINTENANCE_WORK(channel);
-+ return 0;
-+err_free:
-+ if (va)
-+ free_page(va);
-+ hv_bounce_page_list_free(channel, &head);
-+ return ret;
-+}
-+
-+/*
-+ * Puts the bounce pages in the list back into the channel's free bounce page
-+ * list and schedules the bounce page maintenance routine.
-+ */
-+static void hv_bounce_page_list_release(struct vmbus_channel *channel,
-+ struct list_head *head)
-+{
-+ struct hv_bounce_page_list *bounce_page;
-+ unsigned long flags;
-+ bool queue_work;
-+ struct hv_bounce_page_list *tmp;
-+
-+ /*
-+ * Need to iterate, rather than a direct list merge so that the last
-+ * used timestamp can be updated for each page.
-+ * Add the page to the tail of the free list to maintain LRU sorting.
-+ */
-+ spin_lock_irqsave(&channel->bp_lock, flags);
-+ list_for_each_entry_safe(bounce_page, tmp, head, link) {
-+ list_del(&bounce_page->link);
-+ bounce_page->last_used_jiff = jiffies;
-+
-+ /* Maintain LRU */
-+ list_add_tail(&bounce_page->link,
-+ &channel->bounce_page_free_head);
-+ }
-+
-+ queue_work = IS_BP_MAINTENANCE_TASK_NEEDED(channel);
-+ spin_unlock_irqrestore(&channel->bp_lock, flags);
-+ if (queue_work)
-+ BP_QUEUE_MAINTENANCE_WORK(channel);
-+}
-+
-+/*
-+ * Maintenance work to prune the vmbus channel's free bounce page list. It runs
-+ * at every 'BP_LIST_MAINTENANCE_FREQ' and frees the bounce pages that are in
-+ * the free list longer than 'BP_MIN_TIME_IN_FREE_LIST' once the min bounce
-+ * resource reservation requirement is met.
-+ */
-+static void hv_bounce_page_list_maintain(struct work_struct *work)
-+{
-+ struct vmbus_channel *channel;
-+ struct delayed_work *dwork = to_delayed_work(work);
-+ unsigned long flags;
-+ struct list_head head_to_free;
-+ bool queue_work;
-+
-+ channel = container_of(dwork, struct vmbus_channel,
-+ bounce_page_list_maintain);
-+ INIT_LIST_HEAD(&head_to_free);
-+ spin_lock_irqsave(&channel->bp_lock, flags);
-+ while (IS_BP_MAINTENANCE_TASK_NEEDED(channel)) {
-+ struct hv_bounce_page_list *bounce_page = list_first_entry(
-+ &channel->bounce_page_free_head,
-+ struct hv_bounce_page_list,
-+ link);
-+
-+ /*
-+ * Stop on the first entry that fails the check since the
-+ * list is expected to be sorted on LRU.
-+ */
-+ if (time_before(jiffies, bounce_page->last_used_jiff +
-+ BP_MIN_TIME_IN_FREE_LIST))
-+ break;
-+ list_del(&bounce_page->link);
-+ list_add_tail(&bounce_page->link, &head_to_free);
-+ channel->bounce_page_alloc_count--;
-+ }
-+
-+ queue_work = IS_BP_MAINTENANCE_TASK_NEEDED(channel);
-+ spin_unlock_irqrestore(&channel->bp_lock, flags);
-+ if (!list_empty(&head_to_free))
-+ hv_bounce_page_list_free(channel, &head_to_free);
-+ if (queue_work)
-+ BP_QUEUE_MAINTENANCE_WORK(channel);
-+}
-+
-+/*
-+ * Assigns a free bounce page from the channel, if one is available. Else,
-+ * allocates a bunch of bounce pages into the channel and returns one. Use
-+ * 'hv_bounce_page_list_release' to release the page.
-+ */
-+static struct hv_bounce_page_list *hv_bounce_page_assign(
-+ struct vmbus_channel *channel)
-+{
-+ struct hv_bounce_page_list *bounce_page = NULL;
-+ unsigned long flags;
-+ int ret;
-+
-+retry:
-+ spin_lock_irqsave(&channel->bp_lock, flags);
-+ if (!list_empty(&channel->bounce_page_free_head)) {
-+ bounce_page = list_first_entry(&channel->bounce_page_free_head,
-+ struct hv_bounce_page_list,
-+ link);
-+ list_del(&bounce_page->link);
-+ }
-+ spin_unlock_irqrestore(&channel->bp_lock, flags);
-+
-+ if (likely(bounce_page))
-+ return bounce_page;
-+
-+ if (hv_isolation_type_snp() && in_interrupt()) {
-+ pr_warn_once("Reservse page is not enough.\n");
-+ return NULL;
-+ }
-+
-+ ret = hv_bounce_page_list_alloc(channel, HV_MIN_BOUNCE_BUFFER_PAGES);
-+ if (unlikely(ret < 0))
-+ return NULL;
-+ goto retry;
-+}
-+
-+/*
-+ * Allocate 'count' linked list of bounce packets into the channel. Use
-+ * 'hv_bounce_pkt_list_free' to free the list.
-+ */
-+static int hv_bounce_pkt_list_alloc(struct vmbus_channel *channel, u32 count)
-+{
-+ struct list_head bounce_pkt_head;
-+ unsigned long flags;
-+ u32 i;
-+
-+ INIT_LIST_HEAD(&bounce_pkt_head);
-+ for (i = 0; i < count; i++) {
-+ struct hv_bounce_pkt *bounce_pkt = __hv_bounce_pkt_alloc(
-+ channel);
-+
-+ if (unlikely(!bounce_pkt))
-+ goto err_free;
-+ list_add(&bounce_pkt->link, &bounce_pkt_head);
-+ }
-+
-+ spin_lock_irqsave(&channel->bp_lock, flags);
-+ list_splice_tail(&bounce_pkt_head, &channel->bounce_pkt_free_list_head);
-+ channel->bounce_pkt_free_count += count;
-+ spin_unlock_irqrestore(&channel->bp_lock, flags);
-+ return 0;
-+err_free:
-+ hv_bounce_pkt_list_free(channel, &bounce_pkt_head);
-+ return -ENOMEM;
-+}
-+
-+/*
-+ * Allocate and reserve enough bounce resources to be able to handle the min
-+ * specified bytes. This routine should be called prior to starting the I/O on
-+ * the channel, else the channel will end up not reserving any bounce resources.
-+ */
-+int hv_bounce_resources_reserve(struct vmbus_channel *channel,
-+ u32 min_bounce_bytes)
-+{
-+ unsigned long flags;
-+ u32 round_up_count;
-+ int ret;
++ u32 page_count = packet->cp_partial ?
++ packet->page_buf_cnt - packet->rmsg_pgcnt :
++ packet->page_buf_cnt;
++ dma_addr_t dma;
++ int i;
+
+ if (!hv_is_isolation_supported())
+ return 0;
+
-+ /* Resize operation is currently not supported */
-+ if (unlikely((!min_bounce_bytes || channel->min_bounce_resource_count)))
-+ return -EINVAL;
-+
-+ /*
-+ * Get the page count and round it up to the min bounce pages supported
-+ */
-+ round_up_count = round_up(min_bounce_bytes, HV_HYP_PAGE_SIZE)
-+ >> HV_HYP_PAGE_SHIFT;
-+ round_up_count = round_up(round_up_count, HV_MIN_BOUNCE_BUFFER_PAGES);
-+ spin_lock_irqsave(&channel->bp_lock, flags);
-+ channel->min_bounce_resource_count = round_up_count;
-+ spin_unlock_irqrestore(&channel->bp_lock, flags);
-+ ret = hv_bounce_pkt_list_alloc(channel, round_up_count);
-+ if (ret < 0)
-+ return ret;
-+ return hv_bounce_page_list_alloc(channel, round_up_count);
++ packet->dma_range = kcalloc(page_count,
++ sizeof(*packet->dma_range),
++ GFP_KERNEL);
++ if (!packet->dma_range)
++ return -ENOMEM;
++
++ for (i = 0; i < page_count; i++) {
++ char *src = phys_to_virt((pb[i].pfn << HV_HYP_PAGE_SHIFT)
++ + pb[i].offset);
++ u32 len = pb[i].len;
++
++ dma = dma_map_single(&hv_dev->device, src, len,
++ DMA_TO_DEVICE);
++ if (dma_mapping_error(&hv_dev->device, dma)) {
++ kfree(packet->dma_range);
++ return -ENOMEM;
++ }
++
++ packet->dma_range[i].dma = dma;
++ packet->dma_range[i].mapping_size = len;
++ pb[i].pfn = dma >> HV_HYP_PAGE_SHIFT;
++ pb[i].offset = offset_in_hvpage(dma);
++ pb[i].len = len;
++ }
++
++ return 0;
+}
-+EXPORT_SYMBOL_GPL(hv_bounce_resources_reserve);
-+
-+static void hv_bounce_resources_release(struct vmbus_channel *channel,
-+ struct hv_bounce_pkt *bounce_pkt)
-+{
-+ if (unlikely(!bounce_pkt))
-+ return;
-+ hv_bounce_page_list_release(channel, &bounce_pkt->bounce_page_head);
-+ hv_bounce_pkt_release(channel, bounce_pkt);
-+}
-+
-+static void hv_copy_to_from_bounce(const struct hv_bounce_pkt *bounce_pkt,
-+ bool copy_to_bounce)
-+{
-+ struct hv_bounce_page_list *bounce_page;
-+
-+ if ((copy_to_bounce && (bounce_pkt->flags != IO_TYPE_WRITE)) ||
-+ (!copy_to_bounce && (bounce_pkt->flags != IO_TYPE_READ)))
-+ return;
-+
-+ list_for_each_entry(bounce_page, &bounce_pkt->bounce_page_head, link) {
-+ u32 offset = bounce_page->offset;
-+ u32 len = bounce_page->len;
-+ u8 *bounce_buffer = (u8 *)bounce_page->bounce_va;
-+ u8 *buffer = (u8 *)bounce_page->va;
-+
-+ if (copy_to_bounce)
-+ memcpy(bounce_buffer + offset, buffer + offset, len);
-+ else
-+ memcpy(buffer + offset, bounce_buffer + offset, len);
-+ }
-+}
-+
-+/*
-+ * Assigns the bounce resources needed to handle the PFNs within the range and
-+ * updates the range accordingly. Uses resources from the pre-allocated pool if
-+ * previously reserved, else allocates memory. Use 'hv_bounce_resources_release'
-+ * to release.
-+ */
-+static struct hv_bounce_pkt *hv_bounce_resources_assign(
-+ struct vmbus_channel *channel,
-+ u32 rangecount,
-+ struct hv_page_range *range,
-+ u8 io_type)
-+{
-+ struct hv_bounce_pkt *bounce_pkt;
-+ u32 r;
-+
-+ bounce_pkt = hv_bounce_pkt_assign(channel);
-+ if (unlikely(!bounce_pkt))
-+ return NULL;
-+ bounce_pkt->flags = io_type;
-+ INIT_LIST_HEAD(&bounce_pkt->bounce_page_head);
-+ for (r = 0; r < rangecount; r++) {
-+ u32 len = range[r].len;
-+ u32 offset = range[r].offset;
-+ u32 p;
-+ u32 pfn_count;
-+
-+ pfn_count = round_up(offset + len, HV_HYP_PAGE_SIZE)
-+ >> HV_HYP_PAGE_SHIFT;
-+ for (p = 0; p < pfn_count; p++) {
-+ struct hv_bounce_page_list *bounce_page;
-+ u32 copy_len = min(len, ((u32)HV_HYP_PAGE_SIZE - offset));
-+
-+ bounce_page = hv_bounce_page_assign(channel);
-+ if (unlikely(!bounce_page))
-+ goto err_free;
-+ bounce_page->va = (unsigned long)
-+ __va(range[r].pfn_array[p] << PAGE_SHIFT);
-+ bounce_page->offset = offset;
-+ bounce_page->len = copy_len;
-+ list_add_tail(&bounce_page->link,
-+ &bounce_pkt->bounce_page_head);
-+
-+ if (hv_isolation_type_snp()) {
-+ range[r].pfn_array[p] =
-+ bounce_page->bounce_extra_pfn;
-+ } else {
-+ range[r].pfn_array[p] = virt_to_hvpfn(
-+ (void *)bounce_page->bounce_va);
-+ }
-+ offset = 0;
-+ len -= copy_len;
-+ }
-+ }
-+
-+ /* Copy data from original buffer to bounce buffer, if needed */
-+ hv_copy_to_bounce(bounce_pkt);
-+ return bounce_pkt;
-+err_free:
-+ /* This will also reclaim any allocated bounce pages */
-+ hv_bounce_resources_release(channel, bounce_pkt);
-+ return NULL;
-+}
-+
-+int vmbus_sendpacket_pagebuffer_bounce(
-+ struct vmbus_channel *channel,
-+ struct vmbus_channel_packet_page_buffer *desc,
-+ u32 desc_size, struct kvec *bufferlist,
-+ u8 io_type, struct hv_bounce_pkt **pbounce_pkt,
-+ u64 requestid)
-+{
-+ struct hv_bounce_pkt *bounce_pkt;
-+ int ret;
-+
-+ bounce_pkt = hv_bounce_resources_assign(channel, desc->rangecount,
-+ (struct hv_page_range *)desc->range, io_type);
-+ if (unlikely(!bounce_pkt))
-+ return -EAGAIN;
-+ ret = hv_ringbuffer_write(channel, bufferlist, 3, requestid);
-+ if (unlikely(ret < 0))
-+ hv_bounce_resources_release(channel, bounce_pkt);
-+ else
-+ *pbounce_pkt = bounce_pkt;
-+
-+ return ret;
-+}
-+
-+int vmbus_sendpacket_mpb_desc_bounce(
-+ struct vmbus_channel *channel, struct vmbus_packet_mpb_array *desc,
-+ u32 desc_size, struct kvec *bufferlist, u8 io_type,
-+ struct hv_bounce_pkt **pbounce_pkt, u64 requestid)
-+{
-+ struct hv_bounce_pkt *bounce_pkt;
-+ struct vmbus_packet_mpb_array *desc_bounce;
-+ struct hv_mpb_array *range;
-+ int ret = -ENOSPC;
-+
-+ desc_bounce = kzalloc(desc_size, GFP_ATOMIC);
-+ if (unlikely(!desc_bounce))
-+ return ret;
-+
-+ memcpy(desc_bounce, desc, desc_size);
-+ range = &desc_bounce->range;
-+ bounce_pkt = hv_bounce_resources_assign(channel, desc->rangecount,
-+ (struct hv_page_range *)range, io_type);
-+ if (unlikely(!bounce_pkt))
-+ goto free;
-+ bufferlist[0].iov_base = desc_bounce;
-+ ret = hv_ringbuffer_write(channel, bufferlist, 3, requestid);
-+free:
-+ kfree(desc_bounce);
-+ if (unlikely(ret < 0))
-+ hv_bounce_resources_release(channel, bounce_pkt);
-+ else
-+ *pbounce_pkt = bounce_pkt;
-+ return ret;
-+}
-+
-+void hv_pkt_bounce(struct vmbus_channel *channel,
-+ struct hv_bounce_pkt *bounce_pkt)
-+{
-+ if (!bounce_pkt)
-+ return;
-+
-+ hv_copy_from_bounce(bounce_pkt);
-+ hv_bounce_resources_release(channel, bounce_pkt);
-+}
-+EXPORT_SYMBOL_GPL(hv_pkt_bounce);
-
- int hv_init_channel_ivm(struct vmbus_channel *channel)
- {
- if (!hv_is_isolation_supported())
- return 0;
-
-+ INIT_DELAYED_WORK(&channel->bounce_page_list_maintain,
-+ hv_bounce_page_list_maintain);
-+
- INIT_LIST_HEAD(&channel->bounce_page_free_head);
- INIT_LIST_HEAD(&channel->bounce_pkt_free_list_head);
-
-@@ -33,8 +610,8 @@ void hv_free_channel_ivm(struct vmbus_channel *channel)
- if (!hv_is_isolation_supported())
- return;
-
--
- cancel_delayed_work_sync(&channel->bounce_page_list_maintain);
-+
- hv_bounce_pkt_list_free(channel, &channel->bounce_pkt_free_list_head);
- hv_bounce_page_list_free(channel, &channel->bounce_page_free_head);
- kmem_cache_destroy(channel->bounce_pkt_cache);
-diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
-index 7677f083d33a..d985271e5522 100644
---- a/drivers/hv/hyperv_vmbus.h
-+++ b/drivers/hv/hyperv_vmbus.h
-@@ -82,6 +82,19 @@ extern int hv_init_channel_ivm(struct vmbus_channel *channel);
-
- extern void hv_free_channel_ivm(struct vmbus_channel *channel);
-
-+extern int vmbus_sendpacket_pagebuffer_bounce(struct vmbus_channel *channel,
-+ struct vmbus_channel_packet_page_buffer *desc, u32 desc_size,
-+ struct kvec *bufferlist, u8 io_type, struct hv_bounce_pkt **bounce_pkt,
-+ u64 requestid);
-+
-+extern int vmbus_sendpacket_mpb_desc_bounce(struct vmbus_channel *channel,
-+ struct vmbus_packet_mpb_array *desc, u32 desc_size,
-+ struct kvec *bufferlist, u8 io_type, struct hv_bounce_pkt **bounce_pkt,
-+ u64 requestid);
-+
-+extern void hv_pkt_bounce(struct vmbus_channel *channel,
-+ struct hv_bounce_pkt *bounce_pkt);
-+
- /* struct hv_monitor_page Layout */
- /* ------------------------------------------------------ */
- /* | 0 | TriggerState (4 bytes) | Rsvd1 (4 bytes) | */
++
+ static inline int netvsc_send_pkt(
+ struct hv_device *device,
+ struct hv_netvsc_packet *packet,
+@@ -987,14 +1117,22 @@ static inline int netvsc_send_pkt(
+
+ trace_nvsp_send_pkt(ndev, out_channel, rpkt);
+
++ packet->dma_range = NULL;
+ if (packet->page_buf_cnt) {
+ if (packet->cp_partial)
+ pb += packet->rmsg_pgcnt;
+
++ ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb);
++ if (ret)
++ return ret;
++
+ ret = vmbus_sendpacket_pagebuffer(out_channel,
+ pb, packet->page_buf_cnt,
+ &nvmsg, sizeof(nvmsg),
+ req_id);
++
++ if (ret)
++ netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
+ } else {
+ ret = vmbus_sendpacket(out_channel,
+ &nvmsg, sizeof(nvmsg),
+diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
+index 983bf362466a..9425fee85aa0 100644
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -361,6 +361,8 @@ static void rndis_filter_receive_response(struct net_device *ndev,
+ }
+ }
+
++ netvsc_dma_unmap(((struct net_device_context *)
++ netdev_priv(ndev))->device_ctx, &request->pkt);
+ complete(&request->wait_event);
+ } else {
+ netdev_err(ndev,
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
-index d1a936091665..be7621b070f2 100644
+index babbe19f57e2..90abff664495 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
-@@ -1109,6 +1109,8 @@ void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
- void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
- void (*chn_rescind_cb)(struct vmbus_channel *));
-
-+extern int hv_bounce_resources_reserve(struct vmbus_channel *channel,
-+ u32 min_bounce_bytes);
- /*
- * Check if sub-channels have already been offerred. This API will be useful
- * when the driver is unloaded after establishing sub-channels. In this case,
+@@ -1616,6 +1616,11 @@ struct hyperv_service_callback {
+ void (*callback)(void *context);
+ };
+
++struct hv_dma_range {
++ dma_addr_t dma;
++ u32 mapping_size;
++};
++
+ #define MAX_SRV_VER 0x7ffffff
+ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, u32 buflen,
+ const int *fw_version, int fw_vercnt,
--
2.25.1