--- v1
+++ v4
@@ -2,404 +2,141 @@
In Isolation VM, all shared memory with host needs to mark visible
to host via hvcall. vmbus_establish_gpadl() has already done it for
-netvsc rx/tx ring buffer. The page buffer used by vmbus_sendpacket_
-pagebuffer() stills need to be handled. Use DMA API to map/umap
-these memory during sending/receiving packet and Hyper-V swiotlb
-bounce buffer dma address will be returned. The swiotlb bounce buffer
-has been masked to be visible to host during boot up.
+storvsc rx/tx ring buffer. The page buffer used by vmbus_sendpacket_
+mpb_desc() still needs to be handled. Use DMA API(scsi_dma_map/unmap)
+to map these memory during sending/receiving packet and return swiotlb
+bounce buffer dma address. In Isolation VM, swiotlb bounce buffer is
+marked to be visible to host and the swiotlb force mode is enabled.
-Allocate rx/tx ring buffer via dma_alloc_noncontiguous() in Isolation
-VM. After calling vmbus_establish_gpadl() which marks these pages visible
-to host, map these pages unencrypted addes space via dma_vmap_noncontiguous().
+Set device's dma min align mask to HV_HYP_PAGE_SIZE - 1 in order to
+keep the original data offset in the bounce buffer.
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
- drivers/net/hyperv/hyperv_net.h | 5 +
- drivers/net/hyperv/netvsc.c | 192 +++++++++++++++++++++++++++---
- drivers/net/hyperv/rndis_filter.c | 2 +
- include/linux/hyperv.h | 6 +
- 4 files changed, 190 insertions(+), 15 deletions(-)
+ drivers/hv/vmbus_drv.c | 1 +
+ drivers/scsi/storvsc_drv.c | 37 +++++++++++++++++++++----------------
+ include/linux/hyperv.h | 1 +
+ 3 files changed, 23 insertions(+), 16 deletions(-)
-diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
-index 315278a7cf88..31c77a00d01e 100644
---- a/drivers/net/hyperv/hyperv_net.h
-+++ b/drivers/net/hyperv/hyperv_net.h
-@@ -164,6 +164,7 @@ struct hv_netvsc_packet {
- u32 total_bytes;
- u32 send_buf_index;
- u32 total_data_buflen;
-+ struct hv_dma_range *dma_range;
- };
+diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
+index 0a64ccfafb8b..ae6ec503399a 100644
+--- a/drivers/hv/vmbus_drv.c
++++ b/drivers/hv/vmbus_drv.c
+@@ -2121,6 +2121,7 @@ int vmbus_device_register(struct hv_device *child_device_obj)
+ hv_debug_add_dev_dir(child_device_obj);
- #define NETVSC_HASH_KEYLEN 40
-@@ -1074,6 +1075,7 @@ struct netvsc_device {
+ child_device_obj->device.dma_mask = &vmbus_dma_mask;
++ child_device_obj->device.dma_parms = &child_device_obj->dma_parms;
+ return 0;
- /* Receive buffer allocated by us but manages by NetVSP */
- void *recv_buf;
-+ struct sg_table *recv_sgt;
- u32 recv_buf_size; /* allocated bytes */
- struct vmbus_gpadl recv_buf_gpadl_handle;
- u32 recv_section_cnt;
-@@ -1082,6 +1084,7 @@ struct netvsc_device {
+ err_kset_unregister:
+diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
+index 20595c0ba0ae..ae293600d799 100644
+--- a/drivers/scsi/storvsc_drv.c
++++ b/drivers/scsi/storvsc_drv.c
+@@ -21,6 +21,8 @@
+ #include <linux/device.h>
+ #include <linux/hyperv.h>
+ #include <linux/blkdev.h>
++#include <linux/dma-mapping.h>
++
+ #include <scsi/scsi.h>
+ #include <scsi/scsi_cmnd.h>
+ #include <scsi/scsi_host.h>
+@@ -1336,6 +1338,7 @@ static void storvsc_on_channel_callback(void *context)
+ continue;
+ }
+ request = (struct storvsc_cmd_request *)scsi_cmd_priv(scmnd);
++ scsi_dma_unmap(scmnd);
+ }
- /* Send buffer allocated by us */
- void *send_buf;
-+ struct sg_table *send_sgt;
- u32 send_buf_size;
- struct vmbus_gpadl send_buf_gpadl_handle;
- u32 send_section_cnt;
-@@ -1731,4 +1734,6 @@ struct rndis_message {
- #define RETRY_US_HI 10000
- #define RETRY_MAX 2000 /* >10 sec */
+ storvsc_on_receive(stor_device, packet, request);
+@@ -1749,7 +1752,6 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
+ struct hv_host_device *host_dev = shost_priv(host);
+ struct hv_device *dev = host_dev->dev;
+ struct storvsc_cmd_request *cmd_request = scsi_cmd_priv(scmnd);
+- int i;
+ struct scatterlist *sgl;
+ unsigned int sg_count;
+ struct vmscsi_request *vm_srb;
+@@ -1831,10 +1833,11 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
+ payload_sz = sizeof(cmd_request->mpb);
-+void netvsc_dma_unmap(struct hv_device *hv_dev,
-+ struct hv_netvsc_packet *packet);
- #endif /* _HYPERV_NET_H */
-diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
-index 396bc1c204e6..9cdc71930830 100644
---- a/drivers/net/hyperv/netvsc.c
-+++ b/drivers/net/hyperv/netvsc.c
-@@ -20,6 +20,7 @@
- #include <linux/vmalloc.h>
- #include <linux/rtnetlink.h>
- #include <linux/prefetch.h>
-+#include <linux/gfp.h>
+ if (sg_count) {
+- unsigned int hvpgoff, hvpfns_to_add;
+ unsigned long offset_in_hvpg = offset_in_hvpage(sgl->offset);
+ unsigned int hvpg_count = HVPFN_UP(offset_in_hvpg + length);
+- u64 hvpfn;
++ struct scatterlist *sg;
++ unsigned long hvpfn, hvpfns_to_add;
++ int j, i = 0;
- #include <asm/sync_bitops.h>
- #include <asm/mshyperv.h>
-@@ -146,15 +147,39 @@ static struct netvsc_device *alloc_net_device(void)
- return net_device;
- }
+ if (hvpg_count > MAX_PAGE_BUFFER_COUNT) {
-+static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel)
-+{
-+ struct vmbus_channel *primary = channel->primary_channel;
-+
-+ return primary ? primary->device_obj : channel->device_obj;
-+}
-+
- static void free_netvsc_device(struct rcu_head *head)
- {
- struct netvsc_device *nvdev
- = container_of(head, struct netvsc_device, rcu);
-+ struct hv_device *dev =
-+ netvsc_channel_to_device(nvdev->chan_table[0].channel);
- int i;
+@@ -1848,21 +1851,22 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
+ payload->range.len = length;
+ payload->range.offset = offset_in_hvpg;
- kfree(nvdev->extension);
-- vfree(nvdev->recv_buf);
-- vfree(nvdev->send_buf);
-+
-+ if (nvdev->recv_sgt) {
-+ dma_vunmap_noncontiguous(&dev->device, nvdev->recv_buf);
-+ dma_free_noncontiguous(&dev->device, nvdev->recv_buf_size,
-+ nvdev->recv_sgt, DMA_FROM_DEVICE);
-+ } else {
-+ vfree(nvdev->recv_buf);
-+ }
-+
-+ if (nvdev->send_sgt) {
-+ dma_vunmap_noncontiguous(&dev->device, nvdev->send_buf);
-+ dma_free_noncontiguous(&dev->device, nvdev->send_buf_size,
-+ nvdev->send_sgt, DMA_TO_DEVICE);
-+ } else {
-+ vfree(nvdev->send_buf);
-+ }
-+
- kfree(nvdev->send_section_map);
++ sg_count = scsi_dma_map(scmnd);
++ if (sg_count < 0)
++ return SCSI_MLQUEUE_DEVICE_BUSY;
- for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
-@@ -348,7 +373,21 @@ static int netvsc_init_buf(struct hv_device *device,
- buf_size = min_t(unsigned int, buf_size,
- NETVSC_RECEIVE_BUFFER_SIZE_LEGACY);
+- for (i = 0; sgl != NULL; sgl = sg_next(sgl)) {
++ for_each_sg(sgl, sg, sg_count, j) {
+ /*
+- * Init values for the current sgl entry. hvpgoff
+- * and hvpfns_to_add are in units of Hyper-V size
+- * pages. Handling the PAGE_SIZE != HV_HYP_PAGE_SIZE
+- * case also handles values of sgl->offset that are
+- * larger than PAGE_SIZE. Such offsets are handled
+- * even on other than the first sgl entry, provided
+- * they are a multiple of PAGE_SIZE.
++ * Init values for the current sgl entry. hvpfns_to_add
++ * is in units of Hyper-V size pages. Handling the
++ * PAGE_SIZE != HV_HYP_PAGE_SIZE case also handles
++ * values of sgl->offset that are larger than PAGE_SIZE.
++ * Such offsets are handled even on other than the first
++ * sgl entry, provided they are a multiple of PAGE_SIZE.
+ */
+- hvpgoff = HVPFN_DOWN(sgl->offset);
+- hvpfn = page_to_hvpfn(sg_page(sgl)) + hvpgoff;
+- hvpfns_to_add = HVPFN_UP(sgl->offset + sgl->length) -
+- hvpgoff;
++ hvpfn = HVPFN_DOWN(sg_dma_address(sg));
++ hvpfns_to_add = HVPFN_UP(sg_dma_address(sg) +
++ sg_dma_len(sg)) - hvpfn;
-- net_device->recv_buf = vzalloc(buf_size);
-+ if (hv_isolation_type_snp()) {
-+ net_device->recv_sgt =
-+ dma_alloc_noncontiguous(&device->device, buf_size,
-+ DMA_FROM_DEVICE, GFP_KERNEL, 0);
-+ if (!net_device->recv_sgt) {
-+ pr_err("Fail to allocate recv buffer buf_size %d.\n.", buf_size);
-+ ret = -ENOMEM;
-+ goto cleanup;
-+ }
-+
-+ net_device->recv_buf = (void *)net_device->recv_sgt->sgl->dma_address;
-+ } else {
-+ net_device->recv_buf = vzalloc(buf_size);
-+ }
-+
- if (!net_device->recv_buf) {
- netdev_err(ndev,
- "unable to allocate receive buffer of size %u\n",
-@@ -357,8 +396,6 @@ static int netvsc_init_buf(struct hv_device *device,
- goto cleanup;
+ /*
+ * Fill the next portion of the PFN array with
+@@ -1872,7 +1876,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
+ * the PFN array is filled.
+ */
+ while (hvpfns_to_add--)
+- payload->range.pfn_array[i++] = hvpfn++;
++ payload->range.pfn_array[i++] = hvpfn++;
+ }
}
-- net_device->recv_buf_size = buf_size;
--
- /*
- * Establish the gpadl handle for this buffer on this
- * channel. Note: This call uses the vmbus connection rather
-@@ -373,6 +410,19 @@ static int netvsc_init_buf(struct hv_device *device,
- goto cleanup;
- }
+@@ -2016,6 +2020,7 @@ static int storvsc_probe(struct hv_device *device,
+ stor_device->vmscsi_size_delta = sizeof(struct vmscsi_win8_extension);
+ spin_lock_init(&stor_device->lock);
+ hv_set_drvdata(device, stor_device);
++ dma_set_min_align_mask(&device->device, HV_HYP_PAGE_SIZE - 1);
-+ if (net_device->recv_sgt) {
-+ net_device->recv_buf =
-+ dma_vmap_noncontiguous(&device->device, buf_size,
-+ net_device->recv_sgt);
-+ if (!net_device->recv_buf) {
-+ pr_err("Fail to vmap recv buffer.\n");
-+ ret = -ENOMEM;
-+ goto cleanup;
-+ }
-+ }
-+
-+ net_device->recv_buf_size = buf_size;
-+
- /* Notify the NetVsp of the gpadl handle */
- init_packet = &net_device->channel_init_pkt;
- memset(init_packet, 0, sizeof(struct nvsp_message));
-@@ -454,14 +504,27 @@ static int netvsc_init_buf(struct hv_device *device,
- buf_size = device_info->send_sections * device_info->send_section_size;
- buf_size = round_up(buf_size, PAGE_SIZE);
-
-- net_device->send_buf = vzalloc(buf_size);
-+ if (hv_isolation_type_snp()) {
-+ net_device->send_sgt =
-+ dma_alloc_noncontiguous(&device->device, buf_size,
-+ DMA_TO_DEVICE, GFP_KERNEL, 0);
-+ if (!net_device->send_sgt) {
-+ pr_err("Fail to allocate send buffer buf_size %d.\n.", buf_size);
-+ ret = -ENOMEM;
-+ goto cleanup;
-+ }
-+
-+ net_device->send_buf = (void *)net_device->send_sgt->sgl->dma_address;
-+ } else {
-+ net_device->send_buf = vzalloc(buf_size);
-+ }
-+
- if (!net_device->send_buf) {
- netdev_err(ndev, "unable to allocate send buffer of size %u\n",
- buf_size);
- ret = -ENOMEM;
- goto cleanup;
- }
-- net_device->send_buf_size = buf_size;
-
- /* Establish the gpadl handle for this buffer on this
- * channel. Note: This call uses the vmbus connection rather
-@@ -476,6 +539,19 @@ static int netvsc_init_buf(struct hv_device *device,
- goto cleanup;
- }
-
-+ if (net_device->send_sgt) {
-+ net_device->send_buf =
-+ dma_vmap_noncontiguous(&device->device, buf_size,
-+ net_device->send_sgt);
-+ if (!net_device->send_buf) {
-+ pr_err("Fail to vmap send buffer.\n");
-+ ret = -ENOMEM;
-+ goto cleanup;
-+ }
-+ }
-+
-+ net_device->send_buf_size = buf_size;
-+
- /* Notify the NetVsp of the gpadl handle */
- init_packet = &net_device->channel_init_pkt;
- memset(init_packet, 0, sizeof(struct nvsp_message));
-@@ -766,7 +842,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
-
- /* Notify the layer above us */
- if (likely(skb)) {
-- const struct hv_netvsc_packet *packet
-+ struct hv_netvsc_packet *packet
- = (struct hv_netvsc_packet *)skb->cb;
- u32 send_index = packet->send_buf_index;
- struct netvsc_stats *tx_stats;
-@@ -782,6 +858,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
- tx_stats->bytes += packet->total_bytes;
- u64_stats_update_end(&tx_stats->syncp);
-
-+ netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
- napi_consume_skb(skb, budget);
- }
-
-@@ -946,6 +1023,87 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
- memset(dest, 0, padding);
- }
-
-+void netvsc_dma_unmap(struct hv_device *hv_dev,
-+ struct hv_netvsc_packet *packet)
-+{
-+ u32 page_count = packet->cp_partial ?
-+ packet->page_buf_cnt - packet->rmsg_pgcnt :
-+ packet->page_buf_cnt;
-+ int i;
-+
-+ if (!hv_is_isolation_supported())
-+ return;
-+
-+ if (!packet->dma_range)
-+ return;
-+
-+ for (i = 0; i < page_count; i++)
-+ dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma,
-+ packet->dma_range[i].mapping_size,
-+ DMA_TO_DEVICE);
-+
-+ kfree(packet->dma_range);
-+}
-+
-+/* netvsc_dma_map - Map swiotlb bounce buffer with data page of
-+ * packet sent by vmbus_sendpacket_pagebuffer() in the Isolation
-+ * VM.
-+ *
-+ * In isolation VM, netvsc send buffer has been marked visible to
-+ * host and so the data copied to send buffer doesn't need to use
-+ * bounce buffer. The data pages handled by vmbus_sendpacket_pagebuffer()
-+ * may not be copied to send buffer and so these pages need to be
-+ * mapped with swiotlb bounce buffer. netvsc_dma_map() is to do
-+ * that. The pfns in the struct hv_page_buffer need to be converted
-+ * to bounce buffer's pfn. The loop here is necessary because the
-+ * entries in the page buffer array are not necessarily full
-+ * pages of data. Each entry in the array has a separate offset and
-+ * len that may be non-zero, even for entries in the middle of the
-+ * array. And the entries are not physically contiguous. So each
-+ * entry must be individually mapped rather than as a contiguous unit.
-+ * So not use dma_map_sg() here.
-+ */
-+static int netvsc_dma_map(struct hv_device *hv_dev,
-+ struct hv_netvsc_packet *packet,
-+ struct hv_page_buffer *pb)
-+{
-+ u32 page_count = packet->cp_partial ?
-+ packet->page_buf_cnt - packet->rmsg_pgcnt :
-+ packet->page_buf_cnt;
-+ dma_addr_t dma;
-+ int i;
-+
-+ if (!hv_is_isolation_supported())
-+ return 0;
-+
-+ packet->dma_range = kcalloc(page_count,
-+ sizeof(*packet->dma_range),
-+ GFP_KERNEL);
-+ if (!packet->dma_range)
-+ return -ENOMEM;
-+
-+ for (i = 0; i < page_count; i++) {
-+ char *src = phys_to_virt((pb[i].pfn << HV_HYP_PAGE_SHIFT)
-+ + pb[i].offset);
-+ u32 len = pb[i].len;
-+
-+ dma = dma_map_single(&hv_dev->device, src, len,
-+ DMA_TO_DEVICE);
-+ if (dma_mapping_error(&hv_dev->device, dma)) {
-+ kfree(packet->dma_range);
-+ return -ENOMEM;
-+ }
-+
-+ packet->dma_range[i].dma = dma;
-+ packet->dma_range[i].mapping_size = len;
-+ pb[i].pfn = dma >> HV_HYP_PAGE_SHIFT;
-+ pb[i].offset = offset_in_hvpage(dma);
-+ pb[i].len = len;
-+ }
-+
-+ return 0;
-+}
-+
- static inline int netvsc_send_pkt(
- struct hv_device *device,
- struct hv_netvsc_packet *packet,
-@@ -986,14 +1144,24 @@ static inline int netvsc_send_pkt(
-
- trace_nvsp_send_pkt(ndev, out_channel, rpkt);
-
-+ packet->dma_range = NULL;
- if (packet->page_buf_cnt) {
- if (packet->cp_partial)
- pb += packet->rmsg_pgcnt;
-
-+ ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb);
-+ if (ret) {
-+ ret = -EAGAIN;
-+ goto exit;
-+ }
-+
- ret = vmbus_sendpacket_pagebuffer(out_channel,
- pb, packet->page_buf_cnt,
- &nvmsg, sizeof(nvmsg),
- req_id);
-+
-+ if (ret)
-+ netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
- } else {
- ret = vmbus_sendpacket(out_channel,
- &nvmsg, sizeof(nvmsg),
-@@ -1001,6 +1169,7 @@ static inline int netvsc_send_pkt(
- VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
- }
-
-+exit:
- if (ret == 0) {
- atomic_inc_return(&nvchan->queue_sends);
-
-@@ -1515,13 +1684,6 @@ static int netvsc_process_raw_pkt(struct hv_device *device,
- return 0;
- }
-
--static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel)
--{
-- struct vmbus_channel *primary = channel->primary_channel;
--
-- return primary ? primary->device_obj : channel->device_obj;
--}
--
- /* Network processing softirq
- * Process data in incoming ring buffer from host
- * Stops when ring is empty or budget is met or exceeded.
-diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
-index f6c9c2a670f9..448fcc325ed7 100644
---- a/drivers/net/hyperv/rndis_filter.c
-+++ b/drivers/net/hyperv/rndis_filter.c
-@@ -361,6 +361,8 @@ static void rndis_filter_receive_response(struct net_device *ndev,
- }
- }
-
-+ netvsc_dma_unmap(((struct net_device_context *)
-+ netdev_priv(ndev))->device_ctx, &request->pkt);
- complete(&request->wait_event);
- } else {
- netdev_err(ndev,
+ stor_device->port_number = host->host_no;
+ ret = storvsc_connect_to_vsp(device, storvsc_ringbuffer_size, is_fc);
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
-index 4d44fb3b3f1c..8882e46d1070 100644
+index 1f037e114dc8..74f5e92f91a0 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
-@@ -25,6 +25,7 @@
- #include <linux/interrupt.h>
- #include <linux/reciprocal_div.h>
- #include <asm/hyperv-tlfs.h>
-+#include <linux/dma-map-ops.h>
+@@ -1261,6 +1261,7 @@ struct hv_device {
- #define MAX_PAGE_BUFFER_COUNT 32
- #define MAX_MULTIPAGE_BUFFER_COUNT 32 /* 128K */
-@@ -1583,6 +1584,11 @@ struct hyperv_service_callback {
- void (*callback)(void *context);
- };
+ struct vmbus_channel *channel;
+ struct kset *channels_kset;
++ struct device_dma_parameters dma_parms;
-+struct hv_dma_range {
-+ dma_addr_t dma;
-+ u32 mapping_size;
-+};
-+
- #define MAX_SRV_VER 0x7ffffff
- extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, u32 buflen,
- const int *fw_version, int fw_vercnt,
+ /* place holder to keep track of the dir for hv device in debugfs */
+ struct dentry *debug_dir;
--
2.25.1