[PATCH v1 10/12] lib: add support for device coherent type in test_hmm
From: Alex Sierra <hidden>
Date: 2021-10-12 17:13:24
Also in:
amd-gfx, dri-devel, linux-mm, linux-xfs
Subsystem:
hmm - heterogeneous memory management, library code, the rest · Maintainers:
Jason Gunthorpe, Leon Romanovsky, Andrew Morton, Linus Torvalds
Device Coherent type uses device memory that is coherently accesible by the CPU. This could be shown as SP (special purpose) memory range at the BIOS-e820 memory enumeration. If no SP memory is supported in system, this could be faked by setting CONFIG_EFI_FAKE_MEMMAP. Currently, test_hmm only supports two different SP ranges of at least 256MB size. This could be specified in the kernel parameter variable efi_fake_mem. Ex. Two SP ranges of 1GB starting at 0x100000000 & 0x140000000 physical address. Ex. efi_fake_mem=1G@0x100000000:0x40000,1G@0x140000000:0x40000 Signed-off-by: Alex Sierra <redacted> --- lib/test_hmm.c | 195 ++++++++++++++++++++++++++++++++------------ lib/test_hmm_uapi.h | 16 +++- 2 files changed, 157 insertions(+), 54 deletions(-)
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index 70a9be0efa00..b349dd920f04 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c@@ -469,6 +469,7 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice, unsigned long pfn_first; unsigned long pfn_last; void *ptr; + int ret = -ENOMEM; devmem = kzalloc(sizeof(*devmem), GFP_KERNEL); if (!devmem)
@@ -551,7 +552,7 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice, } spin_unlock(&mdevice->lock); - return true; + return 0; err_release: mutex_unlock(&mdevice->devmem_lock);
@@ -560,7 +561,7 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice, err_devmem: kfree(devmem); - return false; + return ret; } static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
@@ -569,13 +570,14 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) struct page *rpage; /* - * This is a fake device so we alloc real system memory to store - * our device memory. + * For ZONE_DEVICE private type, this is a fake device so we alloc real + * system memory to store our device memory. + * For ZONE_DEVICE coherent type we use the actual dpage to store the data + * and ignore rpage. */ rpage = alloc_page(GFP_HIGHUSER); if (!rpage) return NULL; - spin_lock(&mdevice->lock); if (mdevice->free_pages) {
@@ -603,7 +605,7 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, struct dmirror *dmirror) { struct dmirror_device *mdevice = dmirror->mdevice; - const unsigned long *src = args->src; + unsigned long *src = args->src; unsigned long *dst = args->dst; unsigned long addr;
@@ -621,12 +623,17 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, * unallocated pte_none() or read-only zero page. */ spage = migrate_pfn_to_page(*src); + if (spage && is_zone_device_page(spage)) + pr_err("page already in device spage pfn: 0x%lx\n", + page_to_pfn(spage)); + BUG_ON(spage && is_zone_device_page(spage)); dpage = dmirror_devmem_alloc_page(mdevice); if (!dpage) continue; - rpage = dpage->zone_device_data; + rpage = is_device_private_page(dpage) ? dpage->zone_device_data : + dpage; if (spage) copy_highpage(rpage, spage); else
@@ -638,8 +645,10 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, * the simulated device memory and that page holds the pointer * to the mirror. */ + rpage = dpage->zone_device_data; rpage->zone_device_data = dmirror; - + pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n", + page_to_pfn(spage), page_to_pfn(dpage)); *dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; if ((*src & MIGRATE_PFN_WRITE) ||
@@ -673,10 +682,13 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args, continue; /* - * Store the page that holds the data so the page table - * doesn't have to deal with ZONE_DEVICE private pages. + * For ZONE_DEVICE private pages we store the page that + * holds the data so the page table doesn't have to deal it. + * For ZONE_DEVICE coherent pages we store the actual page, since + * the CPU has coherent access to the page. */ - entry = dpage->zone_device_data; + entry = is_device_private_page(dpage) ? dpage->zone_device_data : + dpage; if (*dst & MIGRATE_PFN_WRITE) entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE); entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
@@ -690,7 +702,110 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args, return 0; } -static int dmirror_migrate(struct dmirror *dmirror, +static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args, + struct dmirror *dmirror) +{ + unsigned long *src = args->src; + unsigned long *dst = args->dst; + unsigned long start = args->start; + unsigned long end = args->end; + unsigned long addr; + + for (addr = start; addr < end; addr += PAGE_SIZE, + src++, dst++) { + struct page *dpage, *spage; + + spage = migrate_pfn_to_page(*src); + if (!spage || !(*src & MIGRATE_PFN_MIGRATE)) + continue; + + BUG_ON(!is_device_page(spage)); + spage = is_device_private_page(spage) ? spage->zone_device_data: + spage; + dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr); + if (!dpage) + continue; + pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 0x%lx\n", + page_to_pfn(spage), page_to_pfn(dpage)); + + lock_page(dpage); + xa_erase(&dmirror->pt, addr >> PAGE_SHIFT); + copy_highpage(dpage, spage); + *dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; + if (*src & MIGRATE_PFN_WRITE) + *dst |= MIGRATE_PFN_WRITE; + } + return 0; +} + +static int dmirror_migrate_to_system(struct dmirror *dmirror, + struct hmm_dmirror_cmd *cmd) +{ + unsigned long start, end, addr; + unsigned long size = cmd->npages << PAGE_SHIFT; + struct mm_struct *mm = dmirror->notifier.mm; + struct vm_area_struct *vma; + unsigned long src_pfns[64]; + unsigned long dst_pfns[64]; + struct migrate_vma args; + unsigned long next; + int ret; + + start = cmd->addr; + end = start + size; + if (end < start) + return -EINVAL; + + /* Since the mm is for the mirrored process, get a reference first. */ + if (!mmget_not_zero(mm)) + return -EINVAL; + + mmap_read_lock(mm); + for (addr = start; addr < end; addr = next) { + vma = find_vma(mm, addr); + if (!vma || addr < vma->vm_start || + !(vma->vm_flags & VM_READ)) { + ret = -EINVAL; + goto out; + } + next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT)); + if (next > vma->vm_end) + next = vma->vm_end; + + args.vma = vma; + args.src = src_pfns; + args.dst = dst_pfns; + args.start = addr; + args.end = next; + args.pgmap_owner = dmirror->mdevice; + args.flags = (dmirror->mdevice->zone_device_type == + HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? + MIGRATE_VMA_SELECT_DEVICE_PRIVATE : + MIGRATE_VMA_SELECT_DEVICE_COHERENT; + + ret = migrate_vma_setup(&args); + if (ret) + goto out; + + pr_debug("Migrating from device mem to sys mem\n"); + dmirror_devmem_fault_alloc_and_copy(&args, dmirror); + + migrate_vma_pages(&args); + migrate_vma_finalize(&args); + } + mmap_read_unlock(mm); + mmput(mm); + + return ret; + +out: + mmap_read_unlock(mm); + mmput(mm); + return ret; +} + + +static int dmirror_migrate_to_device(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd) { unsigned long start, end, addr;
@@ -736,6 +851,7 @@ static int dmirror_migrate(struct dmirror *dmirror, if (ret) goto out; + pr_debug("Migrating from sys mem to device mem\n"); dmirror_migrate_alloc_and_copy(&args, dmirror); migrate_vma_pages(&args); dmirror_migrate_finalize_and_map(&args, dmirror);
@@ -744,7 +860,7 @@ static int dmirror_migrate(struct dmirror *dmirror, mmap_read_unlock(mm); mmput(mm); - /* Return the migrated data for verification. */ + /* Return the migrated data for verification. only for pages in device zone */ ret = dmirror_bounce_init(&bounce, start, size); if (ret) return ret;
@@ -758,6 +874,7 @@ static int dmirror_migrate(struct dmirror *dmirror, } cmd->cpages = bounce.cpages; dmirror_bounce_fini(&bounce); + return ret; out:
@@ -781,9 +898,15 @@ static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range, } page = hmm_pfn_to_page(entry); - if (is_device_private_page(page)) { - /* Is the page migrated to this device or some other? */ - if (dmirror->mdevice == dmirror_page_to_device(page)) + if (is_device_page(page)) { + /* Is page ZONE_DEVICE coherent? */ + if (!is_device_private_page(page)) + *perm = HMM_DMIRROR_PROT_DEV_COHERENT; + /* + * Is page ZONE_DEVICE private migrated to + * this device or some other? + */ + else if (dmirror->mdevice == dmirror_page_to_device(page)) *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL; else *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE;
@@ -983,8 +1106,12 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp, ret = dmirror_write(dmirror, &cmd); break; - case HMM_DMIRROR_MIGRATE: - ret = dmirror_migrate(dmirror, &cmd); + case HMM_DMIRROR_MIGRATE_TO_DEV: + ret = dmirror_migrate_to_device(dmirror, &cmd); + break; + + case HMM_DMIRROR_MIGRATE_TO_SYS: + ret = dmirror_migrate_to_system(dmirror, &cmd); break; case HMM_DMIRROR_SNAPSHOT:
@@ -1030,38 +1157,6 @@ static void dmirror_devmem_free(struct page *page) spin_unlock(&mdevice->lock); } -static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args, - struct dmirror *dmirror) -{ - const unsigned long *src = args->src; - unsigned long *dst = args->dst; - unsigned long start = args->start; - unsigned long end = args->end; - unsigned long addr; - - for (addr = start; addr < end; addr += PAGE_SIZE, - src++, dst++) { - struct page *dpage, *spage; - - spage = migrate_pfn_to_page(*src); - if (!spage || !(*src & MIGRATE_PFN_MIGRATE)) - continue; - spage = spage->zone_device_data; - - dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr); - if (!dpage) - continue; - - lock_page(dpage); - xa_erase(&dmirror->pt, addr >> PAGE_SHIFT); - copy_highpage(dpage, spage); - *dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; - if (*src & MIGRATE_PFN_WRITE) - *dst |= MIGRATE_PFN_WRITE; - } - return 0; -} - static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf) { struct migrate_vma args;
diff --git a/lib/test_hmm_uapi.h b/lib/test_hmm_uapi.h
index f86754be64fd..13cec485328d 100644
--- a/lib/test_hmm_uapi.h
+++ b/lib/test_hmm_uapi.h@@ -17,8 +17,12 @@ * @addr: (in) user address the device will read/write * @ptr: (in) user address where device data is copied to/from * @npages: (in) number of pages to read/write + * @alloc_to_devmem: (in) desired allocation destination during migration. + * True if allocation is to device memory. + * False if allocation is to system memory. * @cpages: (out) number of pages copied * @faults: (out) number of device page faults seen + * @zone_device_type: (out) zone device memory type */ struct hmm_dmirror_cmd { __u64 addr;
@@ -26,15 +30,16 @@ struct hmm_dmirror_cmd { __u64 npages; __u64 cpages; __u64 faults; - __u64 zone_device_type; + __u32 zone_device_type; }; /* Expose the address space of the calling process through hmm device file */ #define HMM_DMIRROR_READ _IOWR('H', 0x00, struct hmm_dmirror_cmd) #define HMM_DMIRROR_WRITE _IOWR('H', 0x01, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_MIGRATE _IOWR('H', 0x02, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_SNAPSHOT _IOWR('H', 0x03, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_GET_MEM_DEV_TYPE _IOWR('H', 0x04, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_MIGRATE_TO_DEV _IOWR('H', 0x02, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_MIGRATE_TO_SYS _IOWR('H', 0x03, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_SNAPSHOT _IOWR('H', 0x04, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_GET_MEM_DEV_TYPE _IOWR('H', 0x05, struct hmm_dmirror_cmd) /* * Values returned in hmm_dmirror_cmd.ptr for HMM_DMIRROR_SNAPSHOT.
@@ -49,6 +54,8 @@ struct hmm_dmirror_cmd { * device the ioctl() is made * HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE: Migrated device private page on some * other device + * HMM_DMIRROR_PROT_DEV_COHERENT: Migrate device coherent page on the device + * the ioctl() is made */ enum { HMM_DMIRROR_PROT_ERROR = 0xFF,
@@ -60,6 +67,7 @@ enum { HMM_DMIRROR_PROT_ZERO = 0x10, HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL = 0x20, HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE = 0x30, + HMM_DMIRROR_PROT_DEV_COHERENT = 0x40, }; enum {
--
2.32.0