Re: [PATCH 12/13] kvm/powerpc: Accelerate H_PUT_TCE by implementing it in real mode
From: Alexander Graf <hidden>
Date: 2011-05-17 08:01:32
Also in:
kvm
On 11.05.2011, at 12:46, Paul Mackerras wrote:
From: David Gibson <redacted> =20 This improves I/O performance for guests using the PAPR =
paravirtualization
interface by making the H_PUT_TCE hcall faster, by implementing it in real mode. H_PUT_TCE is used for updating virtual IOMMU tables, and =
is
used both for virtual I/O and for real I/O in the PAPR interface. =20 Since this moves the IOMMU tables into the kernel, we define a new KVM_CREATE_SPAPR_TCE ioctl to allow qemu to create the tables. The ioctl returns a file descriptor which can be used to mmap the newly created table. =20 Signed-off-by: Paul Mackerras <redacted> --- arch/powerpc/include/asm/kvm.h | 9 +++ arch/powerpc/include/asm/kvm_book3s_64.h | 2 + arch/powerpc/include/asm/kvm_host.h | 9 +++ arch/powerpc/include/asm/kvm_ppc.h | 2 + arch/powerpc/kvm/Makefile | 3 +- arch/powerpc/kvm/book3s_64_vio_hv.c | 73 +++++++++++++++++++ arch/powerpc/kvm/book3s_hv.c | 116 =
+++++++++++++++++++++++++++++-
arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +- arch/powerpc/kvm/powerpc.c | 18 +++++ include/linux/kvm.h | 5 ++
This one definitely needs documentation :).
quoted hunk ↗ jump to hunk
10 files changed, 236 insertions(+), 3 deletions(-) create mode 100644 arch/powerpc/kvm/book3s_64_vio_hv.c =20diff --git a/arch/powerpc/include/asm/kvm.h =
b/arch/powerpc/include/asm/kvm.h
quoted hunk ↗ jump to hunk
index 18ea696..a9e641b 100644--- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h@@ -22,6 +22,9 @@=20 #include <linux/types.h> =20 +/* Select powerpc specific features in <linux/kvm.h> */ +#define __KVM_HAVE_SPAPR_TCE + struct kvm_regs { __u64 pc; __u64 cr;@@ -88,4 +91,10 @@ struct kvm_guest_debug_arch {#define KVM_INTERRUPT_UNSET -2U #define KVM_INTERRUPT_SET_LEVEL -3U =20 +/* for KVM_CAP_SPAPR_TCE */ +struct kvm_create_spapr_tce { + __u64 liobn; + __u32 window_size; +}; + #endif /* __LINUX_KVM_POWERPC_H */diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h =
b/arch/powerpc/include/asm/kvm_book3s_64.h
quoted hunk ↗ jump to hunk
index 4cadd61..e1a096b 100644--- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h@@ -25,4 +25,6 @@ static inline struct kvmppc_book3s_shadow_vcpu =
*to_svcpu(struct kvm_vcpu *vcpu)
quoted hunk ↗ jump to hunk
return &get_paca()->shadow_vcpu; } =20 +#define SPAPR_TCE_SHIFT 12 + #endif /* __ASM_KVM_BOOK3S_64_H__ */diff --git a/arch/powerpc/include/asm/kvm_host.h =
b/arch/powerpc/include/asm/kvm_host.h
quoted hunk ↗ jump to hunk
index af6703e..cda183e 100644--- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h@@ -144,6 +144,14 @@ struct kvmppc_pginfo {atomic_t refcnt; }; =20 +struct kvmppc_spapr_tce_table { + struct list_head list; + struct kvm *kvm; + u64 liobn; + u32 window_size; + struct page *pages[0]; +}; + struct kvm_arch { unsigned long hpt_virt; unsigned long ram_npages;@@ -157,6 +165,7 @@ struct kvm_arch {unsigned long host_sdr1; int tlbie_lock; unsigned short last_vcpu[NR_CPUS]; + struct list_head spapr_tce_tables; }; =20 struct kvmppc_pte {diff --git a/arch/powerpc/include/asm/kvm_ppc.h =
b/arch/powerpc/include/asm/kvm_ppc.h
quoted hunk ↗ jump to hunk
index b4ee11a..de683fa 100644--- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h@@ -117,6 +117,8 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm,extern void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem); extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); +extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, + struct kvm_create_spapr_tce *args); extern int kvmppc_core_init_vm(struct kvm *kvm); extern void kvmppc_core_destroy_vm(struct kvm *kvm); extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 37c1a60..8ba062f 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile@@ -59,7 +59,8 @@ kvm-book3s_64_hv-objs :=3D \book3s.o \ book3s_hv.o \ book3s_hv_interrupts.o \ - book3s_64_mmu_hv.o + book3s_64_mmu_hv.o \ + book3s_64_vio_hv.o kvm-objs-$(CONFIG_KVM_BOOK3S_64_HV) :=3D $(kvm-book3s_64_hv-objs) =20 kvm-book3s_32-objs :=3D \diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c =
b/arch/powerpc/kvm/book3s_64_vio_hv.c
quoted hunk ↗ jump to hunk
new file mode 100644 index 0000000..ea0f8c5--- /dev/null +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c@@ -0,0 +1,73 @@ +/* + * This program is free software; you can redistribute it and/or =
modify
+ * it under the terms of the GNU General Public License, version 2, =
as
+ * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA =
02110-1301, USA.
+ * + * Copyright 2010 Paul Mackerras, IBM Corp. [off-list ref] + * Copyright 2011 David Gibson, IBM Corporation [off-list ref] + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/highmem.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/hugetlb.h> +#include <linux/list.h> + +#include <asm/tlbflush.h> +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> +#include <asm/mmu-hash64.h> +#include <asm/hvcall.h> +#include <asm/synch.h> +#include <asm/ppc-opcode.h> +#include <asm/kvm_host.h> +#include <asm/udbg.h> + +#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) +
It would be great to somehow mark code that runs in real mode as such - = either by an attribute in the function header or by a simple comment.
+long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba, unsigned long tce)
+{
+ struct kvm *kvm =3D vcpu->kvm;
+ struct kvmppc_spapr_tce_table *stt;
+
+ /* udbg_printf("H_PUT_TCE(): liobn=3D0x%lx ioba=3D0x%lx, =tce=3D0x%lx\n", */
+ /* liobn, ioba, tce); */
+
+ list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
+ if (stt->liobn =3D=3D liobn) {
+ unsigned long idx =3D ioba >> SPAPR_TCE_SHIFT;
+ struct page *page;
+ u64 *tbl;
+
+ /* udbg_printf("H_PUT_TCE: liobn 0x%lx =3D> =stt=3D%p window_size=3D0x%x\n", */
+ /* liobn, stt, stt->window_size); */
+ if (ioba >=3D stt->window_size)
+ return H_PARAMETER;
+
+ page =3D stt->pages[idx / TCES_PER_PAGE];
+ tbl =3D (u64 *)page_address(page);
+
+ /* FIXME: Need to validate the TCE itself */
+ /* udbg_printf("tce @ %p\n", &tbl[idx % =TCES_PER_PAGE]); */
quoted hunk ↗ jump to hunk
+ tbl[idx % TCES_PER_PAGE] =3D tce; + return H_SUCCESS; + } + } + + /* Didn't find the liobn, punt it to userspace */ + return H_TOO_HARD; +}diff --git a/arch/powerpc/kvm/book3s_hv.c =
b/arch/powerpc/kvm/book3s_hv.c
quoted hunk ↗ jump to hunk
index 377a35a..eed2c10 100644--- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c@@ -506,6 +506,116 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct =
kvm_vcpu *vcpu)
return r;
}
=20
+static long kvmppc_stt_npages(unsigned long window_size)
+{
+ return ALIGN((window_size >> SPAPR_TCE_SHIFT)
+ * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
+}
+
+static void release_spapr_tce_table(struct kvmppc_spapr_tce_table =*stt)
+{
+ struct kvm *kvm =3D stt->kvm;
+ int i;
+
+ mutex_lock(&kvm->lock);
+ list_del(&stt->list);
+ for (i =3D 0; i < kvmppc_stt_npages(stt->window_size); i++)
+ __free_page(stt->pages[i]);
+ kfree(stt);
+ mutex_unlock(&kvm->lock);
+
+ kvm_put_kvm(kvm);
+}
+
+static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct =vm_fault *vmf)
+{
+ struct kvmppc_spapr_tce_table *stt =3D =vma->vm_file->private_data;
+ struct page *page;
+
+ if (vmf->pgoff >=3D kvmppc_stt_npages(stt->window_size))
+ return VM_FAULT_SIGBUS;
+
+ page =3D stt->pages[vmf->pgoff];
+ get_page(page);
+ vmf->page =3D page;
+ return 0;
+}
+
+static const struct vm_operations_struct kvm_spapr_tce_vm_ops =3D {
+ .fault =3D kvm_spapr_tce_fault,
+};
+
+static int kvm_spapr_tce_mmap(struct file *file, struct =vm_area_struct *vma)
+{
+ vma->vm_ops =3D &kvm_spapr_tce_vm_ops;
+ return 0;
+}
+
+static int kvm_spapr_tce_release(struct inode *inode, struct file =*filp)
quoted hunk ↗ jump to hunk
+{ + struct kvmppc_spapr_tce_table *stt =3D filp->private_data; + + release_spapr_tce_table(stt); + return 0; +} + +static struct file_operations kvm_spapr_tce_fops =3D { + .mmap =3D kvm_spapr_tce_mmap, + .release =3D kvm_spapr_tce_release, +}; + +long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, + struct kvm_create_spapr_tce *args) +{ + struct kvmppc_spapr_tce_table *stt =3D NULL; + long npages; + int ret =3D -ENOMEM; + int i; + + /* Check this LIOBN hasn't been previously allocated */ + list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { + if (stt->liobn =3D=3D args->liobn) + return -EBUSY; + } + + npages =3D kvmppc_stt_npages(args->window_size); + + stt =3D kzalloc(sizeof(*stt) + npages* sizeof(struct page *), + GFP_KERNEL); + if (!stt) + goto fail; + + stt->liobn =3D args->liobn; + stt->window_size =3D args->window_size; + stt->kvm =3D kvm; + + for (i =3D 0; i < npages; i++) { + stt->pages[i] =3D alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!stt->pages[i]) + goto fail; + } + + kvm_get_kvm(kvm); + + mutex_lock(&kvm->lock); + list_add(&stt->list, &kvm->arch.spapr_tce_tables); + + mutex_unlock(&kvm->lock); + + return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, + stt, O_RDONLY); + +fail: + if (stt) { + for (i =3D 0; i < npages; i++) + if (stt->pages[i]) + __free_page(stt->pages[i]); + + kfree(stt); + } + return ret; +} + int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem) {@@ -527,13 +637,17 @@ int kvmppc_core_init_vm(struct kvm *kvm)=20 /* Allocate hashed page table */ r =3D kvmppc_alloc_hpt(kvm); + if (r) + return r; =20 - return r; + INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); + return 0; } =20 void kvmppc_core_destroy_vm(struct kvm *kvm) { kvmppc_free_hpt(kvm); + WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); } =20 /* These are stubs for now */diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S =
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
quoted hunk ↗ jump to hunk
index e8a8f3c..95f6386 100644--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S@@ -722,7 +722,7 @@ hcall_real_table:.long 0 /* 0x14 - H_CLEAR_REF */ .long .kvmppc_h_protect - hcall_real_table .long 0 /* 0x1c - H_GET_TCE */ - .long 0 /* 0x20 - H_SET_TCE */ + .long .kvmppc_h_put_tce - hcall_real_table .long 0 /* 0x24 - H_SET_SPRG0 */ .long .kvmppc_h_set_dabr - hcall_real_table .long 0 /* 0x2c */diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 7bfe413..10f777a 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c@@ -196,6 +196,11 @@ int kvm_dev_ioctl_check_extension(long ext)r =3D KVM_COALESCED_MMIO_PAGE_OFFSET; break; #endif +#ifdef CONFIG_KVM_BOOK3S_64_HV + case KVM_CAP_SPAPR_TCE: + r =3D 1; + break; +#endif default: r =3D 0; break;@@ -628,6 +633,19 @@ long kvm_arch_vm_ioctl(struct file *filp,=20 break; } +#ifdef CONFIG_KVM_BOOK3S_64_HV + case KVM_CREATE_SPAPR_TCE: { + struct kvm_create_spapr_tce create_tce; + struct kvm *kvm =3D filp->private_data; + + r =3D -EFAULT; + if (copy_from_user(&create_tce, argp, =
sizeof(create_tce)))
+ goto out; + r =3D kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce); + goto out; + }
I'm not sure I fully understand how this is supposed to work. If the = tables are kept inside the kernel, how does userspace get to know where = to DMA to? Alex