Thread (55 messages) 55 messages, 8 authors, 2011-06-01

Re: [PATCH 12/13] kvm/powerpc: Accelerate H_PUT_TCE by implementing it in real mode

From: Alexander Graf <hidden>
Date: 2011-05-17 08:01:32
Also in: kvm

On 11.05.2011, at 12:46, Paul Mackerras wrote:
From: David Gibson <redacted>
=20
This improves I/O performance for guests using the PAPR =
paravirtualization
interface by making the H_PUT_TCE hcall faster, by implementing it in
real mode.  H_PUT_TCE is used for updating virtual IOMMU tables, and =
is
used both for virtual I/O and for real I/O in the PAPR interface.
=20
Since this moves the IOMMU tables into the kernel, we define a new
KVM_CREATE_SPAPR_TCE ioctl to allow qemu to create the tables.
The ioctl returns a file descriptor which can be used to mmap the
newly created table.
=20
Signed-off-by: Paul Mackerras <redacted>
---
arch/powerpc/include/asm/kvm.h           |    9 +++
arch/powerpc/include/asm/kvm_book3s_64.h |    2 +
arch/powerpc/include/asm/kvm_host.h      |    9 +++
arch/powerpc/include/asm/kvm_ppc.h       |    2 +
arch/powerpc/kvm/Makefile                |    3 +-
arch/powerpc/kvm/book3s_64_vio_hv.c      |   73 +++++++++++++++++++
arch/powerpc/kvm/book3s_hv.c             |  116 =
+++++++++++++++++++++++++++++-
arch/powerpc/kvm/book3s_hv_rmhandlers.S  |    2 +-
arch/powerpc/kvm/powerpc.c               |   18 +++++
include/linux/kvm.h                      |    5 ++
This one definitely needs documentation :).
quoted hunk ↗ jump to hunk
10 files changed, 236 insertions(+), 3 deletions(-)
create mode 100644 arch/powerpc/kvm/book3s_64_vio_hv.c
=20
diff --git a/arch/powerpc/include/asm/kvm.h =
b/arch/powerpc/include/asm/kvm.h
quoted hunk ↗ jump to hunk
index 18ea696..a9e641b 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -22,6 +22,9 @@
=20
#include <linux/types.h>
=20
+/* Select powerpc specific features in <linux/kvm.h> */
+#define __KVM_HAVE_SPAPR_TCE
+
struct kvm_regs {
	__u64 pc;
	__u64 cr;
@@ -88,4 +91,10 @@ struct kvm_guest_debug_arch {
#define KVM_INTERRUPT_UNSET	-2U
#define KVM_INTERRUPT_SET_LEVEL	-3U
=20
+/* for KVM_CAP_SPAPR_TCE */
+struct kvm_create_spapr_tce {
+	__u64 liobn;
+	__u32 window_size;
+};
+
#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h =
b/arch/powerpc/include/asm/kvm_book3s_64.h
quoted hunk ↗ jump to hunk
index 4cadd61..e1a096b 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -25,4 +25,6 @@ static inline struct kvmppc_book3s_shadow_vcpu =
*to_svcpu(struct kvm_vcpu *vcpu)
quoted hunk ↗ jump to hunk
	return &get_paca()->shadow_vcpu;
}
=20
+#define SPAPR_TCE_SHIFT		12
+
#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h =
b/arch/powerpc/include/asm/kvm_host.h
quoted hunk ↗ jump to hunk
index af6703e..cda183e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -144,6 +144,14 @@ struct kvmppc_pginfo {
	atomic_t refcnt;
};
=20
+struct kvmppc_spapr_tce_table {
+	struct list_head list;
+	struct kvm *kvm;
+	u64 liobn;
+	u32 window_size;
+	struct page *pages[0];
+};
+
struct kvm_arch {
	unsigned long hpt_virt;
	unsigned long ram_npages;
@@ -157,6 +165,7 @@ struct kvm_arch {
	unsigned long host_sdr1;
	int tlbie_lock;
	unsigned short last_vcpu[NR_CPUS];
+	struct list_head spapr_tce_tables;
};
=20
struct kvmppc_pte {
diff --git a/arch/powerpc/include/asm/kvm_ppc.h =
b/arch/powerpc/include/asm/kvm_ppc.h
quoted hunk ↗ jump to hunk
index b4ee11a..de683fa 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -117,6 +117,8 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm,
extern void kvmppc_map_vrma(struct kvm *kvm,
			    struct kvm_userspace_memory_region *mem);
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
+extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
+				struct kvm_create_spapr_tce *args);
extern int kvmppc_core_init_vm(struct kvm *kvm);
extern void kvmppc_core_destroy_vm(struct kvm *kvm);
extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 37c1a60..8ba062f 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -59,7 +59,8 @@ kvm-book3s_64_hv-objs :=3D \
	book3s.o \
	book3s_hv.o \
	book3s_hv_interrupts.o \
-	book3s_64_mmu_hv.o
+	book3s_64_mmu_hv.o \
+	book3s_64_vio_hv.o
kvm-objs-$(CONFIG_KVM_BOOK3S_64_HV) :=3D $(kvm-book3s_64_hv-objs)
=20
kvm-book3s_32-objs :=3D \
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c =
b/arch/powerpc/kvm/book3s_64_vio_hv.c
quoted hunk ↗ jump to hunk
new file mode 100644
index 0000000..ea0f8c5
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -0,0 +1,73 @@
+/*
+ * This program is free software; you can redistribute it and/or =
modify
+ * it under the terms of the GNU General Public License, version 2, =
as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  =
02110-1301, USA.
+ *
+ * Copyright 2010 Paul Mackerras, IBM Corp. [off-list ref]
+ * Copyright 2011 David Gibson, IBM Corporation [off-list ref]
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/hugetlb.h>
+#include <linux/list.h>
+
+#include <asm/tlbflush.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu-hash64.h>
+#include <asm/hvcall.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+#include <asm/kvm_host.h>
+#include <asm/udbg.h>
+
+#define TCES_PER_PAGE	(PAGE_SIZE / sizeof(u64))
+
It would be great to somehow mark code that runs in real mode as such - =
either by an attribute in the function header or by a simple comment.
+long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+		      unsigned long ioba, unsigned long tce)
+{
+	struct kvm *kvm =3D vcpu->kvm;
+	struct kvmppc_spapr_tce_table *stt;
+
+	/* udbg_printf("H_PUT_TCE(): liobn=3D0x%lx ioba=3D0x%lx, =
tce=3D0x%lx\n", */
+	/* 	    liobn, ioba, tce); */
+
+	list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
+		if (stt->liobn =3D=3D liobn) {
+			unsigned long idx =3D ioba >> SPAPR_TCE_SHIFT;
+			struct page *page;
+			u64 *tbl;
+
+			/* udbg_printf("H_PUT_TCE: liobn 0x%lx =3D> =
stt=3D%p  window_size=3D0x%x\n", */
+			/* 	    liobn, stt, stt->window_size); */
+			if (ioba >=3D stt->window_size)
+				return H_PARAMETER;
+
+			page =3D stt->pages[idx / TCES_PER_PAGE];
+			tbl =3D (u64 *)page_address(page);
+
+			/* FIXME: Need to validate the TCE itself */
+			/* udbg_printf("tce @ %p\n", &tbl[idx % =
TCES_PER_PAGE]); */
quoted hunk ↗ jump to hunk
+			tbl[idx % TCES_PER_PAGE] =3D tce;
+			return H_SUCCESS;
+		}
+	}
+
+	/* Didn't find the liobn, punt it to userspace */
+	return H_TOO_HARD;
+}
diff --git a/arch/powerpc/kvm/book3s_hv.c =
b/arch/powerpc/kvm/book3s_hv.c
quoted hunk ↗ jump to hunk
index 377a35a..eed2c10 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -506,6 +506,116 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct =
kvm_vcpu *vcpu)
	return r;
}
=20
+static long kvmppc_stt_npages(unsigned long window_size)
+{
+	return ALIGN((window_size >> SPAPR_TCE_SHIFT)
+		     * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
+}
+
+static void release_spapr_tce_table(struct kvmppc_spapr_tce_table =
*stt)
+{
+	struct kvm *kvm =3D stt->kvm;
+	int i;
+
+	mutex_lock(&kvm->lock);
+	list_del(&stt->list);
+	for (i =3D 0; i < kvmppc_stt_npages(stt->window_size); i++)
+		__free_page(stt->pages[i]);
+	kfree(stt);
+	mutex_unlock(&kvm->lock);
+
+	kvm_put_kvm(kvm);
+}
+
+static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct =
vm_fault *vmf)
+{
+	struct kvmppc_spapr_tce_table *stt =3D =
vma->vm_file->private_data;
+	struct page *page;
+
+	if (vmf->pgoff >=3D kvmppc_stt_npages(stt->window_size))
+		return VM_FAULT_SIGBUS;
+
+	page =3D stt->pages[vmf->pgoff];
+	get_page(page);
+	vmf->page =3D page;
+	return 0;
+}
+
+static const struct vm_operations_struct kvm_spapr_tce_vm_ops =3D {
+	.fault =3D kvm_spapr_tce_fault,
+};
+
+static int kvm_spapr_tce_mmap(struct file *file, struct =
vm_area_struct *vma)
+{
+	vma->vm_ops =3D &kvm_spapr_tce_vm_ops;
+	return 0;
+}
+
+static int kvm_spapr_tce_release(struct inode *inode, struct file =
*filp)
quoted hunk ↗ jump to hunk
+{
+	struct kvmppc_spapr_tce_table *stt =3D filp->private_data;
+
+	release_spapr_tce_table(stt);
+	return 0;
+}
+
+static struct file_operations kvm_spapr_tce_fops =3D {
+	.mmap           =3D kvm_spapr_tce_mmap,
+	.release	=3D kvm_spapr_tce_release,
+};
+
+long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
+				   struct kvm_create_spapr_tce *args)
+{
+	struct kvmppc_spapr_tce_table *stt =3D NULL;
+	long npages;
+	int ret =3D -ENOMEM;
+	int i;
+
+	/* Check this LIOBN hasn't been previously allocated */
+	list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
+		if (stt->liobn =3D=3D args->liobn)
+			return -EBUSY;
+	}
+
+	npages =3D kvmppc_stt_npages(args->window_size);
+
+	stt =3D kzalloc(sizeof(*stt) + npages* sizeof(struct page *),
+		      GFP_KERNEL);
+	if (!stt)
+		goto fail;
+
+	stt->liobn =3D args->liobn;
+	stt->window_size =3D args->window_size;
+	stt->kvm =3D kvm;
+
+	for (i =3D 0; i < npages; i++) {
+		stt->pages[i] =3D alloc_page(GFP_KERNEL | __GFP_ZERO);
+		if (!stt->pages[i])
+			goto fail;
+	}
+
+	kvm_get_kvm(kvm);
+
+	mutex_lock(&kvm->lock);
+	list_add(&stt->list, &kvm->arch.spapr_tce_tables);
+
+	mutex_unlock(&kvm->lock);
+
+	return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
+				stt, O_RDONLY);
+
+fail:
+	if (stt) {
+		for (i =3D 0; i < npages; i++)
+			if (stt->pages[i])
+				__free_page(stt->pages[i]);
+
+		kfree(stt);
+	}
+	return ret;
+}
+
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
				struct kvm_userspace_memory_region *mem)
{
@@ -527,13 +637,17 @@ int kvmppc_core_init_vm(struct kvm *kvm)
=20
	/* Allocate hashed page table */
	r =3D kvmppc_alloc_hpt(kvm);
+	if (r)
+		return r;
=20
-	return r;
+	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+	return 0;
}
=20
void kvmppc_core_destroy_vm(struct kvm *kvm)
{
	kvmppc_free_hpt(kvm);
+	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
}
=20
/* These are stubs for now */
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S =
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
quoted hunk ↗ jump to hunk
index e8a8f3c..95f6386 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -722,7 +722,7 @@ hcall_real_table:
	.long	0		/* 0x14 - H_CLEAR_REF */
	.long	.kvmppc_h_protect - hcall_real_table
	.long	0		/* 0x1c - H_GET_TCE */
-	.long	0		/* 0x20 - H_SET_TCE */
+	.long	.kvmppc_h_put_tce - hcall_real_table
	.long	0		/* 0x24 - H_SET_SPRG0 */
	.long	.kvmppc_h_set_dabr - hcall_real_table
	.long	0		/* 0x2c */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 7bfe413..10f777a 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -196,6 +196,11 @@ int kvm_dev_ioctl_check_extension(long ext)
		r =3D KVM_COALESCED_MMIO_PAGE_OFFSET;
		break;
#endif
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+	case KVM_CAP_SPAPR_TCE:
+		r =3D 1;
+		break;
+#endif
	default:
		r =3D 0;
		break;
@@ -628,6 +633,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
=20
		break;
	}
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+	case KVM_CREATE_SPAPR_TCE: {
+		struct kvm_create_spapr_tce create_tce;
+		struct kvm *kvm =3D filp->private_data;
+
+		r =3D -EFAULT;
+		if (copy_from_user(&create_tce, argp, =
sizeof(create_tce)))
+			goto out;
+		r =3D kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
+		goto out;
+	}
I'm not sure I fully understand how this is supposed to work. If the =
tables are kept inside the kernel, how does userspace get to know where =
to DMA to?


Alex
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help