Re: [PATCH -V2 2/2] powerpc: Update kernel VSID range
From: Aneesh Kumar K.V <hidden>
Date: 2013-03-12 07:19:38
Subsystem:
linux for powerpc (32-bit and 64-bit), the rest · Maintainers:
Madhavan Srinivasan, Michael Ellerman, Linus Torvalds
David Gibson [off-list ref] writes:
On Fri, Feb 15, 2013 at 09:09:38PM +0530, Aneesh Kumar K.V wrote:quoted
From: "Aneesh Kumar K.V" <redacted>
.... snip....
quoted
- * - We allow for USER_ESID_BITS significant bits of ESID and - * CONTEXT_BITS bits of context for user addresses. - * i.e. 64T (46 bits) of address space for up to half a million contexts. - * - * - The scramble function gives robust scattering in the hash - * table (at least based on some initial results). The previous - * method was more susceptible to pathological cases giving excessive - * hash collisions. + * We also need to avoid the last segment of the last context, because that + * would give a protovsid of 0x1fffffffff. That will result in a VSID 0 + * because of the modulo operation in vsid scramble. But the vmemmap + * (which is what uses region 0xf) will never be close to 64TB in size + * (it's 56 bytes per page of system memory). */ #define CONTEXT_BITS 19@@ -386,15 +382,25 @@ extern void slb_set_size(u16 size); #define USER_ESID_BITS_1T 6USER_ESID_BITS should probably be renamed just ESID_BITS, since it's now relevant to kernel addresses too.
Done. I added this as a patch on top of the series.
quoted
/* + * 256MB segment + * The proto-VSID space has 2^(CONTEX_BITS + USER_ESID_BITS) - 1 segments + * available for user + kernel mapping. The top 4 contexts are used for + * kernel mapping. Each segment contains 2^28 bytes. Each + * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts + * (19 == 37 + 28 - 46). + */ +#define MAX_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 1)Hrm. I think it would be clearer to have MAX_CONTEXT (still) be the maximum usable *user* context (i.e. 0x80000 - 5) and put the kernel ones above that still.
Done as -#define MAX_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 1) +#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 5) Also updated the reference of MAX_CONTEXT in the code to MAX_USER_CONTEXT
quoted
+ +/* * This should be computed such that protovosid * vsid_mulitplier * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus */ #define VSID_MULTIPLIER_256M ASM_CONST(12538073) /* 24-bit prime */ -#define VSID_BITS_256M (CONTEXT_BITS + USER_ESID_BITS + 1) +#define VSID_BITS_256M (CONTEXT_BITS + USER_ESID_BITS) #define VSID_MODULUS_256M ((1UL<<VSID_BITS_256M)-1) #define VSID_MULTIPLIER_1T ASM_CONST(12538073) /* 24-bit prime */ -#define VSID_BITS_1T (CONTEXT_BITS + USER_ESID_BITS_1T + 1) +#define VSID_BITS_1T (CONTEXT_BITS + USER_ESID_BITS_1T) #define VSID_MODULUS_1T ((1UL<<VSID_BITS_1T)-1)
.... snip......
quoted
#endif /* _ASM_POWERPC_MMU_HASH64_H_ */diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4665e82..0e9c48c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S@@ -1268,20 +1268,39 @@ do_ste_alloc: _GLOBAL(do_stab_bolted)The stab path certainly hasn't been tested, since we've been broken on stab machines for a long time.quoted
stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */ + mfspr r11,SPRN_DAR /* ea */ + /* + * check for bad kernel/user address + * (ea & ~REGION_MASK) >= PGTABLE_RANGE + */ + clrldi r9,r11,4 + li r10,-1 + clrldi r10,r10,(64 - 46) + cmpld cr7,r9,r10You can replace the above 4 instructions with just: rldicr. r9,r11,4,(64-46-4)
nice, updated as below - clrldi r9,r11,4 - li r10,-1 - clrldi r10,r10,(64 - 46) - cmpld cr7,r9,r10 + rldicr. r9,r11,4,(64 - 46 - 4) li r9,0 /* VSID = 0 for bad address */ - bgt cr7,0f + bne- 0f
quoted
+ li r9,0 /* VSID = 0 for bad address */ + bgt cr7,0f + + /*
.... snip....
quoted
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 3a292be..bfeab83 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c@@ -194,6 +194,11 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, unsigned long vpn = hpt_vpn(vaddr, vsid, ssize); unsigned long tprot = prot; + /* + * If we hit a bad address return error. + */ + if (!vsid) + return -1; /* Make kernel text executable */ if (overlaps_kernel_text(vaddr, vaddr + step)) tprot &= ~HPTE_R_N;@@ -921,11 +926,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", ea, access, trap); - if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) { - DBG_LOW(" out of pgtable range !\n"); - return 1; - } -Hrm. This test is conceptually different, even if the logic is the same as the vsid availablility test you may have performed earlier. Perhaps add BUILD_BUG_ON()s to ensure that they really are the same.
can you elaborate that ? What BUILD_BUG_ON test are you suggesting ?
quoted
/* Get region & vsid */ switch (REGION_ID(ea)) { case USER_REGION_ID:@@ -956,6 +956,11 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) } DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid); + /* Bad address. */ + if (!vsid) { + DBG_LOW("Bad address!\n"); + return 1; + } /* Get pgdir */ pgdir = mm->pgd; if (pgdir == NULL)@@ -1125,6 +1130,8 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, /* Get VSID */ ssize = user_segment_size(ea); vsid = get_vsid(mm->context.id, ea, ssize); + if (!vsid) + return; /* Hash doesn't like irqs */ local_irq_save(flags);@@ -1217,6 +1224,9 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); + /* Don't create HPTE entries for bad address */ + if (!vsid) + return; ret = ppc_md.hpte_insert(hpteg, vpn, __pa(vaddr), mode, HPTE_V_BOLTED, mmu_linear_psize, mmu_kernel_ssize);diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index 40bc5b0..59cd773 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c@@ -29,15 +29,6 @@ static DEFINE_SPINLOCK(mmu_context_lock); static DEFINE_IDA(mmu_context_ida); -/* - * 256MB segment - * The proto-VSID space has 2^(CONTEX_BITS + USER_ESID_BITS) - 1 segments - * available for user mappings. Each segment contains 2^28 bytes. Each - * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts - * (19 == 37 + 28 - 46). - */ -#define MAX_CONTEXT ((1UL << CONTEXT_BITS) - 1) - int __init_new_context(void) { int index;@@ -56,7 +47,8 @@ again: else if (err) return err; - if (index > MAX_CONTEXT) { + if (index > (MAX_CONTEXT - 4)) { + /* Top 4 context id values are used for kernel */This change would not be necessary if you changed MAX_CONTEXT as suggested above.
done now have
- if (index > (MAX_CONTEXT - 4)) {
- /* Top 4 context id values are used for kernel */
+ if (index > (MAX_USER_CONTEXT) {
quoted
spin_lock(&mmu_context_lock); ida_remove(&mmu_context_ida, index); spin_unlock(&mmu_context_lock);diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index 1a16ca2..c066d00 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S@@ -31,13 +31,20 @@ * No other registers are examined or changed. */ _GLOBAL(slb_allocate_realmode) - /* r3 = faulting address */ + /* + * check for bad kernel/user address + * (ea & ~REGION_MASK) >= PGTABLE_RANGE + */ + clrldi r9,r3,4 + li r10,-1 + clrldi r10,r10,(64 - 46) + cmpld cr7,r9,r10 + bgt cr7,8fAs in the stab path, you can accomplish this with a single rldicr.quoted
srdi r9,r3,60 /* get region */ - srdi r10,r3,28 /* get esid */ cmpldi cr7,r9,0xc /* cmp PAGE_OFFSET for later use */ - /* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */ + /* r3 = address, cr7 = <> PAGE_OFFSET */ blt cr7,0f /* user or kernel? */ /* kernel address: proto-VSID = ESID */@@ -56,18 +63,26 @@ _GLOBAL(slb_allocate_realmode) */ _GLOBAL(slb_miss_kernel_load_linear) li r11,0 - li r9,0x1 + /* + * context = (MAX_CONTEXT - 3) + ((ea >> 60) - 0xc) + */ + srdi r9,r3,60 + subi r9,r9,(0xc + 3 + 1) + lis r10, 8 + add r9,r9,r10Hrm. You can avoid clobbering r10, which I assume is why you removed the computation of esid from the common path by doing this instead: rldicl r9,r3,4,62 addis r9,r9,8 subi r9,r9,4
nice. Updated. I am inlining the final patch below.
quoted
+ srdi r10,r3,SID_SHIFT /* get esid */ /* * for 1T we shift 12 bits more. slb_finish_load_1T will do * the necessary adjustment */ - rldimi r10,r9,(CONTEXT_BITS + USER_ESID_BITS),0 + rldimi r10,r9,USER_ESID_BITS,0 BEGIN_FTR_SECTION b slb_finish_load END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) b slb_finish_load_1T 1: + srdi r10,r3,SID_SHIFT /* get esid */ #ifdef CONFIG_SPARSEMEM_VMEMMAP /* Check virtual memmap region. To be patches at kernel boot */ cmpldi cr0,r9,0xf@@ -91,23 +106,26 @@ _GLOBAL(slb_miss_kernel_load_vmemmap) _GLOBAL(slb_miss_kernel_load_io) li r11,0 6: - li r9,0x1 + /* + * context = (MAX_CONTEXT - 3) + ((ea >> 60) - 0xc) + */ + srdi r9,r3,60 + subi r9,r9,(0xc + 3 + 1) + lis r10,8 + add r9,r9,r10 + srdi r10,r3,SID_SHIFTSame here. Can you put the kernel context calculation into a common path? In fact, now that kernel vsids, like user vsids are made of a context and vsid component, can you put the rldimi which combines them into a common path for both kernel and user addresses?
will do.
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ffa0c48..f0351b5 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S@@ -1274,12 +1274,9 @@ _GLOBAL(do_stab_bolted) * check for bad kernel/user address * (ea & ~REGION_MASK) >= PGTABLE_RANGE */ - clrldi r9,r11,4 - li r10,-1 - clrldi r10,r10,(64 - 46) - cmpld cr7,r9,r10 + rldicr. r9,r11,4,(64 - 46 - 4) li r9,0 /* VSID = 0 for bad address */ - bgt cr7,0f + bne- 0f /* * Calculate VSID:
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 80fd644..349411e 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S@@ -35,16 +35,14 @@ _GLOBAL(slb_allocate_realmode) * check for bad kernel/user address * (ea & ~REGION_MASK) >= PGTABLE_RANGE */ - clrldi r9,r3,4 - li r10,-1 - clrldi r10,r10,(64 - 46) - cmpld cr7,r9,r10 - bgt cr7,8f + rldicr. r9,r3,4,(64 - 46 - 4) + bne- 8f srdi r9,r3,60 /* get region */ + srdi r10,r3,SID_SHIFT /* get esid */ cmpldi cr7,r9,0xc /* cmp PAGE_OFFSET for later use */ - /* r3 = address, cr7 = <> PAGE_OFFSET */ + /* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */ blt cr7,0f /* user or kernel? */ /* kernel address: proto-VSID = ESID */
@@ -66,11 +64,10 @@ _GLOBAL(slb_miss_kernel_load_linear) /* * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1 */ - srdi r9,r3,60 - subi r9,r9,(0xc + 4) - lis r10, 8 - add r9,r9,r10 - srdi r10,r3,SID_SHIFT /* get esid */ + rldicl r9,r3,4,62 + addis r9,r9,8 + subi r9,r9,4 + /* * for 1T we shift 12 bits more. slb_finish_load_1T will do * the necessary adjustment
@@ -82,7 +79,6 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) b slb_finish_load_1T 1: - srdi r10,r3,SID_SHIFT /* get esid */ #ifdef CONFIG_SPARSEMEM_VMEMMAP /* Check virtual memmap region. To be patches at kernel boot */ cmpldi cr0,r9,0xf
@@ -109,11 +105,9 @@ _GLOBAL(slb_miss_kernel_load_vmemmap) /* * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1 */ - srdi r9,r3,60 - subi r9,r9,(0xc + 4) - lis r10,8 - add r9,r9,r10 - srdi r10,r3,SID_SHIFT + rldicl r9,r3,4,62 + addis r9,r9,8 + subi r9,r9,4 /* * for 1T we shift 12 bits more. slb_finish_load_1T will do * the necessary adjustment
@@ -125,8 +119,6 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) b slb_finish_load_1T 0: - srdi r10,r3,SID_SHIFT /* get esid */ - /* when using slices, we extract the psize off the slice bitmaps * and then we need to get the sllp encoding off the mmu_psize_defs * array.