Re: [PATCH v2 4/6] x86: Add clear_page_nocache
From: Borislav Petkov <bp@alien8.de>
Date: 2012-08-13 17:03:53
Also in:
linux-mips, linux-mm, linux-sh, lkml, sparclinux
On Mon, Aug 13, 2012 at 02:43:34PM +0300, Kirill A. Shutemov wrote:
$ cat test.c
#include <stdio.h>
#include <sys/mman.h>
#define SIZE 1024*1024*1024
void clear_page_nocache_sse2(void *page) __attribute__((regparm(1)));
int main(int argc, char** argv)
{
char *p;
unsigned long i, j;
p = mmap(NULL, SIZE, PROT_WRITE|PROT_READ,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_POPULATE, -1, 0);
for(j = 0; j < 100; j++) {
for(i = 0; i < SIZE; i += 4096) {
clear_page_nocache_sse2(p + i);
}
}
return 0;
}
$ cat clear_page_nocache_unroll32.S
.globl clear_page_nocache_sse2
.align 4,0x90
clear_page_nocache_sse2:
.cfi_startproc
mov %eax,%edx
xorl %eax,%eax
movl $4096/32,%ecx
.p2align 4
.Lloop_sse2:
decl %ecx
#define PUT(x) movnti %eax,x*4(%edx)
PUT(0)
PUT(1)
PUT(2)
PUT(3)
PUT(4)
PUT(5)
PUT(6)
PUT(7)
#undef PUT
lea 32(%edx),%edx
jnz .Lloop_sse2
nop
ret
.cfi_endproc
.type clear_page_nocache_sse2, @function
.size clear_page_nocache_sse2, .-clear_page_nocache_sse2
$ cat clear_page_nocache_unroll64.S
.globl clear_page_nocache_sse2
.align 4,0x90
clear_page_nocache_sse2:
.cfi_startproc
mov %eax,%edxThis must still be the 32-bit version becaue it segfaults here. Here's why: mmap above gives a ptr which, on 64-bit, is larger than 32-bit, i.e. it looks like 0x7fffxxxxx000, i.e. starting from top of userspace. Now, the mov above truncates that ptr and the thing segfaults. Doing s/edx/rdx/g fixes it though. Thanks. -- Regards/Gruss, Boris.