Thread (14 messages) 14 messages, 3 authors, 2023-08-25

Re: [PATCH v2 1/4] powerpc/code-patching: introduce patch_instructions()

From: Christophe Leroy <hidden>
Date: 2023-03-11 07:42:16
Also in: bpf


Le 10/03/2023 à 19:26, Christophe Leroy a écrit :

Le 09/03/2023 à 19:02, Hari Bathini a écrit :
quoted
patch_instruction() entails setting up pte, patching the instruction,
clearing the pte and flushing the tlb. If multiple instructions need
to be patched, every instruction would have to go through the above
drill unnecessarily. Instead, introduce function patch_instructions()
that patches multiple instructions at one go while setting up the pte,
clearing the pte and flushing the tlb only once per page range of
instructions. Observed ~5X improvement in speed of execution using
patch_instructions() over patch_instructions(), when more instructions
are to be patched.
I get a 13% degradation on the time needed to activate ftrace on a 
powerpc 8xx.

Before your patch, activation ftrace takes 550k timebase ticks. After 
your patch it takes 620k timebase ticks.
More details about the problem:

Before your patch, patch_instruction() is a simple, stackless function 
(Note that the first branch is noped out after startup).

00000254 <patch_instruction>:
  254:	48 00 00 6c 	b       2c0 <patch_instruction+0x6c>
  258:	7c e0 00 a6 	mfmsr   r7
  25c:	7c 51 13 a6 	mtspr   81,r2
  260:	3d 40 00 00 	lis     r10,0
			262: R_PPC_ADDR16_HA	.data
  264:	39 4a 00 00 	addi    r10,r10,0
			266: R_PPC_ADDR16_LO	.data
  268:	7c 69 1b 78 	mr      r9,r3
  26c:	3d 29 40 00 	addis   r9,r9,16384
  270:	81 0a 00 08 	lwz     r8,8(r10)
  274:	55 29 00 26 	rlwinm  r9,r9,0,0,19
  278:	81 4a 00 04 	lwz     r10,4(r10)
  27c:	61 29 01 25 	ori     r9,r9,293
  280:	91 28 00 00 	stw     r9,0(r8)
  284:	55 49 00 26 	rlwinm  r9,r10,0,0,19
  288:	50 6a 05 3e 	rlwimi  r10,r3,0,20,31
  28c:	90 8a 00 00 	stw     r4,0(r10)
  290:	7c 00 50 6c 	dcbst   0,r10
  294:	7c 00 04 ac 	hwsync
  298:	7c 00 1f ac 	icbi    0,r3
  29c:	7c 00 04 ac 	hwsync
  2a0:	4c 00 01 2c 	isync
  2a4:	38 60 00 00 	li      r3,0
  2a8:	39 40 00 00 	li      r10,0
  2ac:	91 48 00 00 	stw     r10,0(r8)
  2b0:	7c 00 4a 64 	tlbie   r9,r0
  2b4:	7c 00 04 ac 	hwsync
  2b8:	7c e0 01 24 	mtmsr   r7
  2bc:	4e 80 00 20 	blr

  2c0:	90 83 00 00 	stw     r4,0(r3)
  2c4:	7c 00 18 6c 	dcbst   0,r3
  2c8:	7c 00 04 ac 	hwsync
  2cc:	7c 00 1f ac 	icbi    0,r3
  2d0:	7c 00 04 ac 	hwsync
  2d4:	4c 00 01 2c 	isync
  2d8:	38 60 00 00 	li      r3,0
  2dc:	4e 80 00 20 	blr
  2e0:	38 60 ff ff 	li      r3,-1
  2e4:	4b ff ff c4 	b       2a8 <patch_instruction+0x54>
  2e8:	38 60 ff ff 	li      r3,-1
  2ec:	4e 80 00 20 	blr


Once your patch is there, patch_instruction() becomes a function that 
has to step up a stack in order to call __do_patch_instructions().
And __do_patch_instructions() is quite a big function.

0000022c <__do_patch_instructions>:
  22c:	3d 20 00 00 	lis     r9,0
			22e: R_PPC_ADDR16_HA	.data
  230:	39 29 00 00 	addi    r9,r9,0
			232: R_PPC_ADDR16_LO	.data
  234:	81 69 00 04 	lwz     r11,4(r9)
  238:	2c 05 00 00 	cmpwi   r5,0
  23c:	81 89 00 08 	lwz     r12,8(r9)
  240:	7c c3 32 14 	add     r6,r3,r6
  244:	55 6b 00 26 	rlwinm  r11,r11,0,0,19
  248:	38 00 00 00 	li      r0,0
  24c:	54 6a 05 3e 	clrlwi  r10,r3,20
  250:	21 0a 10 00 	subfic  r8,r10,4096
  254:	7d 03 42 14 	add     r8,r3,r8
  258:	7c 69 1b 78 	mr      r9,r3
  25c:	7f 88 30 40 	cmplw   cr7,r8,r6
  260:	3d 29 40 00 	addis   r9,r9,16384
  264:	55 29 00 26 	rlwinm  r9,r9,0,0,19
  268:	61 29 01 25 	ori     r9,r9,293
  26c:	91 2c 00 00 	stw     r9,0(r12)
  270:	7d 4a 5b 78 	or      r10,r10,r11
  274:	40 9d 00 08 	ble     cr7,27c <__do_patch_instructions+0x50>
  278:	7c c8 33 78 	mr      r8,r6
  27c:	7f 83 40 40 	cmplw   cr7,r3,r8
  280:	40 9c 01 2c 	bge     cr7,3ac <__do_patch_instructions+0x180>
  284:	7c 69 18 f8 	not     r9,r3
  288:	7d 28 4a 14 	add     r9,r8,r9
  28c:	55 29 f7 fe 	rlwinm  r9,r9,30,31,31
  290:	7c e3 50 50 	subf    r7,r3,r10
  294:	80 a4 00 00 	lwz     r5,0(r4)
  298:	90 aa 00 00 	stw     r5,0(r10)
  29c:	7c 00 50 6c 	dcbst   0,r10
  2a0:	7c 00 04 ac 	hwsync
  2a4:	7c 00 1f ac 	icbi    0,r3
  2a8:	7c 00 04 ac 	hwsync
  2ac:	4c 00 01 2c 	isync
  2b0:	38 63 00 04 	addi    r3,r3,4
  2b4:	40 82 00 08 	bne     2bc <__do_patch_instructions+0x90>
  2b8:	38 84 00 04 	addi    r4,r4,4
  2bc:	7f 83 40 40 	cmplw   cr7,r3,r8
  2c0:	40 9c 00 a4 	bge     cr7,364 <__do_patch_instructions+0x138>
  2c4:	2f 89 00 00 	cmpwi   cr7,r9,0
  2c8:	41 9e 00 38 	beq     cr7,300 <__do_patch_instructions+0xd4>
  2cc:	7d 23 3a 14 	add     r9,r3,r7
  2d0:	81 44 00 00 	lwz     r10,0(r4)
  2d4:	91 49 00 00 	stw     r10,0(r9)
  2d8:	7c 00 48 6c 	dcbst   0,r9
  2dc:	7c 00 04 ac 	hwsync
  2e0:	7c 00 1f ac 	icbi    0,r3
  2e4:	7c 00 04 ac 	hwsync
  2e8:	4c 00 01 2c 	isync
  2ec:	38 63 00 04 	addi    r3,r3,4
  2f0:	40 82 00 08 	bne     2f8 <__do_patch_instructions+0xcc>
  2f4:	38 84 00 04 	addi    r4,r4,4
  2f8:	7f 83 40 40 	cmplw   cr7,r3,r8
  2fc:	40 9c 00 68 	bge     cr7,364 <__do_patch_instructions+0x138>
  300:	7d 23 3a 14 	add     r9,r3,r7
  304:	81 44 00 00 	lwz     r10,0(r4)
  308:	91 49 00 00 	stw     r10,0(r9)
  30c:	7c 00 48 6c 	dcbst   0,r9
  310:	7c 00 04 ac 	hwsync
  314:	7c 00 1f ac 	icbi    0,r3
  318:	7c 00 04 ac 	hwsync
  31c:	4c 00 01 2c 	isync
  320:	38 63 00 04 	addi    r3,r3,4
  324:	7c 69 1b 78 	mr      r9,r3
  328:	40 82 00 08 	bne     330 <__do_patch_instructions+0x104>
  32c:	38 84 00 04 	addi    r4,r4,4
  330:	7d 49 3a 14 	add     r10,r9,r7
  334:	80 a4 00 00 	lwz     r5,0(r4)
  338:	90 aa 00 00 	stw     r5,0(r10)
  33c:	7c 00 50 6c 	dcbst   0,r10
  340:	7c 00 04 ac 	hwsync
  344:	7c 00 4f ac 	icbi    0,r9
  348:	7c 00 04 ac 	hwsync
  34c:	4c 00 01 2c 	isync
  350:	38 69 00 04 	addi    r3,r9,4
  354:	7f 83 40 40 	cmplw   cr7,r3,r8
  358:	40 82 00 08 	bne     360 <__do_patch_instructions+0x134>
  35c:	38 84 00 04 	addi    r4,r4,4
  360:	41 9c ff a0 	blt     cr7,300 <__do_patch_instructions+0xd4>
  364:	90 0c 00 00 	stw     r0,0(r12)
  368:	39 20 00 00 	li      r9,0
  36c:	7c 00 5a 64 	tlbie   r11,r0
  370:	7c 00 04 ac 	hwsync
  374:	2f 89 00 00 	cmpwi   cr7,r9,0
  378:	40 9e 00 2c 	bne     cr7,3a4 <__do_patch_instructions+0x178>
  37c:	7f 86 18 40 	cmplw   cr7,r6,r3
  380:	41 9d fe cc 	bgt     cr7,24c <__do_patch_instructions+0x20>
  384:	38 60 00 00 	li      r3,0
  388:	4e 80 00 20 	blr
  38c:	90 0c 00 00 	stw     r0,0(r12)
  390:	39 20 ff ff 	li      r9,-1
  394:	7c 00 5a 64 	tlbie   r11,r0
  398:	7c 00 04 ac 	hwsync
  39c:	2f 89 00 00 	cmpwi   cr7,r9,0
  3a0:	41 9e ff dc 	beq     cr7,37c <__do_patch_instructions+0x150>
  3a4:	38 60 ff ff 	li      r3,-1
  3a8:	4e 80 00 20 	blr
  3ac:	39 20 00 00 	li      r9,0
  3b0:	91 2c 00 00 	stw     r9,0(r12)
  3b4:	7c 00 5a 64 	tlbie   r11,r0
  3b8:	7c 00 04 ac 	hwsync
  3bc:	4b ff ff c0 	b       37c <__do_patch_instructions+0x150>

000003e8 <patch_instruction>:
  3e8:	94 21 ff e0 	stwu    r1,-32(r1)
  3ec:	90 81 00 08 	stw     r4,8(r1)
  3f0:	48 00 00 40 	b       430 <patch_instruction+0x48>
  3f4:	7c 08 02 a6 	mflr    r0
  3f8:	90 01 00 24 	stw     r0,36(r1)
  3fc:	93 e1 00 1c 	stw     r31,28(r1)
  400:	7f e0 00 a6 	mfmsr   r31
  404:	7c 51 13 a6 	mtspr   81,r2
  408:	38 c0 00 04 	li      r6,4
  40c:	38 81 00 08 	addi    r4,r1,8
  410:	38 a0 00 00 	li      r5,0
  414:	4b ff fe 19 	bl      22c <__do_patch_instructions>
  418:	7f e0 01 24 	mtmsr   r31
  41c:	80 01 00 24 	lwz     r0,36(r1)
  420:	83 e1 00 1c 	lwz     r31,28(r1)
  424:	7c 08 03 a6 	mtlr    r0
  428:	38 21 00 20 	addi    r1,r1,32
  42c:	4e 80 00 20 	blr

  430:	81 21 00 08 	lwz     r9,8(r1)
  434:	91 23 00 00 	stw     r9,0(r3)
  438:	7c 00 18 6c 	dcbst   0,r3
  43c:	7c 00 04 ac 	hwsync
  440:	7c 00 1f ac 	icbi    0,r3
  444:	7c 00 04 ac 	hwsync
  448:	4c 00 01 2c 	isync
  44c:	38 60 00 00 	li      r3,0
  450:	4b ff ff d8 	b       428 <patch_instruction+0x40>
  454:	38 60 ff ff 	li      r3,-1
  458:	4b ff ff d0 	b       428 <patch_instruction+0x40>

Christophe
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help