From d712c64236a3e5d684975ee2a94a77c17141ceb0 Mon Sep 17 00:00:00 2001 From: Jeppe Johansen Date: Thu, 1 Apr 2021 20:55:21 +0000 Subject: [PATCH] * thumb2: Optimize fillchar a bit more with a wider inner loop chunk size git-svn-id: trunk@49100 - --- rtl/arm/thumb2.inc | 102 ++++++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 42 deletions(-) diff --git a/rtl/arm/thumb2.inc b/rtl/arm/thumb2.inc index ac8bd8e4ab..f9914cbae8 100644 --- a/rtl/arm/thumb2.inc +++ b/rtl/arm/thumb2.inc @@ -124,42 +124,70 @@ end; Procedure FillChar(var x;count:longint;value:byte);assembler;nostackframe; asm // less than 0? - cmp r1,#0 - it lt - movlt pc,lr - mov r3,r0 - cmp r1,#8 // at least 8 bytes to do? - blt .LFillchar2 - orr r2,r2,r2,lsl #8 - orr r2,r2,r2,lsl #16 + cmp r1,#0 + it le + movle pc,lr + mov r3,r0 + cmp r1,#8 // at least 8 bytes to do? + add r1, r0 + blt .LFillchar3 + orr r2,r2,r2,lsl #8 + orr r2,r2,r2,lsl #16 .LFillchar0: - tst r3,#3 // aligned yet? - itt ne - strneb r2,[r3],#1 - subne r1,r1,#1 - bne .LFillchar0 + ands ip, r3, #3 + beq .LAligned + + subs r0, ip, #1 + lsls r0, r0, #1 + add pc, r0 + nop + + strb r2,[r3,#2] + strb r2,[r3,#1] + strb r2,[r3,#0] + rsb r0, ip, #4 + add r3, r0 + +.LAligned: mov ip,r2 + push {r4,r5,lr} + mov r4,r2 + mov r5,r2 .LFillchar1: - cmp r1,#8 // 8 bytes still to do? - blt .LFillchar2 - stmia r3!,{r2,ip} - sub r1,r1,#8 - cmp r1,#8 // 8 bytes still to do? - blt .LFillchar2 - stmia r3!,{r2,ip} - sub r1,r1,#8 - cmp r1,#8 // 8 bytes still to do? - blt .LFillchar2 - stmia r3!,{r2,ip} - sub r1,r1,#8 - cmp r1,#8 // 8 bytes still to do? - itt ge - stmgeia r3!,{r2,ip} - subge r1,r1,#8 - bge .LFillchar1 + // Use calculated jump to do fills of x*16 bytes + subs r0, r1, r3 + cmp r0, #128 + bge .LFillchar1_128 + lsrs r0, #4 + beq .LFillchar2 + rsb r0, #8 + lsls r0, #2 + add pc, r0 + nop +.LFillchar1_128: + stmia r3!,{r2,r4,r5,ip} + stmia r3!,{r2,r4,r5,ip} + stmia r3!,{r2,r4,r5,ip} + stmia r3!,{r2,r4,r5,ip} + stmia r3!,{r2,r4,r5,ip} + stmia r3!,{r2,r4,r5,ip} + stmia r3!,{r2,r4,r5,ip} + stmia r3!,{r2,r4,r5,ip} + b .LFillchar1 .LFillchar2: - adr r0, .Ljumptable - tbb [r0, r1] + // Mop up any leftover 8 byte chunks. We are still aligned at this point + pop {r4,r5,lr} + sub r0, r1, r3 + cmp r0, #8 + it ge + stmgeia r3!,{r2,ip} +.LFillchar3: + // Write any remaining bytes + subs r0, r3, r1 + adds r0, #7 // 7-(e-s) = 7+(s-e) + lsls r0, #1 + add pc, r0 + nop strb r2,[r3,#6] strb r2,[r3,#5] @@ -168,16 +196,6 @@ asm strb r2,[r3,#2] strb r2,[r3,#1] strb r2,[r3,#0] - mov pc,lr -.Ljumptable: - .byte 7 - .byte 6 - .byte 5 - .byte 4 - .byte 3 - .byte 2 - .byte 1 - .byte 0 end; {$endif FPC_SYSTEM_HAS_FILLCHAR}