* thumb2: Optimize fillchar a bit more with a wider inner loop chunk size

git-svn-id: trunk@49100 -
This commit is contained in:
Jeppe Johansen 2021-04-01 20:55:21 +00:00
parent 682e29c4ff
commit d712c64236

View File

@ -124,42 +124,70 @@ end;
Procedure FillChar(var x;count:longint;value:byte);assembler;nostackframe; Procedure FillChar(var x;count:longint;value:byte);assembler;nostackframe;
asm asm
// less than 0? // less than 0?
cmp r1,#0 cmp r1,#0
it lt it le
movlt pc,lr movle pc,lr
mov r3,r0 mov r3,r0
cmp r1,#8 // at least 8 bytes to do? cmp r1,#8 // at least 8 bytes to do?
blt .LFillchar2 add r1, r0
orr r2,r2,r2,lsl #8 blt .LFillchar3
orr r2,r2,r2,lsl #16 orr r2,r2,r2,lsl #8
orr r2,r2,r2,lsl #16
.LFillchar0: .LFillchar0:
tst r3,#3 // aligned yet? ands ip, r3, #3
itt ne beq .LAligned
strneb r2,[r3],#1
subne r1,r1,#1 subs r0, ip, #1
bne .LFillchar0 lsls r0, r0, #1
add pc, r0
nop
strb r2,[r3,#2]
strb r2,[r3,#1]
strb r2,[r3,#0]
rsb r0, ip, #4
add r3, r0
.LAligned:
mov ip,r2 mov ip,r2
push {r4,r5,lr}
mov r4,r2
mov r5,r2
.LFillchar1: .LFillchar1:
cmp r1,#8 // 8 bytes still to do? // Use calculated jump to do fills of x*16 bytes
blt .LFillchar2 subs r0, r1, r3
stmia r3!,{r2,ip} cmp r0, #128
sub r1,r1,#8 bge .LFillchar1_128
cmp r1,#8 // 8 bytes still to do? lsrs r0, #4
blt .LFillchar2 beq .LFillchar2
stmia r3!,{r2,ip} rsb r0, #8
sub r1,r1,#8 lsls r0, #2
cmp r1,#8 // 8 bytes still to do? add pc, r0
blt .LFillchar2 nop
stmia r3!,{r2,ip} .LFillchar1_128:
sub r1,r1,#8 stmia r3!,{r2,r4,r5,ip}
cmp r1,#8 // 8 bytes still to do? stmia r3!,{r2,r4,r5,ip}
itt ge stmia r3!,{r2,r4,r5,ip}
stmgeia r3!,{r2,ip} stmia r3!,{r2,r4,r5,ip}
subge r1,r1,#8 stmia r3!,{r2,r4,r5,ip}
bge .LFillchar1 stmia r3!,{r2,r4,r5,ip}
stmia r3!,{r2,r4,r5,ip}
stmia r3!,{r2,r4,r5,ip}
b .LFillchar1
.LFillchar2: .LFillchar2:
adr r0, .Ljumptable // Mop up any leftover 8 byte chunks. We are still aligned at this point
tbb [r0, r1] pop {r4,r5,lr}
sub r0, r1, r3
cmp r0, #8
it ge
stmgeia r3!,{r2,ip}
.LFillchar3:
// Write any remaining bytes
subs r0, r3, r1
adds r0, #7 // 7-(e-s) = 7+(s-e)
lsls r0, #1
add pc, r0
nop
strb r2,[r3,#6] strb r2,[r3,#6]
strb r2,[r3,#5] strb r2,[r3,#5]
@ -168,16 +196,6 @@ asm
strb r2,[r3,#2] strb r2,[r3,#2]
strb r2,[r3,#1] strb r2,[r3,#1]
strb r2,[r3,#0] strb r2,[r3,#0]
mov pc,lr
.Ljumptable:
.byte 7
.byte 6
.byte 5
.byte 4
.byte 3
.byte 2
.byte 1
.byte 0
end; end;
{$endif FPC_SYSTEM_HAS_FILLCHAR} {$endif FPC_SYSTEM_HAS_FILLCHAR}