* thumb2: Optimize fillchar a bit more with a wider inner loop chunk size

git-svn-id: trunk@49100 -
This commit is contained in:
Jeppe Johansen 2021-04-01 20:55:21 +00:00
parent 682e29c4ff
commit d712c64236

View File

@ -124,42 +124,70 @@ end;
Procedure FillChar(var x;count:longint;value:byte);assembler;nostackframe;
asm
// less than 0?
cmp r1,#0
it lt
movlt pc,lr
mov r3,r0
cmp r1,#8 // at least 8 bytes to do?
blt .LFillchar2
orr r2,r2,r2,lsl #8
orr r2,r2,r2,lsl #16
cmp r1,#0
it le
movle pc,lr
mov r3,r0
cmp r1,#8 // at least 8 bytes to do?
add r1, r0
blt .LFillchar3
orr r2,r2,r2,lsl #8
orr r2,r2,r2,lsl #16
.LFillchar0:
tst r3,#3 // aligned yet?
itt ne
strneb r2,[r3],#1
subne r1,r1,#1
bne .LFillchar0
ands ip, r3, #3
beq .LAligned
subs r0, ip, #1
lsls r0, r0, #1
add pc, r0
nop
strb r2,[r3,#2]
strb r2,[r3,#1]
strb r2,[r3,#0]
rsb r0, ip, #4
add r3, r0
.LAligned:
mov ip,r2
push {r4,r5,lr}
mov r4,r2
mov r5,r2
.LFillchar1:
cmp r1,#8 // 8 bytes still to do?
blt .LFillchar2
stmia r3!,{r2,ip}
sub r1,r1,#8
cmp r1,#8 // 8 bytes still to do?
blt .LFillchar2
stmia r3!,{r2,ip}
sub r1,r1,#8
cmp r1,#8 // 8 bytes still to do?
blt .LFillchar2
stmia r3!,{r2,ip}
sub r1,r1,#8
cmp r1,#8 // 8 bytes still to do?
itt ge
stmgeia r3!,{r2,ip}
subge r1,r1,#8
bge .LFillchar1
// Use calculated jump to do fills of x*16 bytes
subs r0, r1, r3
cmp r0, #128
bge .LFillchar1_128
lsrs r0, #4
beq .LFillchar2
rsb r0, #8
lsls r0, #2
add pc, r0
nop
.LFillchar1_128:
stmia r3!,{r2,r4,r5,ip}
stmia r3!,{r2,r4,r5,ip}
stmia r3!,{r2,r4,r5,ip}
stmia r3!,{r2,r4,r5,ip}
stmia r3!,{r2,r4,r5,ip}
stmia r3!,{r2,r4,r5,ip}
stmia r3!,{r2,r4,r5,ip}
stmia r3!,{r2,r4,r5,ip}
b .LFillchar1
.LFillchar2:
adr r0, .Ljumptable
tbb [r0, r1]
// Mop up any leftover 8 byte chunks. We are still aligned at this point
pop {r4,r5,lr}
sub r0, r1, r3
cmp r0, #8
it ge
stmgeia r3!,{r2,ip}
.LFillchar3:
// Write any remaining bytes
subs r0, r3, r1
adds r0, #7 // 7-(e-s) = 7+(s-e)
lsls r0, #1
add pc, r0
nop
strb r2,[r3,#6]
strb r2,[r3,#5]
@ -168,16 +196,6 @@ asm
strb r2,[r3,#2]
strb r2,[r3,#1]
strb r2,[r3,#0]
mov pc,lr
.Ljumptable:
.byte 7
.byte 6
.byte 5
.byte 4
.byte 3
.byte 2
.byte 1
.byte 0
end;
{$endif FPC_SYSTEM_HAS_FILLCHAR}