mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-09 00:46:00 +02:00
* thumb2: Optimize fillchar a bit more with a wider inner loop chunk size
git-svn-id: trunk@49100 -
This commit is contained in:
parent
682e29c4ff
commit
d712c64236
@ -124,42 +124,70 @@ end;
|
|||||||
Procedure FillChar(var x;count:longint;value:byte);assembler;nostackframe;
|
Procedure FillChar(var x;count:longint;value:byte);assembler;nostackframe;
|
||||||
asm
|
asm
|
||||||
// less than 0?
|
// less than 0?
|
||||||
cmp r1,#0
|
cmp r1,#0
|
||||||
it lt
|
it le
|
||||||
movlt pc,lr
|
movle pc,lr
|
||||||
mov r3,r0
|
mov r3,r0
|
||||||
cmp r1,#8 // at least 8 bytes to do?
|
cmp r1,#8 // at least 8 bytes to do?
|
||||||
blt .LFillchar2
|
add r1, r0
|
||||||
orr r2,r2,r2,lsl #8
|
blt .LFillchar3
|
||||||
orr r2,r2,r2,lsl #16
|
orr r2,r2,r2,lsl #8
|
||||||
|
orr r2,r2,r2,lsl #16
|
||||||
.LFillchar0:
|
.LFillchar0:
|
||||||
tst r3,#3 // aligned yet?
|
ands ip, r3, #3
|
||||||
itt ne
|
beq .LAligned
|
||||||
strneb r2,[r3],#1
|
|
||||||
subne r1,r1,#1
|
subs r0, ip, #1
|
||||||
bne .LFillchar0
|
lsls r0, r0, #1
|
||||||
|
add pc, r0
|
||||||
|
nop
|
||||||
|
|
||||||
|
strb r2,[r3,#2]
|
||||||
|
strb r2,[r3,#1]
|
||||||
|
strb r2,[r3,#0]
|
||||||
|
rsb r0, ip, #4
|
||||||
|
add r3, r0
|
||||||
|
|
||||||
|
.LAligned:
|
||||||
mov ip,r2
|
mov ip,r2
|
||||||
|
push {r4,r5,lr}
|
||||||
|
mov r4,r2
|
||||||
|
mov r5,r2
|
||||||
.LFillchar1:
|
.LFillchar1:
|
||||||
cmp r1,#8 // 8 bytes still to do?
|
// Use calculated jump to do fills of x*16 bytes
|
||||||
blt .LFillchar2
|
subs r0, r1, r3
|
||||||
stmia r3!,{r2,ip}
|
cmp r0, #128
|
||||||
sub r1,r1,#8
|
bge .LFillchar1_128
|
||||||
cmp r1,#8 // 8 bytes still to do?
|
lsrs r0, #4
|
||||||
blt .LFillchar2
|
beq .LFillchar2
|
||||||
stmia r3!,{r2,ip}
|
rsb r0, #8
|
||||||
sub r1,r1,#8
|
lsls r0, #2
|
||||||
cmp r1,#8 // 8 bytes still to do?
|
add pc, r0
|
||||||
blt .LFillchar2
|
nop
|
||||||
stmia r3!,{r2,ip}
|
.LFillchar1_128:
|
||||||
sub r1,r1,#8
|
stmia r3!,{r2,r4,r5,ip}
|
||||||
cmp r1,#8 // 8 bytes still to do?
|
stmia r3!,{r2,r4,r5,ip}
|
||||||
itt ge
|
stmia r3!,{r2,r4,r5,ip}
|
||||||
stmgeia r3!,{r2,ip}
|
stmia r3!,{r2,r4,r5,ip}
|
||||||
subge r1,r1,#8
|
stmia r3!,{r2,r4,r5,ip}
|
||||||
bge .LFillchar1
|
stmia r3!,{r2,r4,r5,ip}
|
||||||
|
stmia r3!,{r2,r4,r5,ip}
|
||||||
|
stmia r3!,{r2,r4,r5,ip}
|
||||||
|
b .LFillchar1
|
||||||
.LFillchar2:
|
.LFillchar2:
|
||||||
adr r0, .Ljumptable
|
// Mop up any leftover 8 byte chunks. We are still aligned at this point
|
||||||
tbb [r0, r1]
|
pop {r4,r5,lr}
|
||||||
|
sub r0, r1, r3
|
||||||
|
cmp r0, #8
|
||||||
|
it ge
|
||||||
|
stmgeia r3!,{r2,ip}
|
||||||
|
.LFillchar3:
|
||||||
|
// Write any remaining bytes
|
||||||
|
subs r0, r3, r1
|
||||||
|
adds r0, #7 // 7-(e-s) = 7+(s-e)
|
||||||
|
lsls r0, #1
|
||||||
|
add pc, r0
|
||||||
|
nop
|
||||||
|
|
||||||
strb r2,[r3,#6]
|
strb r2,[r3,#6]
|
||||||
strb r2,[r3,#5]
|
strb r2,[r3,#5]
|
||||||
@ -168,16 +196,6 @@ asm
|
|||||||
strb r2,[r3,#2]
|
strb r2,[r3,#2]
|
||||||
strb r2,[r3,#1]
|
strb r2,[r3,#1]
|
||||||
strb r2,[r3,#0]
|
strb r2,[r3,#0]
|
||||||
mov pc,lr
|
|
||||||
.Ljumptable:
|
|
||||||
.byte 7
|
|
||||||
.byte 6
|
|
||||||
.byte 5
|
|
||||||
.byte 4
|
|
||||||
.byte 3
|
|
||||||
.byte 2
|
|
||||||
.byte 1
|
|
||||||
.byte 0
|
|
||||||
end;
|
end;
|
||||||
{$endif FPC_SYSTEM_HAS_FILLCHAR}
|
{$endif FPC_SYSTEM_HAS_FILLCHAR}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user