mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-14 00:59:30 +02:00
* fixed some bugs, simplified/optimized already implemented routines and code some more
This commit is contained in:
parent
55d92375c4
commit
f4ec8b8b12
@ -25,68 +25,16 @@ asm
|
||||
{ empty/invalid string? }
|
||||
cmpli r3,0
|
||||
{ if yes, do nothing }
|
||||
beq .LStrCopyDone
|
||||
{ clear two lowest bits of source address }
|
||||
rlwminm r28,r4,0,0,31-2
|
||||
{ get # of misaligned bytes }
|
||||
sub. r28,r28,r4
|
||||
{ since we have to return dest intact, use another register for }
|
||||
{ dest in the copy loop }
|
||||
mr r29,r3
|
||||
beq .LStrCopyAligned
|
||||
.LStrCopyAlignLoop:
|
||||
{ decrease misaligned bytes counter (do it here already to improve }
|
||||
{ jump prediction) }
|
||||
subic. r28,1
|
||||
{ load next byte }
|
||||
lbz r27,(r4)
|
||||
{ end of string? }
|
||||
cmpli cr1,r27,0
|
||||
{ point to next source byte }
|
||||
addi r4,r4,1
|
||||
{ store byte }
|
||||
stb r27,(r29)
|
||||
{ point to next dest address }
|
||||
addi r29,r29,1
|
||||
{ stop if end of string }
|
||||
beq cr1,.LStrCopyDone
|
||||
bne .LStrCopyAlignLoop
|
||||
.balign 16
|
||||
.LStrCopyAligned:
|
||||
{ load next 4 bytes }
|
||||
lwz r27,(r4)
|
||||
{ first/highest byte zero? (big endian!) }
|
||||
andis. r28,r27,0x0ff00
|
||||
addi r4,r4,4
|
||||
beq .LStrCopyByte
|
||||
{ second byte zero? }
|
||||
andis. r28,r27,0x00ff
|
||||
beq .LStrCopyWord
|
||||
{ third byte zero? }
|
||||
andi. r28,r27,0xff00
|
||||
beq .LStrCopy3Bytes
|
||||
{ fourth byte zero? }
|
||||
andi. r28,r27,0x00ff
|
||||
{ store next 4 bytes }
|
||||
stw r27,(r29)
|
||||
{ increase dest address }
|
||||
addi r29,r29,4
|
||||
beq .LStrCopyDone
|
||||
b .LStrCopyAligned
|
||||
{ store left-overs }
|
||||
.LStrCopy3Bytes:
|
||||
sth r27,(r29)
|
||||
li r27,0
|
||||
stb r27,2(r29)
|
||||
b .LStrCopyDone
|
||||
.LStrCopyWord:
|
||||
sth r27,(r29)
|
||||
b .LStrCopyDone
|
||||
.LStrCopyByte:
|
||||
stb r27,(r29)
|
||||
.LStrCopyDone:
|
||||
{ r3 still contains dest here }
|
||||
end ['r4','r27','r28','r29','cr0','cr1'];
|
||||
beq LStrCopyDone
|
||||
subi r4,r4,1
|
||||
subi r9,r3,1
|
||||
LStrCopyLoop:
|
||||
lbzu r10,1(r4)
|
||||
cmpli r10,0
|
||||
stbu r10,1(r9)
|
||||
bne LStrCopyLoop
|
||||
LStrCopyDone:
|
||||
end ['r4','r9','r10','cr0'];
|
||||
|
||||
|
||||
function strecopy(dest,source : pchar) : pchar;assembler;
|
||||
@ -96,231 +44,74 @@ asm
|
||||
{ empty/invalid string? }
|
||||
cmpli r3,0
|
||||
{ if yes, do nothing }
|
||||
beq .LStreCopyDone
|
||||
{ clear two lowest bits of source address }
|
||||
rlwminm r28,r4,0,0,31-2
|
||||
{ get # of misaligned bytes }
|
||||
sub. r28,r28,r4
|
||||
beq .LStreCopyAligned
|
||||
.LStreCopyAlignLoop:
|
||||
{ decrease misaligned bytes counter (do it here already to improve }
|
||||
{ jump prediction) }
|
||||
subic. r28,1
|
||||
{ load next byte }
|
||||
lbz r27,(r4)
|
||||
{ end of string? }
|
||||
cmpli cr1,r27,0
|
||||
{ point to next source byte }
|
||||
addi r4,r4,1
|
||||
{ store byte }
|
||||
stb r27,(r3)
|
||||
{ stop if end of string }
|
||||
beq cr1,.LStreCopyDone
|
||||
{ point to next dest address }
|
||||
addi r3,r3,1
|
||||
{ loop if misaligned bytes left }
|
||||
bne .LStreCopyAlignLoop
|
||||
.balign 16
|
||||
.LStreCopyAligned:
|
||||
{ load next 4 bytes }
|
||||
lwz r27,(r4)
|
||||
{ first/highest byte zero? (big endian!) }
|
||||
andis. r28,r27,0x0ff00
|
||||
addi r4,r4,4
|
||||
beq .LStreCopyByte
|
||||
{ second byte zero? }
|
||||
andis. r28,r27,0x00ff
|
||||
beq .LStreCopyWord
|
||||
{ third byte zero? }
|
||||
andi. r28,r27,0xff00
|
||||
beq .LStreCopy3Bytes
|
||||
{ fourth byte zero? }
|
||||
andi. r28,r27,0x00ff
|
||||
{ store next 4 bytes }
|
||||
stw r27,(r3)
|
||||
{ increase dest address }
|
||||
{ the result must point to the terminating #0, so only add 3 }
|
||||
addi r3,r3,3
|
||||
beq .LStreCopyDone
|
||||
{ add another 1 for next char }
|
||||
addi r3,r3,1
|
||||
b .LStreCopyAligned
|
||||
{ store left-overs }
|
||||
.LStreCopy3Bytes:
|
||||
sth r27,(r3)
|
||||
li r27,0
|
||||
stbu r27,2(r3)
|
||||
b .LStrCopyDone
|
||||
.LStreCopyWord:
|
||||
sth r27,(r3)
|
||||
addi r3,r3,1
|
||||
b .LStrCopyDone
|
||||
.LStreCopyByte:
|
||||
stb r27,(r3)
|
||||
.LStreCopyDone:
|
||||
{ r3 contains end of new string now }
|
||||
end ['r3','r4','r27','r28','cr0','cr1'];
|
||||
beq LStreCopyDone
|
||||
subi r4,r4,1
|
||||
subi r3,r3,1
|
||||
LStreCopyLoop:
|
||||
lbzu r10,1(r4)
|
||||
cmpli r10,0
|
||||
stbu r10,1(r3)
|
||||
bne LStreCopyLoop
|
||||
LStreCopyDone:
|
||||
end ['r3','r4','r10','cr0'];
|
||||
|
||||
|
||||
function strlcopy(dest,source : pchar;maxlen : longint) : pchar;assembler;
|
||||
asm
|
||||
{ in: dest in r3, source in r4, maxlen in r5 }
|
||||
{ out: result (dest) in r3 }
|
||||
asm
|
||||
{ empty/invalid string? }
|
||||
cmpli r3,0
|
||||
{ if yes, do nothing }
|
||||
beq .LStrlCopyDone
|
||||
{ maxlen in counter }
|
||||
beq LStrCopyDone
|
||||
mtctr r5
|
||||
{ clear two lowest bits of source address }
|
||||
rlwminm r28,r4,0,0,31-2
|
||||
{ get # of misaligned bytes }
|
||||
sub. r28,r28,r4
|
||||
{ since we have to return dest intact, use another register for }
|
||||
{ dest in the copy loop }
|
||||
mr r29,r3
|
||||
beq .LStrlCopyAligned
|
||||
.LStrlCopyAlignLoop:
|
||||
{ if decreased maxlen counter = 0 (dz), stop }
|
||||
bdz .LStrlCopyByte
|
||||
{ decrease misaligned bytes counter (do it here already to improve }
|
||||
{ jump prediction) }
|
||||
subic. r28,1
|
||||
{ load next byte }
|
||||
lbz r27,(r4)
|
||||
{ end of string? }
|
||||
cmpli cr1,r27,0
|
||||
{ point to next source byte }
|
||||
addi r4,r4,1
|
||||
{ store byte }
|
||||
stb r27,(r29)
|
||||
{ point to next dest address }
|
||||
addi r29,r29,1
|
||||
{ stop if end of string }
|
||||
beq cr1,.LStrlCopyDone
|
||||
{ loop while unaligned byte counter <> 0 }
|
||||
bne .LStrlCopyAlignLoop
|
||||
.balign 16
|
||||
.LStrlCopyAligned:
|
||||
{ load next 4 bytes }
|
||||
lwz r27,(r4)
|
||||
{ first/highest byte zero? (big endian!) }
|
||||
andis. r28,r27,0x0ff00
|
||||
addi r4,r4,4
|
||||
{ if decremented maxlen counter not zero (dnz) and no #0 (ne), }
|
||||
{ continue (and hint that the most likely case is jump taken) }
|
||||
bdnzne+ .LNoStrlCopyByte
|
||||
b .LStrlCopyByte
|
||||
.LNoStrlCopyByte:
|
||||
{ second byte zero? }
|
||||
andis. r28,r27,0x00ff
|
||||
bdnzne+ .LNoStrlCopyWord
|
||||
b .LStrlCopyWord
|
||||
.LNoStrlCopyWord:
|
||||
{ third byte zero? }
|
||||
andi. r28,r27,0xff00
|
||||
bdnzne+ .LNoStrlCopy3Bytes
|
||||
b .LStrlCopy3Bytes
|
||||
.LNoStrlCopy3Bytes:
|
||||
{ fourth byte zero? }
|
||||
andi. r28,r27,0x00ff
|
||||
{ store next 4 bytes }
|
||||
stw r27,(r29)
|
||||
{ increase dest address }
|
||||
addi r29,r29,4
|
||||
bdnzne .LStrlCopyAligned
|
||||
{ replace last char with a #0 in case we stopped because the maxlen }
|
||||
{ was reached }
|
||||
li r27,0
|
||||
stb r27,-1(r29)
|
||||
b .LStrlCopyDone
|
||||
{ store left-overs }
|
||||
.LStrlCopy3Bytes:
|
||||
{ big endian! So move upper 16bits to lower 16bits}
|
||||
srwi r27,r27,16
|
||||
sth r27,(r29)
|
||||
li r27,0
|
||||
stb r27,2(r29)
|
||||
b .LStrlCopyDone
|
||||
.LStrlCopyWord:
|
||||
{ clear lower 8 bits of low 16 bits }
|
||||
andi r27,r27,0x0ff00
|
||||
sth r27,(r29)
|
||||
b .LStrlCopyDone
|
||||
.LStrlCopyByte:
|
||||
li r27,0
|
||||
stb r27,(r29)
|
||||
.LStrlCopyDone:
|
||||
{ r3 still contains dest here }
|
||||
end ['r4','r27','r28','r29','cr0','cr1','ctr'];
|
||||
subi r4,r4,1
|
||||
subi r9,r3,1
|
||||
LStrlCopyLoop:
|
||||
lbzu r10,1(r4)
|
||||
cmpli r10,0
|
||||
stbu r10,1(r9)
|
||||
bdnzne LStrlCopyLoop
|
||||
beq LStrlCopyDone
|
||||
li r10,0
|
||||
stb r10,1(r9)
|
||||
LStrlCopyDone:
|
||||
end ['r4','r9','r10','cr0'];
|
||||
|
||||
|
||||
function strlen(p : pchar) : longint;assembler;
|
||||
{ in: p in r3 }
|
||||
{ out: result (length) in r3 }
|
||||
{ WARNING: if the used registers change here, also change strend!! (JM) }
|
||||
{ in: p in r3 }
|
||||
{ out: result (length) in r3 }
|
||||
asm
|
||||
{ empty/invalid string? }
|
||||
cmpli r3,0
|
||||
{ if yes, do nothing }
|
||||
beq .LStrLenNil
|
||||
{ clear two lowest bits of source address }
|
||||
rlwminm r28,r3,0,0,31-2
|
||||
{ get # of misaligned bytes }
|
||||
sub. r28,r28,r3
|
||||
{ at the end, we substract r29 from r3 to get the length }
|
||||
mr r29,r3
|
||||
beq .LStrLenAligned
|
||||
.LStrLenAlignLoop:
|
||||
{ decrease misaligned bytes counter (do it here already to improve }
|
||||
{ jump prediction) }
|
||||
subic. r28,1
|
||||
{ load next byte }
|
||||
lbz r27,(r3)
|
||||
{ end of string? }
|
||||
cmpli cr1,r27,0
|
||||
{ stop if end of string }
|
||||
beq cr1,.LStrLenDone
|
||||
{ point to next source byte }
|
||||
addi r3,r3,1
|
||||
bne .LStrLenAlignLoop
|
||||
.balign 16
|
||||
.LStrLenAligned:
|
||||
{ load next 4 bytes }
|
||||
lwz r27,(r3)
|
||||
{ first/highest byte zero? (big endian!) }
|
||||
andis. r28,r27,0x0ff00
|
||||
beq .LStrLenDone
|
||||
{ second byte zero? }
|
||||
andis. r28,r27,0x00ff
|
||||
{ increase length }
|
||||
addi r3,r3,1
|
||||
beq .LStrLenDone
|
||||
{ third byte zero? }
|
||||
andi. r28,r27,0xff00
|
||||
addi r3,r3,1
|
||||
beq .LStrLenDone
|
||||
{ fourth byte zero? }
|
||||
andi. r28,r27,0x00ff
|
||||
addi r3,r3,1
|
||||
beq .LStrLenDone
|
||||
addi r3,r3,1
|
||||
b .LStrLenAligned
|
||||
.LStrLenDone:
|
||||
sub r3,r29,r3
|
||||
.LStrLenNil:
|
||||
end ['r3','r27','r28','r29','cr0','cr1'];
|
||||
beq LStrLenDone
|
||||
subi r9,r3,1
|
||||
LStrLenLoop:
|
||||
lbzu r10,1(r9)
|
||||
cmpli r10,0
|
||||
bne LStrLenLoop
|
||||
sub r3,r9,r3
|
||||
LStrLenDone:
|
||||
end ['r3','r4','r9','r10','cr0'];
|
||||
|
||||
|
||||
function strend(p : pchar) : pchar;assembler;
|
||||
{ in: p in r3 }
|
||||
{ out: result (end of p) in r3 }
|
||||
asm
|
||||
mr r26,r3
|
||||
mflr r25
|
||||
bl strlen
|
||||
mtlr r25
|
||||
add r3,r26,r3
|
||||
end ['r3','r25','r26','r27','r28','r29','cr0','cr1'];
|
||||
{ empty/invalid string? }
|
||||
cmpli r3,0
|
||||
{ if yes, do nothing }
|
||||
beq LStrEndDone
|
||||
subi r3,r3,1
|
||||
LStrEndLoop:
|
||||
lbzu r10,1(r3)
|
||||
cmpli r10,0
|
||||
bne LStrEndLoop
|
||||
LStrEndDone:
|
||||
end ['r3','r4','r10','cr0'];
|
||||
|
||||
|
||||
function strcomp(str1,str2 : pchar) : longint;assembler;
|
||||
@ -361,155 +152,95 @@ end;
|
||||
|
||||
function strscan(p : pchar;c : char) : pchar;assembler;
|
||||
asm
|
||||
movl p,%eax
|
||||
xorl %ecx,%ecx
|
||||
testl %eax,%eax
|
||||
jz .LSTRSCAN
|
||||
// align
|
||||
movb c,%cl
|
||||
movl %eax,%esi
|
||||
andl $0xfffffff8,%eax
|
||||
movl $0xff,%edx
|
||||
movl p,%edi
|
||||
subl %eax,%esi
|
||||
jz .LSTRSCANLOOP
|
||||
xorl %eax,%eax
|
||||
.LSTRSCANALIGNLOOP:
|
||||
movb (%edi),%al
|
||||
// at .LSTRSCANFOUND, one is substracted from edi to calculate the position,
|
||||
// so add 1 here already (not after .LSTRSCAN, because then the test/jz and
|
||||
// cmp/je can't be paired)
|
||||
incl %edi
|
||||
testb %al,%al
|
||||
jz .LSTRSCAN
|
||||
cmpb %cl,%al
|
||||
je .LSTRSCANFOUND
|
||||
decl %esi
|
||||
jnz .LSTRSCANALIGNLOOP
|
||||
jmp .LSTRSCANLOOP
|
||||
.balign 16
|
||||
.LSTRSCANLOOP:
|
||||
movl (%edi),%eax
|
||||
movl %eax,%esi
|
||||
// first char
|
||||
andl %edx,%eax
|
||||
// end of string -> stop
|
||||
jz .LSTRSCAN
|
||||
shrl $8,%esi
|
||||
cmpl %ecx,%eax
|
||||
movl %esi,%eax
|
||||
je .LSTRSCANFOUND1
|
||||
// second char
|
||||
andl %edx,%eax
|
||||
jz .LSTRSCAN
|
||||
shrl $8,%esi
|
||||
cmpl %ecx,%eax
|
||||
movl %esi,%eax
|
||||
je .LSTRSCANFOUND2
|
||||
// third char
|
||||
andl %edx,%eax
|
||||
jz .LSTRSCAN
|
||||
shrl $8,%esi
|
||||
cmpl %ecx,%eax
|
||||
movl %esi,%eax
|
||||
je .LSTRSCANFOUND3
|
||||
// fourth char
|
||||
// all upper bits have already been cleared
|
||||
testl %eax,%eax
|
||||
jz .LSTRSCAN
|
||||
addl $4,%edi
|
||||
cmpl %ecx,%eax
|
||||
je .LSTRSCANFOUND
|
||||
jmp .LSTRSCANLOOP
|
||||
.LSTRSCANFOUND3:
|
||||
leal 2(%edi),%eax
|
||||
jmp .LSTRSCAN
|
||||
.LSTRSCANFOUND2:
|
||||
leal 1(%edi),%eax
|
||||
jmp .LSTRSCAN
|
||||
.LSTRSCANFOUND1:
|
||||
movl %edi,%eax
|
||||
jmp .LSTRSCAN
|
||||
.LSTRSCANFOUND:
|
||||
leal -1(%edi),%eax
|
||||
.LSTRSCAN:
|
||||
end ['EAX','ECX','ESI','EDI','EDX'];
|
||||
{ empty/invalid string? }
|
||||
cmpli r3,0
|
||||
{ if yes, do nothing }
|
||||
beq LStrScanDone
|
||||
subi r3,r3,1
|
||||
LStrScanLoop:
|
||||
lbzu r10,1(r3)
|
||||
cmpl r10,r4
|
||||
bne LStrScanLoop
|
||||
LStrScanDone:
|
||||
end ['r3','r4','r10','cr0'];
|
||||
|
||||
|
||||
function strrscan(p : pchar;c : char) : pchar;assembler;
|
||||
asm
|
||||
xorl %eax,%eax
|
||||
movl p,%edi
|
||||
orl %edi,%edi
|
||||
jz .LSTRRSCAN
|
||||
movl $0xffffffff,%ecx
|
||||
cld
|
||||
xorb %al,%al
|
||||
repne
|
||||
scasb
|
||||
not %ecx
|
||||
movb c,%al
|
||||
movl p,%edi
|
||||
addl %ecx,%edi
|
||||
decl %edi
|
||||
std
|
||||
repne
|
||||
scasb
|
||||
cld
|
||||
movl $0,%eax
|
||||
jnz .LSTRRSCAN
|
||||
movl %edi,%eax
|
||||
incl %eax
|
||||
.LSTRRSCAN:
|
||||
end ['EAX','ECX','EDI'];
|
||||
{ empty/invalid string? }
|
||||
cmpli r3,0
|
||||
{ if yes, do nothing }
|
||||
beq LStrrScanDone
|
||||
{ make r9 $ffffffff, later on we take min(r9,r3) }
|
||||
li r9,0x0ffff
|
||||
subi r3,r3,1
|
||||
LStrrScanLoop:
|
||||
lbzu r10,1(r3)
|
||||
cmpl cr1,r10,r4
|
||||
cmpli cr0,r10,0
|
||||
bne+ cr1,LStrrScanNotFound
|
||||
{ store address of found position }
|
||||
mr r9,r3
|
||||
LStrrScanNotFound:
|
||||
bne LStrrScanLoop
|
||||
{ Select min of r3 and r9 -> end of string or found position }
|
||||
{ From the PPC compiler writer's guide, not sure if I could ever }
|
||||
{ come up with something like this :) }
|
||||
|
||||
subfc r10,r3,r9 { r10 = r9 - r3, CA = (r9 >= r3) ? 1 : 0 }
|
||||
subfe r9,r9,r9 { r9' = (r9 >= r3) ? 0 : -1 }
|
||||
and r10,r10,r9 { r10 = (r9 >= r3) ? 0 : r9 - r3 }
|
||||
add r3,r10,r3 { r3 = (r9 >= r3) ? r3 : r9 }
|
||||
LStrrScanDone:
|
||||
end ['r3','r4','r9','r10','cr0','cr1'];
|
||||
|
||||
|
||||
function strupper(p : pchar) : pchar;assembler;
|
||||
asm
|
||||
movl p,%esi
|
||||
orl %esi,%esi
|
||||
jz .LStrUpperNil
|
||||
movl %esi,%edi
|
||||
.LSTRUPPER1:
|
||||
lodsb
|
||||
cmpb $97,%al
|
||||
jb .LSTRUPPER3
|
||||
cmpb $122,%al
|
||||
ja .LSTRUPPER3
|
||||
subb $0x20,%al
|
||||
.LSTRUPPER3:
|
||||
stosb
|
||||
orb %al,%al
|
||||
jnz .LSTRUPPER1
|
||||
.LStrUpperNil:
|
||||
movl p,%eax
|
||||
end ['EAX','ESI','EDI'];
|
||||
cmpli r3,0
|
||||
beq LStrUpperNil
|
||||
subi r9,r3,1
|
||||
LStrUpperLoop:
|
||||
lbzu r10,1(r9)
|
||||
{ a <= x <= b <=> cardinal(x-a) <= cardinal(b-a) }
|
||||
subi r8,r10,97
|
||||
cmpli r8,122-97
|
||||
cmpli cr1,r10,0
|
||||
subi r10,r10,0x20
|
||||
bgt LStrUpper1
|
||||
stb r10,0(r9)
|
||||
LStrUpper1:
|
||||
bne cr1,LStrUpperLoop
|
||||
LStrUpperNil:
|
||||
end ['r8','r9','r10','cr0','cr1'];
|
||||
|
||||
|
||||
function strlower(p : pchar) : pchar;assembler;
|
||||
asm
|
||||
movl p,%esi
|
||||
orl %esi,%esi
|
||||
jz .LStrLowerNil
|
||||
movl %esi,%edi
|
||||
.LSTRLOWER1:
|
||||
lodsb
|
||||
cmpb $65,%al
|
||||
jb .LSTRLOWER3
|
||||
cmpb $90,%al
|
||||
ja .LSTRLOWER3
|
||||
addb $0x20,%al
|
||||
.LSTRLOWER3:
|
||||
stosb
|
||||
orb %al,%al
|
||||
jnz .LSTRLOWER1
|
||||
.LStrLowerNil:
|
||||
movl p,%eax
|
||||
end ['EAX','ESI','EDI'];
|
||||
cmpli r3,0
|
||||
beq LStrLowerNil
|
||||
subi r9,r3,1
|
||||
LStrLowerLoop:
|
||||
lbzu r10,1(r9)
|
||||
{ a <= x <= b <=> cardinal(x-a) <= cardinal(b-a) }
|
||||
subi r8,r10,65
|
||||
cmpli r8,90-65
|
||||
cmpli cr1,r10,0
|
||||
addi r10,r10,0x20
|
||||
bgt LStrLower1
|
||||
stb r10,0(r9)
|
||||
LStrLower1:
|
||||
bne cr1,LStrLowerLoop
|
||||
LStrLowerNil:
|
||||
end ['r8','r9','r10','cr0','cr1'];
|
||||
|
||||
|
||||
{
|
||||
$Log$
|
||||
Revision 1.1 2000-11-05 17:17:08 jonas
|
||||
Revision 1.2 2001-02-10 12:28:22 jonas
|
||||
* fixed some bugs, simplified/optimized already implemented routines and code some more
|
||||
|
||||
Revision 1.1 2000/11/05 17:17:08 jonas
|
||||
+ first implementation, not yet finished
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user