* fixed some bugs, simplified/optimized already implemented routines and code some more

This commit is contained in:
Jonas Maebe 2001-02-10 12:28:22 +00:00
parent 55d92375c4
commit f4ec8b8b12

View File

@ -25,68 +25,16 @@ asm
{ empty/invalid string? }
cmpli r3,0
{ if yes, do nothing }
beq .LStrCopyDone
{ clear two lowest bits of source address }
rlwminm r28,r4,0,0,31-2
{ get # of misaligned bytes }
sub. r28,r28,r4
{ since we have to return dest intact, use another register for }
{ dest in the copy loop }
mr r29,r3
beq .LStrCopyAligned
.LStrCopyAlignLoop:
{ decrease misaligned bytes counter (do it here already to improve }
{ jump prediction) }
subic. r28,1
{ load next byte }
lbz r27,(r4)
{ end of string? }
cmpli cr1,r27,0
{ point to next source byte }
addi r4,r4,1
{ store byte }
stb r27,(r29)
{ point to next dest address }
addi r29,r29,1
{ stop if end of string }
beq cr1,.LStrCopyDone
bne .LStrCopyAlignLoop
.balign 16
.LStrCopyAligned:
{ load next 4 bytes }
lwz r27,(r4)
{ first/highest byte zero? (big endian!) }
andis. r28,r27,0x0ff00
addi r4,r4,4
beq .LStrCopyByte
{ second byte zero? }
andis. r28,r27,0x00ff
beq .LStrCopyWord
{ third byte zero? }
andi. r28,r27,0xff00
beq .LStrCopy3Bytes
{ fourth byte zero? }
andi. r28,r27,0x00ff
{ store next 4 bytes }
stw r27,(r29)
{ increase dest address }
addi r29,r29,4
beq .LStrCopyDone
b .LStrCopyAligned
{ store left-overs }
.LStrCopy3Bytes:
sth r27,(r29)
li r27,0
stb r27,2(r29)
b .LStrCopyDone
.LStrCopyWord:
sth r27,(r29)
b .LStrCopyDone
.LStrCopyByte:
stb r27,(r29)
.LStrCopyDone:
{ r3 still contains dest here }
end ['r4','r27','r28','r29','cr0','cr1'];
beq LStrCopyDone
subi r4,r4,1
subi r9,r3,1
LStrCopyLoop:
lbzu r10,1(r4)
cmpli r10,0
stbu r10,1(r9)
bne LStrCopyLoop
LStrCopyDone:
end ['r4','r9','r10','cr0'];
function strecopy(dest,source : pchar) : pchar;assembler;
@ -96,231 +44,74 @@ asm
{ empty/invalid string? }
cmpli r3,0
{ if yes, do nothing }
beq .LStreCopyDone
{ clear two lowest bits of source address }
rlwminm r28,r4,0,0,31-2
{ get # of misaligned bytes }
sub. r28,r28,r4
beq .LStreCopyAligned
.LStreCopyAlignLoop:
{ decrease misaligned bytes counter (do it here already to improve }
{ jump prediction) }
subic. r28,1
{ load next byte }
lbz r27,(r4)
{ end of string? }
cmpli cr1,r27,0
{ point to next source byte }
addi r4,r4,1
{ store byte }
stb r27,(r3)
{ stop if end of string }
beq cr1,.LStreCopyDone
{ point to next dest address }
addi r3,r3,1
{ loop if misaligned bytes left }
bne .LStreCopyAlignLoop
.balign 16
.LStreCopyAligned:
{ load next 4 bytes }
lwz r27,(r4)
{ first/highest byte zero? (big endian!) }
andis. r28,r27,0x0ff00
addi r4,r4,4
beq .LStreCopyByte
{ second byte zero? }
andis. r28,r27,0x00ff
beq .LStreCopyWord
{ third byte zero? }
andi. r28,r27,0xff00
beq .LStreCopy3Bytes
{ fourth byte zero? }
andi. r28,r27,0x00ff
{ store next 4 bytes }
stw r27,(r3)
{ increase dest address }
{ the result must point to the terminating #0, so only add 3 }
addi r3,r3,3
beq .LStreCopyDone
{ add another 1 for next char }
addi r3,r3,1
b .LStreCopyAligned
{ store left-overs }
.LStreCopy3Bytes:
sth r27,(r3)
li r27,0
stbu r27,2(r3)
b .LStrCopyDone
.LStreCopyWord:
sth r27,(r3)
addi r3,r3,1
b .LStrCopyDone
.LStreCopyByte:
stb r27,(r3)
.LStreCopyDone:
{ r3 contains end of new string now }
end ['r3','r4','r27','r28','cr0','cr1'];
beq LStreCopyDone
subi r4,r4,1
subi r3,r3,1
LStreCopyLoop:
lbzu r10,1(r4)
cmpli r10,0
stbu r10,1(r3)
bne LStreCopyLoop
LStreCopyDone:
end ['r3','r4','r10','cr0'];
function strlcopy(dest,source : pchar;maxlen : longint) : pchar;assembler;
asm
{ in: dest in r3, source in r4, maxlen in r5 }
{ out: result (dest) in r3 }
asm
{ empty/invalid string? }
cmpli r3,0
{ if yes, do nothing }
beq .LStrlCopyDone
{ maxlen in counter }
beq LStrCopyDone
mtctr r5
{ clear two lowest bits of source address }
rlwminm r28,r4,0,0,31-2
{ get # of misaligned bytes }
sub. r28,r28,r4
{ since we have to return dest intact, use another register for }
{ dest in the copy loop }
mr r29,r3
beq .LStrlCopyAligned
.LStrlCopyAlignLoop:
{ if decreased maxlen counter = 0 (dz), stop }
bdz .LStrlCopyByte
{ decrease misaligned bytes counter (do it here already to improve }
{ jump prediction) }
subic. r28,1
{ load next byte }
lbz r27,(r4)
{ end of string? }
cmpli cr1,r27,0
{ point to next source byte }
addi r4,r4,1
{ store byte }
stb r27,(r29)
{ point to next dest address }
addi r29,r29,1
{ stop if end of string }
beq cr1,.LStrlCopyDone
{ loop while unaligned byte counter <> 0 }
bne .LStrlCopyAlignLoop
.balign 16
.LStrlCopyAligned:
{ load next 4 bytes }
lwz r27,(r4)
{ first/highest byte zero? (big endian!) }
andis. r28,r27,0x0ff00
addi r4,r4,4
{ if decremented maxlen counter not zero (dnz) and no #0 (ne), }
{ continue (and hint that the most likely case is jump taken) }
bdnzne+ .LNoStrlCopyByte
b .LStrlCopyByte
.LNoStrlCopyByte:
{ second byte zero? }
andis. r28,r27,0x00ff
bdnzne+ .LNoStrlCopyWord
b .LStrlCopyWord
.LNoStrlCopyWord:
{ third byte zero? }
andi. r28,r27,0xff00
bdnzne+ .LNoStrlCopy3Bytes
b .LStrlCopy3Bytes
.LNoStrlCopy3Bytes:
{ fourth byte zero? }
andi. r28,r27,0x00ff
{ store next 4 bytes }
stw r27,(r29)
{ increase dest address }
addi r29,r29,4
bdnzne .LStrlCopyAligned
{ replace last char with a #0 in case we stopped because the maxlen }
{ was reached }
li r27,0
stb r27,-1(r29)
b .LStrlCopyDone
{ store left-overs }
.LStrlCopy3Bytes:
{ big endian! So move upper 16bits to lower 16bits}
srwi r27,r27,16
sth r27,(r29)
li r27,0
stb r27,2(r29)
b .LStrlCopyDone
.LStrlCopyWord:
{ clear lower 8 bits of low 16 bits }
andi r27,r27,0x0ff00
sth r27,(r29)
b .LStrlCopyDone
.LStrlCopyByte:
li r27,0
stb r27,(r29)
.LStrlCopyDone:
{ r3 still contains dest here }
end ['r4','r27','r28','r29','cr0','cr1','ctr'];
subi r4,r4,1
subi r9,r3,1
LStrlCopyLoop:
lbzu r10,1(r4)
cmpli r10,0
stbu r10,1(r9)
bdnzne LStrlCopyLoop
beq LStrlCopyDone
li r10,0
stb r10,1(r9)
LStrlCopyDone:
end ['r4','r9','r10','cr0'];
function strlen(p : pchar) : longint;assembler;
{ in: p in r3 }
{ out: result (length) in r3 }
{ WARNING: if the used registers change here, also change strend!! (JM) }
{ in: p in r3 }
{ out: result (length) in r3 }
asm
{ empty/invalid string? }
cmpli r3,0
{ if yes, do nothing }
beq .LStrLenNil
{ clear two lowest bits of source address }
rlwminm r28,r3,0,0,31-2
{ get # of misaligned bytes }
sub. r28,r28,r3
{ at the end, we substract r29 from r3 to get the length }
mr r29,r3
beq .LStrLenAligned
.LStrLenAlignLoop:
{ decrease misaligned bytes counter (do it here already to improve }
{ jump prediction) }
subic. r28,1
{ load next byte }
lbz r27,(r3)
{ end of string? }
cmpli cr1,r27,0
{ stop if end of string }
beq cr1,.LStrLenDone
{ point to next source byte }
addi r3,r3,1
bne .LStrLenAlignLoop
.balign 16
.LStrLenAligned:
{ load next 4 bytes }
lwz r27,(r3)
{ first/highest byte zero? (big endian!) }
andis. r28,r27,0x0ff00
beq .LStrLenDone
{ second byte zero? }
andis. r28,r27,0x00ff
{ increase length }
addi r3,r3,1
beq .LStrLenDone
{ third byte zero? }
andi. r28,r27,0xff00
addi r3,r3,1
beq .LStrLenDone
{ fourth byte zero? }
andi. r28,r27,0x00ff
addi r3,r3,1
beq .LStrLenDone
addi r3,r3,1
b .LStrLenAligned
.LStrLenDone:
sub r3,r29,r3
.LStrLenNil:
end ['r3','r27','r28','r29','cr0','cr1'];
beq LStrLenDone
subi r9,r3,1
LStrLenLoop:
lbzu r10,1(r9)
cmpli r10,0
bne LStrLenLoop
sub r3,r9,r3
LStrLenDone:
end ['r3','r4','r9','r10','cr0'];
function strend(p : pchar) : pchar;assembler;
{ in: p in r3 }
{ out: result (end of p) in r3 }
asm
mr r26,r3
mflr r25
bl strlen
mtlr r25
add r3,r26,r3
end ['r3','r25','r26','r27','r28','r29','cr0','cr1'];
{ empty/invalid string? }
cmpli r3,0
{ if yes, do nothing }
beq LStrEndDone
subi r3,r3,1
LStrEndLoop:
lbzu r10,1(r3)
cmpli r10,0
bne LStrEndLoop
LStrEndDone:
end ['r3','r4','r10','cr0'];
function strcomp(str1,str2 : pchar) : longint;assembler;
@ -361,155 +152,95 @@ end;
function strscan(p : pchar;c : char) : pchar;assembler;
asm
movl p,%eax
xorl %ecx,%ecx
testl %eax,%eax
jz .LSTRSCAN
// align
movb c,%cl
movl %eax,%esi
andl $0xfffffff8,%eax
movl $0xff,%edx
movl p,%edi
subl %eax,%esi
jz .LSTRSCANLOOP
xorl %eax,%eax
.LSTRSCANALIGNLOOP:
movb (%edi),%al
// at .LSTRSCANFOUND, one is substracted from edi to calculate the position,
// so add 1 here already (not after .LSTRSCAN, because then the test/jz and
// cmp/je can't be paired)
incl %edi
testb %al,%al
jz .LSTRSCAN
cmpb %cl,%al
je .LSTRSCANFOUND
decl %esi
jnz .LSTRSCANALIGNLOOP
jmp .LSTRSCANLOOP
.balign 16
.LSTRSCANLOOP:
movl (%edi),%eax
movl %eax,%esi
// first char
andl %edx,%eax
// end of string -> stop
jz .LSTRSCAN
shrl $8,%esi
cmpl %ecx,%eax
movl %esi,%eax
je .LSTRSCANFOUND1
// second char
andl %edx,%eax
jz .LSTRSCAN
shrl $8,%esi
cmpl %ecx,%eax
movl %esi,%eax
je .LSTRSCANFOUND2
// third char
andl %edx,%eax
jz .LSTRSCAN
shrl $8,%esi
cmpl %ecx,%eax
movl %esi,%eax
je .LSTRSCANFOUND3
// fourth char
// all upper bits have already been cleared
testl %eax,%eax
jz .LSTRSCAN
addl $4,%edi
cmpl %ecx,%eax
je .LSTRSCANFOUND
jmp .LSTRSCANLOOP
.LSTRSCANFOUND3:
leal 2(%edi),%eax
jmp .LSTRSCAN
.LSTRSCANFOUND2:
leal 1(%edi),%eax
jmp .LSTRSCAN
.LSTRSCANFOUND1:
movl %edi,%eax
jmp .LSTRSCAN
.LSTRSCANFOUND:
leal -1(%edi),%eax
.LSTRSCAN:
end ['EAX','ECX','ESI','EDI','EDX'];
{ empty/invalid string? }
cmpli r3,0
{ if yes, do nothing }
beq LStrScanDone
subi r3,r3,1
LStrScanLoop:
lbzu r10,1(r3)
cmpl r10,r4
bne LStrScanLoop
LStrScanDone:
end ['r3','r4','r10','cr0'];
function strrscan(p : pchar;c : char) : pchar;assembler;
asm
xorl %eax,%eax
movl p,%edi
orl %edi,%edi
jz .LSTRRSCAN
movl $0xffffffff,%ecx
cld
xorb %al,%al
repne
scasb
not %ecx
movb c,%al
movl p,%edi
addl %ecx,%edi
decl %edi
std
repne
scasb
cld
movl $0,%eax
jnz .LSTRRSCAN
movl %edi,%eax
incl %eax
.LSTRRSCAN:
end ['EAX','ECX','EDI'];
{ empty/invalid string? }
cmpli r3,0
{ if yes, do nothing }
beq LStrrScanDone
{ make r9 $ffffffff, later on we take min(r9,r3) }
li r9,0x0ffff
subi r3,r3,1
LStrrScanLoop:
lbzu r10,1(r3)
cmpl cr1,r10,r4
cmpli cr0,r10,0
bne+ cr1,LStrrScanNotFound
{ store address of found position }
mr r9,r3
LStrrScanNotFound:
bne LStrrScanLoop
{ Select min of r3 and r9 -> end of string or found position }
{ From the PPC compiler writer's guide, not sure if I could ever }
{ come up with something like this :) }
subfc r10,r3,r9 { r10 = r9 - r3, CA = (r9 >= r3) ? 1 : 0 }
subfe r9,r9,r9 { r9' = (r9 >= r3) ? 0 : -1 }
and r10,r10,r9 { r10 = (r9 >= r3) ? 0 : r9 - r3 }
add r3,r10,r3 { r3 = (r9 >= r3) ? r3 : r9 }
LStrrScanDone:
end ['r3','r4','r9','r10','cr0','cr1'];
function strupper(p : pchar) : pchar;assembler;
asm
movl p,%esi
orl %esi,%esi
jz .LStrUpperNil
movl %esi,%edi
.LSTRUPPER1:
lodsb
cmpb $97,%al
jb .LSTRUPPER3
cmpb $122,%al
ja .LSTRUPPER3
subb $0x20,%al
.LSTRUPPER3:
stosb
orb %al,%al
jnz .LSTRUPPER1
.LStrUpperNil:
movl p,%eax
end ['EAX','ESI','EDI'];
cmpli r3,0
beq LStrUpperNil
subi r9,r3,1
LStrUpperLoop:
lbzu r10,1(r9)
{ a <= x <= b <=> cardinal(x-a) <= cardinal(b-a) }
subi r8,r10,97
cmpli r8,122-97
cmpli cr1,r10,0
subi r10,r10,0x20
bgt LStrUpper1
stb r10,0(r9)
LStrUpper1:
bne cr1,LStrUpperLoop
LStrUpperNil:
end ['r8','r9','r10','cr0','cr1'];
function strlower(p : pchar) : pchar;assembler;
asm
movl p,%esi
orl %esi,%esi
jz .LStrLowerNil
movl %esi,%edi
.LSTRLOWER1:
lodsb
cmpb $65,%al
jb .LSTRLOWER3
cmpb $90,%al
ja .LSTRLOWER3
addb $0x20,%al
.LSTRLOWER3:
stosb
orb %al,%al
jnz .LSTRLOWER1
.LStrLowerNil:
movl p,%eax
end ['EAX','ESI','EDI'];
cmpli r3,0
beq LStrLowerNil
subi r9,r3,1
LStrLowerLoop:
lbzu r10,1(r9)
{ a <= x <= b <=> cardinal(x-a) <= cardinal(b-a) }
subi r8,r10,65
cmpli r8,90-65
cmpli cr1,r10,0
addi r10,r10,0x20
bgt LStrLower1
stb r10,0(r9)
LStrLower1:
bne cr1,LStrLowerLoop
LStrLowerNil:
end ['r8','r9','r10','cr0','cr1'];
{
$Log$
Revision 1.1 2000-11-05 17:17:08 jonas
Revision 1.2 2001-02-10 12:28:22 jonas
* fixed some bugs, simplified/optimized already implemented routines and code some more
Revision 1.1 2000/11/05 17:17:08 jonas
+ first implementation, not yet finished
}