From f4ec8b8b1251628858029d90da95c7e7c32299ce Mon Sep 17 00:00:00 2001 From: Jonas Maebe Date: Sat, 10 Feb 2001 12:28:22 +0000 Subject: [PATCH] * fixed some bugs, simplified/optimized already implemented routines and code some more --- rtl/powerpc/strings.inc | 531 ++++++++++------------------------------ 1 file changed, 131 insertions(+), 400 deletions(-) diff --git a/rtl/powerpc/strings.inc b/rtl/powerpc/strings.inc index f34774e22d..f32de29f9e 100644 --- a/rtl/powerpc/strings.inc +++ b/rtl/powerpc/strings.inc @@ -25,68 +25,16 @@ asm { empty/invalid string? } cmpli r3,0 { if yes, do nothing } - beq .LStrCopyDone - { clear two lowest bits of source address } - rlwminm r28,r4,0,0,31-2 - { get # of misaligned bytes } - sub. r28,r28,r4 - { since we have to return dest intact, use another register for } - { dest in the copy loop } - mr r29,r3 - beq .LStrCopyAligned -.LStrCopyAlignLoop: - { decrease misaligned bytes counter (do it here already to improve } - { jump prediction) } - subic. r28,1 - { load next byte } - lbz r27,(r4) - { end of string? } - cmpli cr1,r27,0 - { point to next source byte } - addi r4,r4,1 - { store byte } - stb r27,(r29) - { point to next dest address } - addi r29,r29,1 - { stop if end of string } - beq cr1,.LStrCopyDone - bne .LStrCopyAlignLoop - .balign 16 -.LStrCopyAligned: - { load next 4 bytes } - lwz r27,(r4) - { first/highest byte zero? (big endian!) } - andis. r28,r27,0x0ff00 - addi r4,r4,4 - beq .LStrCopyByte - { second byte zero? } - andis. r28,r27,0x00ff - beq .LStrCopyWord - { third byte zero? } - andi. r28,r27,0xff00 - beq .LStrCopy3Bytes - { fourth byte zero? } - andi. r28,r27,0x00ff - { store next 4 bytes } - stw r27,(r29) - { increase dest address } - addi r29,r29,4 - beq .LStrCopyDone - b .LStrCopyAligned -{ store left-overs } -.LStrCopy3Bytes: - sth r27,(r29) - li r27,0 - stb r27,2(r29) - b .LStrCopyDone -.LStrCopyWord: - sth r27,(r29) - b .LStrCopyDone -.LStrCopyByte: - stb r27,(r29) -.LStrCopyDone: - { r3 still contains dest here } -end ['r4','r27','r28','r29','cr0','cr1']; + beq LStrCopyDone + subi r4,r4,1 + subi r9,r3,1 +LStrCopyLoop: + lbzu r10,1(r4) + cmpli r10,0 + stbu r10,1(r9) + bne LStrCopyLoop +LStrCopyDone: +end ['r4','r9','r10','cr0']; function strecopy(dest,source : pchar) : pchar;assembler; @@ -96,231 +44,74 @@ asm { empty/invalid string? } cmpli r3,0 { if yes, do nothing } - beq .LStreCopyDone - { clear two lowest bits of source address } - rlwminm r28,r4,0,0,31-2 - { get # of misaligned bytes } - sub. r28,r28,r4 - beq .LStreCopyAligned -.LStreCopyAlignLoop: - { decrease misaligned bytes counter (do it here already to improve } - { jump prediction) } - subic. r28,1 - { load next byte } - lbz r27,(r4) - { end of string? } - cmpli cr1,r27,0 - { point to next source byte } - addi r4,r4,1 - { store byte } - stb r27,(r3) - { stop if end of string } - beq cr1,.LStreCopyDone - { point to next dest address } - addi r3,r3,1 - { loop if misaligned bytes left } - bne .LStreCopyAlignLoop - .balign 16 -.LStreCopyAligned: - { load next 4 bytes } - lwz r27,(r4) - { first/highest byte zero? (big endian!) } - andis. r28,r27,0x0ff00 - addi r4,r4,4 - beq .LStreCopyByte - { second byte zero? } - andis. r28,r27,0x00ff - beq .LStreCopyWord - { third byte zero? } - andi. r28,r27,0xff00 - beq .LStreCopy3Bytes - { fourth byte zero? } - andi. r28,r27,0x00ff - { store next 4 bytes } - stw r27,(r3) - { increase dest address } - { the result must point to the terminating #0, so only add 3 } - addi r3,r3,3 - beq .LStreCopyDone - { add another 1 for next char } - addi r3,r3,1 - b .LStreCopyAligned -{ store left-overs } -.LStreCopy3Bytes: - sth r27,(r3) - li r27,0 - stbu r27,2(r3) - b .LStrCopyDone -.LStreCopyWord: - sth r27,(r3) - addi r3,r3,1 - b .LStrCopyDone -.LStreCopyByte: - stb r27,(r3) -.LStreCopyDone: - { r3 contains end of new string now } -end ['r3','r4','r27','r28','cr0','cr1']; + beq LStreCopyDone + subi r4,r4,1 + subi r3,r3,1 +LStreCopyLoop: + lbzu r10,1(r4) + cmpli r10,0 + stbu r10,1(r3) + bne LStreCopyLoop +LStreCopyDone: +end ['r3','r4','r10','cr0']; function strlcopy(dest,source : pchar;maxlen : longint) : pchar;assembler; -asm { in: dest in r3, source in r4, maxlen in r5 } { out: result (dest) in r3 } asm { empty/invalid string? } cmpli r3,0 { if yes, do nothing } - beq .LStrlCopyDone - { maxlen in counter } + beq LStrCopyDone mtctr r5 - { clear two lowest bits of source address } - rlwminm r28,r4,0,0,31-2 - { get # of misaligned bytes } - sub. r28,r28,r4 - { since we have to return dest intact, use another register for } - { dest in the copy loop } - mr r29,r3 - beq .LStrlCopyAligned -.LStrlCopyAlignLoop: - { if decreased maxlen counter = 0 (dz), stop } - bdz .LStrlCopyByte - { decrease misaligned bytes counter (do it here already to improve } - { jump prediction) } - subic. r28,1 - { load next byte } - lbz r27,(r4) - { end of string? } - cmpli cr1,r27,0 - { point to next source byte } - addi r4,r4,1 - { store byte } - stb r27,(r29) - { point to next dest address } - addi r29,r29,1 - { stop if end of string } - beq cr1,.LStrlCopyDone - { loop while unaligned byte counter <> 0 } - bne .LStrlCopyAlignLoop - .balign 16 -.LStrlCopyAligned: - { load next 4 bytes } - lwz r27,(r4) - { first/highest byte zero? (big endian!) } - andis. r28,r27,0x0ff00 - addi r4,r4,4 - { if decremented maxlen counter not zero (dnz) and no #0 (ne), } - { continue (and hint that the most likely case is jump taken) } - bdnzne+ .LNoStrlCopyByte - b .LStrlCopyByte -.LNoStrlCopyByte: - { second byte zero? } - andis. r28,r27,0x00ff - bdnzne+ .LNoStrlCopyWord - b .LStrlCopyWord -.LNoStrlCopyWord: - { third byte zero? } - andi. r28,r27,0xff00 - bdnzne+ .LNoStrlCopy3Bytes - b .LStrlCopy3Bytes -.LNoStrlCopy3Bytes: - { fourth byte zero? } - andi. r28,r27,0x00ff - { store next 4 bytes } - stw r27,(r29) - { increase dest address } - addi r29,r29,4 - bdnzne .LStrlCopyAligned - { replace last char with a #0 in case we stopped because the maxlen } - { was reached } - li r27,0 - stb r27,-1(r29) - b .LStrlCopyDone -{ store left-overs } -.LStrlCopy3Bytes: - { big endian! So move upper 16bits to lower 16bits} - srwi r27,r27,16 - sth r27,(r29) - li r27,0 - stb r27,2(r29) - b .LStrlCopyDone -.LStrlCopyWord: - { clear lower 8 bits of low 16 bits } - andi r27,r27,0x0ff00 - sth r27,(r29) - b .LStrlCopyDone -.LStrlCopyByte: - li r27,0 - stb r27,(r29) -.LStrlCopyDone: - { r3 still contains dest here } -end ['r4','r27','r28','r29','cr0','cr1','ctr']; + subi r4,r4,1 + subi r9,r3,1 +LStrlCopyLoop: + lbzu r10,1(r4) + cmpli r10,0 + stbu r10,1(r9) + bdnzne LStrlCopyLoop + beq LStrlCopyDone + li r10,0 + stb r10,1(r9) +LStrlCopyDone: +end ['r4','r9','r10','cr0']; function strlen(p : pchar) : longint;assembler; -{ in: p in r3 } -{ out: result (length) in r3 } -{ WARNING: if the used registers change here, also change strend!! (JM) } +{ in: p in r3 } +{ out: result (length) in r3 } asm { empty/invalid string? } cmpli r3,0 { if yes, do nothing } - beq .LStrLenNil - { clear two lowest bits of source address } - rlwminm r28,r3,0,0,31-2 - { get # of misaligned bytes } - sub. r28,r28,r3 - { at the end, we substract r29 from r3 to get the length } - mr r29,r3 - beq .LStrLenAligned -.LStrLenAlignLoop: - { decrease misaligned bytes counter (do it here already to improve } - { jump prediction) } - subic. r28,1 - { load next byte } - lbz r27,(r3) - { end of string? } - cmpli cr1,r27,0 - { stop if end of string } - beq cr1,.LStrLenDone - { point to next source byte } - addi r3,r3,1 - bne .LStrLenAlignLoop - .balign 16 -.LStrLenAligned: - { load next 4 bytes } - lwz r27,(r3) - { first/highest byte zero? (big endian!) } - andis. r28,r27,0x0ff00 - beq .LStrLenDone - { second byte zero? } - andis. r28,r27,0x00ff - { increase length } - addi r3,r3,1 - beq .LStrLenDone - { third byte zero? } - andi. r28,r27,0xff00 - addi r3,r3,1 - beq .LStrLenDone - { fourth byte zero? } - andi. r28,r27,0x00ff - addi r3,r3,1 - beq .LStrLenDone - addi r3,r3,1 - b .LStrLenAligned -.LStrLenDone: - sub r3,r29,r3 -.LStrLenNil: -end ['r3','r27','r28','r29','cr0','cr1']; + beq LStrLenDone + subi r9,r3,1 +LStrLenLoop: + lbzu r10,1(r9) + cmpli r10,0 + bne LStrLenLoop + sub r3,r9,r3 +LStrLenDone: +end ['r3','r4','r9','r10','cr0']; function strend(p : pchar) : pchar;assembler; +{ in: p in r3 } +{ out: result (end of p) in r3 } asm - mr r26,r3 - mflr r25 - bl strlen - mtlr r25 - add r3,r26,r3 -end ['r3','r25','r26','r27','r28','r29','cr0','cr1']; + { empty/invalid string? } + cmpli r3,0 + { if yes, do nothing } + beq LStrEndDone + subi r3,r3,1 +LStrEndLoop: + lbzu r10,1(r3) + cmpli r10,0 + bne LStrEndLoop +LStrEndDone: +end ['r3','r4','r10','cr0']; function strcomp(str1,str2 : pchar) : longint;assembler; @@ -361,155 +152,95 @@ end; function strscan(p : pchar;c : char) : pchar;assembler; asm - movl p,%eax - xorl %ecx,%ecx - testl %eax,%eax - jz .LSTRSCAN -// align - movb c,%cl - movl %eax,%esi - andl $0xfffffff8,%eax - movl $0xff,%edx - movl p,%edi - subl %eax,%esi - jz .LSTRSCANLOOP - xorl %eax,%eax -.LSTRSCANALIGNLOOP: - movb (%edi),%al -// at .LSTRSCANFOUND, one is substracted from edi to calculate the position, -// so add 1 here already (not after .LSTRSCAN, because then the test/jz and -// cmp/je can't be paired) - incl %edi - testb %al,%al - jz .LSTRSCAN - cmpb %cl,%al - je .LSTRSCANFOUND - decl %esi - jnz .LSTRSCANALIGNLOOP - jmp .LSTRSCANLOOP - .balign 16 -.LSTRSCANLOOP: - movl (%edi),%eax - movl %eax,%esi -// first char - andl %edx,%eax -// end of string -> stop - jz .LSTRSCAN - shrl $8,%esi - cmpl %ecx,%eax - movl %esi,%eax - je .LSTRSCANFOUND1 -// second char - andl %edx,%eax - jz .LSTRSCAN - shrl $8,%esi - cmpl %ecx,%eax - movl %esi,%eax - je .LSTRSCANFOUND2 -// third char - andl %edx,%eax - jz .LSTRSCAN - shrl $8,%esi - cmpl %ecx,%eax - movl %esi,%eax - je .LSTRSCANFOUND3 -// fourth char -// all upper bits have already been cleared - testl %eax,%eax - jz .LSTRSCAN - addl $4,%edi - cmpl %ecx,%eax - je .LSTRSCANFOUND - jmp .LSTRSCANLOOP -.LSTRSCANFOUND3: - leal 2(%edi),%eax - jmp .LSTRSCAN -.LSTRSCANFOUND2: - leal 1(%edi),%eax - jmp .LSTRSCAN -.LSTRSCANFOUND1: - movl %edi,%eax - jmp .LSTRSCAN -.LSTRSCANFOUND: - leal -1(%edi),%eax -.LSTRSCAN: -end ['EAX','ECX','ESI','EDI','EDX']; + { empty/invalid string? } + cmpli r3,0 + { if yes, do nothing } + beq LStrScanDone + subi r3,r3,1 +LStrScanLoop: + lbzu r10,1(r3) + cmpl r10,r4 + bne LStrScanLoop +LStrScanDone: +end ['r3','r4','r10','cr0']; function strrscan(p : pchar;c : char) : pchar;assembler; asm - xorl %eax,%eax - movl p,%edi - orl %edi,%edi - jz .LSTRRSCAN - movl $0xffffffff,%ecx - cld - xorb %al,%al - repne - scasb - not %ecx - movb c,%al - movl p,%edi - addl %ecx,%edi - decl %edi - std - repne - scasb - cld - movl $0,%eax - jnz .LSTRRSCAN - movl %edi,%eax - incl %eax -.LSTRRSCAN: -end ['EAX','ECX','EDI']; + { empty/invalid string? } + cmpli r3,0 + { if yes, do nothing } + beq LStrrScanDone + { make r9 $ffffffff, later on we take min(r9,r3) } + li r9,0x0ffff + subi r3,r3,1 +LStrrScanLoop: + lbzu r10,1(r3) + cmpl cr1,r10,r4 + cmpli cr0,r10,0 + bne+ cr1,LStrrScanNotFound + { store address of found position } + mr r9,r3 +LStrrScanNotFound: + bne LStrrScanLoop + { Select min of r3 and r9 -> end of string or found position } + { From the PPC compiler writer's guide, not sure if I could ever } + { come up with something like this :) } + + subfc r10,r3,r9 { r10 = r9 - r3, CA = (r9 >= r3) ? 1 : 0 } + subfe r9,r9,r9 { r9' = (r9 >= r3) ? 0 : -1 } + and r10,r10,r9 { r10 = (r9 >= r3) ? 0 : r9 - r3 } + add r3,r10,r3 { r3 = (r9 >= r3) ? r3 : r9 } +LStrrScanDone: +end ['r3','r4','r9','r10','cr0','cr1']; function strupper(p : pchar) : pchar;assembler; asm - movl p,%esi - orl %esi,%esi - jz .LStrUpperNil - movl %esi,%edi -.LSTRUPPER1: - lodsb - cmpb $97,%al - jb .LSTRUPPER3 - cmpb $122,%al - ja .LSTRUPPER3 - subb $0x20,%al -.LSTRUPPER3: - stosb - orb %al,%al - jnz .LSTRUPPER1 -.LStrUpperNil: - movl p,%eax -end ['EAX','ESI','EDI']; + cmpli r3,0 + beq LStrUpperNil + subi r9,r3,1 +LStrUpperLoop: + lbzu r10,1(r9) + { a <= x <= b <=> cardinal(x-a) <= cardinal(b-a) } + subi r8,r10,97 + cmpli r8,122-97 + cmpli cr1,r10,0 + subi r10,r10,0x20 + bgt LStrUpper1 + stb r10,0(r9) +LStrUpper1: + bne cr1,LStrUpperLoop +LStrUpperNil: +end ['r8','r9','r10','cr0','cr1']; function strlower(p : pchar) : pchar;assembler; asm - movl p,%esi - orl %esi,%esi - jz .LStrLowerNil - movl %esi,%edi -.LSTRLOWER1: - lodsb - cmpb $65,%al - jb .LSTRLOWER3 - cmpb $90,%al - ja .LSTRLOWER3 - addb $0x20,%al -.LSTRLOWER3: - stosb - orb %al,%al - jnz .LSTRLOWER1 -.LStrLowerNil: - movl p,%eax -end ['EAX','ESI','EDI']; + cmpli r3,0 + beq LStrLowerNil + subi r9,r3,1 +LStrLowerLoop: + lbzu r10,1(r9) + { a <= x <= b <=> cardinal(x-a) <= cardinal(b-a) } + subi r8,r10,65 + cmpli r8,90-65 + cmpli cr1,r10,0 + addi r10,r10,0x20 + bgt LStrLower1 + stb r10,0(r9) +LStrLower1: + bne cr1,LStrLowerLoop +LStrLowerNil: +end ['r8','r9','r10','cr0','cr1']; + { $Log$ - Revision 1.1 2000-11-05 17:17:08 jonas + Revision 1.2 2001-02-10 12:28:22 jonas + * fixed some bugs, simplified/optimized already implemented routines and code some more + + Revision 1.1 2000/11/05 17:17:08 jonas + first implementation, not yet finished }