* some small bugfixes and cache optimizations

This commit is contained in:
Jonas Maebe 2001-07-07 12:46:12 +00:00
parent f4ca079850
commit 91a2f0d3f5
4 changed files with 112 additions and 46 deletions

View File

@ -1,7 +1,7 @@
{ {
$Id$ $Id$
This file is part of the Free Pascal run time library. This file is part of the Free Pascal run time library.
Copyright (c) 1999 by the Free Pascal development team. Copyright (c) 2000-2001 by the Free Pascal development team.
Portions Copyright (c) 2000 by Casey Duncan (casey.duncan@state.co.us) Portions Copyright (c) 2000 by Casey Duncan (casey.duncan@state.co.us)
@ -26,6 +26,8 @@
procedure Move(var source;var dest;count:longint);assembler; procedure Move(var source;var dest;count:longint);assembler;
asm asm
{ load the begin of the source in the data cache }
dcbt r0,r3
{ count <= 0 ? } { count <= 0 ? }
cmpwi cr0,r5,0 cmpwi cr0,r5,0
{ check if we have to do the move backwards because of overlap } { check if we have to do the move backwards because of overlap }
@ -93,9 +95,9 @@ LMove4ByteAlignLoop:
{ count >= 39 -> align to 8 byte boundary and then use the FPU } { count >= 39 -> align to 8 byte boundary and then use the FPU }
{ since we're already at 4 byte alignment, use dword store } { since we're already at 4 byte alignment, use dword store }
lwz r29,0(r3) lwz r29,0(r3)
add r3,r3,r30, add r3,r3,r30
stw r29,0(r4) stw r29,0(r4)
add r4,r4,r30, add r4,r4,r30
L8BytesAligned: L8BytesAligned:
{ count div 32 ( >= 1, since count was >=39 } { count div 32 ( >= 1, since count was >=39 }
srwi r29,r5,5 srwi r29,r5,5
@ -260,43 +262,49 @@ function IndexByte(var buf;len:longint;b:byte):longint; assembler;
{ input: r3 = buf, r4 = len, r5 = b } { input: r3 = buf, r4 = len, r5 = b }
{ output: r3 = position of b in buf (-1 if not found) } { output: r3 = position of b in buf (-1 if not found) }
asm asm
{ load the begin of the buffer in the data cache }
dcbt r0,r3
cmpli r4,0 cmpli r4,0
mtctr r4 mtctr r4
subi r30,r3,1 subi r30,r3,1
mr r28,r3
{ assume not found } { assume not found }
li r3,-1 li r3,-1
beq LIndexByteNotFound beq LIndexByteDone
LIndexByteLoop: LIndexByteLoop:
lbzu r29,1(r30) lbzu r29,1(r30)
cmpl r29,r5 cmpl r29,r5
bdnzne LIndexByteLoop bdnzf cr0*4+eq,LIndexByteLoop
{ r3 still contains -1 here } { r3 still contains -1 here }
bne LIndexByteDone bne LIndexByteDone
sub r3,r29,r3 sub r3,r30,r28
LIndexByteDone: LIndexByteDone:
end ['r3','r29','r30','cr0','ctr']; end ['r3','r28','r29','r30','cr0','ctr'];
{$define FPC_SYSTEM_HAS_INDEXWORD} {$define FPC_SYSTEM_HAS_INDEXWORD}
function Indexword(var buf;len:longint;b:word):longint; assembler; function IndexWord(var buf;len:longint;b:word):longint; assembler;
{ input: r3 = buf, r4 = len, r5 = b } { input: r3 = buf, r4 = len, r5 = b }
{ output: r3 = position of b in buf (-1 if not found) } { output: r3 = position of b in buf (-1 if not found) }
asm asm
{ load the begin of the buffer in the data cache }
dcbt r0,r3
cmpli r4,0 cmpli r4,0
mtctr r4 mtctr r4
subi r30,r3,2 subi r30,r3,2
mr r28,r3
{ assume not found } { assume not found }
li r3,-1 li r3,-1
beq LIndexWordNotFound beq LIndexWordDone
LIndexWordLoop: LIndexWordLoop:
lhzu r29,2(r30) lhzu r29,2(r30)
cmpl r29,r5 cmpl r29,r5
bdnzne LIndexWordLoop bdnzf cr0*4+eq,LIndexWordLoop
{ r3 still contains -1 here } { r3 still contains -1 here }
bne LIndexWordDone bne LIndexWordDone
sub r3,r29,r3 sub r3,r30,r28
LIndexWordDone: LIndexWordDone:
end ['r3','r29','r30','cr0','ctr']; end ['r3','r28','r29','r30','cr0','ctr'];
{$define FPC_SYSTEM_HAS_INDEXDWORD} {$define FPC_SYSTEM_HAS_INDEXDWORD}
@ -304,21 +312,24 @@ function IndexDWord(var buf;len:longint;b:DWord):longint; assembler;
{ input: r3 = buf, r4 = len, r5 = b } { input: r3 = buf, r4 = len, r5 = b }
{ output: r3 = position of b in buf (-1 if not found) } { output: r3 = position of b in buf (-1 if not found) }
asm asm
{ load the begin of the buffer in the data cache }
dcbt r0,r3
cmpli r4,0 cmpli r4,0
mtctr r4 mtctr r4
subi r30,r3,4 subi r30,r3,4
mr r28,r3
{ assume not found } { assume not found }
li r3,-1 li r3,-1
beq LIndexDWordNotFound beq LIndexDWordDone
LIndexDWordLoop: LIndexDWordLoop:
lwzu r29,4(r30) lwzu r29,4(r30)
cmpl r29,r5 cmpl r29,r5
bdnzne LIndexDWordLoop bdnzf cr0*4+eq, LIndexDWordLoop
{ r3 still contains -1 here } { r3 still contains -1 here }
bne LIndexDWordDone bne LIndexDWordDone
sub r3,r29,r3 sub r3,r30,r28
LIndexDWordDone: LIndexDWordDone:
end ['r3','r29','r30','cr0','ctr']; end ['r3','r28','r29','r30','cr0','ctr'];
{$define FPC_SYSTEM_HAS_COMPAREBYTE} {$define FPC_SYSTEM_HAS_COMPAREBYTE}
function CompareByte(var buf1,buf2;len:longint):longint; assembler; function CompareByte(var buf1,buf2;len:longint):longint; assembler;
@ -326,13 +337,15 @@ function CompareByte(var buf1,buf2;len:longint):longint; assembler;
{ output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 } { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
{ note: almost direct copy of strlcomp() from strings.inc } { note: almost direct copy of strlcomp() from strings.inc }
asm asm
{ load the begin of the first buffer in the data cache }
dcbt r0,r3
{ use r28 instead of r3 for buf1 since r3 contains result } { use r28 instead of r3 for buf1 since r3 contains result }
cmpl r5,0 cmpl r5,0
mtctr r5
subi r28,r3,1 subi r28,r3,1
subi r4,r4,1
li r3,0 li r3,0
beq LCompByteDone beq LCompByteDone
mtctr r5
subi r4,r4,1
LCompByteLoop: LCompByteLoop:
{ load next chars } { load next chars }
lbzu r29,1(r28) lbzu r29,1(r28)
@ -340,7 +353,7 @@ LCompByteLoop:
{ calculate difference } { calculate difference }
sub. r3,r29,r30 sub. r3,r29,r30
{ if chars not equal or at the end, we're ready } { if chars not equal or at the end, we're ready }
bdnze LCompByteDone bdnzt cr0*4+eq, LCompByteLoop
LCompByteDone: LCompByteDone:
end ['r3','r4','r28','r29','r30','cr0','ctr']; end ['r3','r4','r28','r29','r30','cr0','ctr'];
@ -350,13 +363,15 @@ function CompareWord(var buf1,buf2;len:longint):longint; assembler;
{ output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 } { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
{ note: almost direct copy of strlcomp() from strings.inc } { note: almost direct copy of strlcomp() from strings.inc }
asm asm
{ load the begin of the first buffer in the data cache }
dcbt r0,r3
{ use r28 instead of r3 for buf1 since r3 contains result } { use r28 instead of r3 for buf1 since r3 contains result }
cmpl r5,0 cmpl r5,0
mtctr r5
subi r28,r3,2 subi r28,r3,2
subi r4,r4,2
li r3,0 li r3,0
beq LCompWordDone beq LCompWordDone
mtctr r5
subi r4,r4,2
LCompWordLoop: LCompWordLoop:
{ load next chars } { load next chars }
lhzu r29,2(r28) lhzu r29,2(r28)
@ -364,7 +379,7 @@ LCompWordLoop:
{ calculate difference } { calculate difference }
sub. r3,r29,r30 sub. r3,r29,r30
{ if chars not equal or at the end, we're ready } { if chars not equal or at the end, we're ready }
bdnze LCompWordDone bdnzt cr0*4+eq, LCompWordLoop
LCompWordDone: LCompWordDone:
end ['r3','r4','r28','r29','r30','cr0','ctr']; end ['r3','r4','r28','r29','r30','cr0','ctr'];
@ -375,13 +390,15 @@ function CompareDWord(var buf1,buf2;len:longint):longint; assembler;
{ output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 } { output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
{ note: almost direct copy of strlcomp() from strings.inc } { note: almost direct copy of strlcomp() from strings.inc }
asm asm
{ load the begin of the first buffer in the data cache }
dcbt r0,r3
{ use r28 instead of r3 for buf1 since r3 contains result } { use r28 instead of r3 for buf1 since r3 contains result }
cmpl r5,0 cmpl r5,0
mtctr r5
subi r28,r3,4 subi r28,r3,4
subi r4,r4,4
li r3,0 li r3,0
beq LCompDWordDone beq LCompDWordDone
mtctr r5
subi r4,r4,4
LCompDWordLoop: LCompDWordLoop:
{ load next chars } { load next chars }
lwzu r29,4(r28) lwzu r29,4(r28)
@ -389,7 +406,7 @@ LCompDWordLoop:
{ calculate difference } { calculate difference }
sub. r3,r29,r30 sub. r3,r29,r30
{ if chars not equal or at the end, we're ready } { if chars not equal or at the end, we're ready }
bdnze LCompDWordDone bdnzt cr0*4+eq, LCompDWordLoop
LCompDWordDone: LCompDWordDone:
end ['r3','r4','r28','r29','r30','cr0','ctr']; end ['r3','r4','r28','r29','r30','cr0','ctr'];
@ -398,12 +415,15 @@ function IndexChar0(var buf;len:longint;b:Char):longint; assembler;
{ input: r3 = buf, r4 = len, r5 = b } { input: r3 = buf, r4 = len, r5 = b }
{ output: r3 = position of found position (-1 if not found) } { output: r3 = position of found position (-1 if not found) }
asm asm
{ load the begin of the buffer in the data cache }
dcbt r0,r3
{ length = 0? } { length = 0? }
cmpli r5,0 cmpli r5,0
mtctr r5
subi r29,r3,1 subi r29,r3,1
mr r28,r29
{ assume not found } { assume not found }
li r3,-1 li r3,-1
mtctr r5
{ if yes, do nothing } { if yes, do nothing }
beq LIndexChar0Done beq LIndexChar0Done
subi r3,r3,1 subi r3,r3,1
@ -412,11 +432,11 @@ LIndexChar0Loop:
cmpli cr1,r30,0 cmpli cr1,r30,0
cmpl r30,r4 cmpl r30,r4
beq cr1,LIndexChar0Done beq cr1,LIndexChar0Done
bdnzne LIndexChar0Loop bdnzf cr0*4+eq, LIndexChar0Loop
bne LIndexChar0Done bne LIndexChar0Done
sub r3,r29,r3 sub r3,r29,r28
LIndexCharDone: LIndexChar0Done:
end ['r3','r4','r29','r30','cr0','ctr']; end ['r3','r4','r28','r29','r30','cr0','ctr'];
{ all FPC_HELP_* are still missing (JM) } { all FPC_HELP_* are still missing (JM) }
@ -432,6 +452,8 @@ assembler;
asm asm
{ load length source } { load length source }
lbz r30,0(r4) lbz r30,0(r4)
{ load the begin of the dest buffer in the data cache }
dcbtst r0,r5
{ put min(length(sstr),len) in r3 } { put min(length(sstr),len) in r3 }
subc r29,r3,r30 { r29 := r3 - r30 } subc r29,r3,r30 { r29 := r3 - r30 }
@ -453,7 +475,10 @@ end ['r3','r4','r5','r29','r30','cr0','ctr'];
{ {
$Log$ $Log$
Revision 1.4 2001-03-03 13:53:36 jonas Revision 1.5 2001-07-07 12:46:12 jonas
* some small bugfixes and cache optimizations
Revision 1.4 2001/03/03 13:53:36 jonas
* fixed small bug in move * fixed small bug in move
Revision 1.3 2001/03/02 13:24:10 jonas Revision 1.3 2001/03/02 13:24:10 jonas

View File

@ -140,6 +140,8 @@ asm
stw r7,saveR7 stw r7,saveR7
stw r8,saveR8 stw r8,saveR8
rlwinm r6,r4,32-3,0,31-2 // divide by 8 to get starting and ending byte- rlwinm r6,r4,32-3,0,31-2 // divide by 8 to get starting and ending byte-
{ load the set the data cache }
dcbt r3,r6
rlwinm r7,r5,32-3,0,31-2 // address and clear two lowest bits to get rlwinm r7,r5,32-3,0,31-2 // address and clear two lowest bits to get
// start/end longint address // start/end longint address
sub. r7,r6,r7 // are bit lo and hi in the same longint? sub. r7,r6,r7 // are bit lo and hi in the same longint?
@ -215,6 +217,8 @@ procedure do_add_sets(set1,set2,dest : pointer);assembler;[public,alias:'FPC_SET
var var
saveR6, saveR7, saveR8: longint; saveR6, saveR7, saveR8: longint;
asm asm
{ load the begin of the first set in the data cache }
dcbt r0,r3
stw r6,saveR6 stw r6,saveR6
stw r7,saveR7 stw r7,saveR7
subi r5,r5,4 subi r5,r5,4
@ -244,6 +248,8 @@ procedure do_mul_sets(set1,set2,dest:pointer);assembler;[public,alias:'FPC_SET_M
var var
saveR6, saveR7, saveR8: longint; saveR6, saveR7, saveR8: longint;
asm asm
{ load the begin of the first set in the data cache }
dcbt r0,r3
stw r6,saveR6 stw r6,saveR6
stw r7,saveR7 stw r7,saveR7
subi r5,r5,4 subi r5,r5,4
@ -273,6 +279,8 @@ procedure do_sub_sets(set1,set2,dest:pointer);assembler;[public,alias:'FPC_SET_S
var var
saveR6, saveR7, saveR8: longint; saveR6, saveR7, saveR8: longint;
asm asm
{ load the begin of the first set in the data cache }
dcbt r0,r3
stw r6,saveR6 stw r6,saveR6
stw r7,saveR7 stw r7,saveR7
subi r5,r5,4 subi r5,r5,4
@ -302,6 +310,8 @@ procedure do_symdif_sets(set1,set2,dest:pointer);assembler;[public,alias:'FPC_SE
var var
saveR6, saveR7, saveR8: longint; saveR6, saveR7, saveR8: longint;
asm asm
{ load the begin of the first set in the data cache }
dcbt r0,r3
stw r6,saveR6 stw r6,saveR6
stw r7,saveR7 stw r7,saveR7
subi r5,r5,4 subi r5,r5,4
@ -331,6 +341,8 @@ procedure do_comp_sets(set1,set2 : pointer);assembler;[public,alias:'FPC_SET_COM
var var
saveR5, saveR6, saveR7: longint; saveR5, saveR6, saveR7: longint;
asm asm
{ load the begin of the first set in the data cache }
dcbt r0,r3
stw r5,saveR5 stw r5,saveR5
mfctr r5 mfctr r5
stw r6,saveR6 stw r6,saveR6
@ -359,6 +371,8 @@ procedure do_contains_sets(set1,set2 : pointer);assembler;[public,alias:'FPC_SET
var var
saveR5, saveR6, saveR7: longint; saveR5, saveR6, saveR7: longint;
asm asm
{ load the begin of the first set in the data cache }
dcbt r0,r3
stw r5,saveR5 stw r5,saveR5
mfctr r5 mfctr r5
stw r6,saveR6 stw r6,saveR6
@ -544,7 +558,10 @@ end;
{ {
$Log$ $Log$
Revision 1.7 2001-03-03 13:54:26 jonas Revision 1.8 2001-07-07 12:46:12 jonas
* some small bugfixes and cache optimizations
Revision 1.7 2001/03/03 13:54:26 jonas
* changed 'bdnzeq cr0' to 'bdnzt cr0*4+eq' * changed 'bdnzeq cr0' to 'bdnzt cr0*4+eq'
Revision 1.6 2000/10/07 14:42:16 jonas Revision 1.6 2000/10/07 14:42:16 jonas

View File

@ -24,6 +24,8 @@ function strcopy(dest,source : pchar) : pchar;assembler;
asm asm
{ in: dest in r3, source in r4 } { in: dest in r3, source in r4 }
{ out: result (dest) in r3 } { out: result (dest) in r3 }
{ load the begin of the source string in the data cache }
dcbt r0,r4
{ get # of misaligned bytes } { get # of misaligned bytes }
rlwinm. r30,r4,0,31-2,31 rlwinm. r30,r4,0,31-2,31
subfic r30,r30,4 subfic r30,r30,4
@ -51,17 +53,20 @@ LStrCopyAligned:
addis r27,r27,0x0feff addis r27,r27,0x0feff
li r26,0x08080 li r26,0x08080
addis r26,r26,0x08081 addis r26,r26,0x08081
LStrCopyAlignedLoop:
{ load next 4 bytes } { load first 4 bytes }
lwzu r28,4(r4) lwzu r28,4(r4)
LStrCopyAlignedLoop:
{ test for zero byte } { test for zero byte }
add r30,r28,r27 add r30,r28,r27
andc r30,r30,r28 andc r30,r30,r28
and. r30,r30,r26 and. r30,r30,r26
bne LStrCopyEndFound bne LStrCopyEndFound
stwu r28,4(r29) stwu r28,4(r29)
{ load next 4 bytes (do it here so the load can begin while the }
{ the branch is processed) }
lwzu r28,4(r4)
b LStrCopyAlignedLoop b LStrCopyAlignedLoop
LStrCopyEndFound: LStrCopyEndFound:
{ result is either 0, 8, 16 or 24 depending on which byte is zero } { result is either 0, 8, 16 or 24 depending on which byte is zero }
@ -81,6 +86,8 @@ function strecopy(dest,source : pchar) : pchar;assembler;
{ in: dest in r3, source in r4 } { in: dest in r3, source in r4 }
{ out: result (end of new dest) in r3 } { out: result (end of new dest) in r3 }
asm asm
{ load the begin of the source string in the data cache }
dcbt r0,r4
{ get # of misaligned bytes } { get # of misaligned bytes }
rlwinm. r30,r4,0,31-2,31 rlwinm. r30,r4,0,31-2,31
subfic r30,r30,4 subfic r30,r30,4
@ -136,6 +143,8 @@ function strlcopy(dest,source : pchar;maxlen : longint) : pchar;assembler;
{ in: dest in r3, source in r4, maxlen in r5 } { in: dest in r3, source in r4, maxlen in r5 }
{ out: result (dest) in r3 } { out: result (dest) in r3 }
asm asm
{ load the begin of the source string in the data cache }
dcbt r0,r4
mtctr r5 mtctr r5
subi r4,r4,1 subi r4,r4,1
subi r29,r3,1 subi r29,r3,1
@ -143,7 +152,7 @@ LStrlCopyLoop:
lbzu r30,1(r4) lbzu r30,1(r4)
cmpli r30,0 cmpli r30,0
stbu r30,1(r29) stbu r30,1(r29)
bdnzne LStrlCopyLoop bdnzf cr0*4+eq, LStrlCopyLoop
{ if we stopped because we copied a #0, we're done } { if we stopped because we copied a #0, we're done }
beq LStrlCopyDone beq LStrlCopyDone
{ otherwise add the #0 } { otherwise add the #0 }
@ -157,6 +166,8 @@ function strlen(p : pchar) : longint;assembler;
{ in: p in r3 } { in: p in r3 }
{ out: result (length) in r3 } { out: result (length) in r3 }
asm asm
{ load the begin of the string in the data cache }
dcbt r0,r3
{ empty/invalid string? } { empty/invalid string? }
cmpli r3,0 cmpli r3,0
{ if yes, do nothing } { if yes, do nothing }
@ -175,6 +186,8 @@ function strend(p : pchar) : pchar;assembler;
{ in: p in r3 } { in: p in r3 }
{ out: result (end of p) in r3 } { out: result (end of p) in r3 }
asm asm
{ load the begin of the string in the data cache }
dcbt r0,r3
{ empty/invalid string? } { empty/invalid string? }
cmpli r3,0 cmpli r3,0
{ if yes, do nothing } { if yes, do nothing }
@ -220,6 +233,8 @@ function strlcomp(str1,str2 : pchar;l : longint) : longint;assembler;
{ out: result (= 0 if strings equal, < 0 if str1 < str2, > 0 if str1 > str2 } { out: result (= 0 if strings equal, < 0 if str1 < str2, > 0 if str1 > str2 }
{ in r3 } { in r3 }
asm asm
{ load the begin of one of the strings in the data cache }
dcbt r0,r3
{ use r28 instead of r3 for str1 since r3 contains result } { use r28 instead of r3 for str1 since r3 contains result }
cmpl r5,0 cmpl r5,0
subi r28,r3,1 subi r28,r3,1
@ -240,7 +255,7 @@ LStrlCompLoop:
{ if they are equal and one is zero, then the other one is zero too } { if they are equal and one is zero, then the other one is zero too }
{ and we're done as well (r3 also contains 0 then) } { and we're done as well (r3 also contains 0 then) }
{ otherwise loop (if ctr <> 0) } { otherwise loop (if ctr <> 0) }
bdnzne cr1,LStrlCompLoop bdnzf cr1*4+eq,LStrlCompLoop
LStrlCompDone: LStrlCompDone:
end ['r3','r4','r28','r29','r30','cr0','cr1','ctr']; end ['r3','r4','r28','r29','r30','cr0','cr1','ctr'];
@ -313,11 +328,13 @@ function strlicomp(str1,str2 : pchar;l : longint) : longint;assembler;
{ in: str1 in r3, str2 in r4, l in r5 } { in: str1 in r3, str2 in r4, l in r5 }
{ out: result of case insensitive comparison (< 0, = 0, > 0) } { out: result of case insensitive comparison (< 0, = 0, > 0) }
asm asm
{ load the begin of one of the string in the data cache }
dcbt r0,r3
{ use r28 instead of r3 for str1 since r3 contains result } { use r28 instead of r3 for str1 since r3 contains result }
cmpl r5,0 cmpl r5,0
subi r28,r3,1 subi r28,r3,1
li r3,0 li r3,0
beq LStrlCompDone beq- LStrlCompDone
mtctr r5 mtctr r5
subi r4,r4,1 subi r4,r4,1
LStriCompLoop: LStriCompLoop:
@ -359,7 +376,7 @@ LStriCompEqual:
{ if they are equal and one is zero, then the other one is zero too } { if they are equal and one is zero, then the other one is zero too }
{ and we're done as well (r3 also contains 0 then) } { and we're done as well (r3 also contains 0 then) }
{ otherwise loop (if ctr <> 0) } { otherwise loop (if ctr <> 0) }
bdnzne cr1,LStriCompLoop bdnzf cr1*4+eq,LStriCompLoop
LStriCompDone: LStriCompDone:
end ['r3','r4','r26','r27','r28','r29','r30','cr0','cr1','ctr']; end ['r3','r4','r26','r27','r28','r29','r30','cr0','cr1','ctr'];
@ -453,7 +470,10 @@ end ['r28','r29','r30','cr0','cr1'];
{ {
$Log$ $Log$
Revision 1.6 2001-02-23 14:05:33 jonas Revision 1.7 2001-07-07 12:46:12 jonas
* some small bugfixes and cache optimizations
Revision 1.6 2001/02/23 14:05:33 jonas
* optimized strcopy/strecopy * optimized strcopy/strecopy
Revision 1.5 2001/02/11 17:59:14 jonas Revision 1.5 2001/02/11 17:59:14 jonas

View File

@ -18,6 +18,8 @@
function strpas(p : pchar) : string; assembler; function strpas(p : pchar) : string; assembler;
asm asm
{ load the begin of the string in the data cache }
dcbt r0,r3
{ load result address in r9 } { load result address in r9 }
li r29,__RESULT@l li r29,__RESULT@l
addis r29,__RESULT@ha addis r29,__RESULT@ha
@ -33,14 +35,13 @@ LStrPasLoop:
lbzu r30,1(r4) lbzu r30,1(r4)
cmpli r30,0 cmpli r30,0
stbu r30,1(r29) stbu r30,1(r29)
bdnzne LStrPasLoop bdnzf cr0*4+eq, LStrPasLoop
{ get remaining count for length } { get remaining count for length }
mfctr r30 mfctr r30
subfic r30,r30,255
{ if we stopped because of a terminating #0, decrease the length by 1 } { if we stopped because of a terminating #0, decrease the length by 1 }
mfcr r4 mfcr r4
subfic r30,r30,255
{ put "equal" condition bit of cr0 in bit position 31 (= rightmost) } { put "equal" condition bit of cr0 in bit position 31 (= rightmost) }
{ and clear other bits } { and clear other bits }
rlwinm r4,r4,cr0*4+eq+1,31,31 rlwinm r4,r4,cr0*4+eq+1,31,31
@ -58,21 +59,24 @@ asm
cmpli r30,0 cmpli r30,0
mtctr r30 mtctr r30
subi r29,r3,1 subi r29,r3,1
beq LStrCopyEmpty beq LStrPCopyEmpty
LStrPCopyLoop: LStrPCopyLoop:
{ copy everything } { copy everything }
lbzu r30,1(r4) lbzu r30,1(r4)
stbu r30,1(r29) stbu r30,1(r29)
bdnz LStrCopyLoop bdnz LStrPCopyLoop
{ add terminating #0 } { add terminating #0 }
li r30,0 li r30,0
LStrCopyEmpty: LStrPCopyEmpty:
stb r30,1(r29) stb r30,1(r29)
end ['r4','r29','r30','cr0','ctr']; end ['r4','r29','r30','cr0','ctr'];
{ {
$Log$ $Log$
Revision 1.2 2001-02-11 12:15:03 jonas Revision 1.3 2001-07-07 12:46:12 jonas
* some small bugfixes and cache optimizations
Revision 1.2 2001/02/11 12:15:03 jonas
* some small optimizations and bugfixes * some small optimizations and bugfixes
Revision 1.1 2001/02/10 16:10:32 jonas Revision 1.1 2001/02/10 16:10:32 jonas