* some small bugfixes and cache optimizations

This commit is contained in:
Jonas Maebe 2001-07-07 12:46:12 +00:00
parent f4ca079850
commit 91a2f0d3f5
4 changed files with 112 additions and 46 deletions

View File

@ -1,7 +1,7 @@
{
$Id$
This file is part of the Free Pascal run time library.
Copyright (c) 1999 by the Free Pascal development team.
Copyright (c) 2000-2001 by the Free Pascal development team.
Portions Copyright (c) 2000 by Casey Duncan (casey.duncan@state.co.us)
@ -26,6 +26,8 @@
procedure Move(var source;var dest;count:longint);assembler;
asm
{ load the begin of the source in the data cache }
dcbt r0,r3
{ count <= 0 ? }
cmpwi cr0,r5,0
{ check if we have to do the move backwards because of overlap }
@ -93,9 +95,9 @@ LMove4ByteAlignLoop:
{ count >= 39 -> align to 8 byte boundary and then use the FPU }
{ since we're already at 4 byte alignment, use dword store }
lwz r29,0(r3)
add r3,r3,r30,
add r3,r3,r30
stw r29,0(r4)
add r4,r4,r30,
add r4,r4,r30
L8BytesAligned:
{ count div 32 ( >= 1, since count was >=39 }
srwi r29,r5,5
@ -260,43 +262,49 @@ function IndexByte(var buf;len:longint;b:byte):longint; assembler;
{ input: r3 = buf, r4 = len, r5 = b }
{ output: r3 = position of b in buf (-1 if not found) }
asm
{ load the begin of the buffer in the data cache }
dcbt r0,r3
cmpli r4,0
mtctr r4
subi r30,r3,1
mr r28,r3
{ assume not found }
li r3,-1
beq LIndexByteNotFound
beq LIndexByteDone
LIndexByteLoop:
lbzu r29,1(r30)
cmpl r29,r5
bdnzne LIndexByteLoop
bdnzf cr0*4+eq,LIndexByteLoop
{ r3 still contains -1 here }
bne LIndexByteDone
sub r3,r29,r3
sub r3,r30,r28
LIndexByteDone:
end ['r3','r29','r30','cr0','ctr'];
end ['r3','r28','r29','r30','cr0','ctr'];
{$define FPC_SYSTEM_HAS_INDEXWORD}
function Indexword(var buf;len:longint;b:word):longint; assembler;
function IndexWord(var buf;len:longint;b:word):longint; assembler;
{ input: r3 = buf, r4 = len, r5 = b }
{ output: r3 = position of b in buf (-1 if not found) }
asm
{ load the begin of the buffer in the data cache }
dcbt r0,r3
cmpli r4,0
mtctr r4
subi r30,r3,2
mr r28,r3
{ assume not found }
li r3,-1
beq LIndexWordNotFound
beq LIndexWordDone
LIndexWordLoop:
lhzu r29,2(r30)
cmpl r29,r5
bdnzne LIndexWordLoop
bdnzf cr0*4+eq,LIndexWordLoop
{ r3 still contains -1 here }
bne LIndexWordDone
sub r3,r29,r3
sub r3,r30,r28
LIndexWordDone:
end ['r3','r29','r30','cr0','ctr'];
end ['r3','r28','r29','r30','cr0','ctr'];
{$define FPC_SYSTEM_HAS_INDEXDWORD}
@ -304,21 +312,24 @@ function IndexDWord(var buf;len:longint;b:DWord):longint; assembler;
{ input: r3 = buf, r4 = len, r5 = b }
{ output: r3 = position of b in buf (-1 if not found) }
asm
{ load the begin of the buffer in the data cache }
dcbt r0,r3
cmpli r4,0
mtctr r4
subi r30,r3,4
mr r28,r3
{ assume not found }
li r3,-1
beq LIndexDWordNotFound
beq LIndexDWordDone
LIndexDWordLoop:
lwzu r29,4(r30)
cmpl r29,r5
bdnzne LIndexDWordLoop
bdnzf cr0*4+eq, LIndexDWordLoop
{ r3 still contains -1 here }
bne LIndexDWordDone
sub r3,r29,r3
sub r3,r30,r28
LIndexDWordDone:
end ['r3','r29','r30','cr0','ctr'];
end ['r3','r28','r29','r30','cr0','ctr'];
{$define FPC_SYSTEM_HAS_COMPAREBYTE}
function CompareByte(var buf1,buf2;len:longint):longint; assembler;
@ -326,13 +337,15 @@ function CompareByte(var buf1,buf2;len:longint):longint; assembler;
{ output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
{ note: almost direct copy of strlcomp() from strings.inc }
asm
{ load the begin of the first buffer in the data cache }
dcbt r0,r3
{ use r28 instead of r3 for buf1 since r3 contains result }
cmpl r5,0
mtctr r5
subi r28,r3,1
subi r4,r4,1
li r3,0
beq LCompByteDone
mtctr r5
subi r4,r4,1
LCompByteLoop:
{ load next chars }
lbzu r29,1(r28)
@ -340,7 +353,7 @@ LCompByteLoop:
{ calculate difference }
sub. r3,r29,r30
{ if chars not equal or at the end, we're ready }
bdnze LCompByteDone
bdnzt cr0*4+eq, LCompByteLoop
LCompByteDone:
end ['r3','r4','r28','r29','r30','cr0','ctr'];
@ -350,13 +363,15 @@ function CompareWord(var buf1,buf2;len:longint):longint; assembler;
{ output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
{ note: almost direct copy of strlcomp() from strings.inc }
asm
{ load the begin of the first buffer in the data cache }
dcbt r0,r3
{ use r28 instead of r3 for buf1 since r3 contains result }
cmpl r5,0
mtctr r5
subi r28,r3,2
subi r4,r4,2
li r3,0
beq LCompWordDone
mtctr r5
subi r4,r4,2
LCompWordLoop:
{ load next chars }
lhzu r29,2(r28)
@ -364,7 +379,7 @@ LCompWordLoop:
{ calculate difference }
sub. r3,r29,r30
{ if chars not equal or at the end, we're ready }
bdnze LCompWordDone
bdnzt cr0*4+eq, LCompWordLoop
LCompWordDone:
end ['r3','r4','r28','r29','r30','cr0','ctr'];
@ -375,13 +390,15 @@ function CompareDWord(var buf1,buf2;len:longint):longint; assembler;
{ output: r3 = 0 if equal, < 0 if buf1 < str2, > 0 if buf1 > str2 }
{ note: almost direct copy of strlcomp() from strings.inc }
asm
{ load the begin of the first buffer in the data cache }
dcbt r0,r3
{ use r28 instead of r3 for buf1 since r3 contains result }
cmpl r5,0
mtctr r5
subi r28,r3,4
subi r4,r4,4
li r3,0
beq LCompDWordDone
mtctr r5
subi r4,r4,4
LCompDWordLoop:
{ load next chars }
lwzu r29,4(r28)
@ -389,7 +406,7 @@ LCompDWordLoop:
{ calculate difference }
sub. r3,r29,r30
{ if chars not equal or at the end, we're ready }
bdnze LCompDWordDone
bdnzt cr0*4+eq, LCompDWordLoop
LCompDWordDone:
end ['r3','r4','r28','r29','r30','cr0','ctr'];
@ -398,12 +415,15 @@ function IndexChar0(var buf;len:longint;b:Char):longint; assembler;
{ input: r3 = buf, r4 = len, r5 = b }
{ output: r3 = position of found position (-1 if not found) }
asm
{ load the begin of the buffer in the data cache }
dcbt r0,r3
{ length = 0? }
cmpli r5,0
mtctr r5
subi r29,r3,1
mr r28,r29
{ assume not found }
li r3,-1
mtctr r5
{ if yes, do nothing }
beq LIndexChar0Done
subi r3,r3,1
@ -412,11 +432,11 @@ LIndexChar0Loop:
cmpli cr1,r30,0
cmpl r30,r4
beq cr1,LIndexChar0Done
bdnzne LIndexChar0Loop
bdnzf cr0*4+eq, LIndexChar0Loop
bne LIndexChar0Done
sub r3,r29,r3
LIndexCharDone:
end ['r3','r4','r29','r30','cr0','ctr'];
sub r3,r29,r28
LIndexChar0Done:
end ['r3','r4','r28','r29','r30','cr0','ctr'];
{ all FPC_HELP_* are still missing (JM) }
@ -432,6 +452,8 @@ assembler;
asm
{ load length source }
lbz r30,0(r4)
{ load the begin of the dest buffer in the data cache }
dcbtst r0,r5
{ put min(length(sstr),len) in r3 }
subc r29,r3,r30 { r29 := r3 - r30 }
@ -453,7 +475,10 @@ end ['r3','r4','r5','r29','r30','cr0','ctr'];
{
$Log$
Revision 1.4 2001-03-03 13:53:36 jonas
Revision 1.5 2001-07-07 12:46:12 jonas
* some small bugfixes and cache optimizations
Revision 1.4 2001/03/03 13:53:36 jonas
* fixed small bug in move
Revision 1.3 2001/03/02 13:24:10 jonas

View File

@ -140,6 +140,8 @@ asm
stw r7,saveR7
stw r8,saveR8
rlwinm r6,r4,32-3,0,31-2 // divide by 8 to get starting and ending byte-
{ load the set the data cache }
dcbt r3,r6
rlwinm r7,r5,32-3,0,31-2 // address and clear two lowest bits to get
// start/end longint address
sub. r7,r6,r7 // are bit lo and hi in the same longint?
@ -215,6 +217,8 @@ procedure do_add_sets(set1,set2,dest : pointer);assembler;[public,alias:'FPC_SET
var
saveR6, saveR7, saveR8: longint;
asm
{ load the begin of the first set in the data cache }
dcbt r0,r3
stw r6,saveR6
stw r7,saveR7
subi r5,r5,4
@ -244,6 +248,8 @@ procedure do_mul_sets(set1,set2,dest:pointer);assembler;[public,alias:'FPC_SET_M
var
saveR6, saveR7, saveR8: longint;
asm
{ load the begin of the first set in the data cache }
dcbt r0,r3
stw r6,saveR6
stw r7,saveR7
subi r5,r5,4
@ -273,6 +279,8 @@ procedure do_sub_sets(set1,set2,dest:pointer);assembler;[public,alias:'FPC_SET_S
var
saveR6, saveR7, saveR8: longint;
asm
{ load the begin of the first set in the data cache }
dcbt r0,r3
stw r6,saveR6
stw r7,saveR7
subi r5,r5,4
@ -302,6 +310,8 @@ procedure do_symdif_sets(set1,set2,dest:pointer);assembler;[public,alias:'FPC_SE
var
saveR6, saveR7, saveR8: longint;
asm
{ load the begin of the first set in the data cache }
dcbt r0,r3
stw r6,saveR6
stw r7,saveR7
subi r5,r5,4
@ -331,6 +341,8 @@ procedure do_comp_sets(set1,set2 : pointer);assembler;[public,alias:'FPC_SET_COM
var
saveR5, saveR6, saveR7: longint;
asm
{ load the begin of the first set in the data cache }
dcbt r0,r3
stw r5,saveR5
mfctr r5
stw r6,saveR6
@ -359,6 +371,8 @@ procedure do_contains_sets(set1,set2 : pointer);assembler;[public,alias:'FPC_SET
var
saveR5, saveR6, saveR7: longint;
asm
{ load the begin of the first set in the data cache }
dcbt r0,r3
stw r5,saveR5
mfctr r5
stw r6,saveR6
@ -544,7 +558,10 @@ end;
{
$Log$
Revision 1.7 2001-03-03 13:54:26 jonas
Revision 1.8 2001-07-07 12:46:12 jonas
* some small bugfixes and cache optimizations
Revision 1.7 2001/03/03 13:54:26 jonas
* changed 'bdnzeq cr0' to 'bdnzt cr0*4+eq'
Revision 1.6 2000/10/07 14:42:16 jonas

View File

@ -24,6 +24,8 @@ function strcopy(dest,source : pchar) : pchar;assembler;
asm
{ in: dest in r3, source in r4 }
{ out: result (dest) in r3 }
{ load the begin of the source string in the data cache }
dcbt r0,r4
{ get # of misaligned bytes }
rlwinm. r30,r4,0,31-2,31
subfic r30,r30,4
@ -51,17 +53,20 @@ LStrCopyAligned:
addis r27,r27,0x0feff
li r26,0x08080
addis r26,r26,0x08081
LStrCopyAlignedLoop:
{ load next 4 bytes }
{ load first 4 bytes }
lwzu r28,4(r4)
LStrCopyAlignedLoop:
{ test for zero byte }
add r30,r28,r27
andc r30,r30,r28
and. r30,r30,r26
bne LStrCopyEndFound
stwu r28,4(r29)
{ load next 4 bytes (do it here so the load can begin while the }
{ the branch is processed) }
lwzu r28,4(r4)
b LStrCopyAlignedLoop
LStrCopyEndFound:
{ result is either 0, 8, 16 or 24 depending on which byte is zero }
@ -81,6 +86,8 @@ function strecopy(dest,source : pchar) : pchar;assembler;
{ in: dest in r3, source in r4 }
{ out: result (end of new dest) in r3 }
asm
{ load the begin of the source string in the data cache }
dcbt r0,r4
{ get # of misaligned bytes }
rlwinm. r30,r4,0,31-2,31
subfic r30,r30,4
@ -136,6 +143,8 @@ function strlcopy(dest,source : pchar;maxlen : longint) : pchar;assembler;
{ in: dest in r3, source in r4, maxlen in r5 }
{ out: result (dest) in r3 }
asm
{ load the begin of the source string in the data cache }
dcbt r0,r4
mtctr r5
subi r4,r4,1
subi r29,r3,1
@ -143,7 +152,7 @@ LStrlCopyLoop:
lbzu r30,1(r4)
cmpli r30,0
stbu r30,1(r29)
bdnzne LStrlCopyLoop
bdnzf cr0*4+eq, LStrlCopyLoop
{ if we stopped because we copied a #0, we're done }
beq LStrlCopyDone
{ otherwise add the #0 }
@ -157,6 +166,8 @@ function strlen(p : pchar) : longint;assembler;
{ in: p in r3 }
{ out: result (length) in r3 }
asm
{ load the begin of the string in the data cache }
dcbt r0,r3
{ empty/invalid string? }
cmpli r3,0
{ if yes, do nothing }
@ -175,6 +186,8 @@ function strend(p : pchar) : pchar;assembler;
{ in: p in r3 }
{ out: result (end of p) in r3 }
asm
{ load the begin of the string in the data cache }
dcbt r0,r3
{ empty/invalid string? }
cmpli r3,0
{ if yes, do nothing }
@ -220,6 +233,8 @@ function strlcomp(str1,str2 : pchar;l : longint) : longint;assembler;
{ out: result (= 0 if strings equal, < 0 if str1 < str2, > 0 if str1 > str2 }
{ in r3 }
asm
{ load the begin of one of the strings in the data cache }
dcbt r0,r3
{ use r28 instead of r3 for str1 since r3 contains result }
cmpl r5,0
subi r28,r3,1
@ -240,7 +255,7 @@ LStrlCompLoop:
{ if they are equal and one is zero, then the other one is zero too }
{ and we're done as well (r3 also contains 0 then) }
{ otherwise loop (if ctr <> 0) }
bdnzne cr1,LStrlCompLoop
bdnzf cr1*4+eq,LStrlCompLoop
LStrlCompDone:
end ['r3','r4','r28','r29','r30','cr0','cr1','ctr'];
@ -313,11 +328,13 @@ function strlicomp(str1,str2 : pchar;l : longint) : longint;assembler;
{ in: str1 in r3, str2 in r4, l in r5 }
{ out: result of case insensitive comparison (< 0, = 0, > 0) }
asm
{ load the begin of one of the string in the data cache }
dcbt r0,r3
{ use r28 instead of r3 for str1 since r3 contains result }
cmpl r5,0
subi r28,r3,1
li r3,0
beq LStrlCompDone
beq- LStrlCompDone
mtctr r5
subi r4,r4,1
LStriCompLoop:
@ -359,7 +376,7 @@ LStriCompEqual:
{ if they are equal and one is zero, then the other one is zero too }
{ and we're done as well (r3 also contains 0 then) }
{ otherwise loop (if ctr <> 0) }
bdnzne cr1,LStriCompLoop
bdnzf cr1*4+eq,LStriCompLoop
LStriCompDone:
end ['r3','r4','r26','r27','r28','r29','r30','cr0','cr1','ctr'];
@ -453,7 +470,10 @@ end ['r28','r29','r30','cr0','cr1'];
{
$Log$
Revision 1.6 2001-02-23 14:05:33 jonas
Revision 1.7 2001-07-07 12:46:12 jonas
* some small bugfixes and cache optimizations
Revision 1.6 2001/02/23 14:05:33 jonas
* optimized strcopy/strecopy
Revision 1.5 2001/02/11 17:59:14 jonas

View File

@ -18,6 +18,8 @@
function strpas(p : pchar) : string; assembler;
asm
{ load the begin of the string in the data cache }
dcbt r0,r3
{ load result address in r9 }
li r29,__RESULT@l
addis r29,__RESULT@ha
@ -33,14 +35,13 @@ LStrPasLoop:
lbzu r30,1(r4)
cmpli r30,0
stbu r30,1(r29)
bdnzne LStrPasLoop
bdnzf cr0*4+eq, LStrPasLoop
{ get remaining count for length }
mfctr r30
subfic r30,r30,255
{ if we stopped because of a terminating #0, decrease the length by 1 }
mfcr r4
subfic r30,r30,255
{ put "equal" condition bit of cr0 in bit position 31 (= rightmost) }
{ and clear other bits }
rlwinm r4,r4,cr0*4+eq+1,31,31
@ -58,21 +59,24 @@ asm
cmpli r30,0
mtctr r30
subi r29,r3,1
beq LStrCopyEmpty
beq LStrPCopyEmpty
LStrPCopyLoop:
{ copy everything }
lbzu r30,1(r4)
stbu r30,1(r29)
bdnz LStrCopyLoop
bdnz LStrPCopyLoop
{ add terminating #0 }
li r30,0
LStrCopyEmpty:
LStrPCopyEmpty:
stb r30,1(r29)
end ['r4','r29','r30','cr0','ctr'];
{
$Log$
Revision 1.2 2001-02-11 12:15:03 jonas
Revision 1.3 2001-07-07 12:46:12 jonas
* some small bugfixes and cache optimizations
Revision 1.2 2001/02/11 12:15:03 jonas
* some small optimizations and bugfixes
Revision 1.1 2001/02/10 16:10:32 jonas