* compile with -dppc603 to not use unaligned float loads in move() and

g_concatcopy, because the 603 and 604 take an exception for those
    (and netbsd doesn't even handle those in the kernel). There are
    still some of those left that could cause problems though (e.g.
    in the set helpers)
This commit is contained in:
Jonas Maebe 2003-05-29 21:17:27 +00:00
parent 737c927f6d
commit c70f75f1b7
2 changed files with 104 additions and 4 deletions

View File

@ -1782,6 +1782,14 @@ const
{ ************* concatcopy ************ }
{$ifndef ppc603}
const
maxmoveunit = 8;
{$else ppc603}
const
maxmoveunit = 4;
{$endif ppc603}
procedure tcgppc.g_concatcopy(list : taasmoutput;const source,dest : treference;len : aword; delsource,loadref : boolean);
var
@ -1800,7 +1808,7 @@ const
{ make sure short loads are handled as optimally as possible }
if not loadref then
if (len <= 8) and
if (len <= maxmoveunit) and
(byte(len) in [1,2,4,8]) then
begin
if len < 8 then
@ -1828,7 +1836,8 @@ const
exit;
end;
count := len div 8;
count := len div maxmoveunit;
reference_reset(src);
reference_reset(dst);
{ load the address of source into src.base }
@ -1870,6 +1879,7 @@ const
orgdst := true;
end;
{$ifndef ppc603}
if count > 4 then
{ generate a loop }
begin
@ -1927,6 +1937,54 @@ const
inc(dst.offset,4);
a_reg_dealloc(list,r);
end;
{$else not ppc603}
if count > 4 then
{ generate a loop }
begin
{ the offsets are zero after the a_loadaddress_ref_reg and just }
{ have to be set to 4. I put an Inc there so debugging may be }
{ easier (should offset be different from zero here, it will be }
{ easy to notice in the generated assembler }
inc(dst.offset,4);
inc(src.offset,4);
list.concat(taicpu.op_reg_reg_const(A_SUBI,src.base,src.base,4));
list.concat(taicpu.op_reg_reg_const(A_SUBI,dst.base,dst.base,4));
countreg := get_scratch_reg_int(list,OS_INT);
a_load_const_reg(list,OS_32,count,countreg);
{ explicitely allocate R_0 since it can be used safely here }
{ (for holding date that's being copied) }
r.enum:=R_INTREGISTER;
r.number:=NR_R0;
a_reg_alloc(list,r);
objectlibrary.getlabel(lab);
a_label(list, lab);
list.concat(taicpu.op_reg_reg_const(A_SUBIC_,countreg,countreg,1));
list.concat(taicpu.op_reg_ref(A_LWZU,r,src));
list.concat(taicpu.op_reg_ref(A_STWU,r,dst));
a_jmp(list,A_BC,C_NE,0,lab);
free_scratch_reg(list,countreg);
a_reg_dealloc(list,r);
len := len mod 4;
end;
count := len div 4;
if count > 0 then
{ unrolled loop }
begin
r.enum:=R_INTREGISTER;
r.number:=NR_R0;
a_reg_alloc(list,r);
for count2 := 1 to count do
begin
a_load_ref_reg(list,OS_32,src,r);
a_load_reg_ref(list,OS_32,r,dst);
inc(src.offset,4);
inc(dst.offset,4);
end;
a_reg_dealloc(list,r);
len := len mod 4;
end;
{$endif not ppc603}
{ copy the leftovers }
if (len and 2) <> 0 then
begin
@ -2484,7 +2542,14 @@ begin
end.
{
$Log$
Revision 1.99 2003-05-29 10:06:09 jonas
Revision 1.100 2003-05-29 21:17:27 jonas
* compile with -dppc603 to not use unaligned float loads in move() and
g_concatcopy, because the 603 and 604 take an exception for those
(and netbsd doesn't even handle those in the kernel). There are
still some of those left that could cause problems though (e.g.
in the set helpers)
Revision 1.99 2003/05/29 10:06:09 jonas
* also free temps in g_concatcopy if delsource is true
Revision 1.98 2003/05/28 23:58:18 jonas

View File

@ -178,8 +178,10 @@ LMove4ByteAlignLoop:
{ while not aligned, continue }
bne cr0,LMove4ByteAlignLoop
{$ifndef ppc603}
{ check for 32 byte alignment }
andi. r7,r4,31
{$endif non ppc603}
{ we are going to copy one byte again (the one at the newly }
{ aligned address), so increase count byte 1 }
addi r5,r5,1
@ -188,6 +190,7 @@ LMove4ByteAlignLoop:
{ if 11 <= count < 63, copy using dwords }
blt cr7,LMoveDWords
{$ifndef ppc603}
{ # of dwords to copy to reach 32 byte alignment (*4) }
{ (depends on forward/backward copy) }
@ -202,6 +205,8 @@ LMove4ByteAlignLoop:
not r8, r6
add r7, r7, r8
xor r7, r7, r8
{$endif not ppc603}
{ multiply the update count with 4 }
slwi r10,r10,2
slwi r6,r6,2
@ -209,6 +214,7 @@ LMove4ByteAlignLoop:
add r3,r3,r6
add r4,r4,r6
{$ifndef ppc603}
beq cr0,LMove32BytesAligned
L32BytesAlignMoveLoop:
{ count >= 39 -> align to 8 byte boundary and then use the FPU }
@ -226,12 +232,17 @@ LMove32BytesAligned:
andi. r5,r5,31
{ to decide if we will do some dword stores (instead of only }
{ byte stores) afterwards or not }
{$else not ppc603}
srwi r0,r5,4
andi. r5,r5,15
{$endif not ppc603}
cmpwi cr1,r5,11
mtctr r0
{ r0 := count div 4, will be moved to ctr when copying dwords }
srwi r0,r5,2
{$ifndef ppc603}
{ adjust the update count: it will now be 8 or -8 depending on overlap }
slwi r10,r10,1
@ -271,6 +282,18 @@ LMove32ByteDcbz:
stfdux f3,r4,r10
bdnz LMove32ByteDcbz
LMove32ByteLoopDone:
{$else not ppc603}
LMove16ByteLoop:
lwzux r11,r3,r10
lwzux r7,r3,r10
lwzux r8,r3,r10
lwzux r9,r3,r10
stwux r11,r4,r10
stwux r7,r4,r10
stwux r8,r4,r10
stwux r9,r4,r10
bdnz LMove16ByteLoop
{$endif not ppc603}
{ cr0*4+eq is true if "count and 31" = 0 }
beq cr0,LMoveDone
@ -278,8 +301,13 @@ LMove32ByteLoopDone:
{ make r10 again -1 or 1, but first adjust source/dest pointers }
sub r3,r3,r6
sub r4,r4,r6
{$ifndef ppc603}
srawi r10,r10,3
srawi r6,r6,3
{$else not ppc603}
srawi r10,r10,2
srawi r6,r6,2
{$endif not ppc603}
{ cr1 contains whether count <= 11 }
ble cr1,LMoveBytes
@ -932,7 +960,14 @@ end ['R3','R10'];
{
$Log$
Revision 1.48 2003-05-29 14:32:54 jonas
Revision 1.49 2003-05-29 21:17:27 jonas
* compile with -dppc603 to not use unaligned float loads in move() and
g_concatcopy, because the 603 and 604 take an exception for those
(and netbsd doesn't even handle those in the kernel). There are
still some of those left that could cause problems though (e.g.
in the set helpers)
Revision 1.48 2003/05/29 14:32:54 jonas
* changed dcbst to dcbtst (former means "flush cache block to memory,
the latter means "I will soon store something to that cache block")