* compile with -dppc603 to not use unaligned float loads in move() and

g_concatcopy, because the 603 and 604 take an exception for those
    (and netbsd doesn't even handle those in the kernel). There are
    still some of those left that could cause problems though (e.g.
    in the set helpers)
This commit is contained in:
Jonas Maebe 2003-05-29 21:17:27 +00:00
parent 737c927f6d
commit c70f75f1b7
2 changed files with 104 additions and 4 deletions

View File

@ -1782,6 +1782,14 @@ const
{ ************* concatcopy ************ } { ************* concatcopy ************ }
{$ifndef ppc603}
const
maxmoveunit = 8;
{$else ppc603}
const
maxmoveunit = 4;
{$endif ppc603}
procedure tcgppc.g_concatcopy(list : taasmoutput;const source,dest : treference;len : aword; delsource,loadref : boolean); procedure tcgppc.g_concatcopy(list : taasmoutput;const source,dest : treference;len : aword; delsource,loadref : boolean);
var var
@ -1800,7 +1808,7 @@ const
{ make sure short loads are handled as optimally as possible } { make sure short loads are handled as optimally as possible }
if not loadref then if not loadref then
if (len <= 8) and if (len <= maxmoveunit) and
(byte(len) in [1,2,4,8]) then (byte(len) in [1,2,4,8]) then
begin begin
if len < 8 then if len < 8 then
@ -1828,7 +1836,8 @@ const
exit; exit;
end; end;
count := len div 8; count := len div maxmoveunit;
reference_reset(src); reference_reset(src);
reference_reset(dst); reference_reset(dst);
{ load the address of source into src.base } { load the address of source into src.base }
@ -1870,6 +1879,7 @@ const
orgdst := true; orgdst := true;
end; end;
{$ifndef ppc603}
if count > 4 then if count > 4 then
{ generate a loop } { generate a loop }
begin begin
@ -1927,6 +1937,54 @@ const
inc(dst.offset,4); inc(dst.offset,4);
a_reg_dealloc(list,r); a_reg_dealloc(list,r);
end; end;
{$else not ppc603}
if count > 4 then
{ generate a loop }
begin
{ the offsets are zero after the a_loadaddress_ref_reg and just }
{ have to be set to 4. I put an Inc there so debugging may be }
{ easier (should offset be different from zero here, it will be }
{ easy to notice in the generated assembler }
inc(dst.offset,4);
inc(src.offset,4);
list.concat(taicpu.op_reg_reg_const(A_SUBI,src.base,src.base,4));
list.concat(taicpu.op_reg_reg_const(A_SUBI,dst.base,dst.base,4));
countreg := get_scratch_reg_int(list,OS_INT);
a_load_const_reg(list,OS_32,count,countreg);
{ explicitely allocate R_0 since it can be used safely here }
{ (for holding date that's being copied) }
r.enum:=R_INTREGISTER;
r.number:=NR_R0;
a_reg_alloc(list,r);
objectlibrary.getlabel(lab);
a_label(list, lab);
list.concat(taicpu.op_reg_reg_const(A_SUBIC_,countreg,countreg,1));
list.concat(taicpu.op_reg_ref(A_LWZU,r,src));
list.concat(taicpu.op_reg_ref(A_STWU,r,dst));
a_jmp(list,A_BC,C_NE,0,lab);
free_scratch_reg(list,countreg);
a_reg_dealloc(list,r);
len := len mod 4;
end;
count := len div 4;
if count > 0 then
{ unrolled loop }
begin
r.enum:=R_INTREGISTER;
r.number:=NR_R0;
a_reg_alloc(list,r);
for count2 := 1 to count do
begin
a_load_ref_reg(list,OS_32,src,r);
a_load_reg_ref(list,OS_32,r,dst);
inc(src.offset,4);
inc(dst.offset,4);
end;
a_reg_dealloc(list,r);
len := len mod 4;
end;
{$endif not ppc603}
{ copy the leftovers } { copy the leftovers }
if (len and 2) <> 0 then if (len and 2) <> 0 then
begin begin
@ -2484,7 +2542,14 @@ begin
end. end.
{ {
$Log$ $Log$
Revision 1.99 2003-05-29 10:06:09 jonas Revision 1.100 2003-05-29 21:17:27 jonas
* compile with -dppc603 to not use unaligned float loads in move() and
g_concatcopy, because the 603 and 604 take an exception for those
(and netbsd doesn't even handle those in the kernel). There are
still some of those left that could cause problems though (e.g.
in the set helpers)
Revision 1.99 2003/05/29 10:06:09 jonas
* also free temps in g_concatcopy if delsource is true * also free temps in g_concatcopy if delsource is true
Revision 1.98 2003/05/28 23:58:18 jonas Revision 1.98 2003/05/28 23:58:18 jonas

View File

@ -178,8 +178,10 @@ LMove4ByteAlignLoop:
{ while not aligned, continue } { while not aligned, continue }
bne cr0,LMove4ByteAlignLoop bne cr0,LMove4ByteAlignLoop
{$ifndef ppc603}
{ check for 32 byte alignment } { check for 32 byte alignment }
andi. r7,r4,31 andi. r7,r4,31
{$endif non ppc603}
{ we are going to copy one byte again (the one at the newly } { we are going to copy one byte again (the one at the newly }
{ aligned address), so increase count byte 1 } { aligned address), so increase count byte 1 }
addi r5,r5,1 addi r5,r5,1
@ -188,6 +190,7 @@ LMove4ByteAlignLoop:
{ if 11 <= count < 63, copy using dwords } { if 11 <= count < 63, copy using dwords }
blt cr7,LMoveDWords blt cr7,LMoveDWords
{$ifndef ppc603}
{ # of dwords to copy to reach 32 byte alignment (*4) } { # of dwords to copy to reach 32 byte alignment (*4) }
{ (depends on forward/backward copy) } { (depends on forward/backward copy) }
@ -202,6 +205,8 @@ LMove4ByteAlignLoop:
not r8, r6 not r8, r6
add r7, r7, r8 add r7, r7, r8
xor r7, r7, r8 xor r7, r7, r8
{$endif not ppc603}
{ multiply the update count with 4 } { multiply the update count with 4 }
slwi r10,r10,2 slwi r10,r10,2
slwi r6,r6,2 slwi r6,r6,2
@ -209,6 +214,7 @@ LMove4ByteAlignLoop:
add r3,r3,r6 add r3,r3,r6
add r4,r4,r6 add r4,r4,r6
{$ifndef ppc603}
beq cr0,LMove32BytesAligned beq cr0,LMove32BytesAligned
L32BytesAlignMoveLoop: L32BytesAlignMoveLoop:
{ count >= 39 -> align to 8 byte boundary and then use the FPU } { count >= 39 -> align to 8 byte boundary and then use the FPU }
@ -226,12 +232,17 @@ LMove32BytesAligned:
andi. r5,r5,31 andi. r5,r5,31
{ to decide if we will do some dword stores (instead of only } { to decide if we will do some dword stores (instead of only }
{ byte stores) afterwards or not } { byte stores) afterwards or not }
{$else not ppc603}
srwi r0,r5,4
andi. r5,r5,15
{$endif not ppc603}
cmpwi cr1,r5,11 cmpwi cr1,r5,11
mtctr r0 mtctr r0
{ r0 := count div 4, will be moved to ctr when copying dwords } { r0 := count div 4, will be moved to ctr when copying dwords }
srwi r0,r5,2 srwi r0,r5,2
{$ifndef ppc603}
{ adjust the update count: it will now be 8 or -8 depending on overlap } { adjust the update count: it will now be 8 or -8 depending on overlap }
slwi r10,r10,1 slwi r10,r10,1
@ -271,6 +282,18 @@ LMove32ByteDcbz:
stfdux f3,r4,r10 stfdux f3,r4,r10
bdnz LMove32ByteDcbz bdnz LMove32ByteDcbz
LMove32ByteLoopDone: LMove32ByteLoopDone:
{$else not ppc603}
LMove16ByteLoop:
lwzux r11,r3,r10
lwzux r7,r3,r10
lwzux r8,r3,r10
lwzux r9,r3,r10
stwux r11,r4,r10
stwux r7,r4,r10
stwux r8,r4,r10
stwux r9,r4,r10
bdnz LMove16ByteLoop
{$endif not ppc603}
{ cr0*4+eq is true if "count and 31" = 0 } { cr0*4+eq is true if "count and 31" = 0 }
beq cr0,LMoveDone beq cr0,LMoveDone
@ -278,8 +301,13 @@ LMove32ByteLoopDone:
{ make r10 again -1 or 1, but first adjust source/dest pointers } { make r10 again -1 or 1, but first adjust source/dest pointers }
sub r3,r3,r6 sub r3,r3,r6
sub r4,r4,r6 sub r4,r4,r6
{$ifndef ppc603}
srawi r10,r10,3 srawi r10,r10,3
srawi r6,r6,3 srawi r6,r6,3
{$else not ppc603}
srawi r10,r10,2
srawi r6,r6,2
{$endif not ppc603}
{ cr1 contains whether count <= 11 } { cr1 contains whether count <= 11 }
ble cr1,LMoveBytes ble cr1,LMoveBytes
@ -932,7 +960,14 @@ end ['R3','R10'];
{ {
$Log$ $Log$
Revision 1.48 2003-05-29 14:32:54 jonas Revision 1.49 2003-05-29 21:17:27 jonas
* compile with -dppc603 to not use unaligned float loads in move() and
g_concatcopy, because the 603 and 604 take an exception for those
(and netbsd doesn't even handle those in the kernel). There are
still some of those left that could cause problems though (e.g.
in the set helpers)
Revision 1.48 2003/05/29 14:32:54 jonas
* changed dcbst to dcbtst (former means "flush cache block to memory, * changed dcbst to dcbtst (former means "flush cache block to memory,
the latter means "I will soon store something to that cache block") the latter means "I will soon store something to that cache block")