mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-19 23:19:24 +02:00
* compile with -dppc603 to not use unaligned float loads in move() and
g_concatcopy, because the 603 and 604 take an exception for those (and netbsd doesn't even handle those in the kernel). There are still some of those left that could cause problems though (e.g. in the set helpers)
This commit is contained in:
parent
737c927f6d
commit
c70f75f1b7
@ -1782,6 +1782,14 @@ const
|
||||
|
||||
{ ************* concatcopy ************ }
|
||||
|
||||
{$ifndef ppc603}
|
||||
const
|
||||
maxmoveunit = 8;
|
||||
{$else ppc603}
|
||||
const
|
||||
maxmoveunit = 4;
|
||||
{$endif ppc603}
|
||||
|
||||
procedure tcgppc.g_concatcopy(list : taasmoutput;const source,dest : treference;len : aword; delsource,loadref : boolean);
|
||||
|
||||
var
|
||||
@ -1800,7 +1808,7 @@ const
|
||||
|
||||
{ make sure short loads are handled as optimally as possible }
|
||||
if not loadref then
|
||||
if (len <= 8) and
|
||||
if (len <= maxmoveunit) and
|
||||
(byte(len) in [1,2,4,8]) then
|
||||
begin
|
||||
if len < 8 then
|
||||
@ -1828,7 +1836,8 @@ const
|
||||
exit;
|
||||
end;
|
||||
|
||||
count := len div 8;
|
||||
count := len div maxmoveunit;
|
||||
|
||||
reference_reset(src);
|
||||
reference_reset(dst);
|
||||
{ load the address of source into src.base }
|
||||
@ -1870,6 +1879,7 @@ const
|
||||
orgdst := true;
|
||||
end;
|
||||
|
||||
{$ifndef ppc603}
|
||||
if count > 4 then
|
||||
{ generate a loop }
|
||||
begin
|
||||
@ -1927,6 +1937,54 @@ const
|
||||
inc(dst.offset,4);
|
||||
a_reg_dealloc(list,r);
|
||||
end;
|
||||
{$else not ppc603}
|
||||
if count > 4 then
|
||||
{ generate a loop }
|
||||
begin
|
||||
{ the offsets are zero after the a_loadaddress_ref_reg and just }
|
||||
{ have to be set to 4. I put an Inc there so debugging may be }
|
||||
{ easier (should offset be different from zero here, it will be }
|
||||
{ easy to notice in the generated assembler }
|
||||
inc(dst.offset,4);
|
||||
inc(src.offset,4);
|
||||
list.concat(taicpu.op_reg_reg_const(A_SUBI,src.base,src.base,4));
|
||||
list.concat(taicpu.op_reg_reg_const(A_SUBI,dst.base,dst.base,4));
|
||||
countreg := get_scratch_reg_int(list,OS_INT);
|
||||
a_load_const_reg(list,OS_32,count,countreg);
|
||||
{ explicitely allocate R_0 since it can be used safely here }
|
||||
{ (for holding date that's being copied) }
|
||||
r.enum:=R_INTREGISTER;
|
||||
r.number:=NR_R0;
|
||||
a_reg_alloc(list,r);
|
||||
objectlibrary.getlabel(lab);
|
||||
a_label(list, lab);
|
||||
list.concat(taicpu.op_reg_reg_const(A_SUBIC_,countreg,countreg,1));
|
||||
list.concat(taicpu.op_reg_ref(A_LWZU,r,src));
|
||||
list.concat(taicpu.op_reg_ref(A_STWU,r,dst));
|
||||
a_jmp(list,A_BC,C_NE,0,lab);
|
||||
free_scratch_reg(list,countreg);
|
||||
a_reg_dealloc(list,r);
|
||||
len := len mod 4;
|
||||
end;
|
||||
|
||||
count := len div 4;
|
||||
if count > 0 then
|
||||
{ unrolled loop }
|
||||
begin
|
||||
r.enum:=R_INTREGISTER;
|
||||
r.number:=NR_R0;
|
||||
a_reg_alloc(list,r);
|
||||
for count2 := 1 to count do
|
||||
begin
|
||||
a_load_ref_reg(list,OS_32,src,r);
|
||||
a_load_reg_ref(list,OS_32,r,dst);
|
||||
inc(src.offset,4);
|
||||
inc(dst.offset,4);
|
||||
end;
|
||||
a_reg_dealloc(list,r);
|
||||
len := len mod 4;
|
||||
end;
|
||||
{$endif not ppc603}
|
||||
{ copy the leftovers }
|
||||
if (len and 2) <> 0 then
|
||||
begin
|
||||
@ -2484,7 +2542,14 @@ begin
|
||||
end.
|
||||
{
|
||||
$Log$
|
||||
Revision 1.99 2003-05-29 10:06:09 jonas
|
||||
Revision 1.100 2003-05-29 21:17:27 jonas
|
||||
* compile with -dppc603 to not use unaligned float loads in move() and
|
||||
g_concatcopy, because the 603 and 604 take an exception for those
|
||||
(and netbsd doesn't even handle those in the kernel). There are
|
||||
still some of those left that could cause problems though (e.g.
|
||||
in the set helpers)
|
||||
|
||||
Revision 1.99 2003/05/29 10:06:09 jonas
|
||||
* also free temps in g_concatcopy if delsource is true
|
||||
|
||||
Revision 1.98 2003/05/28 23:58:18 jonas
|
||||
|
@ -178,8 +178,10 @@ LMove4ByteAlignLoop:
|
||||
{ while not aligned, continue }
|
||||
bne cr0,LMove4ByteAlignLoop
|
||||
|
||||
{$ifndef ppc603}
|
||||
{ check for 32 byte alignment }
|
||||
andi. r7,r4,31
|
||||
{$endif non ppc603}
|
||||
{ we are going to copy one byte again (the one at the newly }
|
||||
{ aligned address), so increase count byte 1 }
|
||||
addi r5,r5,1
|
||||
@ -188,6 +190,7 @@ LMove4ByteAlignLoop:
|
||||
{ if 11 <= count < 63, copy using dwords }
|
||||
blt cr7,LMoveDWords
|
||||
|
||||
{$ifndef ppc603}
|
||||
{ # of dwords to copy to reach 32 byte alignment (*4) }
|
||||
{ (depends on forward/backward copy) }
|
||||
|
||||
@ -202,6 +205,8 @@ LMove4ByteAlignLoop:
|
||||
not r8, r6
|
||||
add r7, r7, r8
|
||||
xor r7, r7, r8
|
||||
{$endif not ppc603}
|
||||
|
||||
{ multiply the update count with 4 }
|
||||
slwi r10,r10,2
|
||||
slwi r6,r6,2
|
||||
@ -209,6 +214,7 @@ LMove4ByteAlignLoop:
|
||||
add r3,r3,r6
|
||||
add r4,r4,r6
|
||||
|
||||
{$ifndef ppc603}
|
||||
beq cr0,LMove32BytesAligned
|
||||
L32BytesAlignMoveLoop:
|
||||
{ count >= 39 -> align to 8 byte boundary and then use the FPU }
|
||||
@ -226,12 +232,17 @@ LMove32BytesAligned:
|
||||
andi. r5,r5,31
|
||||
{ to decide if we will do some dword stores (instead of only }
|
||||
{ byte stores) afterwards or not }
|
||||
{$else not ppc603}
|
||||
srwi r0,r5,4
|
||||
andi. r5,r5,15
|
||||
{$endif not ppc603}
|
||||
cmpwi cr1,r5,11
|
||||
mtctr r0
|
||||
|
||||
{ r0 := count div 4, will be moved to ctr when copying dwords }
|
||||
srwi r0,r5,2
|
||||
|
||||
{$ifndef ppc603}
|
||||
{ adjust the update count: it will now be 8 or -8 depending on overlap }
|
||||
slwi r10,r10,1
|
||||
|
||||
@ -271,6 +282,18 @@ LMove32ByteDcbz:
|
||||
stfdux f3,r4,r10
|
||||
bdnz LMove32ByteDcbz
|
||||
LMove32ByteLoopDone:
|
||||
{$else not ppc603}
|
||||
LMove16ByteLoop:
|
||||
lwzux r11,r3,r10
|
||||
lwzux r7,r3,r10
|
||||
lwzux r8,r3,r10
|
||||
lwzux r9,r3,r10
|
||||
stwux r11,r4,r10
|
||||
stwux r7,r4,r10
|
||||
stwux r8,r4,r10
|
||||
stwux r9,r4,r10
|
||||
bdnz LMove16ByteLoop
|
||||
{$endif not ppc603}
|
||||
|
||||
{ cr0*4+eq is true if "count and 31" = 0 }
|
||||
beq cr0,LMoveDone
|
||||
@ -278,8 +301,13 @@ LMove32ByteLoopDone:
|
||||
{ make r10 again -1 or 1, but first adjust source/dest pointers }
|
||||
sub r3,r3,r6
|
||||
sub r4,r4,r6
|
||||
{$ifndef ppc603}
|
||||
srawi r10,r10,3
|
||||
srawi r6,r6,3
|
||||
{$else not ppc603}
|
||||
srawi r10,r10,2
|
||||
srawi r6,r6,2
|
||||
{$endif not ppc603}
|
||||
|
||||
{ cr1 contains whether count <= 11 }
|
||||
ble cr1,LMoveBytes
|
||||
@ -932,7 +960,14 @@ end ['R3','R10'];
|
||||
|
||||
{
|
||||
$Log$
|
||||
Revision 1.48 2003-05-29 14:32:54 jonas
|
||||
Revision 1.49 2003-05-29 21:17:27 jonas
|
||||
* compile with -dppc603 to not use unaligned float loads in move() and
|
||||
g_concatcopy, because the 603 and 604 take an exception for those
|
||||
(and netbsd doesn't even handle those in the kernel). There are
|
||||
still some of those left that could cause problems though (e.g.
|
||||
in the set helpers)
|
||||
|
||||
Revision 1.48 2003/05/29 14:32:54 jonas
|
||||
* changed dcbst to dcbtst (former means "flush cache block to memory,
|
||||
the latter means "I will soon store something to that cache block")
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user