* compile with -dppc603 to not use unaligned float loads in move() and

g_concatcopy, because the 603 and 604 take an exception for those (and netbsd doesn't even handle those in the kernel). There are still some of those left that could cause problems though (e.g. in the set helpers)
2025-08-13 11:59:19 +02:00 · 2003-05-29 21:17:27 +00:00 · 2003-05-29 21:17:27 +00:00 · c70f75f1b7
commit c70f75f1b7
parent 737c927f6d
2 changed files with 104 additions and 4 deletions
--- a/compiler/powerpc/cgcpu.pas
+++ b/compiler/powerpc/cgcpu.pas
@ -1782,6 +1782,14 @@ const
 { ************* concatcopy ************ }
 {$ifndef ppc603}
  const
    maxmoveunit = 8;
 {$else ppc603}
  const
    maxmoveunit = 4;
 {$endif ppc603}
    procedure tcgppc.g_concatcopy(list : taasmoutput;const source,dest : treference;len : aword; delsource,loadref : boolean);
      var
@ -1800,7 +1808,7 @@ const
        { make sure short loads are handled as optimally as possible }
        if not loadref then
-          if (len <= 8) and
+          if (len <= maxmoveunit) and
             (byte(len) in [1,2,4,8]) then
            begin
              if len < 8 then
@ -1828,7 +1836,8 @@ const
              exit;
            end;
-        count := len div 8;
+        count := len div maxmoveunit;
        reference_reset(src);
        reference_reset(dst);
        { load the address of source into src.base }
@ -1870,6 +1879,7 @@ const
            orgdst := true;
          end;
 {$ifndef ppc603}
        if count > 4 then
          { generate a loop }
          begin
@ -1927,6 +1937,54 @@ const
            inc(dst.offset,4);
            a_reg_dealloc(list,r);
          end;
 {$else not ppc603}
        if count > 4 then
          { generate a loop }
          begin
            { the offsets are zero after the a_loadaddress_ref_reg and just }
            { have to be set to 4. I put an Inc there so debugging may be   }
            { easier (should offset be different from zero here, it will be }
            { easy to notice in the generated assembler                     }
            inc(dst.offset,4);
            inc(src.offset,4);
            list.concat(taicpu.op_reg_reg_const(A_SUBI,src.base,src.base,4));
            list.concat(taicpu.op_reg_reg_const(A_SUBI,dst.base,dst.base,4));
            countreg := get_scratch_reg_int(list,OS_INT);
            a_load_const_reg(list,OS_32,count,countreg);
            { explicitely allocate R_0 since it can be used safely here }
            { (for holding date that's being copied)                    }
            r.enum:=R_INTREGISTER;
            r.number:=NR_R0;
            a_reg_alloc(list,r);
            objectlibrary.getlabel(lab);
            a_label(list, lab);
            list.concat(taicpu.op_reg_reg_const(A_SUBIC_,countreg,countreg,1));
            list.concat(taicpu.op_reg_ref(A_LWZU,r,src));
            list.concat(taicpu.op_reg_ref(A_STWU,r,dst));
            a_jmp(list,A_BC,C_NE,0,lab);
            free_scratch_reg(list,countreg);
            a_reg_dealloc(list,r);
            len := len mod 4;
          end;
        count := len div 4;
        if count > 0 then
          { unrolled loop }
          begin
            r.enum:=R_INTREGISTER;
            r.number:=NR_R0;
            a_reg_alloc(list,r);
            for count2 := 1 to count do
              begin
                a_load_ref_reg(list,OS_32,src,r);
                a_load_reg_ref(list,OS_32,r,dst);
                inc(src.offset,4);
                inc(dst.offset,4);
              end;
            a_reg_dealloc(list,r);
            len := len mod 4;
          end;
 {$endif not ppc603}
       { copy the leftovers }
       if (len and 2) <> 0 then
         begin
@ -2484,7 +2542,14 @@ begin
 end.
 {
  $Log$
-  Revision 1.99  2003-05-29 10:06:09  jonas
+  Revision 1.100  2003-05-29 21:17:27  jonas
    * compile with -dppc603 to not use unaligned float loads in move() and
      g_concatcopy, because the 603 and 604 take an exception for those
      (and netbsd doesn't even handle those in the kernel). There are
      still some of those left that could cause problems though (e.g.
      in the set helpers)
  Revision 1.99  2003/05/29 10:06:09  jonas
    * also free temps in g_concatcopy if delsource is true
  Revision 1.98  2003/05/28 23:58:18  jonas
--- a/rtl/powerpc/powerpc.inc
+++ b/rtl/powerpc/powerpc.inc
@ -178,8 +178,10 @@ LMove4ByteAlignLoop:
          {  while not aligned, continue  }
          bne     cr0,LMove4ByteAlignLoop
 {$ifndef ppc603}
          { check for 32 byte alignment }
          andi.   r7,r4,31
 {$endif non ppc603}
          { we are going to copy one byte again (the one at the newly }
          { aligned address), so increase count byte 1                }
          addi    r5,r5,1
@ -188,6 +190,7 @@ LMove4ByteAlignLoop:
          {  if 11 <= count < 63, copy using dwords }
          blt     cr7,LMoveDWords
 {$ifndef ppc603}
          { # of dwords to copy to reach 32 byte alignment (*4) }
          { (depends on forward/backward copy)                  }
@ -202,6 +205,8 @@ LMove4ByteAlignLoop:
          not     r8, r6
          add     r7, r7, r8
          xor     r7, r7, r8
 {$endif not ppc603}
          { multiply the update count with 4 }
          slwi    r10,r10,2
          slwi    r6,r6,2
@ -209,6 +214,7 @@ LMove4ByteAlignLoop:
          add     r3,r3,r6
          add     r4,r4,r6
 {$ifndef ppc603}
          beq     cr0,LMove32BytesAligned
 L32BytesAlignMoveLoop:
          {  count >= 39 -> align to 8 byte boundary and then use the FPU  }
@ -226,12 +232,17 @@ LMove32BytesAligned:
          andi.   r5,r5,31
          { to decide if we will do some dword stores (instead of only }
          { byte stores) afterwards or not                             }
 {$else not ppc603}
          srwi    r0,r5,4
          andi.   r5,r5,15
 {$endif not ppc603}
          cmpwi   cr1,r5,11
          mtctr   r0
          {  r0 := count div 4, will be moved to ctr when copying dwords  }
          srwi    r0,r5,2
 {$ifndef ppc603}
          {  adjust the update count: it will now be 8 or -8 depending on overlap  }
          slwi    r10,r10,1
@ -271,6 +282,18 @@ LMove32ByteDcbz:
          stfdux  f3,r4,r10
          bdnz    LMove32ByteDcbz
 LMove32ByteLoopDone:
 {$else not ppc603}
 LMove16ByteLoop:
          lwzux   r11,r3,r10
          lwzux   r7,r3,r10
          lwzux   r8,r3,r10
          lwzux   r9,r3,r10
          stwux   r11,r4,r10
          stwux   r7,r4,r10
          stwux   r8,r4,r10
          stwux   r9,r4,r10
          bdnz    LMove16ByteLoop
 {$endif not ppc603}
          { cr0*4+eq is true if "count and 31" = 0 }
          beq     cr0,LMoveDone
@ -278,8 +301,13 @@ LMove32ByteLoopDone:
          {  make r10 again -1 or 1, but first adjust source/dest pointers }
          sub     r3,r3,r6
          sub     r4,r4,r6
 {$ifndef ppc603}
          srawi   r10,r10,3
          srawi   r6,r6,3
 {$else not ppc603}
          srawi   r10,r10,2
          srawi   r6,r6,2
 {$endif not ppc603}
          { cr1 contains whether count <= 11 }
          ble     cr1,LMoveBytes
@ -932,7 +960,14 @@ end ['R3','R10'];
 {
  $Log$
-  Revision 1.48  2003-05-29 14:32:54  jonas
+  Revision 1.49  2003-05-29 21:17:27  jonas
    * compile with -dppc603 to not use unaligned float loads in move() and
      g_concatcopy, because the 603 and 604 take an exception for those
      (and netbsd doesn't even handle those in the kernel). There are
      still some of those left that could cause problems though (e.g.
      in the set helpers)
  Revision 1.48  2003/05/29 14:32:54  jonas
    * changed dcbst to dcbtst (former means "flush cache block to memory,
      the latter means "I will soon store something to that cache block")