From 5fcb64f35034267b7ba265c745ad38a26037e3df Mon Sep 17 00:00:00 2001
From: tom_at_work <tom_at_work@idefix.freepascal.org>
Date: Wed, 19 Oct 2005 23:25:21 +0000
Subject: [PATCH] * parameter passing is now more ABI compliant * stack frame
 size optimization * optimized (64 bit) constant loading * some code generator
 code cleanup

git-svn-id: trunk@1539 -
---
 compiler/powerpc64/cgcpu.pas   | 231 +++++++++++++++------------------
 compiler/powerpc64/cpubase.pas |   2 +-
 compiler/powerpc64/cpupara.pas | 213 +++++++++---------------------
 compiler/powerpc64/cpupi.pas   |  12 +-
 4 files changed, 169 insertions(+), 289 deletions(-)

diff --git a/compiler/powerpc64/cgcpu.pas b/compiler/powerpc64/cgcpu.pas
index 09674709bc..70d1a1959a 100644
--- a/compiler/powerpc64/cgcpu.pas
+++ b/compiler/powerpc64/cgcpu.pas
@@ -139,6 +139,13 @@ type
     { of asmcondflags and destination addressing mode                }
     procedure a_jmp(list: taasmoutput; op: tasmop;
       c: tasmcondflag; crval: longint; l: tasmlabel);
+
+    { returns the lowest numbered FP register in use, and the number of used FP registers 
+      for the current procedure }
+    procedure calcFirstUsedFPR(out firstfpr : TSuperRegister; out fprcount : aint);
+    { returns the lowest numbered GP register in use, and the number of used GP registers
+      for the current procedure }
+    procedure calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint);
   end;
 
 const
@@ -350,26 +357,34 @@ end;
 procedure tcgppc.a_load_const_reg(list: taasmoutput; size: TCGSize; a: aint;
   reg: TRegister);
 
-var
-  scratchreg : TRegister;
-
-  procedure load32bitconstant(list : taasmoutput; size : TCGSize; a : longint;
-    reg : TRegister);
-  var is_half_signed : boolean;
+  { loads a 32 bit constant into the given register, using an optimal instruction sequence.
+    This is either LIS, LI or LI+ADDIS.
+    Returns true if during these operations the upper 32 bits were filled with 1 bits (e.g.
+    sign extension was performed) }
+  function load32bitconstant(list : taasmoutput; size : TCGSize; a : longint;
+    reg : TRegister) : boolean;
+  var 
+    is_half_signed : byte;
   begin
-(*
-    // ts: test optimized code using LI/ADDIS
-
+    { if the lower 16 bits are zero, do a single LIS }
     if (smallint(a) = 0) and ((a shr 16) <> 0) then begin
-      list.concat(taicpu.op_reg_const(A_LIS, reg, smallint(a shr 16)));
+      list.concat(taicpu.op_reg_const(A_LIS, reg, smallint(hi(a))));
+      load32bitconstant := longint(a) < 0;
     end else begin
-      is_half_signed := smallint(a) < 0;
-      list.concat(taicpu.op_reg_const(A_LI, reg, smallint(a)));
-      if smallint((a shr 16) + ord(is_half_signed)) <> 0 then begin
-        list.concat(taicpu.op_reg_reg_const(A_ADDIS, reg, reg, smallint((a shr 16) + ord(is_half_signed))));
+      is_half_signed := ord(smallint(lo(a)) < 0);
+      list.concat(taicpu.op_reg_const(A_LI, reg, smallint(a and $ffff)));
+      if smallint(hi(a) + is_half_signed) <> 0 then begin
+        list.concat(taicpu.op_reg_reg_const(A_ADDIS, reg, reg, smallint(hi(a) + is_half_signed)));
       end;
+      load32bitconstant := (smallint(a) < 0) or (a < 0);
     end;
-*)
+  end;
+
+  { R0-safe version of the above (ADDIS doesn't work the same way with R0 as base), without
+    the return value }
+  procedure load32bitconstantR0(list : taasmoutput; size : TCGSize; a : longint;
+    reg : TRegister);
+  begin
     // only 16 bit constant? (-2^15 <= a <= +2^15-1)
     if (a >= low(smallint)) and (a <= high(smallint)) then begin
       list.concat(taicpu.op_reg_const(A_LI, reg, smallint(a)));
@@ -378,56 +393,52 @@ var
       if ((a and $FFFF) <> 0) then begin
         list.concat(taicpu.op_reg_const(A_LIS, reg, smallint(a shr 16)));
         list.concat(taicpu.op_reg_reg_const(A_ORI, reg, reg, word(a)));
-
       end else begin
         list.concat(taicpu.op_reg_const(A_LIS, reg, smallint(a shr 16)));
       end;
     end;
-
   end;
+
 var
+  extendssign : boolean;
+  {$IFDEF EXTDEBUG}
   astring : string;
+  {$ENDIF EXTDEBUG}
 
 begin
-  astring := 'a_load_const reg ' + inttostr(a) + ' ' + inttostr(tcgsize2size[size]);
+  {$IFDEF EXTDEBUG}
+  astring := 'a_load_const reg ' + inttostr(hi(a)) + ' ' + inttostr(lo(a)) + ' ' + inttostr(ord(size)) + ' ' + inttostr(tcgsize2size[size]);
   list.concat(tai_comment.create(strpnew(astring)));
+  {$ENDIF EXTDEBUG}
+
   if not (size in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
     internalerror(2002090902);
-  // load low 32 bit (as signed number)
-  load32bitconstant(list, size, lo(a), reg);
-
-  // load high 32 bit if needed :( (the second expression is optimization, to be enabled and tested later!)
-  if (size in [OS_64, OS_S64]) {and (hi(a) <> 0)} then begin
-    // allocate scratch reg (=R0 because it might be called at places where register
-    // allocation has already happened - either procedure entry/exit, and stack check
-    // code generation)
-    // Note: I hope this restriction can be lifted at some time
-
-    //scratchreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
-    // load high 32 bit
-    load32bitconstant(list, size, hi(a), NR_R0);
-    // combine both registers
-    list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, reg, NR_R0, 32, 0));
-  end;
-(*
-  // for 16/32 bit unsigned constants we need to make sure that the difference from this size to
-  // 32 bits is cleared (since we optimize loading them as signed 16 bit parts, but 32 bit ops are
-  // used for them.
-  // e.g. for 16 bit there's a problem if the (unsigned) constant is of the form
-  //   xx..xx xx..xx 00..00 1x..xx
-  // same problem as above for 32 bit: unsigned constants of the form
-  //   xx..xx xx..xx 00..00 1x..xx
-  // cause troubles. Signed are ok.
-  // for now, just clear the upper 48/32 bits (also because full 32 bit op usage isn't done yet)
-  if (size in [OS_16, OS_32]) {and (lo(a) < 0)} then begin
-    a_load_reg_reg(list, size, size, reg, reg);
-  end; *)
-  // need to clear MSB for unsigned 64 bit int because we did not load the upper
-  // 32 bit at all (second expression is optimization: enable and test later!)
-  // e.g. constants of the form 00..00 00..00 1x..xx xx..xx
-  if (size in [OS_64]) and (hi(a) = 0) then begin
-        list.concat(taicpu.op_reg_reg_const_const(A_RLDICL, reg, reg, 0, 32));
-  end;
+  if (lo(a) = 0) and (hi(a) <> 0) then begin
+    { load only upper 32 bits, and shift }
+    load32bitconstant(list, size, hi(a), reg);
+    list.concat(taicpu.op_reg_reg_const(A_SLDI, reg, reg, 32));    
+  end else begin
+    { load lower 32 bits }
+    extendssign := load32bitconstant(list, size, lo(a), reg);
+    if (extendssign) and (hi(a) = 0) then
+      { if upper 32 bits are zero, but loading the lower 32 bit resulted in automatic 
+        sign extension, clear those bits }
+      a_load_reg_reg(list, OS_32, OS_64, reg, reg)
+    else if (not 
+      ((extendssign and (longint(hi(a)) = -1)) or 
+       ((not extendssign) and (hi(a)=0)))
+      ) then begin
+      { only load the upper 32 bits, if the automatic sign extension is not okay,
+        that is, _not_ if 
+        - loading the lower 32 bits resulted in -1 in the upper 32 bits, and the upper 
+         32 bits should contain -1
+        - loading the lower 32 bits resulted in 0 in the upper 32 bits, and the upper
+         32 bits should contain 0 }
+      load32bitconstantR0(list, size, hi(a), NR_R0);
+      { combine both registers }
+      list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, reg, NR_R0, 32, 0));
+    end;
+  end;  
 end;
 
 procedure tcgppc.a_load_reg_ref(list: taasmoutput; fromsize, tosize: TCGSize;
@@ -794,6 +805,7 @@ var
   signed: boolean;
 
 begin
+  { todo: use 32 bit compares? }
   signed := cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE];
   { in the following case, we generate more efficient code when }
   { signed is true                                              }
@@ -803,16 +815,14 @@ begin
   if signed then
     if (a >= low(smallint)) and (a <= high(smallint)) then
       list.concat(taicpu.op_reg_reg_const(A_CMPDI, NR_CR0, reg, a))
-    else
-    begin
+    else begin
       scratch_register := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
       a_load_const_reg(list, OS_64, a, scratch_register);
       list.concat(taicpu.op_reg_reg_reg(A_CMPD, NR_CR0, reg, scratch_register));
     end
   else if (aword(a) <= $FFFF) then
     list.concat(taicpu.op_reg_reg_const(A_CMPLDI, NR_CR0, reg, aword(a)))
-  else
-  begin
+  else begin
     scratch_register := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
     a_load_const_reg(list, OS_64, a, scratch_register);
     list.concat(taicpu.op_reg_reg_reg(A_CMPLD, NR_CR0, reg,
@@ -930,6 +940,40 @@ begin
   { this work is done in g_proc_exit }
 end;
 
+procedure tcgppc.calcFirstUsedFPR(out firstfpr : TSuperRegister; out fprcount : aint);
+var
+  reg : TSuperRegister;
+begin
+  fprcount := 0;
+  firstfpr := RS_F31;
+  if not (po_assembler in current_procinfo.procdef.procoptions) then begin
+    for reg := RS_F14 to RS_F31 do begin
+      if reg in rg[R_FPUREGISTER].used_in_proc then begin
+        fprcount := ord(RS_F31)-ord(reg)+1;
+        firstfpr := reg;
+        break;
+      end;
+    end;
+  end;
+end;
+
+procedure tcgppc.calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint);
+var
+  reg : TSuperRegister;
+begin
+  gprcount := 0;
+  firstgpr := RS_R31;
+  if not (po_assembler in current_procinfo.procdef.procoptions) then begin
+    for reg := RS_R14 to RS_R31 do begin
+      if reg in rg[R_INTREGISTER].used_in_proc then begin
+        gprcount := ord(RS_R31)-ord(reg)+1;
+        firstgpr := reg;
+        break;
+      end;
+    end;
+  end;
+end;
+
 procedure tcgppc.g_proc_entry(list: taasmoutput; localsize: longint;
   nostackframe: boolean);
 { generated the entry code of a procedure/function. Note: localsize is the }
@@ -939,40 +983,6 @@ procedure tcgppc.g_proc_entry(list: taasmoutput; localsize: longint;
 { This procedure may be called before, as well as after g_return_from_proc }
 { is called. NOTE registers are not to be allocated through the register   }
 { allocator here, because the register colouring has already occured !!    }
-  procedure calcFirstUsedFPR(out firstfpr : TSuperRegister; out fprcount : aint);
-  var
-    reg : TSuperRegister;
-  begin
-    fprcount := 0;
-    firstfpr := RS_F31;
-    if not (po_assembler in current_procinfo.procdef.procoptions) then begin
-      for reg := RS_F14 to RS_F31 do begin
-        if reg in rg[R_FPUREGISTER].used_in_proc then begin
-          fprcount := ord(RS_F31)-ord(reg)+1;
-          firstfpr := reg;
-          break;
-        end;
-      end;
-    end;
-  end;
-
-  procedure calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint);
-  var
-    reg : TSuperRegister;
-  begin
-    gprcount := 0;
-    firstgpr := RS_R31;
-    if not (po_assembler in current_procinfo.procdef.procoptions) then begin
-      for reg := RS_R14 to RS_R31 do begin
-        if reg in rg[R_INTREGISTER].used_in_proc then begin
-          gprcount := ord(RS_R31)-ord(reg)+1;
-          firstgpr := reg;
-          break;
-        end;
-      end;
-    end;
-  end;
-
 var
   firstregfpu, firstreggpr: TSuperRegister;
   href: treference;
@@ -1009,7 +1019,6 @@ begin
     a_reg_alloc(list, NR_R12);
     list.concat(taicpu.op_reg_reg(A_MR, NR_R12, NR_STACK_POINTER_REG));
   end;
-
   // save registers, FPU first, then GPR
   reference_reset_base(href, NR_STACK_POINTER_REG, -8);
   if (fprcount > 0) then begin
@@ -1072,45 +1081,11 @@ end;
 
 procedure tcgppc.g_proc_exit(list: taasmoutput; parasize: longint; nostackframe:
   boolean);
-
-  procedure calcFirstUsedFPR(out firstfpr : TSuperRegister; out fprcount : aint);
-  var
-    reg : TSuperRegister;
-  begin
-    fprcount := 0;
-    firstfpr := RS_F31;
-    if not (po_assembler in current_procinfo.procdef.procoptions) then begin
-      for reg := RS_F14 to RS_F31 do begin
-        if reg in rg[R_FPUREGISTER].used_in_proc then begin
-          fprcount := ord(RS_F31)-ord(reg)+1;
-          firstfpr := reg;
-          break;
-        end;
-      end;
-    end;
-  end;
-
-  procedure calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint);
-  var
-    reg : TSuperRegister;
-  begin
-    gprcount := 0;
-    firstgpr := RS_R31;
-    if not (po_assembler in current_procinfo.procdef.procoptions) then begin
-      for reg := RS_R14 to RS_R31 do begin
-        if reg in rg[R_INTREGISTER].used_in_proc then begin
-          gprcount := ord(RS_R31)-ord(reg)+1;
-          firstgpr := reg;
-          break;
-        end;
-      end;
-    end;
-  end;
-
 { This procedure may be called before, as well as after g_stackframe_entry }
 { is called. NOTE registers are not to be allocated through the register   }
 { allocator here, because the register colouring has already occured !!    }
 
+
 var
   regcount, firstregfpu, firstreggpr: TSuperRegister;
   href: treference;
@@ -1443,7 +1418,7 @@ procedure tcgppc.g_intf_wrapper(list: TAAsmoutput; procdef: tprocdef; const
       procdef._class.vmtmethodoffset(procdef.extnumber));
     if not ((aint(href.offset) >= low(smallint)) and
       (aint(href.offset) <= high(smallint))) then begin
-      {$warning ts:adapt me}
+      {$warning ts:adapt me for offsets > 16 bit }
       list.concat(taicpu.op_reg_reg_const(A_ADDIS, NR_R11, NR_R11,
         smallint((href.offset shr 16) + ord(smallint(href.offset and $FFFF) <
         0))));
diff --git a/compiler/powerpc64/cpubase.pas b/compiler/powerpc64/cpubase.pas
index 6b7f794e40..3af3693116 100644
--- a/compiler/powerpc64/cpubase.pas
+++ b/compiler/powerpc64/cpubase.pas
@@ -19,7 +19,7 @@
 
  ****************************************************************************
 }
-{ This Unit contains the base types for the PowerPC
+{ This Unit contains the base types for the PowerPC64
 }
 unit cpubase;
 
diff --git a/compiler/powerpc64/cpupara.pas b/compiler/powerpc64/cpupara.pas
index 268d87e05f..8ea067bb72 100644
--- a/compiler/powerpc64/cpupara.pas
+++ b/compiler/powerpc64/cpupara.pas
@@ -87,18 +87,14 @@ begin
   cgpara.intsize := tcgsize2size[OS_INT];
   cgpara.alignment := get_para_align(calloption);
   paraloc := cgpara.add_location;
-  with paraloc^ do
-  begin
+  with paraloc^ do begin
     size := OS_INT;
-    if (nr <= 8) then
-    begin
+    if (nr <= 8) then begin
       if nr = 0 then
         internalerror(200309271);
       loc := LOC_REGISTER;
       register := newreg(R_INTREGISTER, RS_R2 + nr, R_SUBWHOLE);
-    end
-    else
-    begin
+    end else begin
       loc := LOC_REFERENCE;
       paraloc^.reference.index := NR_STACK_POINTER_REG;
       if (target_info.abi <> abi_powerpc_aix) then
@@ -129,10 +125,7 @@ begin
     classrefdef:
       result := LOC_REGISTER;
     recorddef:
-      if (target_info.abi <> abi_powerpc_aix) then
-        result := LOC_REFERENCE
-      else
-        result := LOC_REGISTER;
+      result := LOC_REGISTER;
     objectdef:
       if is_object(p) then
         result := LOC_REFERENCE
@@ -183,7 +176,6 @@ begin
       result := true;
     recorddef:
       result :=
-        (target_info.abi <> abi_powerpc_aix) or
         ((varspez = vs_const) and
         ((calloption = pocall_mwpascal) or
         (not (calloption in [pocall_cdecl, pocall_cppdecl]) and
@@ -210,10 +202,11 @@ end;
 procedure tppcparamanager.init_values(var curintreg, curfloatreg, curmmreg:
   tsuperregister; var cur_stack_offset: aword);
 begin
+  { register parameter save area begins at 48(r2) }
   cur_stack_offset := 48;
   curintreg := RS_R3;
   curfloatreg := RS_F1;
-  curmmreg := RS_M1;
+  curmmreg := RS_M2;
 end;
 
 procedure tppcparamanager.create_funcretloc_info(p: tabstractprocdef; side:
@@ -230,36 +223,28 @@ begin
   location_reset(p.funcretloc[side], LOC_INVALID, OS_NO);
   p.funcretloc[side].size := retcgsize;
   { void has no location }
-  if is_void(p.rettype.def) then
-  begin
+  if is_void(p.rettype.def) then begin
     p.funcretloc[side].loc := LOC_VOID;
     exit;
   end;
 
   { Return in FPU register? }
-  if p.rettype.def.deftype = floatdef then
-  begin
+  if p.rettype.def.deftype = floatdef then begin
     p.funcretloc[side].loc := LOC_FPUREGISTER;
     p.funcretloc[side].register := NR_FPU_RESULT_REG;
     p.funcretloc[side].size := retcgsize;
-  end
-  else
-    { Return in register? } if not ret_in_param(p.rettype.def, p.proccalloption)
-      then
-    begin
-      begin
-        p.funcretloc[side].loc := LOC_REGISTER;
-        p.funcretloc[side].size := retcgsize;
-        if side = callerside then
-          p.funcretloc[side].register := newreg(R_INTREGISTER,
-            RS_FUNCTION_RESULT_REG, cgsize2subreg(retcgsize))
-        else
-          p.funcretloc[side].register := newreg(R_INTREGISTER,
-            RS_FUNCTION_RETURN_REG, cgsize2subreg(retcgsize));
-      end;
-    end
-    else
-    begin
+  end else
+    { Return in register? } 
+    if not ret_in_param(p.rettype.def, p.proccalloption) then begin
+      p.funcretloc[side].loc := LOC_REGISTER;
+      p.funcretloc[side].size := retcgsize;
+      if side = callerside then
+        p.funcretloc[side].register := newreg(R_INTREGISTER,
+          RS_FUNCTION_RESULT_REG, cgsize2subreg(retcgsize))
+      else
+        p.funcretloc[side].register := newreg(R_INTREGISTER,
+          RS_FUNCTION_RETURN_REG, cgsize2subreg(retcgsize));
+    end else begin
       p.funcretloc[side].loc := LOC_REFERENCE;
       p.funcretloc[side].size := retcgsize;
     end;
@@ -282,8 +267,8 @@ end;
 
 function tppcparamanager.create_paraloc_info_intern(p: tabstractprocdef; side:
   tcallercallee; paras: tparalist;
-  var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset:
-    aword): longint;
+var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset:
+  aword): longint;
 var
   stack_offset: longint;
   paralen: aint;
@@ -309,13 +294,11 @@ begin
 
   maxfpureg := RS_F13;
 
-  for i := 0 to paras.count - 1 do
-  begin
+  for i := 0 to paras.count - 1 do begin
     hp := tparavarsym(paras[i]);
     paradef := hp.vartype.def;
     { Syscall for Morphos can have already a paraloc set }
-    if (vo_has_explicit_paraloc in hp.varoptions) then
-    begin
+    if (vo_has_explicit_paraloc in hp.varoptions) then begin
       if not (vo_is_syscall_lib in hp.varoptions) then
         internalerror(200412153);
       continue;
@@ -323,8 +306,7 @@ begin
     hp.paraloc[side].reset;
     { currently only support C-style array of const }
     if (p.proccalloption in [pocall_cdecl, pocall_cppdecl]) and
-      is_array_of_const(paradef) then
-    begin
+      is_array_of_const(paradef) then begin
       paraloc := hp.paraloc[side].add_location;
       { hack: the paraloc must be valid, but is not actually used }
       paraloc^.loc := LOC_REGISTER;
@@ -336,50 +318,37 @@ begin
     if (hp.varspez in [vs_var, vs_out]) or
       push_addr_param(hp.varspez, paradef, p.proccalloption) or
       is_open_array(paradef) or
-      is_array_of_const(paradef) then
-    begin
+      is_array_of_const(paradef) then begin
       paradef := voidpointertype.def;
       loc := LOC_REGISTER;
       paracgsize := OS_ADDR;
       paralen := tcgsize2size[OS_ADDR];
-    end
-    else
-    begin
+    end else begin
       if not is_special_array(paradef) then
         paralen := paradef.size
       else
         paralen := tcgsize2size[def_cgsize(paradef)];
-      if (target_info.abi = abi_powerpc_aix) and
-        (paradef.deftype = recorddef) and
-        (hp.varspez in [vs_value, vs_const]) then
-      begin
+      if (paradef.deftype = recorddef) and
+        (hp.varspez in [vs_value, vs_const]) then begin
         { if a record has only one field and that field is }
         { non-composite (not array or record), it must be  }
         { passed according to the rules of that type.       }
         if (trecorddef(hp.vartype.def).symtable.symindex.count = 1) and
           (not trecorddef(hp.vartype.def).isunion) and
-          ((tabstractvarsym(trecorddef(hp.vartype.def).symtable.symindex.search(1)).vartype.def.deftype = floatdef) or
-          ((target_info.system = system_powerpc_darwin) and
-          (tabstractvarsym(trecorddef(hp.vartype.def).symtable.symindex.search(1)).vartype.def.deftype in [orddef, enumdef]))) then
-        begin
+          (tabstractvarsym(trecorddef(hp.vartype.def).symtable.symindex.search(1)).vartype.def.deftype = floatdef) then begin
           paradef :=
             tabstractvarsym(trecorddef(hp.vartype.def).symtable.symindex.search(1)).vartype.def;
           loc := getparaloc(paradef);
           paracgsize := def_cgsize(paradef);
-        end
-        else
-        begin
+        end else begin
           loc := LOC_REGISTER;
           paracgsize := int_cgsize(paralen);
         end;
-      end
-      else
-      begin
+      end else begin
         loc := getparaloc(paradef);
         paracgsize := def_cgsize(paradef);
         { for things like formaldef }
-        if (paracgsize = OS_NO) then
-        begin
+        if (paracgsize = OS_NO) then begin
           paracgsize := OS_ADDR;
           paralen := tcgsize2size[OS_ADDR];
         end;
@@ -389,20 +358,16 @@ begin
     hp.paraloc[side].size := paracgsize;
     hp.paraloc[side].intsize := paralen;
     if (paralen = 0) then
-      if (paradef.deftype = recorddef) then
-      begin
+      if (paradef.deftype = recorddef) then begin
         paraloc := hp.paraloc[side].add_location;
         paraloc^.loc := LOC_VOID;
-      end
-      else
+      end else
         internalerror(2005011310);
     { can become < 0 for e.g. 3-byte records }
-    while (paralen > 0) do
-    begin
+    while (paralen > 0) do begin
       paraloc := hp.paraloc[side].add_location;
       if (loc = LOC_REGISTER) and
-        (nextintreg <= RS_R10) then
-      begin
+        (nextintreg <= RS_R10) then begin
         paraloc^.loc := loc;
         { make sure we don't lose whether or not the type is signed }
         if (paradef.deftype <> orddef) then
@@ -414,29 +379,36 @@ begin
         paraloc^.register := newreg(R_INTREGISTER, nextintreg, R_SUBNONE);
         inc(nextintreg);
         dec(paralen, tcgsize2size[paraloc^.size]);
-        if target_info.abi = abi_powerpc_aix then
-          inc(stack_offset, tcgsize2size[paraloc^.size]);
-      end
-      else if (loc = LOC_FPUREGISTER) and
-        (nextfloatreg <= maxfpureg) then
-      begin
+
+        inc(stack_offset, tcgsize2size[paraloc^.size]);
+      end else if (loc = LOC_FPUREGISTER) and
+        (nextfloatreg <= maxfpureg) then begin
         paraloc^.loc := loc;
         paraloc^.size := paracgsize;
         paraloc^.register := newreg(R_FPUREGISTER, nextfloatreg, R_SUBWHOLE);
+        { the PPC64 ABI says that the GPR index is increased for every parameter, no matter
+        which type it is stored in }
+        inc(nextintreg);
         inc(nextfloatreg);
         dec(paralen, tcgsize2size[paraloc^.size]);
-        { if nextfpureg > maxfpureg, all intregs are already used, since there }
-        { are less of those available for parameter passing in the AIX abi     }
-      end
-      else { LOC_REFERENCE }
-      begin
+
+        inc(stack_offset, tcgsize2size[paraloc^.size]);
+      end else if (loc = LOC_MMREGISTER) then begin
+        { Altivec not supported }
+        internalerror(200510192);
+      end else begin 
+        { either LOC_REFERENCE, or one of the above which must be passed on the
+        stack because of insufficient registers }
         paraloc^.loc := LOC_REFERENCE;
         paraloc^.size := int_cgsize(paralen);
         if (side = callerside) then
           paraloc^.reference.index := NR_STACK_POINTER_REG
         else
+          { during procedure entry, R12 contains the old stack pointer }
           paraloc^.reference.index := NR_R12;
         paraloc^.reference.offset := stack_offset;
+
+        { TODO: change this to the next power of two (natural alignment) }
         inc(stack_offset, align(paralen, 8));
         paralen := 0;
       end;
@@ -470,9 +442,7 @@ begin
     result := create_paraloc_info_intern(p, callerside, varargspara, curintreg,
       curfloatreg, curmmreg, cur_stack_offset);
     { varargs routines have to reserve at least 64 bytes for the AIX abi }
-    { ts: dunno??? }
-    if (target_info.abi = abi_powerpc_aix) and
-      (result < 64) then
+    if (result < 64) then
       result := 64;
   end
   else
@@ -497,76 +467,9 @@ begin
 end;
 
 function tppcparamanager.parseparaloc(p: tparavarsym; const s: string): boolean;
-var
-  paraloc: pcgparalocation;
-  paracgsize: tcgsize;
 begin
-  result := false;
-  case target_info.system of
-    system_powerpc_morphos:
-      begin
-        paracgsize := def_cgsize(p.vartype.def);
-        p.paraloc[callerside].alignment := 8;
-        p.paraloc[callerside].size := paracgsize;
-        p.paraloc[callerside].intsize := tcgsize2size[paracgsize];
-        paraloc := p.paraloc[callerside].add_location;
-        paraloc^.loc := LOC_REFERENCE;
-        paraloc^.size := paracgsize;
-        paraloc^.reference.index := newreg(R_INTREGISTER, RS_R2, R_SUBWHOLE);
-        { pattern is always uppercase'd }
-        if s = 'D0' then
-          paraloc^.reference.offset := 0
-        else if s = 'D1' then
-          paraloc^.reference.offset := 8
-        else if s = 'D2' then
-          paraloc^.reference.offset := 16
-        else if s = 'D3' then
-          paraloc^.reference.offset := 24
-        else if s = 'D4' then
-          paraloc^.reference.offset := 32
-        else if s = 'D5' then
-          paraloc^.reference.offset := 40
-        else if s = 'D6' then
-          paraloc^.reference.offset := 48
-        else if s = 'D7' then
-          paraloc^.reference.offset := 56
-        else if s = 'A0' then
-          paraloc^.reference.offset := 64
-        else if s = 'A1' then
-          paraloc^.reference.offset := 72
-        else if s = 'A2' then
-          paraloc^.reference.offset := 80
-        else if s = 'A3' then
-          paraloc^.reference.offset := 88
-        else if s = 'A4' then
-          paraloc^.reference.offset := 96
-        else if s = 'A5' then
-          paraloc^.reference.offset := 104
-            { 'A6' (offset 56) is used by mossyscall as libbase, so API
-            never passes parameters in it,
-            Indeed, but this allows to declare libbase either explicitly
-            or let the compiler insert it }
-        else if s = 'A6' then
-          paraloc^.reference.offset := 112
-            { 'A7' is the stack pointer on 68k, can't be overwritten
-            by API calls, so it has no offset }
-          { 'R12' is special, used internally to support r12base sysv
-            calling convention }
-        else if s = 'R12' then
-        begin
-          paraloc^.loc := LOC_REGISTER;
-          paraloc^.size := OS_ADDR;
-          paraloc^.register := NR_R12;
-        end
-        else
-          exit;
-
-        { copy to callee side }
-        p.paraloc[calleeside].add_location^ := paraloc^;
-      end;
-  else
-    internalerror(200404182);
-  end;
+  { not supported/required for PowerPC64-linux target }
+  internalerror(200404182);
   result := true;
 end;
 
diff --git a/compiler/powerpc64/cpupi.pas b/compiler/powerpc64/cpupi.pas
index 2c67a7c894..bfde833b9c 100644
--- a/compiler/powerpc64/cpupi.pas
+++ b/compiler/powerpc64/cpupi.pas
@@ -65,11 +65,13 @@ var
   locals: longint;
 begin
   if not (po_assembler in procdef.procoptions) then begin
-    { always allocate space for 8 * 8 bytes for registers R3-R10 and stack header if
-      there's a stack frame }
-    if (maxpushedparasize < 64) then begin
-      maxpushedparasize := 64;
-    end;
+    { the ABI specification says that it is required to always allocate space for 8 * 8 bytes
+      for registers R3-R10 and stack header if there's a stack frame, but GCC doesn't do that,
+      so we don't that too. Uncomment the next three lines if this is required }
+    // if (maxpushedparasize < 64) then begin
+    //  maxpushedparasize := 64;
+    // end;
+    { align the stack properly }
     ofs := align(maxpushedparasize + LinkageAreaSizeELF, ELF_STACK_ALIGN);
     tg.setfirsttemp(ofs);
   end else begin