From dc70db9402d5637ec2a26e0e18e9890b387b2a99 Mon Sep 17 00:00:00 2001
From: Jonas Maebe <jonas@freepascal.org>
Date: Wed, 11 Apr 2012 18:02:13 +0000
Subject: [PATCH]   * parameter passing support for AIX:    o the last bytes of
 records passed by value whose size is not a multiple      of the register
 size must be passed in the upper (leftmost) bytes of      a register

git-svn-id: trunk@20804 -
---
 compiler/cgobj.pas           | 33 ++++++++++++++++++++++++++++-----
 compiler/parabase.pas        | 21 ++++++++++++++-------
 compiler/paramgr.pas         |  1 +
 compiler/powerpc/cpupara.pas | 22 +++++++++++++++++-----
 4 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/compiler/cgobj.pas b/compiler/cgobj.pas
index df286c95a8..1eb8ae257a 100644
--- a/compiler/cgobj.pas
+++ b/compiler/cgobj.pas
@@ -899,6 +899,8 @@ implementation
       begin
          cgpara.check_simple_location;
          paramanager.alloccgpara(list,cgpara);
+         if cgpara.location^.shiftval<0 then
+           a_op_const_reg(list,OP_SHL,cgpara.location^.size,-cgpara.location^.shiftval,r);
          case cgpara.location^.loc of
             LOC_REGISTER,LOC_CREGISTER:
               a_load_reg_reg(list,size,cgpara.location^.size,r,cgpara.location^.register);
@@ -974,6 +976,8 @@ implementation
                      begin
                        cgpara.check_simple_location;
                        a_load_ref_reg(list,size,location^.size,tmpref,location^.register);
+                       if location^.shiftval<0 then
+                         a_op_const_reg(list,OP_SHL,location^.size,-location^.shiftval,location^.register);
                      end
                    { there's a lot more data left, and the current paraloc's
                      register is entirely filled with part of that data }
@@ -987,6 +991,8 @@ implementation
                    else if (sizeleft in [1,2{$ifndef cpu16bitalu},4{$endif}{$ifdef cpu64bitalu},8{$endif}]) then
                      begin
                        a_load_ref_reg(list,int_cgsize(sizeleft),location^.size,tmpref,location^.register);
+                       if location^.shiftval<0 then
+                         a_op_const_reg(list,OP_SHL,location^.size,-location^.shiftval,location^.register);
                      end
                    { we're at the end of the data, and we need multiple loads
                      to get it in the register because it's an irregular size }
@@ -1047,6 +1053,8 @@ implementation
                              a_load_reg_reg(list,location^.size,location^.size,tmpreg,location^.register);
                            inc(tmpref.offset);
                          end;
+                       if location^.shiftval<0 then
+                         a_op_const_reg(list,OP_SHL,location^.size,-location^.shiftval,location^.register);
                        { the loop will already adjust the offset and sizeleft }
                        dec(tmpref.offset,orgsizeleft);
                        sizeleft:=orgsizeleft;
@@ -1128,15 +1136,28 @@ implementation
     procedure tcg.a_load_cgparaloc_ref(list : TAsmList;const paraloc : TCGParaLocation;const ref : treference;sizeleft : tcgint;align : longint);
       var
         href : treference;
+        hreg : tregister;
+        cgsize: tcgsize;
       begin
          case paraloc.loc of
            LOC_REGISTER :
              begin
-{$IFDEF POWERPC64}
-               if (paraloc.shiftval <> 0) then
-                 a_op_const_reg_reg(list, OP_SHL, OS_INT, paraloc.shiftval, paraloc.register, paraloc.register);
-{$ENDIF POWERPC64}
-               a_load_reg_ref(list,paraloc.size,paraloc.size,paraloc.register,ref);
+               hreg:=paraloc.register;
+               cgsize:=paraloc.size;
+               if paraloc.shiftval>0 then
+                 a_op_const_reg_reg(list,OP_SHL,OS_INT,paraloc.shiftval,paraloc.register,paraloc.register)
+               else if (paraloc.shiftval<0) and
+                       (sizeleft in [1,2,4]) then
+                 begin
+                   a_op_const_reg_reg(list,OP_SHR,OS_INT,-paraloc.shiftval,paraloc.register,paraloc.register);
+                   { convert to a register of 1/2/4 bytes in size, since the
+                     original register had to be made larger to be able to hold
+                     the shifted value }
+                   cgsize:=int_cgsize(tcgsize2size[OS_INT]-(-paraloc.shiftval div 8));
+                   hreg:=getintregister(list,cgsize);
+                   a_load_reg_reg(list,OS_INT,cgsize,paraloc.register,hreg);
+                 end;
+               a_load_reg_ref(list,paraloc.size,cgsize,hreg,ref);
              end;
            LOC_MMREGISTER :
              begin
@@ -1175,6 +1196,8 @@ implementation
          case paraloc.loc of
            LOC_REGISTER :
              begin
+               if paraloc.shiftval<0 then
+                 a_op_const_reg_reg(list,OP_SHR,OS_INT,-paraloc.shiftval,paraloc.register,paraloc.register);
                case getregtype(reg) of
                  R_INTREGISTER:
                    a_load_reg_reg(list,paraloc.size,regsize,paraloc.register,reg);
diff --git a/compiler/parabase.pas b/compiler/parabase.pas
index 047f7f1a7e..07057d8dba 100644
--- a/compiler/parabase.pas
+++ b/compiler/parabase.pas
@@ -48,7 +48,11 @@ unit parabase;
            LOC_CMMREGISTER,
            LOC_REGISTER,
            LOC_CREGISTER : (
-             { The number of bits the value in the register must be shifted to the left before
+             {
+
+             * If shiftval > 0:
+
+             The number of bits the value in the register must be shifted to the left before
              it can be stored to memory in the function prolog.
              This is used for passing OS_NO memory blocks less than register size and of "odd"
              (3, 5, 6, 7) size on big endian machines, so that small memory blocks passed via
@@ -56,8 +60,15 @@ unit parabase;
 
              E.g. the value $5544433 is passed in bits 40-63 of the register (others are zero),
              but they should actually be stored in the first bits of the stack location reserved
-             for this value. So they have to be shifted left by this amount of bits before. }
-             {$IFDEF POWERPC64}shiftval : byte;{$ENDIF POWERPC64}
+             for this value. So they have to be shifted left by this amount of bits before.
+
+             * if shiftval < 0:
+
+             Similar as above, but the shifting must always be done and
+               1) for all parameter sizes < regsize
+               2) on the caller side
+             }
+             shiftval : shortint;
              register : tregister);
        end;
 
@@ -281,9 +292,7 @@ implementation
               LOC_REGISTER,
               LOC_CREGISTER :
                 begin
-{$ifdef powerpc64}
                   ppufile.putbyte(hparaloc^.shiftval);
-{$endif}
                   ppufile.putlongint(longint(hparaloc^.register));
                 end;
               { This seems to be required for systems using explicitparaloc (eg. MorphOS)
@@ -331,9 +340,7 @@ implementation
               LOC_REGISTER,
               LOC_CREGISTER :
                 begin
-{$ifdef powerpc64}
                   hparaloc^.shiftval:=ppufile.getbyte;
-{$endif}
                   hparaloc^.register:=tregister(ppufile.getlongint);
                 end;
               { This seems to be required for systems using explicitparaloc (eg. MorphOS)
diff --git a/compiler/paramgr.pas b/compiler/paramgr.pas
index 13d149b641..d812df6753 100644
--- a/compiler/paramgr.pas
+++ b/compiler/paramgr.pas
@@ -366,6 +366,7 @@ implementation
               len:=tcgsize2size[paraloc^.size];
             newparaloc:=cgpara.add_location;
             newparaloc^.size:=paraloc^.size;
+            newparaloc^.shiftval:=paraloc^.shiftval;
             { $warning maybe release this optimization for all targets?  }
             { released for all CPUs:
               i386 isn't affected anyways because it uses the stack to push parameters
diff --git a/compiler/powerpc/cpupara.pas b/compiler/powerpc/cpupara.pas
index 1ee35eb781..8c3266bc5e 100644
--- a/compiler/powerpc/cpupara.pas
+++ b/compiler/powerpc/cpupara.pas
@@ -137,9 +137,10 @@ unit cpupara;
               else
                 result:=LOC_REFERENCE;
             recorddef:
-              if (target_info.abi<>abi_powerpc_aix) or
-                 ((p.size >= 3) and
-                  ((p.size mod 4) <> 0)) then
+              if not(target_info.system in systems_aix) and
+                 ((target_info.abi<>abi_powerpc_aix) or
+                  ((p.size >= 3) and
+                   ((p.size mod 4) <> 0))) then
                 result:=LOC_REFERENCE
               else
                 result:=LOC_REGISTER;
@@ -501,6 +502,15 @@ unit cpupara;
                         paraloc^.size := OS_INT
                       else
                         paraloc^.size := paracgsize;
+                      { aix requires that record data stored in parameter
+                        registers is left-aligned }
+                      if (target_info.system in systems_aix) and
+                         (paradef.typ = recorddef) and
+                         (tcgsize2size[paraloc^.size] <> sizeof(aint)) then
+                        begin
+                          paraloc^.shiftval := (sizeof(aint)-tcgsize2size[paraloc^.size])*(-8);
+                          paraloc^.size := OS_INT;
+                        end;
                       paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBNONE);
                       inc(nextintreg);
                       dec(paralen,tcgsize2size[paraloc^.size]);
@@ -561,9 +571,11 @@ unit cpupara;
                            tppcprocinfo(current_procinfo).needs_frame_pointer := true;
                          end;
 
-                       if (target_info.abi = abi_powerpc_aix) and
+                       if not((target_info.system in systems_aix) and
+                              (paradef.typ=recorddef)) and
+                          (target_info.abi = abi_powerpc_aix) and
                           (hp.paraloc[side].intsize < 3) then
-                           paraloc^.reference.offset:=stack_offset+(4-paralen)
+                         paraloc^.reference.offset:=stack_offset+(4-paralen)
                        else
                          paraloc^.reference.offset:=stack_offset;