From 9788b01d3113a43e9c4730e567f3a711f65ecf67 Mon Sep 17 00:00:00 2001 From: Jonas Maebe Date: Sat, 14 Mar 2015 18:35:47 +0000 Subject: [PATCH] * rewrote most of the special case handling of parameter passing on ppc64, documenting the pecularities of the various calling conventions and marking what we do and don't support currently * also handle arrays for the ELFv2 ABI when determining whether an aggregate only contains floating values of a single type git-svn-id: trunk@30203 - --- compiler/powerpc64/cpupara.pas | 241 +++++++++++++++++++++++++-------- compiler/powerpc64/symcpu.pas | 29 ++++ 2 files changed, 215 insertions(+), 55 deletions(-) diff --git a/compiler/powerpc64/cpupara.pas b/compiler/powerpc64/cpupara.pas index f4d67dcfe2..65f06490cd 100644 --- a/compiler/powerpc64/cpupara.pas +++ b/compiler/powerpc64/cpupara.pas @@ -311,17 +311,22 @@ end; procedure tppcparamanager.create_paraloc_for_def(var para: TCGPara; varspez: tvarspez; paradef: tdef; var nextfloatreg, nextintreg: tsuperregister; var stack_offset: longint; const isVararg, forceintmem: boolean; const side: tcallercallee; const p: tabstractprocdef); var - adjusttail: boolean; - firstparaloc: boolean; paracgsize: tcgsize; loc: tcgloc; paraloc: pcgparalocation; + { def to use for all paralocs if <> nil } + alllocdef, + { def to use for the current paraloc } locdef, tmpdef: tdef; paralen: aint; fsym: tfieldvarsym; parashift: byte; + tailpadding, + firstparaloc, + paraaligned: boolean; begin + alllocdef:=nil; locdef:=nil; parashift := 0; para.reset; @@ -335,48 +340,134 @@ begin paralen := paradef.size else paralen := tcgsize2size[def_cgsize(paradef)]; - if (paradef.typ = recorddef) and - (varspez in [vs_value, vs_const]) then begin - { if a record has only one field and that field is } - { non-composite (not array or record), it must be } - { passed according to the rules of that type. } - if tabstractrecordsymtable(tabstractrecorddef(paradef).symtable).has_single_field(fsym) and - ((fsym.vardef.typ = floatdef) or - (not(target_info.system in systems_aix) and - (fsym.vardef.typ in [orddef, enumdef]))) then begin - paradef := fsym.vardef; - loc := getparaloc(paradef); - paracgsize := def_cgsize(paradef) - { With the new ABI, so-called "homogeneous" aggregates, i.e. struct, arrays, - or unions that (recursively) contain only elements of the same floating- - point or vector type are passed as if those elements were passed as - separate arguments. (This is done for up to 8 such elements.) } - end else if (target_info.abi=abi_powerpc_elfv2) and - tcpurecorddef(paradef).has_single_type_elfv2(tmpdef) and - ((8*tmpdef.size)<=paradef.size) then begin - locdef := tmpdef; - loc := getparaloc(locdef); - paracgsize := def_cgsize(locdef); - end else begin - loc := LOC_REGISTER; - paracgsize := int_cgsize(paralen); - if (paralen in [3, 5, 6, 7]) then - parashift := (8-paralen) * 8; - end; - end else begin - loc := getparaloc(paradef); - paracgsize := def_cgsize(paradef); - { for things like formaldef } - if (paracgsize = OS_NO) then begin - paracgsize := OS_ADDR; - paralen := tcgsize2size[OS_ADDR]; - end; - end + { default rules: + * integer parameters sign/zero-extended to 64 bit + * floating point register used -> skip equivalent GP register + * floating point parameters passed as is (32/64 bit) + * floating point parameters to variable arguments -> in int registers + * aggregates passed in consecutive integer registers + * all *aggregate* data in integer registers exactly mirrors the data + in memory -> on big endian it's left aligned (passed in most + significant part of the 64 bit word if it's < 64 bit), on little + endian it's right aligned (least significant part of the 64 bit + word) + + special rules: + +implemented + | + | * AIX/ELFv1/SysV ppc64 ABI (big endian only): + x a) single precision floats are stored in the second word of a 64 bit + location when passed on the stack + x b) aggregate with 1 floating point element passed like a floating + point parameter of the same size + x c) aggregates smaller than 64 bit are aligned in least significant bits + of a single 64bit location (incl. register) (AIX exception: it puts + them in the most significant bits) + + * ELFv2 ppc64 ABI: + x a) so-called "homogeneous" aggregates, i.e. struct, arrays, or unions + that (recursively) contain only elements of the same floating- + point or vector type, are passed as if those elements were passed as + separate arguments. This is done for up to 8 such elements. + x b) other than a), it's the same as the AIX ppc64 ABI + + * Darwin ppc64 ABI: + + - as in the general case, aggregates in registers mirror their place in + memory, so if e.g. a struct starts with a 32 bit integer, it's + placed in the upper 32 bits of a the corresponding register. A plain + 32 bit integer para is however passed in the lower 32 bits, since it + is promoted to a 64 bit int first (see below) + + x a) aggregates with sizes 1, 2 and 4 bytes are padded with 0s on the left + (-> aligned in least significant bits of 64 bit word on big endian) to + a multiple of *4 bytes* (when passed by memory, don't occupy 8 bytes) + x b) other aggregates are padded with 0s on the right (-> aligned in most + signifcant bits of 64 bit word of integer register) to a multiple of + *4 bytes* + x c) all floating pointer parameters (not in aggregates) are promoted to + double (doesn't seem to be correct: 8 bytes are reserved in the + stack frame, but the compiler still stores a single in it (in the + lower 4 bytes -- like with SysV a) ) + x d) all integer parameters (not in aggregates) are promoted to 64 bit + (x) e) aggregates (incl. arrays) of exactly 16 bytes passed in two integer + registers + f) floats in *structures without unions* are processed per rule c) + (similar for vector fields) + g) other fields in *structures without unions* are processed + recursively according to e) / f) if they are aggragates, and h) + otherwise (i.e, without promotion!) + (x) h) everything else (structures with unions and size<>16, arrays with + size<>16, ...) is passed "normally" in integer registers + } + { should the tail be shifted into the most significant bits? } + tailpadding:=false; + { have we ensured that the next parameter location will be aligned to the + next 8 byte boundary? } + paraaligned:=false; + { ELFv2 a) } + if (target_info.abi=abi_powerpc_elfv2) and + (((paradef.typ=recorddef) and + tcpurecorddef(paradef).has_single_type_elfv2(tmpdef)) or + ((paradef.typ=arraydef) and + tcpuarraydef(paradef).has_single_type_elfv2(tmpdef))) and + (tmpdef.typ=floatdef { or vectordef }) and + (paradef.size<=(8*tmpdef.size)) then + begin + alllocdef:=tmpdef; + loc:=getparaloc(alllocdef); + paracgsize:=def_cgsize(paradef); + end + { AIX/ELFv1 b) } + else if (target_info.abi in [abi_powerpc_aix,abi_powerpc_sysv]) and + (paradef.typ=recorddef) and + tabstractrecordsymtable(tabstractrecorddef(paradef).symtable).has_single_field(fsym) and + (fsym.vardef.typ=floatdef) then + begin + paradef:=fsym.vardef; + loc:=getparaloc(paradef); + paracgsize:=def_cgsize(paradef) + end + else if (((paradef.typ=arraydef) and not + is_special_array(paradef)) or + (paradef.typ=recorddef)) then + begin + { should handle Darwin f/g/h) now, but can't model that yet } + + { general rule: aggregate data is aligned in the most significant bits + except for ELFv1 c) and Darwin a) } + if (target_info.endian=endian_big) and + ((target_info.abi in [abi_powerpc_aix,abi_powerpc_elfv2]) or + ((target_info.abi=abi_powerpc_sysv) and + (paralen>8)) or + ((target_info.abi=abi_powerpc_darwin) and + not(paralen in [1,2,4]))) then + tailpadding:=true + { if we don't add tailpadding on the caller side, the callee will have + to shift the value in the register before it can store it to memory } + else if (target_info.endian=endian_big) and + (paralen in [3,5,6,7]) then + parashift:=(8-paralen)*8; + { general fallback rule: pass aggregate types in integer registers + without special adjustments (incl. Darwin h) } + loc:=LOC_REGISTER; + paracgsize:=int_cgsize(paralen); + end + else + begin + loc:=getparaloc(paradef); + paracgsize:=def_cgsize(paradef); + { for things like formaldef } + if (paracgsize=OS_NO) then + begin + paracgsize:=OS_ADDR; + paralen:=tcgsize2size[OS_ADDR]; + end; + end end; - { patch FPU values into integer registers if we currently have - to pass them as vararg parameters - } + { patch FPU values into integer registers if we are processing varargs } if (isVararg) and (paradef.typ = floatdef) then begin loc := LOC_REGISTER; if paracgsize = OS_F64 then @@ -385,6 +476,41 @@ begin paracgsize := OS_32; end; + { AIX/SysV a), Darwin c) -> skip 4 bytes in the stack frame } + if (target_info.endian=endian_big) and + (paradef.typ=floatdef) and + (tfloatdef(paradef).floattype=s32real) and + (nextfloatreg>RS_F13) then + begin + inc(stack_offset,4); + paraaligned:=true; + end; + + { Darwin d) } + if (target_info.abi=abi_powerpc_darwin) and + (paradef.typ in [orddef,enumdef]) and + (paralen<8) and + { we don't have to sign/zero extend the lower 8/16/32 bit on the callee + side since it's done on the caller side; however, if the value is + passed via memory, we do have to modify the stack offset since this + is big endian and otherwise we'll load/store the wrong bytes) } + ((side=callerside) or + forceintmem or + (nextintreg>RS_R10)) then + begin + if side=callerside then + begin + paralen:=8; + paradef:=s64inttype; + paracgsize:=OS_S64; + end + else + begin + inc(stack_offset,8-paralen); + paraaligned:=true; + end; + end; + para.alignment := std_param_align; para.size := paracgsize; para.intsize := paralen; @@ -395,9 +521,13 @@ begin paraloc^.loc := LOC_VOID; end else internalerror(2005011310); - adjusttail:=paralen>8; - if not assigned(locdef) then - locdef:=paradef; + if not assigned(alllocdef) then + locdef:=paradef + else + begin + locdef:=alllocdef; + paracgsize:=def_cgsize(locdef); + end; firstparaloc:=true; { can become < 0 for e.g. 3-byte records } while (paralen > 0) do begin @@ -411,20 +541,18 @@ begin paraloc^.shiftval := parashift; { make sure we don't lose whether or not the type is signed } - if (paracgsize <> OS_NO) and (paradef.typ <> orddef) then + if (paracgsize <> OS_NO) and + (paradef.typ <> orddef) and + not assigned(alllocdef) then begin paracgsize := int_cgsize(paralen); locdef:=get_paraloc_def(paradef, paralen, firstparaloc); end; - { aix requires that record data (including partial data) stored in - parameter registers is left-aligned. Other targets only do this if - the total size of the parameter was > 8 bytes. } - if (target_info.endian=endian_big) and - ((((target_info.system in systems_aix) and - (paradef.typ = recorddef)) or - adjusttail) and - (paralen < sizeof(aint))) then + { Partial aggregate data may have to be left-aligned. If so, add tail + padding } + if tailpadding and + (paralen < sizeof(aint)) then begin paraloc^.shiftval := (sizeof(aint)-paralen)*(-8); paraloc^.size := OS_INT; @@ -499,7 +627,10 @@ begin paraloc^.reference.offset := stack_offset; { align temp contents to next register size } - inc(stack_offset, align(paralen, 8)); + if not paraaligned then + inc(stack_offset, align(paralen, 8)) + else + inc(stack_offset, paralen); paralen := 0; end; firstparaloc:=false; diff --git a/compiler/powerpc64/symcpu.pas b/compiler/powerpc64/symcpu.pas index 12e14ca87f..479026934c 100644 --- a/compiler/powerpc64/symcpu.pas +++ b/compiler/powerpc64/symcpu.pas @@ -79,6 +79,8 @@ type tcpuclassrefdefclass = class of tcpuclassrefdef; tcpuarraydef = class(tarraydef) + { see tcpurecorddef.has_single_type_elfv2 } + function has_single_type_elfv2(out def: tdef): boolean; end; tcpuarraydefclass = class of tcpuarraydef; @@ -218,6 +220,33 @@ implementation result:=true; end; + + { tcpuarraydef } + + function tcpuarraydef.has_single_type_elfv2(out def: tdef): boolean; + var + checkdef: tdef; + begin + result:=false; + checkdef:=self; + while (checkdef.typ=arraydef) and + not is_special_array(checkdef) do + checkdef:=tarraydef(checkdef).elementdef; + case checkdef.typ of + recorddef: + result:=tcpurecorddef(checkdef).has_single_type_elfv2(def); + floatdef: + begin + def:=checkdef; + result:=true; + exit; + end; + else + exit; + end; + end; + + begin { used tdef classes } cfiledef:=tcpufiledef;