From 93bb997d8fc392b845873d58671603a6a1da007f Mon Sep 17 00:00:00 2001 From: tom_at_work Date: Thu, 17 Nov 2005 21:35:01 +0000 Subject: [PATCH] * ppc64/linux: enhanced varargs support (passes tprintf test now) * ppc64/linux: some small tweak for small memory locations copy code git-svn-id: trunk@1778 - --- compiler/ncgcal.pas | 20 ++++++++++++ compiler/powerpc64/cgcpu.pas | 60 +++++++++++++++++++++++++--------- compiler/powerpc64/cpupara.pas | 30 +++++++++++------ 3 files changed, 84 insertions(+), 26 deletions(-) diff --git a/compiler/ncgcal.pas b/compiler/ncgcal.pas index 96dd127eb1..7fec4067fc 100644 --- a/compiler/ncgcal.pas +++ b/compiler/ncgcal.pas @@ -231,6 +231,17 @@ implementation cg64.a_param64_ref(exprasmlist,left.location.reference,tempcgpara); end; {$endif powerpc} +{$ifdef powerpc64} + LOC_REGISTER, + LOC_CREGISTER : + begin + { ppc64 abi passes floats of varargs in integer registers, so force a store } + location_force_mem(exprasmlist,left.location); + { force integer size } + left.location.size:=int_cgsize(tcgsize2size[tempcgpara.location^.size]); + cg.a_param_ref(exprasmlist,left.location.size,left.location.reference,tempcgpara) + end; +{$endif powerpc64} {$if defined(sparc) or defined(arm)} { sparc and arm pass floats in normal registers } LOC_REGISTER, @@ -273,6 +284,15 @@ implementation cg64.a_param64_ref(exprasmlist,left.location.reference,tempcgpara); end; {$endif powerpc} +{$ifdef powerpc64} + LOC_REGISTER, + LOC_CREGISTER : + begin + { force integer size } + left.location.size:=int_cgsize(tcgsize2size[tempcgpara.location^.size]); + cg.a_param_ref(exprasmlist,left.location.size,left.location.reference,tempcgpara) + end; +{$endif powerpc64} {$if defined(sparc) or defined(arm) } { sparc and arm pass floats in normal registers } LOC_REGISTER, diff --git a/compiler/powerpc64/cgcpu.pas b/compiler/powerpc64/cgcpu.pas index c6c10243ea..5237d93a06 100644 --- a/compiler/powerpc64/cgcpu.pas +++ b/compiler/powerpc64/cgcpu.pas @@ -357,11 +357,13 @@ var tmpref, ref: treference; location: pcgparalocation; sizeleft: aint; + adjusttail : boolean; begin location := paraloc.location; tmpref := r; sizeleft := paraloc.intsize; + adjusttail := false; while assigned(location) do begin case location^.loc of LOC_REGISTER, LOC_CREGISTER: @@ -370,6 +372,10 @@ begin a_load_ref_reg(list, size, location^.size, tmpref, location^.register) else + {$IFDEF extdebug} + list.concat(tai_comment.create(strpnew('a_param_ref with OS_NO'))); + {$ENDIF extdebug} + { load non-integral sized memory location into register. This memory location be 1-sizeleft byte sized. Always assume that this memory area is properly aligned, eg. start @@ -421,7 +427,18 @@ begin { still > 8 bytes to load, so load data single register now } a_load_ref_reg(list, location^.size, location^.size, tmpref, location^.register); + { the block is > 8 bytes, so we have to store any bytes not + a multiple of the register size beginning with the MSB } + adjusttail := true; end; +(* + { Comment this in (for gcc compat) and be prepared for a whole bunch of errors :/ } + + if (adjusttail) and (sizeleft < tcgsize2size[OS_INT]) then + a_op_const_reg(list, OP_SHL, OS_INT, + (tcgsize2size[OS_INT] - sizeleft) * tcgsize2size[OS_INT], + location^.register); +*) end; LOC_REFERENCE: begin @@ -1489,23 +1506,33 @@ begin {$IFDEF extdebug} if len > high(aint) then internalerror(2002072704); + list.concat(tai_comment.create(strpnew('g_concatcopy'))); {$ENDIF extdebug} - { make sure short loads are handled as optimally as possible } + { make sure short loads are handled as optimally as possible; + note that the data here never overlaps, so we can do a forward + copy at all times. + NOTE: maybe use some scratch registers to pair load/store instructions + } - if (len <= maxmoveunit) and - (byte(len) in [1, 2, 4, 8]) then - begin - if len < 8 then - begin - size := int_cgsize(len); - a_load_ref_ref(list, size, size, source, dest); - end - else - begin - a_reg_alloc(list, NR_F0); - a_loadfpu_ref_reg(list, OS_F64, source, NR_F0); - a_loadfpu_reg_ref(list, OS_F64, NR_F0, dest); - a_reg_dealloc(list, NR_F0); + if (len <= maxmoveunit) then begin + src := source; dst := dest; + while (len <> 0) do begin + if (len = 8) then begin + a_load_ref_ref(list, OS_64, OS_64, src, dst); + dec(len, 8); + end else if (len >= 4) then begin + a_load_ref_ref(list, OS_32, OS_32, src, dst); + inc(src.offset, 4); inc(dst.offset, 4); + dec(len, 4); + end else if (len >= 2) then begin + a_load_ref_ref(list, OS_16, OS_16, src, dst); + inc(src.offset, 2); inc(dst.offset, 2); + dec(len, 2); + end else begin + a_load_ref_ref(list, OS_8, OS_8, src, dst); + inc(src.offset, 1); inc(dst.offset, 1); + dec(len, 1); + end; end; exit; end; @@ -1546,7 +1573,7 @@ begin list.concat(taicpu.op_reg_reg_const(A_SUBI, src.base, src.base, 8)); list.concat(taicpu.op_reg_reg_const(A_SUBI, dst.base, dst.base, 8)); countreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE); - a_load_const_reg(list, OS_32, count, countreg); + a_load_const_reg(list, OS_64, count, countreg); { explicitely allocate F0 since it can be used safely here (for holding date that's being copied) } a_reg_alloc(list, NR_F0); @@ -1793,6 +1820,7 @@ begin tmpref.symbol := ref.symbol; tmpref.relsymbol := ref.relsymbol; tmpref.offset := ref.offset; + if (ref.base <> NR_NO) then begin { As long as the TOC isn't working we try to achieve highest speed (in this case by allowing instructions execute in parallel) as possible at the cost diff --git a/compiler/powerpc64/cpupara.pas b/compiler/powerpc64/cpupara.pas index 38ed3d9927..a0eef43692 100644 --- a/compiler/powerpc64/cpupara.pas +++ b/compiler/powerpc64/cpupara.pas @@ -42,8 +42,7 @@ type procedure getintparaloc(calloption: tproccalloption; nr: longint; var cgpara: TCGPara); override; - function create_paraloc_info(p: tabstractprocdef; side: tcallercallee): - longint; override; + function create_paraloc_info(p: tabstractprocdef; side: tcallercallee): longint; override; function create_varargs_paraloc_info(p: tabstractprocdef; varargspara: tvarargsparalist): longint; override; procedure create_funcretloc_info(p: tabstractprocdef; side: tcallercallee); @@ -54,7 +53,7 @@ type function create_paraloc_info_intern(p: tabstractprocdef; side: tcallercallee; paras: tparalist; var curintreg, curfloatreg, curmmreg: tsuperregister; var - cur_stack_offset: aword): longint; + cur_stack_offset: aword; isVararg : boolean): longint; function parseparaloc(p: tparavarsym; const s: string): boolean; override; end; @@ -169,7 +168,7 @@ begin end; case def.deftype of variantdef, - formaldef: + formaldef: result := true; recorddef: result := @@ -256,7 +255,7 @@ begin init_values(curintreg, curfloatreg, curmmreg, cur_stack_offset); result := create_paraloc_info_intern(p, side, p.paras, curintreg, curfloatreg, - curmmreg, cur_stack_offset); + curmmreg, cur_stack_offset, false); create_funcretloc_info(p, side); end; @@ -264,7 +263,7 @@ end; function tppcparamanager.create_paraloc_info_intern(p: tabstractprocdef; side: tcallercallee; paras: tparalist; var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: - aword): longint; + aword; isVararg : boolean): longint; var stack_offset: longint; paralen: aint; @@ -348,6 +347,18 @@ begin end; end end; + + { patch FPU values into integer registers if we currently have + to pass them as vararg parameters + } + if (isVararg) and (paradef.deftype = floatdef) then begin + loc := LOC_REGISTER; + if paracgsize = OS_F64 then + paracgsize := OS_64 + else + paracgsize := OS_32; + end; + hp.paraloc[side].alignment := std_param_align; hp.paraloc[side].size := paracgsize; hp.paraloc[side].intsize := paralen; @@ -360,8 +371,7 @@ begin { can become < 0 for e.g. 3-byte records } while (paralen > 0) do begin paraloc := hp.paraloc[side].add_location; - if (loc = LOC_REGISTER) and - (nextintreg <= RS_R10) then begin + if (loc = LOC_REGISTER) and (nextintreg <= RS_R10) then begin paraloc^.loc := loc; { make sure we don't lose whether or not the type is signed } if (paradef.deftype <> orddef) then @@ -430,11 +440,11 @@ begin firstfloatreg := curfloatreg; result := create_paraloc_info_intern(p, callerside, p.paras, curintreg, - curfloatreg, curmmreg, cur_stack_offset); + curfloatreg, curmmreg, cur_stack_offset, false); if (p.proccalloption in [pocall_cdecl, pocall_cppdecl]) then begin { just continue loading the parameters in the registers } result := create_paraloc_info_intern(p, callerside, varargspara, curintreg, - curfloatreg, curmmreg, cur_stack_offset); + curfloatreg, curmmreg, cur_stack_offset, true); { varargs routines have to reserve at least 64 bytes for the PPC64 ABI } if (result < 64) then result := 64;