* ppc64/linux: enhanced varargs support (passes tprintf test now)

* ppc64/linux: some small tweak for small memory locations copy code

git-svn-id: trunk@1778 -
This commit is contained in:
tom_at_work 2005-11-17 21:35:01 +00:00
parent dcbf526530
commit 93bb997d8f
3 changed files with 84 additions and 26 deletions

View File

@ -231,6 +231,17 @@ implementation
cg64.a_param64_ref(exprasmlist,left.location.reference,tempcgpara);
end;
{$endif powerpc}
{$ifdef powerpc64}
LOC_REGISTER,
LOC_CREGISTER :
begin
{ ppc64 abi passes floats of varargs in integer registers, so force a store }
location_force_mem(exprasmlist,left.location);
{ force integer size }
left.location.size:=int_cgsize(tcgsize2size[tempcgpara.location^.size]);
cg.a_param_ref(exprasmlist,left.location.size,left.location.reference,tempcgpara)
end;
{$endif powerpc64}
{$if defined(sparc) or defined(arm)}
{ sparc and arm pass floats in normal registers }
LOC_REGISTER,
@ -273,6 +284,15 @@ implementation
cg64.a_param64_ref(exprasmlist,left.location.reference,tempcgpara);
end;
{$endif powerpc}
{$ifdef powerpc64}
LOC_REGISTER,
LOC_CREGISTER :
begin
{ force integer size }
left.location.size:=int_cgsize(tcgsize2size[tempcgpara.location^.size]);
cg.a_param_ref(exprasmlist,left.location.size,left.location.reference,tempcgpara)
end;
{$endif powerpc64}
{$if defined(sparc) or defined(arm) }
{ sparc and arm pass floats in normal registers }
LOC_REGISTER,

View File

@ -357,11 +357,13 @@ var
tmpref, ref: treference;
location: pcgparalocation;
sizeleft: aint;
adjusttail : boolean;
begin
location := paraloc.location;
tmpref := r;
sizeleft := paraloc.intsize;
adjusttail := false;
while assigned(location) do begin
case location^.loc of
LOC_REGISTER, LOC_CREGISTER:
@ -370,6 +372,10 @@ begin
a_load_ref_reg(list, size, location^.size, tmpref,
location^.register)
else
{$IFDEF extdebug}
list.concat(tai_comment.create(strpnew('a_param_ref with OS_NO')));
{$ENDIF extdebug}
{ load non-integral sized memory location into register. This
memory location be 1-sizeleft byte sized.
Always assume that this memory area is properly aligned, eg. start
@ -421,7 +427,18 @@ begin
{ still > 8 bytes to load, so load data single register now }
a_load_ref_reg(list, location^.size, location^.size, tmpref,
location^.register);
{ the block is > 8 bytes, so we have to store any bytes not
a multiple of the register size beginning with the MSB }
adjusttail := true;
end;
(*
{ Comment this in (for gcc compat) and be prepared for a whole bunch of errors :/ }
if (adjusttail) and (sizeleft < tcgsize2size[OS_INT]) then
a_op_const_reg(list, OP_SHL, OS_INT,
(tcgsize2size[OS_INT] - sizeleft) * tcgsize2size[OS_INT],
location^.register);
*)
end;
LOC_REFERENCE:
begin
@ -1489,23 +1506,33 @@ begin
{$IFDEF extdebug}
if len > high(aint) then
internalerror(2002072704);
list.concat(tai_comment.create(strpnew('g_concatcopy')));
{$ENDIF extdebug}
{ make sure short loads are handled as optimally as possible }
{ make sure short loads are handled as optimally as possible;
note that the data here never overlaps, so we can do a forward
copy at all times.
NOTE: maybe use some scratch registers to pair load/store instructions
}
if (len <= maxmoveunit) and
(byte(len) in [1, 2, 4, 8]) then
begin
if len < 8 then
begin
size := int_cgsize(len);
a_load_ref_ref(list, size, size, source, dest);
end
else
begin
a_reg_alloc(list, NR_F0);
a_loadfpu_ref_reg(list, OS_F64, source, NR_F0);
a_loadfpu_reg_ref(list, OS_F64, NR_F0, dest);
a_reg_dealloc(list, NR_F0);
if (len <= maxmoveunit) then begin
src := source; dst := dest;
while (len <> 0) do begin
if (len = 8) then begin
a_load_ref_ref(list, OS_64, OS_64, src, dst);
dec(len, 8);
end else if (len >= 4) then begin
a_load_ref_ref(list, OS_32, OS_32, src, dst);
inc(src.offset, 4); inc(dst.offset, 4);
dec(len, 4);
end else if (len >= 2) then begin
a_load_ref_ref(list, OS_16, OS_16, src, dst);
inc(src.offset, 2); inc(dst.offset, 2);
dec(len, 2);
end else begin
a_load_ref_ref(list, OS_8, OS_8, src, dst);
inc(src.offset, 1); inc(dst.offset, 1);
dec(len, 1);
end;
end;
exit;
end;
@ -1546,7 +1573,7 @@ begin
list.concat(taicpu.op_reg_reg_const(A_SUBI, src.base, src.base, 8));
list.concat(taicpu.op_reg_reg_const(A_SUBI, dst.base, dst.base, 8));
countreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
a_load_const_reg(list, OS_32, count, countreg);
a_load_const_reg(list, OS_64, count, countreg);
{ explicitely allocate F0 since it can be used safely here
(for holding date that's being copied) }
a_reg_alloc(list, NR_F0);
@ -1793,6 +1820,7 @@ begin
tmpref.symbol := ref.symbol;
tmpref.relsymbol := ref.relsymbol;
tmpref.offset := ref.offset;
if (ref.base <> NR_NO) then begin
{ As long as the TOC isn't working we try to achieve highest speed (in this
case by allowing instructions execute in parallel) as possible at the cost

View File

@ -42,8 +42,7 @@ type
procedure getintparaloc(calloption: tproccalloption; nr: longint; var
cgpara: TCGPara); override;
function create_paraloc_info(p: tabstractprocdef; side: tcallercallee):
longint; override;
function create_paraloc_info(p: tabstractprocdef; side: tcallercallee): longint; override;
function create_varargs_paraloc_info(p: tabstractprocdef; varargspara:
tvarargsparalist): longint; override;
procedure create_funcretloc_info(p: tabstractprocdef; side: tcallercallee);
@ -54,7 +53,7 @@ type
function create_paraloc_info_intern(p: tabstractprocdef; side:
tcallercallee; paras: tparalist;
var curintreg, curfloatreg, curmmreg: tsuperregister; var
cur_stack_offset: aword): longint;
cur_stack_offset: aword; isVararg : boolean): longint;
function parseparaloc(p: tparavarsym; const s: string): boolean; override;
end;
@ -169,7 +168,7 @@ begin
end;
case def.deftype of
variantdef,
formaldef:
formaldef:
result := true;
recorddef:
result :=
@ -256,7 +255,7 @@ begin
init_values(curintreg, curfloatreg, curmmreg, cur_stack_offset);
result := create_paraloc_info_intern(p, side, p.paras, curintreg, curfloatreg,
curmmreg, cur_stack_offset);
curmmreg, cur_stack_offset, false);
create_funcretloc_info(p, side);
end;
@ -264,7 +263,7 @@ end;
function tppcparamanager.create_paraloc_info_intern(p: tabstractprocdef; side:
tcallercallee; paras: tparalist;
var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset:
aword): longint;
aword; isVararg : boolean): longint;
var
stack_offset: longint;
paralen: aint;
@ -348,6 +347,18 @@ begin
end;
end
end;
{ patch FPU values into integer registers if we currently have
to pass them as vararg parameters
}
if (isVararg) and (paradef.deftype = floatdef) then begin
loc := LOC_REGISTER;
if paracgsize = OS_F64 then
paracgsize := OS_64
else
paracgsize := OS_32;
end;
hp.paraloc[side].alignment := std_param_align;
hp.paraloc[side].size := paracgsize;
hp.paraloc[side].intsize := paralen;
@ -360,8 +371,7 @@ begin
{ can become < 0 for e.g. 3-byte records }
while (paralen > 0) do begin
paraloc := hp.paraloc[side].add_location;
if (loc = LOC_REGISTER) and
(nextintreg <= RS_R10) then begin
if (loc = LOC_REGISTER) and (nextintreg <= RS_R10) then begin
paraloc^.loc := loc;
{ make sure we don't lose whether or not the type is signed }
if (paradef.deftype <> orddef) then
@ -430,11 +440,11 @@ begin
firstfloatreg := curfloatreg;
result := create_paraloc_info_intern(p, callerside, p.paras, curintreg,
curfloatreg, curmmreg, cur_stack_offset);
curfloatreg, curmmreg, cur_stack_offset, false);
if (p.proccalloption in [pocall_cdecl, pocall_cppdecl]) then begin
{ just continue loading the parameters in the registers }
result := create_paraloc_info_intern(p, callerside, varargspara, curintreg,
curfloatreg, curmmreg, cur_stack_offset);
curfloatreg, curmmreg, cur_stack_offset, true);
{ varargs routines have to reserve at least 64 bytes for the PPC64 ABI }
if (result < 64) then
result := 64;