* Reworked the optimization of unused $parentfp for nested routines.

- Do not remove the $parentfp parameter as was done in the previous optimization approach. Instead when $parentfp is unused to the following:
      - On the caller side: Omit passing the value for $parentfp for targets where tcgcallparanode.push_zero_sized_value_para=false (classic CPU targets). 
          Pass 0/nil as $parentfp for targets where tcgcallparanode.push_zero_sized_value_para=true;
      - On the callee side: Prevent allocation of registers/resources for $parentfp.
  - When possible keep $parentfp in a register.
  - Set the pio_nested_access flag in tprocinfo.set_needs_parentfp() to properly handle deep nesting levels;

git-svn-id: trunk@45436 -
This commit is contained in:
yury 2020-05-19 13:17:47 +00:00
parent a501be1b2b
commit 2808873d1b
12 changed files with 150 additions and 63 deletions

View File

@ -103,7 +103,8 @@ implementation
uses
cutils,
verbose;
verbose,
cgbase;
constructor TDebugInfo.Create;
@ -430,7 +431,8 @@ implementation
localvarsym :
appendsym_localvar(list,tlocalvarsym(sym));
paravarsym :
appendsym_paravar(list,tparavarsym(sym));
if tparavarsym(sym).localloc.loc<>LOC_VOID then
appendsym_paravar(list,tparavarsym(sym));
constsym :
appendsym_const(list,tconstsym(sym));
typesym :

View File

@ -2366,8 +2366,7 @@ implementation
not is_nested_pd(def2))) or
((def1.typ=procdef) and { d) }
is_nested_pd(def1) and
((not(po_delphi_nested_cc in def1.procoptions) and
(pio_needs_parentfp in tprocdef(def1).implprocoptions)) or
(not(po_delphi_nested_cc in def1.procoptions) or
not is_nested_pd(def2))) or
((def1.typ=procvardef) and { e) }
(is_nested_pd(def1)<>is_nested_pd(def2))) then

View File

@ -41,6 +41,7 @@ interface
procedure push_value_para;virtual;
procedure push_formal_para;virtual;
procedure push_copyout_para;virtual;abstract;
function maybe_push_unused_para:boolean;virtual;
public
tempcgpara : tcgpara;
@ -60,6 +61,7 @@ interface
procedure release_para_temps;
procedure reorder_parameters;
procedure freeparas;
function is_parentfp_pushed:boolean;
protected
retloc: tcgpara;
paralocs: array of pcgpara;
@ -169,6 +171,8 @@ implementation
begin
if not(left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
internalerror(200304235);
if maybe_push_unused_para then
exit;
{ see the call to keep_para_array_range in ncal: if that call returned
true, we overwrite the resultdef of left with its original resultdef
(to keep track of the range of the original array); we inserted a type
@ -262,6 +266,9 @@ implementation
not push_zero_sized_value_para then
exit;
if maybe_push_unused_para then
exit;
{ Move flags and jump in register to make it less complex }
if left.location.loc in [LOC_FLAGS,LOC_JUMP,LOC_SUBSETREG,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF] then
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
@ -273,6 +280,8 @@ implementation
procedure tcgcallparanode.push_formal_para;
begin
if maybe_push_unused_para then
exit;
{ allow passing of a constant to a const formaldef }
if (parasym.varspez=vs_const) and
not(left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
@ -281,6 +290,35 @@ implementation
end;
function tcgcallparanode.maybe_push_unused_para: boolean;
begin
{ Check if the parameter is unused.
Only the $parentfp parameter is supported for now. }
result:=(vo_is_parentfp in parasym.varoptions) and (parasym.varstate<=vs_initialised);
if not result then
exit;
{ The parameter is unused.
We can skip loading of the parameter when:
- the target does not strictly require all parameters (push_zero_sized_value_para = false)
and
- fixed stack is used
or the parameter is in a register
or the parameter is $parentfp. }
if not push_zero_sized_value_para and
(paramanager.use_fixed_stack or
(vo_is_parentfp in parasym.varoptions) or
(parasym.paraloc[callerside].Location^.Loc in [LOC_REGISTER,LOC_CREGISTER])) then
begin
{ Skip loading }
parasym.paraloc[callerside].Location^.Loc:=LOC_VOID;
tempcgpara.Location^.Loc:=LOC_VOID;
end
else
{ Load the dummy nil/0 value }
hlcg.a_load_const_cgpara(current_asmdata.CurrAsmList,left.resultdef,0,tempcgpara);
end;
procedure tcgcallparanode.secondcallparan;
var
pushaddr: boolean;
@ -909,6 +947,20 @@ implementation
end;
function tcgcallnode.is_parentfp_pushed: boolean;
var
i : longint;
begin
for i:=0 to procdefinition.paras.Count-1 do
with tparavarsym(procdefinition.paras[i]) do
if vo_is_parentfp in varoptions then
begin
result:=paraloc[callerside].Location^.Loc in [LOC_REFERENCE,LOC_CREFERENCE];
exit;
end;
result:=false;
end;
procedure tcgcallnode.pass_generate_code;
var
@ -1258,9 +1310,10 @@ implementation
pop_parasize(0);
end
{ frame pointer parameter is popped by the caller when it's passed the
Delphi way }
Delphi way and $parentfp is used }
else if (po_delphi_nested_cc in procdefinition.procoptions) and
not paramanager.use_fixed_stack then
not paramanager.use_fixed_stack and
is_parentfp_pushed() then
pop_parasize(sizeof(pint));
if procdefinition.generate_safecall_wrapper then

View File

@ -170,9 +170,7 @@ implementation
location_reset(location,LOC_REGISTER,def_cgsize(parentfpvoidpointertype));
currpi:=current_procinfo;
{ load framepointer of current proc }
hsym:=tparavarsym(currpi.procdef.parast.Find('parentfp'));
if not assigned(hsym) then
internalerror(200309281);
hsym:=parentfpsym;
if (currpi.procdef.owner.symtablelevel=parentpd.parast.symtablelevel) and (hsym.localloc.loc in [LOC_REGISTER,LOC_CREGISTER]) then
location.register:=hsym.localloc.register
else

View File

@ -105,7 +105,7 @@ implementation
of the current routine (and hence it has not been moved into the
nestedfp struct), get the original nestedfp parameter }
useparentfppara:=not assigned(current_procinfo.procdef.parentfpstruct);
hsym:=tparavarsym(current_procinfo.procdef.parast.Find('parentfp'));
hsym:=parentfpsym;
if current_procinfo.procdef.parast.symtablelevel>parentpd.parast.symtablelevel then
useparentfppara:=
useparentfppara or

View File

@ -865,6 +865,13 @@ implementation
location_reset(vs.initialloc,LOC_REGISTER,OS_ADDR);
vs.initialloc.register:=NR_FRAME_POINTER_REG;
end
{ Unused parameters ($parentfp for now) need to be kept in the original location
to prevent allocation of registers/resources for them. }
else if (vs.varstate <= vs_initialised) and
(vo_is_parentfp in vs.varoptions) then
begin
tparavarsym(vs).paraloc[calleeside].get_location(vs.initialloc);
end
else
begin
{ if an open array is used, also its high parameter is used,
@ -1055,6 +1062,9 @@ implementation
loadn:
if (tloadnode(n).symtableentry.typ in [staticvarsym,localvarsym,paravarsym]) then
add_regvars(rv^,tabstractnormalvarsym(tloadnode(n).symtableentry).localloc);
loadparentfpn:
if current_procinfo.procdef.parast.symtablelevel>tloadparentfpnode(n).parentpd.parast.symtablelevel then
add_regvars(rv^,tloadparentfpnode(n).parentfpsym.localloc);
vecn:
begin
{ range checks sometimes need the high parameter }

View File

@ -357,8 +357,6 @@ implementation
if assigned(left) then
internalerror(200309289);
left:=cloadparentfpnode.create(tprocdef(symtable.defowner),lpf_forload);
{ we can't inline the referenced parent procedure }
include(tprocdef(symtable.defowner).implprocoptions,pio_nested_access);
{ reference in nested procedures, variable needs to be in memory }
{ and behaves as if its address escapes its parent block }
make_not_regable(self,[ra_different_scope]);

View File

@ -53,6 +53,10 @@ interface
lpf_forload
);
tloadparentfpnode = class(tunarynode)
private
_parentfpsym: tparavarsym;
function getparentfpsym: tparavarsym;
public
parentpd : tprocdef;
parentpdderef : tderef;
kind: tloadparentfpkind;
@ -65,6 +69,7 @@ interface
function pass_typecheck:tnode;override;
function docompare(p: tnode): boolean; override;
function dogetcopy : tnode;override;
property parentfpsym: tparavarsym read getparentfpsym;
end;
tloadparentfpnodeclass = class of tloadparentfpnode;
@ -372,32 +377,9 @@ implementation
function tloadparentfpnode.pass_typecheck:tnode;
{$ifdef dummy}
var
currpi : tprocinfo;
hsym : tparavarsym;
{$endif dummy}
begin
result:=nil;
resultdef:=parentfpvoidpointertype;
{$ifdef dummy}
{ currently parentfps are never loaded in registers (FK) }
if (current_procinfo.procdef.parast.symtablelevel<>parentpd.parast.symtablelevel) then
begin
currpi:=current_procinfo;
{ walk parents }
while (currpi.procdef.owner.symtablelevel>parentpd.parast.symtablelevel) do
begin
currpi:=currpi.parent;
if not assigned(currpi) then
internalerror(2005040602);
hsym:=tparavarsym(currpi.procdef.parast.Find('parentfp'));
if not assigned(hsym) then
internalerror(2005040601);
hsym.varregable:=vr_none;
end;
end;
{$endif dummy}
end;
@ -408,6 +390,17 @@ implementation
end;
function tloadparentfpnode.getparentfpsym: tparavarsym;
begin
if not assigned(_parentfpsym) then
begin
_parentfpsym:=tparavarsym(current_procinfo.procdef.parast.Find('parentfp'));
if not assigned(_parentfpsym) then
internalerror(200309281);
end;
result:=_parentfpsym;
end;
{*****************************************************************************
TADDRNODE
*****************************************************************************}

View File

@ -156,7 +156,8 @@ implementation
begin
vs:=cparavarsym.create('$parentfp',paranr,vs_value
,parentfpvoidpointertype,[vo_is_parentfp,vo_is_hidden_para]);
vs.varregable:=vr_none;
{ Mark $parentfp as used by default }
vs.varstate:=vs_read;
end
else
begin

View File

@ -202,8 +202,9 @@ unit procinfo;
procedure start_eh(list : TAsmList); virtual;
{ called to insert needed eh info into the exit code }
procedure end_eh(list : TAsmList); virtual;
{ Sets the pio_needs_parentfp flag for the current nested procedure and
all its parent procedures until parent_level }
{ Sets the pio_needs_parentfp flag for the current nested procedure.
Sets both pio_needs_parentfp and pio_nested_access for all parent
procedures until parent_level }
procedure set_needs_parentfp(parent_level: byte);
end;
tcprocinfo = class of tprocinfo;
@ -442,11 +443,15 @@ implementation
Internalerror(2020050302);
if parent_level<normal_function_level then
parent_level:=normal_function_level;
{ Set pio_needs_parentfp for the current proc }
pi:=Self;
repeat
include(pi.procdef.implprocoptions, pio_needs_parentfp);
pi:=pi.parent;
until pi.procdef.parast.symtablelevel<=parent_level;
include(pi.procdef.implprocoptions, pio_needs_parentfp);
{ Set both pio_needs_parentfp and pio_nested_access for all parent procs until parent_level }
while pi.procdef.parast.symtablelevel>parent_level do
begin
pi:=pi.parent;
pi.procdef.implprocoptions:=pi.procdef.implprocoptions+[pio_needs_parentfp,pio_nested_access];
end;
end;
end.

View File

@ -2309,8 +2309,6 @@ implementation
parentfpinitblock: tnode;
old_parse_generic: boolean;
recordtokens : boolean;
parentfp_sym: TSymEntry;
begin
old_current_procinfo:=current_procinfo;
old_block_type:=block_type;
@ -2386,25 +2384,6 @@ implementation
{ parse the code ... }
code:=block(current_module.islibrary);
{ If this is a nested procedure which does not access its parent's frame
pointer, we can optimize it by removing the hidden $parentfp parameter.
Do not perform this for:
- targets which use a special struct to access parent's variables;
- pure assembler procedures (for compatibility with old code).
}
if not (target_info.system in systems_fpnestedstruct) and
is_nested_pd(procdef) and
not (pio_needs_parentfp in procdef.implprocoptions) and
not (po_assembler in procdef.procoptions) then
begin
exclude(procdef.procoptions, po_delphi_nested_cc);
parentfp_sym:=procdef.parast.Find('parentfp');
if parentfp_sym = nil then
Internalerror(2020050301);
procdef.parast.Delete(parentfp_sym);
procdef.calcparas;
end;
if recordtokens then
begin
{ stop token recorder for generic template }

View File

@ -702,6 +702,8 @@ interface
private
procedure count_para(p:TObject;arg:pointer);
procedure insert_para(p:TObject;arg:pointer);
protected
procedure handle_unused_paras(side: tcallercallee); virtual;
end;
tprocvardef = class(tabstractprocdef)
@ -812,6 +814,7 @@ interface
procedure SetIsEmpty(AValue: boolean);
function GetHasInliningInfo: boolean;
procedure SetHasInliningInfo(AValue: boolean);
procedure handle_unused_paras(side: tcallercallee); override;
public
messageinf : tmessageinf;
dispid : longint;
@ -5273,6 +5276,11 @@ implementation
end;
procedure tabstractprocdef.handle_unused_paras(side: tcallercallee);
begin
end;
procedure tabstractprocdef.calcparas;
var
paracount : longint;
@ -5706,6 +5714,7 @@ implementation
has_paraloc_info:=callbothsides
else
has_paraloc_info:=callerside;
handle_unused_paras(callerside);
end;
if (side in [calleeside,callbothsides]) and
not(has_paraloc_info in [calleeside,callbothsides]) then
@ -5718,6 +5727,7 @@ implementation
has_paraloc_info:=callbothsides
else
has_paraloc_info:=calleeside;
handle_unused_paras(calleeside);
end;
end;
@ -6011,6 +6021,45 @@ implementation
end;
procedure tprocdef.handle_unused_paras(side: tcallercallee);
var
i : longint;
begin
{ Optimize unused parameters by preventing loading them on the callee side
and, if possible, preventing passing them on the caller side.
The caller side optimization is handled by tcgcallparanode.maybe_push_unused_para().
}
if (proctypeoption = potype_exceptfilter) or
(po_assembler in procoptions) then
exit;
{ Only $parentfp is optmized for now. }
if not is_nested_pd(self) then
exit;
{ Handle unused parameters }
for i:=0 to paras.Count-1 do
with tparavarsym(paras[i]) do
if vo_is_parentfp in varoptions then
begin
if pio_needs_parentfp in implprocoptions then
begin
{ If this routine is accessed from other nested routine,
$parentfp must be in a memory location. }
if pio_nested_access in implprocoptions then
varregable:=vr_none;
end
else
begin
{ Mark $parentfp as unused, since it has vs_read by default }
varstate:=vs_initialised;
if side=calleeside then
{ Set LOC_VOID as the parameter's location on the callee side }
paraloc[side].location^.Loc:=LOC_VOID;
break;
end;
end;
end;
procedure tprocdef.Setinterfacedef(AValue: boolean);
begin
if not assigned(implprocdefinfo) then