* Reworked the optimization of unused $parentfp for nested routines.

- Do not remove the $parentfp parameter as was done in the previous optimization approach. Instead when $parentfp is unused to the following:
      - On the caller side: Omit passing the value for $parentfp for targets where tcgcallparanode.push_zero_sized_value_para=false (classic CPU targets). 
          Pass 0/nil as $parentfp for targets where tcgcallparanode.push_zero_sized_value_para=true;
      - On the callee side: Prevent allocation of registers/resources for $parentfp.
  - When possible keep $parentfp in a register.
  - Set the pio_nested_access flag in tprocinfo.set_needs_parentfp() to properly handle deep nesting levels;

git-svn-id: trunk@45436 -
This commit is contained in:
yury 2020-05-19 13:17:47 +00:00
parent a501be1b2b
commit 2808873d1b
12 changed files with 150 additions and 63 deletions

View File

@ -103,7 +103,8 @@ implementation
uses uses
cutils, cutils,
verbose; verbose,
cgbase;
constructor TDebugInfo.Create; constructor TDebugInfo.Create;
@ -430,7 +431,8 @@ implementation
localvarsym : localvarsym :
appendsym_localvar(list,tlocalvarsym(sym)); appendsym_localvar(list,tlocalvarsym(sym));
paravarsym : paravarsym :
appendsym_paravar(list,tparavarsym(sym)); if tparavarsym(sym).localloc.loc<>LOC_VOID then
appendsym_paravar(list,tparavarsym(sym));
constsym : constsym :
appendsym_const(list,tconstsym(sym)); appendsym_const(list,tconstsym(sym));
typesym : typesym :

View File

@ -2366,8 +2366,7 @@ implementation
not is_nested_pd(def2))) or not is_nested_pd(def2))) or
((def1.typ=procdef) and { d) } ((def1.typ=procdef) and { d) }
is_nested_pd(def1) and is_nested_pd(def1) and
((not(po_delphi_nested_cc in def1.procoptions) and (not(po_delphi_nested_cc in def1.procoptions) or
(pio_needs_parentfp in tprocdef(def1).implprocoptions)) or
not is_nested_pd(def2))) or not is_nested_pd(def2))) or
((def1.typ=procvardef) and { e) } ((def1.typ=procvardef) and { e) }
(is_nested_pd(def1)<>is_nested_pd(def2))) then (is_nested_pd(def1)<>is_nested_pd(def2))) then

View File

@ -41,6 +41,7 @@ interface
procedure push_value_para;virtual; procedure push_value_para;virtual;
procedure push_formal_para;virtual; procedure push_formal_para;virtual;
procedure push_copyout_para;virtual;abstract; procedure push_copyout_para;virtual;abstract;
function maybe_push_unused_para:boolean;virtual;
public public
tempcgpara : tcgpara; tempcgpara : tcgpara;
@ -60,6 +61,7 @@ interface
procedure release_para_temps; procedure release_para_temps;
procedure reorder_parameters; procedure reorder_parameters;
procedure freeparas; procedure freeparas;
function is_parentfp_pushed:boolean;
protected protected
retloc: tcgpara; retloc: tcgpara;
paralocs: array of pcgpara; paralocs: array of pcgpara;
@ -169,6 +171,8 @@ implementation
begin begin
if not(left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then if not(left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
internalerror(200304235); internalerror(200304235);
if maybe_push_unused_para then
exit;
{ see the call to keep_para_array_range in ncal: if that call returned { see the call to keep_para_array_range in ncal: if that call returned
true, we overwrite the resultdef of left with its original resultdef true, we overwrite the resultdef of left with its original resultdef
(to keep track of the range of the original array); we inserted a type (to keep track of the range of the original array); we inserted a type
@ -262,6 +266,9 @@ implementation
not push_zero_sized_value_para then not push_zero_sized_value_para then
exit; exit;
if maybe_push_unused_para then
exit;
{ Move flags and jump in register to make it less complex } { Move flags and jump in register to make it less complex }
if left.location.loc in [LOC_FLAGS,LOC_JUMP,LOC_SUBSETREG,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF] then if left.location.loc in [LOC_FLAGS,LOC_JUMP,LOC_SUBSETREG,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF] then
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false); hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
@ -273,6 +280,8 @@ implementation
procedure tcgcallparanode.push_formal_para; procedure tcgcallparanode.push_formal_para;
begin begin
if maybe_push_unused_para then
exit;
{ allow passing of a constant to a const formaldef } { allow passing of a constant to a const formaldef }
if (parasym.varspez=vs_const) and if (parasym.varspez=vs_const) and
not(left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then not(left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
@ -281,6 +290,35 @@ implementation
end; end;
function tcgcallparanode.maybe_push_unused_para: boolean;
begin
{ Check if the parameter is unused.
Only the $parentfp parameter is supported for now. }
result:=(vo_is_parentfp in parasym.varoptions) and (parasym.varstate<=vs_initialised);
if not result then
exit;
{ The parameter is unused.
We can skip loading of the parameter when:
- the target does not strictly require all parameters (push_zero_sized_value_para = false)
and
- fixed stack is used
or the parameter is in a register
or the parameter is $parentfp. }
if not push_zero_sized_value_para and
(paramanager.use_fixed_stack or
(vo_is_parentfp in parasym.varoptions) or
(parasym.paraloc[callerside].Location^.Loc in [LOC_REGISTER,LOC_CREGISTER])) then
begin
{ Skip loading }
parasym.paraloc[callerside].Location^.Loc:=LOC_VOID;
tempcgpara.Location^.Loc:=LOC_VOID;
end
else
{ Load the dummy nil/0 value }
hlcg.a_load_const_cgpara(current_asmdata.CurrAsmList,left.resultdef,0,tempcgpara);
end;
procedure tcgcallparanode.secondcallparan; procedure tcgcallparanode.secondcallparan;
var var
pushaddr: boolean; pushaddr: boolean;
@ -909,6 +947,20 @@ implementation
end; end;
function tcgcallnode.is_parentfp_pushed: boolean;
var
i : longint;
begin
for i:=0 to procdefinition.paras.Count-1 do
with tparavarsym(procdefinition.paras[i]) do
if vo_is_parentfp in varoptions then
begin
result:=paraloc[callerside].Location^.Loc in [LOC_REFERENCE,LOC_CREFERENCE];
exit;
end;
result:=false;
end;
procedure tcgcallnode.pass_generate_code; procedure tcgcallnode.pass_generate_code;
var var
@ -1258,9 +1310,10 @@ implementation
pop_parasize(0); pop_parasize(0);
end end
{ frame pointer parameter is popped by the caller when it's passed the { frame pointer parameter is popped by the caller when it's passed the
Delphi way } Delphi way and $parentfp is used }
else if (po_delphi_nested_cc in procdefinition.procoptions) and else if (po_delphi_nested_cc in procdefinition.procoptions) and
not paramanager.use_fixed_stack then not paramanager.use_fixed_stack and
is_parentfp_pushed() then
pop_parasize(sizeof(pint)); pop_parasize(sizeof(pint));
if procdefinition.generate_safecall_wrapper then if procdefinition.generate_safecall_wrapper then

View File

@ -170,9 +170,7 @@ implementation
location_reset(location,LOC_REGISTER,def_cgsize(parentfpvoidpointertype)); location_reset(location,LOC_REGISTER,def_cgsize(parentfpvoidpointertype));
currpi:=current_procinfo; currpi:=current_procinfo;
{ load framepointer of current proc } { load framepointer of current proc }
hsym:=tparavarsym(currpi.procdef.parast.Find('parentfp')); hsym:=parentfpsym;
if not assigned(hsym) then
internalerror(200309281);
if (currpi.procdef.owner.symtablelevel=parentpd.parast.symtablelevel) and (hsym.localloc.loc in [LOC_REGISTER,LOC_CREGISTER]) then if (currpi.procdef.owner.symtablelevel=parentpd.parast.symtablelevel) and (hsym.localloc.loc in [LOC_REGISTER,LOC_CREGISTER]) then
location.register:=hsym.localloc.register location.register:=hsym.localloc.register
else else

View File

@ -105,7 +105,7 @@ implementation
of the current routine (and hence it has not been moved into the of the current routine (and hence it has not been moved into the
nestedfp struct), get the original nestedfp parameter } nestedfp struct), get the original nestedfp parameter }
useparentfppara:=not assigned(current_procinfo.procdef.parentfpstruct); useparentfppara:=not assigned(current_procinfo.procdef.parentfpstruct);
hsym:=tparavarsym(current_procinfo.procdef.parast.Find('parentfp')); hsym:=parentfpsym;
if current_procinfo.procdef.parast.symtablelevel>parentpd.parast.symtablelevel then if current_procinfo.procdef.parast.symtablelevel>parentpd.parast.symtablelevel then
useparentfppara:= useparentfppara:=
useparentfppara or useparentfppara or

View File

@ -865,6 +865,13 @@ implementation
location_reset(vs.initialloc,LOC_REGISTER,OS_ADDR); location_reset(vs.initialloc,LOC_REGISTER,OS_ADDR);
vs.initialloc.register:=NR_FRAME_POINTER_REG; vs.initialloc.register:=NR_FRAME_POINTER_REG;
end end
{ Unused parameters ($parentfp for now) need to be kept in the original location
to prevent allocation of registers/resources for them. }
else if (vs.varstate <= vs_initialised) and
(vo_is_parentfp in vs.varoptions) then
begin
tparavarsym(vs).paraloc[calleeside].get_location(vs.initialloc);
end
else else
begin begin
{ if an open array is used, also its high parameter is used, { if an open array is used, also its high parameter is used,
@ -1055,6 +1062,9 @@ implementation
loadn: loadn:
if (tloadnode(n).symtableentry.typ in [staticvarsym,localvarsym,paravarsym]) then if (tloadnode(n).symtableentry.typ in [staticvarsym,localvarsym,paravarsym]) then
add_regvars(rv^,tabstractnormalvarsym(tloadnode(n).symtableentry).localloc); add_regvars(rv^,tabstractnormalvarsym(tloadnode(n).symtableentry).localloc);
loadparentfpn:
if current_procinfo.procdef.parast.symtablelevel>tloadparentfpnode(n).parentpd.parast.symtablelevel then
add_regvars(rv^,tloadparentfpnode(n).parentfpsym.localloc);
vecn: vecn:
begin begin
{ range checks sometimes need the high parameter } { range checks sometimes need the high parameter }

View File

@ -357,8 +357,6 @@ implementation
if assigned(left) then if assigned(left) then
internalerror(200309289); internalerror(200309289);
left:=cloadparentfpnode.create(tprocdef(symtable.defowner),lpf_forload); left:=cloadparentfpnode.create(tprocdef(symtable.defowner),lpf_forload);
{ we can't inline the referenced parent procedure }
include(tprocdef(symtable.defowner).implprocoptions,pio_nested_access);
{ reference in nested procedures, variable needs to be in memory } { reference in nested procedures, variable needs to be in memory }
{ and behaves as if its address escapes its parent block } { and behaves as if its address escapes its parent block }
make_not_regable(self,[ra_different_scope]); make_not_regable(self,[ra_different_scope]);

View File

@ -53,6 +53,10 @@ interface
lpf_forload lpf_forload
); );
tloadparentfpnode = class(tunarynode) tloadparentfpnode = class(tunarynode)
private
_parentfpsym: tparavarsym;
function getparentfpsym: tparavarsym;
public
parentpd : tprocdef; parentpd : tprocdef;
parentpdderef : tderef; parentpdderef : tderef;
kind: tloadparentfpkind; kind: tloadparentfpkind;
@ -65,6 +69,7 @@ interface
function pass_typecheck:tnode;override; function pass_typecheck:tnode;override;
function docompare(p: tnode): boolean; override; function docompare(p: tnode): boolean; override;
function dogetcopy : tnode;override; function dogetcopy : tnode;override;
property parentfpsym: tparavarsym read getparentfpsym;
end; end;
tloadparentfpnodeclass = class of tloadparentfpnode; tloadparentfpnodeclass = class of tloadparentfpnode;
@ -372,32 +377,9 @@ implementation
function tloadparentfpnode.pass_typecheck:tnode; function tloadparentfpnode.pass_typecheck:tnode;
{$ifdef dummy}
var
currpi : tprocinfo;
hsym : tparavarsym;
{$endif dummy}
begin begin
result:=nil; result:=nil;
resultdef:=parentfpvoidpointertype; resultdef:=parentfpvoidpointertype;
{$ifdef dummy}
{ currently parentfps are never loaded in registers (FK) }
if (current_procinfo.procdef.parast.symtablelevel<>parentpd.parast.symtablelevel) then
begin
currpi:=current_procinfo;
{ walk parents }
while (currpi.procdef.owner.symtablelevel>parentpd.parast.symtablelevel) do
begin
currpi:=currpi.parent;
if not assigned(currpi) then
internalerror(2005040602);
hsym:=tparavarsym(currpi.procdef.parast.Find('parentfp'));
if not assigned(hsym) then
internalerror(2005040601);
hsym.varregable:=vr_none;
end;
end;
{$endif dummy}
end; end;
@ -408,6 +390,17 @@ implementation
end; end;
function tloadparentfpnode.getparentfpsym: tparavarsym;
begin
if not assigned(_parentfpsym) then
begin
_parentfpsym:=tparavarsym(current_procinfo.procdef.parast.Find('parentfp'));
if not assigned(_parentfpsym) then
internalerror(200309281);
end;
result:=_parentfpsym;
end;
{***************************************************************************** {*****************************************************************************
TADDRNODE TADDRNODE
*****************************************************************************} *****************************************************************************}

View File

@ -156,7 +156,8 @@ implementation
begin begin
vs:=cparavarsym.create('$parentfp',paranr,vs_value vs:=cparavarsym.create('$parentfp',paranr,vs_value
,parentfpvoidpointertype,[vo_is_parentfp,vo_is_hidden_para]); ,parentfpvoidpointertype,[vo_is_parentfp,vo_is_hidden_para]);
vs.varregable:=vr_none; { Mark $parentfp as used by default }
vs.varstate:=vs_read;
end end
else else
begin begin

View File

@ -202,8 +202,9 @@ unit procinfo;
procedure start_eh(list : TAsmList); virtual; procedure start_eh(list : TAsmList); virtual;
{ called to insert needed eh info into the exit code } { called to insert needed eh info into the exit code }
procedure end_eh(list : TAsmList); virtual; procedure end_eh(list : TAsmList); virtual;
{ Sets the pio_needs_parentfp flag for the current nested procedure and { Sets the pio_needs_parentfp flag for the current nested procedure.
all its parent procedures until parent_level } Sets both pio_needs_parentfp and pio_nested_access for all parent
procedures until parent_level }
procedure set_needs_parentfp(parent_level: byte); procedure set_needs_parentfp(parent_level: byte);
end; end;
tcprocinfo = class of tprocinfo; tcprocinfo = class of tprocinfo;
@ -442,11 +443,15 @@ implementation
Internalerror(2020050302); Internalerror(2020050302);
if parent_level<normal_function_level then if parent_level<normal_function_level then
parent_level:=normal_function_level; parent_level:=normal_function_level;
{ Set pio_needs_parentfp for the current proc }
pi:=Self; pi:=Self;
repeat include(pi.procdef.implprocoptions, pio_needs_parentfp);
include(pi.procdef.implprocoptions, pio_needs_parentfp); { Set both pio_needs_parentfp and pio_nested_access for all parent procs until parent_level }
pi:=pi.parent; while pi.procdef.parast.symtablelevel>parent_level do
until pi.procdef.parast.symtablelevel<=parent_level; begin
pi:=pi.parent;
pi.procdef.implprocoptions:=pi.procdef.implprocoptions+[pio_needs_parentfp,pio_nested_access];
end;
end; end;
end. end.

View File

@ -2309,8 +2309,6 @@ implementation
parentfpinitblock: tnode; parentfpinitblock: tnode;
old_parse_generic: boolean; old_parse_generic: boolean;
recordtokens : boolean; recordtokens : boolean;
parentfp_sym: TSymEntry;
begin begin
old_current_procinfo:=current_procinfo; old_current_procinfo:=current_procinfo;
old_block_type:=block_type; old_block_type:=block_type;
@ -2386,25 +2384,6 @@ implementation
{ parse the code ... } { parse the code ... }
code:=block(current_module.islibrary); code:=block(current_module.islibrary);
{ If this is a nested procedure which does not access its parent's frame
pointer, we can optimize it by removing the hidden $parentfp parameter.
Do not perform this for:
- targets which use a special struct to access parent's variables;
- pure assembler procedures (for compatibility with old code).
}
if not (target_info.system in systems_fpnestedstruct) and
is_nested_pd(procdef) and
not (pio_needs_parentfp in procdef.implprocoptions) and
not (po_assembler in procdef.procoptions) then
begin
exclude(procdef.procoptions, po_delphi_nested_cc);
parentfp_sym:=procdef.parast.Find('parentfp');
if parentfp_sym = nil then
Internalerror(2020050301);
procdef.parast.Delete(parentfp_sym);
procdef.calcparas;
end;
if recordtokens then if recordtokens then
begin begin
{ stop token recorder for generic template } { stop token recorder for generic template }

View File

@ -702,6 +702,8 @@ interface
private private
procedure count_para(p:TObject;arg:pointer); procedure count_para(p:TObject;arg:pointer);
procedure insert_para(p:TObject;arg:pointer); procedure insert_para(p:TObject;arg:pointer);
protected
procedure handle_unused_paras(side: tcallercallee); virtual;
end; end;
tprocvardef = class(tabstractprocdef) tprocvardef = class(tabstractprocdef)
@ -812,6 +814,7 @@ interface
procedure SetIsEmpty(AValue: boolean); procedure SetIsEmpty(AValue: boolean);
function GetHasInliningInfo: boolean; function GetHasInliningInfo: boolean;
procedure SetHasInliningInfo(AValue: boolean); procedure SetHasInliningInfo(AValue: boolean);
procedure handle_unused_paras(side: tcallercallee); override;
public public
messageinf : tmessageinf; messageinf : tmessageinf;
dispid : longint; dispid : longint;
@ -5273,6 +5276,11 @@ implementation
end; end;
procedure tabstractprocdef.handle_unused_paras(side: tcallercallee);
begin
end;
procedure tabstractprocdef.calcparas; procedure tabstractprocdef.calcparas;
var var
paracount : longint; paracount : longint;
@ -5706,6 +5714,7 @@ implementation
has_paraloc_info:=callbothsides has_paraloc_info:=callbothsides
else else
has_paraloc_info:=callerside; has_paraloc_info:=callerside;
handle_unused_paras(callerside);
end; end;
if (side in [calleeside,callbothsides]) and if (side in [calleeside,callbothsides]) and
not(has_paraloc_info in [calleeside,callbothsides]) then not(has_paraloc_info in [calleeside,callbothsides]) then
@ -5718,6 +5727,7 @@ implementation
has_paraloc_info:=callbothsides has_paraloc_info:=callbothsides
else else
has_paraloc_info:=calleeside; has_paraloc_info:=calleeside;
handle_unused_paras(calleeside);
end; end;
end; end;
@ -6011,6 +6021,45 @@ implementation
end; end;
procedure tprocdef.handle_unused_paras(side: tcallercallee);
var
i : longint;
begin
{ Optimize unused parameters by preventing loading them on the callee side
and, if possible, preventing passing them on the caller side.
The caller side optimization is handled by tcgcallparanode.maybe_push_unused_para().
}
if (proctypeoption = potype_exceptfilter) or
(po_assembler in procoptions) then
exit;
{ Only $parentfp is optmized for now. }
if not is_nested_pd(self) then
exit;
{ Handle unused parameters }
for i:=0 to paras.Count-1 do
with tparavarsym(paras[i]) do
if vo_is_parentfp in varoptions then
begin
if pio_needs_parentfp in implprocoptions then
begin
{ If this routine is accessed from other nested routine,
$parentfp must be in a memory location. }
if pio_nested_access in implprocoptions then
varregable:=vr_none;
end
else
begin
{ Mark $parentfp as unused, since it has vs_read by default }
varstate:=vs_initialised;
if side=calleeside then
{ Set LOC_VOID as the parameter's location on the callee side }
paraloc[side].location^.Loc:=LOC_VOID;
break;
end;
end;
end;
procedure tprocdef.Setinterfacedef(AValue: boolean); procedure tprocdef.Setinterfacedef(AValue: boolean);
begin begin
if not assigned(implprocdefinfo) then if not assigned(implprocdefinfo) then