Add a bound parameter to node_count(_weighted).

This commit is contained in:
Rika Ichinose 2022-01-12 11:00:51 +03:00 committed by FPK
parent 2d1ab3410d
commit 11d16be702
4 changed files with 91 additions and 43 deletions

View File

@ -88,6 +88,7 @@ interface
procedure add_done_statement(n:tnode); procedure add_done_statement(n:tnode);
procedure convert_carg_array_of_const; procedure convert_carg_array_of_const;
procedure order_parameters; procedure order_parameters;
function heuristics_favors_inlining:boolean;
procedure check_inlining; procedure check_inlining;
function pass1_normal:tnode; function pass1_normal:tnode;
procedure register_created_object_types; procedure register_created_object_types;
@ -4753,6 +4754,30 @@ implementation
end; end;
function tcallnode.heuristics_favors_inlining:boolean;
var
limExcluding: cardinal;
begin
{ Prevent too deep inlining recursion and code bloat by inlining
The actual formuala is
inlinelevel/3+1 /-------
node count < -----------------\/ 10000
This allows exponential grow of the code only to a certain limit.
Remarks
- The current approach calculates the inlining level top down, so outer call nodes (nodes closer to the leaf) might not be inlined
if the max. complexity is reached. This is done because it makes the implementation easier and because
there might be situations were it is more beneficial to inline inner nodes and do the calls to the outer nodes
if the outer nodes are in a seldomly used code path
- The code avoids to use functions from the math unit
}
limExcluding:=round(exp((1.0/(inlinelevel/3.0+1))*ln(10000)));
result:=node_count(tprocdef(procdefinition).inlininginfo^.code,limExcluding)<limExcluding;
end;
procedure tcallnode.check_inlining; procedure tcallnode.check_inlining;
var var
st : tsymtable; st : tsymtable;
@ -4762,22 +4787,7 @@ implementation
if (po_inline in procdefinition.procoptions) and if (po_inline in procdefinition.procoptions) and
(procdefinition.typ=procdef) and (procdefinition.typ=procdef) and
tprocdef(procdefinition).has_inlininginfo and tprocdef(procdefinition).has_inlininginfo and
{ Prevent too deep inlining recursion and code bloat by inlining heuristics_favors_inlining then
The actual formuala is
inlinelevel/3+1 /-------
node count < -----------------\/ 10000
This allows exponential grow of the code only to a certain limit.
Remarks
- The current approach calculates the inlining level top down, so outer call nodes (nodes closer to the leaf) might not be inlined
if the max. complexity is reached. This is done because it makes the implementation easier and because
there might be situations were it is more beneficial to inline inner nodes and do the calls to the outer nodes
if the outer nodes are in a seldomly used code path
- The code avoids to use functions from the math unit
}
(node_count(tprocdef(procdefinition).inlininginfo^.code)<round(exp((1.0/(inlinelevel/3.0+1))*ln(10000)))) then
begin begin
include(callnodeflags,cnf_do_inline); include(callnodeflags,cnf_do_inline);
{ Check if we can inline the procedure when it references proc/var that { Check if we can inline the procedure when it references proc/var that

View File

@ -134,10 +134,11 @@ interface
function has_conditional_nodes(n : tnode) : boolean; function has_conditional_nodes(n : tnode) : boolean;
{ count the number of nodes in the node tree, { count the number of nodes in the node tree,
rough estimation how large the tree "node" is } rough estimation how large the tree "node" is
function node_count(node : tnode) : dword; If more than max nodes, returns max, so node_count(n, 10 + 1) <= 10 answers whether the tree has 10 nodes but avoids traversing the remaining 990. }
function node_count(node : tnode; max : dword = High(dword)) : dword;
function node_count_weighted(node : tnode) : dword; function node_count_weighted(node : tnode; max : dword = High(dword)) : dword;
{ returns true, if the value described by node is constant/immutable, this approximation is safe { returns true, if the value described by node is constant/immutable, this approximation is safe
if no dirty tricks like buffer overflows or pointer magic are used } if no dirty tricks like buffer overflows or pointer magic are used }
@ -1438,37 +1439,49 @@ implementation
result:=foreachnodestatic(n,@check_for_conditional_nodes,nil); result:=foreachnodestatic(n,@check_for_conditional_nodes,nil);
end; end;
var
nodecount : dword;
function donodecount(var n: tnode; arg: pointer): foreachnoderesult; function donodecount(var n: tnode; arg: pointer): foreachnoderesult;
begin begin
inc(nodecount); if PDWord(arg)^>0 then
result:=fen_false; begin
dec(PDWord(arg)^);
result:=fen_false;
end
else
result:=fen_norecurse_false;
end; end;
function node_count(node : tnode) : dword; function node_count(node : tnode; max : dword = High(dword)) : dword;
var
left : dword;
begin begin
nodecount:=0; left:=max;
foreachnodestatic(node,@donodecount,nil); foreachnodestatic(node,@donodecount,@left);
result:=nodecount; result:=max-left;
end; end;
function donodecount_weighted(var n: tnode; arg: pointer): foreachnoderesult; function donodecount_weighted(var n: tnode; arg: pointer): foreachnoderesult;
begin begin
if not(n.nodetype in [blockn,statementn,callparan,nothingn]) then if PDWord(arg)^>0 then
inc(nodecount); begin
result:=fen_false; if not(n.nodetype in [blockn,statementn,callparan,nothingn]) then
dec(PDWord(arg)^);
result:=fen_false;
end
else
result:=fen_norecurse_false;
end; end;
function node_count_weighted(node : tnode) : dword; function node_count_weighted(node : tnode; max : dword = High(dword)) : dword;
var
left : dword;
begin begin
nodecount:=0; left:=max;
foreachnodestatic(node,@donodecount_weighted,nil); foreachnodestatic(node,@donodecount_weighted,@left);
result:=nodecount; result:=max-left;
end; end;

View File

@ -52,6 +52,8 @@ unit optloop;
procinfo; procinfo;
function number_unrolls(node : tnode) : cardinal; function number_unrolls(node : tnode) : cardinal;
var
nodeCount : cardinal;
begin begin
{ calculate how often a loop shall be unrolled. { calculate how often a loop shall be unrolled.
@ -60,10 +62,22 @@ unit optloop;
{$ifdef i386} {$ifdef i386}
{ multiply by 2 for CPUs with a long pipeline } { multiply by 2 for CPUs with a long pipeline }
if current_settings.optimizecputype in [cpu_Pentium4] then if current_settings.optimizecputype in [cpu_Pentium4] then
number_unrolls:=trunc(round((60+(60*ord(node_count_weighted(node)<15)))/max(node_count_weighted(node),1))) begin
{ See the common branch below for an explanation. }
nodeCount:=node_count_weighted(node,41);
number_unrolls:=round((60+(60*ord(nodeCount<15)))/max(nodeCount,1))
end
else else
{$endif i386} {$endif i386}
number_unrolls:=trunc(round((30+(60*ord(node_count_weighted(node)<15)))/max(node_count_weighted(node),1))); begin
{ If nodeCount >= 15, numerator will be 30,
and the largest number (starting from 15) that makes sense as its denominator
(the smallest number that gives number_unrolls = 1) is 21 = trunc(30/1.5+1),
so there's no point in counting for more than 21 nodes.
"Long pipeline" variant above is the same with numerator=60 and max denominator = 41. }
nodeCount:=node_count_weighted(node,21);
number_unrolls:=round((30+(60*ord(nodeCount<15)))/max(nodeCount,1));
end;
if number_unrolls=0 then if number_unrolls=0 then
number_unrolls:=1; number_unrolls:=1;

View File

@ -1829,6 +1829,23 @@ implementation
end; end;
end; end;
function heuristics_favors_autoinlining(code: tnode): boolean;
var
complexityAvail : integer;
begin
{ rough approximation if we should auto inline:
- if the tree is simple enough
- if the tree is not too big
A bigger tree which is simpler might be autoinlined otoh
a smaller and complexer tree as well: so we use the sum of
both measures here }
{ This is a shortcutted version of
"result:=node_count(code)+node_complexity(code)<=25". }
complexityAvail:=25-node_complexity(code);
result:=(complexityAvail>0) and (node_count(code,complexityAvail+1)<=dword(complexityAvail));
end;
var var
old_current_procinfo : tprocinfo; old_current_procinfo : tprocinfo;
oldmaxfpuregisters : longint; oldmaxfpuregisters : longint;
@ -1911,13 +1928,7 @@ implementation
potype_destructor,potype_class_constructor,potype_class_destructor]) and potype_destructor,potype_class_constructor,potype_class_destructor]) and
((procdef.procoptions*[po_exports,po_external,po_interrupt,po_virtualmethod,po_iocheck])=[]) and ((procdef.procoptions*[po_exports,po_external,po_interrupt,po_virtualmethod,po_iocheck])=[]) and
(not(procdef.proccalloption in [pocall_safecall])) and (not(procdef.proccalloption in [pocall_safecall])) and
{ rough approximation if we should auto inline: heuristics_favors_autoinlining(code) then
- if the tree is simple enough
- if the tree is not too big
A bigger tree which is simpler might be autoinlined otoh
a smaller and complexer tree as well: so we use the sum of
both measures here }
(node_count(code)+node_complexity(code)<=25) then
begin begin
{ Can we inline this procedure? } { Can we inline this procedure? }
if checknodeinlining(procdef) then if checknodeinlining(procdef) then