Add a bound parameter to node_count(_weighted).

This commit is contained in:
Rika Ichinose 2022-01-12 11:00:51 +03:00 committed by FPK
parent 2d1ab3410d
commit 11d16be702
4 changed files with 91 additions and 43 deletions

View File

@ -88,6 +88,7 @@ interface
procedure add_done_statement(n:tnode);
procedure convert_carg_array_of_const;
procedure order_parameters;
function heuristics_favors_inlining:boolean;
procedure check_inlining;
function pass1_normal:tnode;
procedure register_created_object_types;
@ -4753,6 +4754,30 @@ implementation
end;
function tcallnode.heuristics_favors_inlining:boolean;
var
limExcluding: cardinal;
begin
{ Prevent too deep inlining recursion and code bloat by inlining
The actual formuala is
inlinelevel/3+1 /-------
node count < -----------------\/ 10000
This allows exponential grow of the code only to a certain limit.
Remarks
- The current approach calculates the inlining level top down, so outer call nodes (nodes closer to the leaf) might not be inlined
if the max. complexity is reached. This is done because it makes the implementation easier and because
there might be situations were it is more beneficial to inline inner nodes and do the calls to the outer nodes
if the outer nodes are in a seldomly used code path
- The code avoids to use functions from the math unit
}
limExcluding:=round(exp((1.0/(inlinelevel/3.0+1))*ln(10000)));
result:=node_count(tprocdef(procdefinition).inlininginfo^.code,limExcluding)<limExcluding;
end;
procedure tcallnode.check_inlining;
var
st : tsymtable;
@ -4762,22 +4787,7 @@ implementation
if (po_inline in procdefinition.procoptions) and
(procdefinition.typ=procdef) and
tprocdef(procdefinition).has_inlininginfo and
{ Prevent too deep inlining recursion and code bloat by inlining
The actual formuala is
inlinelevel/3+1 /-------
node count < -----------------\/ 10000
This allows exponential grow of the code only to a certain limit.
Remarks
- The current approach calculates the inlining level top down, so outer call nodes (nodes closer to the leaf) might not be inlined
if the max. complexity is reached. This is done because it makes the implementation easier and because
there might be situations were it is more beneficial to inline inner nodes and do the calls to the outer nodes
if the outer nodes are in a seldomly used code path
- The code avoids to use functions from the math unit
}
(node_count(tprocdef(procdefinition).inlininginfo^.code)<round(exp((1.0/(inlinelevel/3.0+1))*ln(10000)))) then
heuristics_favors_inlining then
begin
include(callnodeflags,cnf_do_inline);
{ Check if we can inline the procedure when it references proc/var that

View File

@ -134,10 +134,11 @@ interface
function has_conditional_nodes(n : tnode) : boolean;
{ count the number of nodes in the node tree,
rough estimation how large the tree "node" is }
function node_count(node : tnode) : dword;
rough estimation how large the tree "node" is
If more than max nodes, returns max, so node_count(n, 10 + 1) <= 10 answers whether the tree has 10 nodes but avoids traversing the remaining 990. }
function node_count(node : tnode; max : dword = High(dword)) : dword;
function node_count_weighted(node : tnode) : dword;
function node_count_weighted(node : tnode; max : dword = High(dword)) : dword;
{ returns true, if the value described by node is constant/immutable, this approximation is safe
if no dirty tricks like buffer overflows or pointer magic are used }
@ -1438,37 +1439,49 @@ implementation
result:=foreachnodestatic(n,@check_for_conditional_nodes,nil);
end;
var
nodecount : dword;
function donodecount(var n: tnode; arg: pointer): foreachnoderesult;
begin
inc(nodecount);
result:=fen_false;
if PDWord(arg)^>0 then
begin
dec(PDWord(arg)^);
result:=fen_false;
end
else
result:=fen_norecurse_false;
end;
function node_count(node : tnode) : dword;
function node_count(node : tnode; max : dword = High(dword)) : dword;
var
left : dword;
begin
nodecount:=0;
foreachnodestatic(node,@donodecount,nil);
result:=nodecount;
left:=max;
foreachnodestatic(node,@donodecount,@left);
result:=max-left;
end;
function donodecount_weighted(var n: tnode; arg: pointer): foreachnoderesult;
begin
if not(n.nodetype in [blockn,statementn,callparan,nothingn]) then
inc(nodecount);
result:=fen_false;
if PDWord(arg)^>0 then
begin
if not(n.nodetype in [blockn,statementn,callparan,nothingn]) then
dec(PDWord(arg)^);
result:=fen_false;
end
else
result:=fen_norecurse_false;
end;
function node_count_weighted(node : tnode) : dword;
function node_count_weighted(node : tnode; max : dword = High(dword)) : dword;
var
left : dword;
begin
nodecount:=0;
foreachnodestatic(node,@donodecount_weighted,nil);
result:=nodecount;
left:=max;
foreachnodestatic(node,@donodecount_weighted,@left);
result:=max-left;
end;

View File

@ -52,6 +52,8 @@ unit optloop;
procinfo;
function number_unrolls(node : tnode) : cardinal;
var
nodeCount : cardinal;
begin
{ calculate how often a loop shall be unrolled.
@ -60,10 +62,22 @@ unit optloop;
{$ifdef i386}
{ multiply by 2 for CPUs with a long pipeline }
if current_settings.optimizecputype in [cpu_Pentium4] then
number_unrolls:=trunc(round((60+(60*ord(node_count_weighted(node)<15)))/max(node_count_weighted(node),1)))
begin
{ See the common branch below for an explanation. }
nodeCount:=node_count_weighted(node,41);
number_unrolls:=round((60+(60*ord(nodeCount<15)))/max(nodeCount,1))
end
else
{$endif i386}
number_unrolls:=trunc(round((30+(60*ord(node_count_weighted(node)<15)))/max(node_count_weighted(node),1)));
begin
{ If nodeCount >= 15, numerator will be 30,
and the largest number (starting from 15) that makes sense as its denominator
(the smallest number that gives number_unrolls = 1) is 21 = trunc(30/1.5+1),
so there's no point in counting for more than 21 nodes.
"Long pipeline" variant above is the same with numerator=60 and max denominator = 41. }
nodeCount:=node_count_weighted(node,21);
number_unrolls:=round((30+(60*ord(nodeCount<15)))/max(nodeCount,1));
end;
if number_unrolls=0 then
number_unrolls:=1;

View File

@ -1829,6 +1829,23 @@ implementation
end;
end;
function heuristics_favors_autoinlining(code: tnode): boolean;
var
complexityAvail : integer;
begin
{ rough approximation if we should auto inline:
- if the tree is simple enough
- if the tree is not too big
A bigger tree which is simpler might be autoinlined otoh
a smaller and complexer tree as well: so we use the sum of
both measures here }
{ This is a shortcutted version of
"result:=node_count(code)+node_complexity(code)<=25". }
complexityAvail:=25-node_complexity(code);
result:=(complexityAvail>0) and (node_count(code,complexityAvail+1)<=dword(complexityAvail));
end;
var
old_current_procinfo : tprocinfo;
oldmaxfpuregisters : longint;
@ -1911,13 +1928,7 @@ implementation
potype_destructor,potype_class_constructor,potype_class_destructor]) and
((procdef.procoptions*[po_exports,po_external,po_interrupt,po_virtualmethod,po_iocheck])=[]) and
(not(procdef.proccalloption in [pocall_safecall])) and
{ rough approximation if we should auto inline:
- if the tree is simple enough
- if the tree is not too big
A bigger tree which is simpler might be autoinlined otoh
a smaller and complexer tree as well: so we use the sum of
both measures here }
(node_count(code)+node_complexity(code)<=25) then
heuristics_favors_autoinlining(code) then
begin
{ Can we inline this procedure? }
if checknodeinlining(procdef) then