Add a bound parameter to node_count(_weighted).

This commit is contained in:
Rika Ichinose 2022-01-12 11:00:51 +03:00 committed by FPK
parent 2d1ab3410d
commit 11d16be702
4 changed files with 91 additions and 43 deletions

View File

@ -88,6 +88,7 @@ interface
procedure add_done_statement(n:tnode); procedure add_done_statement(n:tnode);
procedure convert_carg_array_of_const; procedure convert_carg_array_of_const;
procedure order_parameters; procedure order_parameters;
function heuristics_favors_inlining:boolean;
procedure check_inlining; procedure check_inlining;
function pass1_normal:tnode; function pass1_normal:tnode;
procedure register_created_object_types; procedure register_created_object_types;
@ -4753,15 +4754,10 @@ implementation
end; end;
procedure tcallnode.check_inlining; function tcallnode.heuristics_favors_inlining:boolean;
var var
st : tsymtable; limExcluding: cardinal;
para : tcallparanode;
begin begin
{ Can we inline the procedure? }
if (po_inline in procdefinition.procoptions) and
(procdefinition.typ=procdef) and
tprocdef(procdefinition).has_inlininginfo and
{ Prevent too deep inlining recursion and code bloat by inlining { Prevent too deep inlining recursion and code bloat by inlining
The actual formuala is The actual formuala is
@ -4777,7 +4773,21 @@ implementation
if the outer nodes are in a seldomly used code path if the outer nodes are in a seldomly used code path
- The code avoids to use functions from the math unit - The code avoids to use functions from the math unit
} }
(node_count(tprocdef(procdefinition).inlininginfo^.code)<round(exp((1.0/(inlinelevel/3.0+1))*ln(10000)))) then limExcluding:=round(exp((1.0/(inlinelevel/3.0+1))*ln(10000)));
result:=node_count(tprocdef(procdefinition).inlininginfo^.code,limExcluding)<limExcluding;
end;
procedure tcallnode.check_inlining;
var
st : tsymtable;
para : tcallparanode;
begin
{ Can we inline the procedure? }
if (po_inline in procdefinition.procoptions) and
(procdefinition.typ=procdef) and
tprocdef(procdefinition).has_inlininginfo and
heuristics_favors_inlining then
begin begin
include(callnodeflags,cnf_do_inline); include(callnodeflags,cnf_do_inline);
{ Check if we can inline the procedure when it references proc/var that { Check if we can inline the procedure when it references proc/var that

View File

@ -134,10 +134,11 @@ interface
function has_conditional_nodes(n : tnode) : boolean; function has_conditional_nodes(n : tnode) : boolean;
{ count the number of nodes in the node tree, { count the number of nodes in the node tree,
rough estimation how large the tree "node" is } rough estimation how large the tree "node" is
function node_count(node : tnode) : dword; If more than max nodes, returns max, so node_count(n, 10 + 1) <= 10 answers whether the tree has 10 nodes but avoids traversing the remaining 990. }
function node_count(node : tnode; max : dword = High(dword)) : dword;
function node_count_weighted(node : tnode) : dword; function node_count_weighted(node : tnode; max : dword = High(dword)) : dword;
{ returns true, if the value described by node is constant/immutable, this approximation is safe { returns true, if the value described by node is constant/immutable, this approximation is safe
if no dirty tricks like buffer overflows or pointer magic are used } if no dirty tricks like buffer overflows or pointer magic are used }
@ -1438,37 +1439,49 @@ implementation
result:=foreachnodestatic(n,@check_for_conditional_nodes,nil); result:=foreachnodestatic(n,@check_for_conditional_nodes,nil);
end; end;
var
nodecount : dword;
function donodecount(var n: tnode; arg: pointer): foreachnoderesult; function donodecount(var n: tnode; arg: pointer): foreachnoderesult;
begin begin
inc(nodecount); if PDWord(arg)^>0 then
begin
dec(PDWord(arg)^);
result:=fen_false; result:=fen_false;
end
else
result:=fen_norecurse_false;
end; end;
function node_count(node : tnode) : dword; function node_count(node : tnode; max : dword = High(dword)) : dword;
var
left : dword;
begin begin
nodecount:=0; left:=max;
foreachnodestatic(node,@donodecount,nil); foreachnodestatic(node,@donodecount,@left);
result:=nodecount; result:=max-left;
end; end;
function donodecount_weighted(var n: tnode; arg: pointer): foreachnoderesult; function donodecount_weighted(var n: tnode; arg: pointer): foreachnoderesult;
begin
if PDWord(arg)^>0 then
begin begin
if not(n.nodetype in [blockn,statementn,callparan,nothingn]) then if not(n.nodetype in [blockn,statementn,callparan,nothingn]) then
inc(nodecount); dec(PDWord(arg)^);
result:=fen_false; result:=fen_false;
end
else
result:=fen_norecurse_false;
end; end;
function node_count_weighted(node : tnode) : dword; function node_count_weighted(node : tnode; max : dword = High(dword)) : dword;
var
left : dword;
begin begin
nodecount:=0; left:=max;
foreachnodestatic(node,@donodecount_weighted,nil); foreachnodestatic(node,@donodecount_weighted,@left);
result:=nodecount; result:=max-left;
end; end;

View File

@ -52,6 +52,8 @@ unit optloop;
procinfo; procinfo;
function number_unrolls(node : tnode) : cardinal; function number_unrolls(node : tnode) : cardinal;
var
nodeCount : cardinal;
begin begin
{ calculate how often a loop shall be unrolled. { calculate how often a loop shall be unrolled.
@ -60,10 +62,22 @@ unit optloop;
{$ifdef i386} {$ifdef i386}
{ multiply by 2 for CPUs with a long pipeline } { multiply by 2 for CPUs with a long pipeline }
if current_settings.optimizecputype in [cpu_Pentium4] then if current_settings.optimizecputype in [cpu_Pentium4] then
number_unrolls:=trunc(round((60+(60*ord(node_count_weighted(node)<15)))/max(node_count_weighted(node),1))) begin
{ See the common branch below for an explanation. }
nodeCount:=node_count_weighted(node,41);
number_unrolls:=round((60+(60*ord(nodeCount<15)))/max(nodeCount,1))
end
else else
{$endif i386} {$endif i386}
number_unrolls:=trunc(round((30+(60*ord(node_count_weighted(node)<15)))/max(node_count_weighted(node),1))); begin
{ If nodeCount >= 15, numerator will be 30,
and the largest number (starting from 15) that makes sense as its denominator
(the smallest number that gives number_unrolls = 1) is 21 = trunc(30/1.5+1),
so there's no point in counting for more than 21 nodes.
"Long pipeline" variant above is the same with numerator=60 and max denominator = 41. }
nodeCount:=node_count_weighted(node,21);
number_unrolls:=round((30+(60*ord(nodeCount<15)))/max(nodeCount,1));
end;
if number_unrolls=0 then if number_unrolls=0 then
number_unrolls:=1; number_unrolls:=1;

View File

@ -1829,6 +1829,23 @@ implementation
end; end;
end; end;
function heuristics_favors_autoinlining(code: tnode): boolean;
var
complexityAvail : integer;
begin
{ rough approximation if we should auto inline:
- if the tree is simple enough
- if the tree is not too big
A bigger tree which is simpler might be autoinlined otoh
a smaller and complexer tree as well: so we use the sum of
both measures here }
{ This is a shortcutted version of
"result:=node_count(code)+node_complexity(code)<=25". }
complexityAvail:=25-node_complexity(code);
result:=(complexityAvail>0) and (node_count(code,complexityAvail+1)<=dword(complexityAvail));
end;
var var
old_current_procinfo : tprocinfo; old_current_procinfo : tprocinfo;
oldmaxfpuregisters : longint; oldmaxfpuregisters : longint;
@ -1911,13 +1928,7 @@ implementation
potype_destructor,potype_class_constructor,potype_class_destructor]) and potype_destructor,potype_class_constructor,potype_class_destructor]) and
((procdef.procoptions*[po_exports,po_external,po_interrupt,po_virtualmethod,po_iocheck])=[]) and ((procdef.procoptions*[po_exports,po_external,po_interrupt,po_virtualmethod,po_iocheck])=[]) and
(not(procdef.proccalloption in [pocall_safecall])) and (not(procdef.proccalloption in [pocall_safecall])) and
{ rough approximation if we should auto inline: heuristics_favors_autoinlining(code) then
- if the tree is simple enough
- if the tree is not too big
A bigger tree which is simpler might be autoinlined otoh
a smaller and complexer tree as well: so we use the sum of
both measures here }
(node_count(code)+node_complexity(code)<=25) then
begin begin
{ Can we inline this procedure? } { Can we inline this procedure? }
if checknodeinlining(procdef) then if checknodeinlining(procdef) then