mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-06 21:07:58 +02:00
Add a bound parameter to node_count(_weighted).
This commit is contained in:
parent
2d1ab3410d
commit
11d16be702
@ -88,6 +88,7 @@ interface
|
||||
procedure add_done_statement(n:tnode);
|
||||
procedure convert_carg_array_of_const;
|
||||
procedure order_parameters;
|
||||
function heuristics_favors_inlining:boolean;
|
||||
procedure check_inlining;
|
||||
function pass1_normal:tnode;
|
||||
procedure register_created_object_types;
|
||||
@ -4753,6 +4754,30 @@ implementation
|
||||
end;
|
||||
|
||||
|
||||
function tcallnode.heuristics_favors_inlining:boolean;
|
||||
var
|
||||
limExcluding: cardinal;
|
||||
begin
|
||||
{ Prevent too deep inlining recursion and code bloat by inlining
|
||||
|
||||
The actual formuala is
|
||||
inlinelevel/3+1 /-------
|
||||
node count < -----------------\/ 10000
|
||||
|
||||
This allows exponential grow of the code only to a certain limit.
|
||||
|
||||
Remarks
|
||||
- The current approach calculates the inlining level top down, so outer call nodes (nodes closer to the leaf) might not be inlined
|
||||
if the max. complexity is reached. This is done because it makes the implementation easier and because
|
||||
there might be situations were it is more beneficial to inline inner nodes and do the calls to the outer nodes
|
||||
if the outer nodes are in a seldomly used code path
|
||||
- The code avoids to use functions from the math unit
|
||||
}
|
||||
limExcluding:=round(exp((1.0/(inlinelevel/3.0+1))*ln(10000)));
|
||||
result:=node_count(tprocdef(procdefinition).inlininginfo^.code,limExcluding)<limExcluding;
|
||||
end;
|
||||
|
||||
|
||||
procedure tcallnode.check_inlining;
|
||||
var
|
||||
st : tsymtable;
|
||||
@ -4762,22 +4787,7 @@ implementation
|
||||
if (po_inline in procdefinition.procoptions) and
|
||||
(procdefinition.typ=procdef) and
|
||||
tprocdef(procdefinition).has_inlininginfo and
|
||||
{ Prevent too deep inlining recursion and code bloat by inlining
|
||||
|
||||
The actual formuala is
|
||||
inlinelevel/3+1 /-------
|
||||
node count < -----------------\/ 10000
|
||||
|
||||
This allows exponential grow of the code only to a certain limit.
|
||||
|
||||
Remarks
|
||||
- The current approach calculates the inlining level top down, so outer call nodes (nodes closer to the leaf) might not be inlined
|
||||
if the max. complexity is reached. This is done because it makes the implementation easier and because
|
||||
there might be situations were it is more beneficial to inline inner nodes and do the calls to the outer nodes
|
||||
if the outer nodes are in a seldomly used code path
|
||||
- The code avoids to use functions from the math unit
|
||||
}
|
||||
(node_count(tprocdef(procdefinition).inlininginfo^.code)<round(exp((1.0/(inlinelevel/3.0+1))*ln(10000)))) then
|
||||
heuristics_favors_inlining then
|
||||
begin
|
||||
include(callnodeflags,cnf_do_inline);
|
||||
{ Check if we can inline the procedure when it references proc/var that
|
||||
|
@ -134,10 +134,11 @@ interface
|
||||
function has_conditional_nodes(n : tnode) : boolean;
|
||||
|
||||
{ count the number of nodes in the node tree,
|
||||
rough estimation how large the tree "node" is }
|
||||
function node_count(node : tnode) : dword;
|
||||
rough estimation how large the tree "node" is
|
||||
If more than max nodes, returns max, so node_count(n, 10 + 1) <= 10 answers whether the tree has ≤10 nodes but avoids traversing the remaining 990. }
|
||||
function node_count(node : tnode; max : dword = High(dword)) : dword;
|
||||
|
||||
function node_count_weighted(node : tnode) : dword;
|
||||
function node_count_weighted(node : tnode; max : dword = High(dword)) : dword;
|
||||
|
||||
{ returns true, if the value described by node is constant/immutable, this approximation is safe
|
||||
if no dirty tricks like buffer overflows or pointer magic are used }
|
||||
@ -1438,37 +1439,49 @@ implementation
|
||||
result:=foreachnodestatic(n,@check_for_conditional_nodes,nil);
|
||||
end;
|
||||
|
||||
var
|
||||
nodecount : dword;
|
||||
|
||||
function donodecount(var n: tnode; arg: pointer): foreachnoderesult;
|
||||
begin
|
||||
inc(nodecount);
|
||||
result:=fen_false;
|
||||
if PDWord(arg)^>0 then
|
||||
begin
|
||||
dec(PDWord(arg)^);
|
||||
result:=fen_false;
|
||||
end
|
||||
else
|
||||
result:=fen_norecurse_false;
|
||||
end;
|
||||
|
||||
|
||||
function node_count(node : tnode) : dword;
|
||||
function node_count(node : tnode; max : dword = High(dword)) : dword;
|
||||
var
|
||||
left : dword;
|
||||
begin
|
||||
nodecount:=0;
|
||||
foreachnodestatic(node,@donodecount,nil);
|
||||
result:=nodecount;
|
||||
left:=max;
|
||||
foreachnodestatic(node,@donodecount,@left);
|
||||
result:=max-left;
|
||||
end;
|
||||
|
||||
|
||||
function donodecount_weighted(var n: tnode; arg: pointer): foreachnoderesult;
|
||||
begin
|
||||
if not(n.nodetype in [blockn,statementn,callparan,nothingn]) then
|
||||
inc(nodecount);
|
||||
result:=fen_false;
|
||||
if PDWord(arg)^>0 then
|
||||
begin
|
||||
if not(n.nodetype in [blockn,statementn,callparan,nothingn]) then
|
||||
dec(PDWord(arg)^);
|
||||
result:=fen_false;
|
||||
end
|
||||
else
|
||||
result:=fen_norecurse_false;
|
||||
end;
|
||||
|
||||
|
||||
function node_count_weighted(node : tnode) : dword;
|
||||
function node_count_weighted(node : tnode; max : dword = High(dword)) : dword;
|
||||
var
|
||||
left : dword;
|
||||
begin
|
||||
nodecount:=0;
|
||||
foreachnodestatic(node,@donodecount_weighted,nil);
|
||||
result:=nodecount;
|
||||
left:=max;
|
||||
foreachnodestatic(node,@donodecount_weighted,@left);
|
||||
result:=max-left;
|
||||
end;
|
||||
|
||||
|
||||
|
@ -52,6 +52,8 @@ unit optloop;
|
||||
procinfo;
|
||||
|
||||
function number_unrolls(node : tnode) : cardinal;
|
||||
var
|
||||
nodeCount : cardinal;
|
||||
begin
|
||||
{ calculate how often a loop shall be unrolled.
|
||||
|
||||
@ -60,10 +62,22 @@ unit optloop;
|
||||
{$ifdef i386}
|
||||
{ multiply by 2 for CPUs with a long pipeline }
|
||||
if current_settings.optimizecputype in [cpu_Pentium4] then
|
||||
number_unrolls:=trunc(round((60+(60*ord(node_count_weighted(node)<15)))/max(node_count_weighted(node),1)))
|
||||
begin
|
||||
{ See the common branch below for an explanation. }
|
||||
nodeCount:=node_count_weighted(node,41);
|
||||
number_unrolls:=round((60+(60*ord(nodeCount<15)))/max(nodeCount,1))
|
||||
end
|
||||
else
|
||||
{$endif i386}
|
||||
number_unrolls:=trunc(round((30+(60*ord(node_count_weighted(node)<15)))/max(node_count_weighted(node),1)));
|
||||
begin
|
||||
{ If nodeCount >= 15, numerator will be 30,
|
||||
and the largest number (starting from 15) that makes sense as its denominator
|
||||
(the smallest number that gives number_unrolls = 1) is 21 = trunc(30/1.5+1),
|
||||
so there's no point in counting for more than 21 nodes.
|
||||
"Long pipeline" variant above is the same with numerator=60 and max denominator = 41. }
|
||||
nodeCount:=node_count_weighted(node,21);
|
||||
number_unrolls:=round((30+(60*ord(nodeCount<15)))/max(nodeCount,1));
|
||||
end;
|
||||
|
||||
if number_unrolls=0 then
|
||||
number_unrolls:=1;
|
||||
|
@ -1829,6 +1829,23 @@ implementation
|
||||
end;
|
||||
end;
|
||||
|
||||
function heuristics_favors_autoinlining(code: tnode): boolean;
|
||||
var
|
||||
complexityAvail : integer;
|
||||
begin
|
||||
{ rough approximation if we should auto inline:
|
||||
- if the tree is simple enough
|
||||
- if the tree is not too big
|
||||
A bigger tree which is simpler might be autoinlined otoh
|
||||
a smaller and complexer tree as well: so we use the sum of
|
||||
both measures here }
|
||||
|
||||
{ This is a shortcutted version of
|
||||
"result:=node_count(code)+node_complexity(code)<=25". }
|
||||
complexityAvail:=25-node_complexity(code);
|
||||
result:=(complexityAvail>0) and (node_count(code,complexityAvail+1)<=dword(complexityAvail));
|
||||
end;
|
||||
|
||||
var
|
||||
old_current_procinfo : tprocinfo;
|
||||
oldmaxfpuregisters : longint;
|
||||
@ -1911,13 +1928,7 @@ implementation
|
||||
potype_destructor,potype_class_constructor,potype_class_destructor]) and
|
||||
((procdef.procoptions*[po_exports,po_external,po_interrupt,po_virtualmethod,po_iocheck])=[]) and
|
||||
(not(procdef.proccalloption in [pocall_safecall])) and
|
||||
{ rough approximation if we should auto inline:
|
||||
- if the tree is simple enough
|
||||
- if the tree is not too big
|
||||
A bigger tree which is simpler might be autoinlined otoh
|
||||
a smaller and complexer tree as well: so we use the sum of
|
||||
both measures here }
|
||||
(node_count(code)+node_complexity(code)<=25) then
|
||||
heuristics_favors_autoinlining(code) then
|
||||
begin
|
||||
{ Can we inline this procedure? }
|
||||
if checknodeinlining(procdef) then
|
||||
|
Loading…
Reference in New Issue
Block a user