Add a bound parameter to node_count(_weighted).

2025-08-29 07:23:19 +02:00 · 2022-01-12 11:00:51 +03:00 · 2022-01-12 11:00:51 +03:00 · 11d16be702
commit 11d16be702
parent 2d1ab3410d
4 changed files with 91 additions and 43 deletions
--- a/compiler/ncal.pas
+++ b/compiler/ncal.pas
@ -88,6 +88,7 @@ interface
          procedure add_done_statement(n:tnode);
          procedure convert_carg_array_of_const;
          procedure order_parameters;
          function heuristics_favors_inlining:boolean;
          procedure check_inlining;
          function  pass1_normal:tnode;
          procedure register_created_object_types;
@ -4753,15 +4754,10 @@ implementation
      end;
-    procedure tcallnode.check_inlining;
+    function tcallnode.heuristics_favors_inlining:boolean;
      var
-        st   : tsymtable;
+        limExcluding: cardinal;
        para : tcallparanode;
      begin
        { Can we inline the procedure? }
        if (po_inline in procdefinition.procoptions) and
           (procdefinition.typ=procdef) and
           tprocdef(procdefinition).has_inlininginfo and
        {  Prevent too deep inlining recursion and code bloat by inlining
           The actual formuala is
@ -4777,7 +4773,21 @@ implementation
              if the outer nodes are in a seldomly used code path
            - The code avoids to use functions from the math unit
        }
-           (node_count(tprocdef(procdefinition).inlininginfo^.code)<round(exp((1.0/(inlinelevel/3.0+1))*ln(10000)))) then
+        limExcluding:=round(exp((1.0/(inlinelevel/3.0+1))*ln(10000)));
        result:=node_count(tprocdef(procdefinition).inlininginfo^.code,limExcluding)<limExcluding;
      end;
    procedure tcallnode.check_inlining;
      var
        st   : tsymtable;
        para : tcallparanode;
      begin
        { Can we inline the procedure? }
        if (po_inline in procdefinition.procoptions) and
           (procdefinition.typ=procdef) and
           tprocdef(procdefinition).has_inlininginfo and
           heuristics_favors_inlining then
          begin
            include(callnodeflags,cnf_do_inline);
            { Check if we can inline the procedure when it references proc/var that
--- a/compiler/nutils.pas
+++ b/compiler/nutils.pas
@ -134,10 +134,11 @@ interface
    function has_conditional_nodes(n : tnode) : boolean;
    { count the number of nodes in the node tree,
-      rough estimation how large the tree "node" is }
+      rough estimation how large the tree "node" is
-    function node_count(node : tnode) : dword;
+      If more than max nodes, returns max, so node_count(n, 10 + 1) <= 10 answers whether the tree has ≤10 nodes but avoids traversing the remaining 990. }
    function node_count(node : tnode; max : dword = High(dword)) : dword;
-    function node_count_weighted(node : tnode) : dword;
+    function node_count_weighted(node : tnode; max : dword = High(dword)) : dword;
    { returns true, if the value described by node is constant/immutable, this approximation is safe
      if no dirty tricks like buffer overflows or pointer magic are used }
@ -1438,37 +1439,49 @@ implementation
        result:=foreachnodestatic(n,@check_for_conditional_nodes,nil);
      end;
    var
      nodecount : dword;
    function donodecount(var n: tnode; arg: pointer): foreachnoderesult;
      begin
-        inc(nodecount);
+        if PDWord(arg)^>0 then
          begin
            dec(PDWord(arg)^);
            result:=fen_false;
          end
        else
          result:=fen_norecurse_false;
      end;
-    function node_count(node : tnode) : dword;
+    function node_count(node : tnode; max : dword = High(dword)) : dword;
      var
        left : dword;
      begin
-        nodecount:=0;
+        left:=max;
-        foreachnodestatic(node,@donodecount,nil);
+        foreachnodestatic(node,@donodecount,@left);
-        result:=nodecount;
+        result:=max-left;
      end;
    function donodecount_weighted(var n: tnode; arg: pointer): foreachnoderesult;
      begin
        if PDWord(arg)^>0 then
          begin
            if not(n.nodetype in [blockn,statementn,callparan,nothingn]) then
-          inc(nodecount);
+              dec(PDWord(arg)^);
            result:=fen_false;
          end
        else
          result:=fen_norecurse_false;
      end;
-    function node_count_weighted(node : tnode) : dword;
+    function node_count_weighted(node : tnode; max : dword = High(dword)) : dword;
      var
        left : dword;
      begin
-        nodecount:=0;
+        left:=max;
-        foreachnodestatic(node,@donodecount_weighted,nil);
+        foreachnodestatic(node,@donodecount_weighted,@left);
-        result:=nodecount;
+        result:=max-left;
      end;
--- a/compiler/optloop.pas
+++ b/compiler/optloop.pas
@ -52,6 +52,8 @@ unit optloop;
      procinfo;
    function number_unrolls(node : tnode) : cardinal;
      var
        nodeCount : cardinal;
      begin
        { calculate how often a loop shall be unrolled.
@ -60,10 +62,22 @@ unit optloop;
 {$ifdef i386}
        { multiply by 2 for CPUs with a long pipeline }
        if current_settings.optimizecputype in [cpu_Pentium4] then
-          number_unrolls:=trunc(round((60+(60*ord(node_count_weighted(node)<15)))/max(node_count_weighted(node),1)))
+          begin
            { See the common branch below for an explanation. }
            nodeCount:=node_count_weighted(node,41);
            number_unrolls:=round((60+(60*ord(nodeCount<15)))/max(nodeCount,1))
          end
        else
 {$endif i386}
-          number_unrolls:=trunc(round((30+(60*ord(node_count_weighted(node)<15)))/max(node_count_weighted(node),1)));
+          begin
            { If nodeCount >= 15, numerator will be 30,
              and the largest number (starting from 15) that makes sense as its denominator
              (the smallest number that gives number_unrolls = 1) is 21 = trunc(30/1.5+1),
              so there's no point in counting for more than 21 nodes.
              "Long pipeline" variant above is the same with numerator=60 and max denominator = 41. }
            nodeCount:=node_count_weighted(node,21);
            number_unrolls:=round((30+(60*ord(nodeCount<15)))/max(nodeCount,1));
          end;
        if number_unrolls=0 then
          number_unrolls:=1;
--- a/compiler/psub.pas
+++ b/compiler/psub.pas
@ -1829,6 +1829,23 @@ implementation
             end;
         end;
       function heuristics_favors_autoinlining(code: tnode): boolean;
         var
           complexityAvail : integer;
         begin
           { rough approximation if we should auto inline:
             - if the tree is simple enough
             - if the tree is not too big
             A bigger tree which is simpler might be autoinlined otoh
             a smaller and complexer tree as well: so we use the sum of
             both measures here }
           { This is a shortcutted version of
             "result:=node_count(code)+node_complexity(code)<=25". }
           complexityAvail:=25-node_complexity(code);
           result:=(complexityAvail>0) and (node_count(code,complexityAvail+1)<=dword(complexityAvail));
         end;
      var
        old_current_procinfo : tprocinfo;
        oldmaxfpuregisters : longint;
@ -1911,13 +1928,7 @@ implementation
                                           potype_destructor,potype_class_constructor,potype_class_destructor]) and
            ((procdef.procoptions*[po_exports,po_external,po_interrupt,po_virtualmethod,po_iocheck])=[]) and
            (not(procdef.proccalloption in [pocall_safecall])) and
-            { rough approximation if we should auto inline:
+            heuristics_favors_autoinlining(code) then
              - if the tree is simple enough
              - if the tree is not too big
              A bigger tree which is simpler might be autoinlined otoh
              a smaller and complexer tree as well: so we use the sum of
              both measures here }
            (node_count(code)+node_complexity(code)<=25) then
          begin
            { Can we inline this procedure? }
            if checknodeinlining(procdef) then