Merge branch 'struct-regvar' into 'main'

[Cross-platform / Prototype] Record field optimisation in loops See merge request freepascal.org/fpc/source!948
2025-04-05 13:38:31 +02:00 · 2025-04-04 14:32:51 +00:00 · 2025-04-04 14:32:51 +00:00 · 317e6470ee
commit 317e6470ee
parent 6c4d218b8d eb12e7a7c7
1 changed files with 402 additions and 0 deletions
--- a/compiler/nflw.pas
+++ b/compiler/nflw.pas
@ -85,6 +85,7 @@ interface
          constructor create(l,r:Tnode;tab,cn:boolean);virtual;reintroduce;
          function pass_typecheck:tnode;override;
          function pass_1 : tnode;override;
+          function simplify(forinline : boolean) : tnode;override;
 {$ifdef state_tracking}
          function track_state_pass(exec_known:boolean):boolean;override;
 {$endif}
@ -1269,6 +1270,407 @@ implementation
          t2.isequal(tloopnode(p).t2);
      end;

+
+    function recorddirectaccess(var n: tnode; arg: pointer): foreachnoderesult;
+      begin
+        result:=fen_false;
+        case n.nodetype of
+          subscriptn:
+            if (TSubscriptNode(n).left.nodetype=loadn) and
+              (TLoadNode(TSubscriptNode(n).left).symtableentry=TSymEntry(arg)) then
+              { It's fine if the record is loaded to access a single field }
+              result:=fen_norecurse_false;
+          loadn:
+            if (TLoadNode(n).symtableentry=TSymEntry(arg)) then
+              result:=fen_norecurse_true;
+          else
+            ;
+        end;
+      end;
+
+
+    type
+      TFieldTempPair = class(TLinkedListItem)
+        BaseSymbol: TAbstractVarSym;
+        Field: TFieldVarSym;
+        TempCreate: TTempCreateNode;
+        InitialRead: Boolean;
+        FieldRead: Boolean;
+        FieldWritten: Boolean;
+        Refs: LongInt;
+        FirstDepth: Integer;
+      end;
+
+      PRecordData = ^TRecordData;
+      TRecordData = record
+        BaseSymbol: TAbstractVarSym;
+        Fields: TLinkedList;
+        Depth: Integer;
+      end;
+
+    function recordloopfindrefs(var n: tnode; arg: pointer): foreachnoderesult; forward;
+
+    { Needed as we can't reference recordloopfindrefs directly within itself }
+    function recordloopfindrefs_recursive(var n: tnode; arg: pointer): foreachnoderesult;
+      begin
+        result:=recordloopfindrefs(n, arg);
+      end;
+
+    function recordloopfindrefs(var n: tnode; arg: pointer): foreachnoderesult;
+      var
+        ThisTemp: TFieldTempPair;
+      begin
+        case n.nodetype of
+          subscriptn:
+            if (TSubscriptNode(n).left.nodetype=loadn) and
+              (TLoadNode(TSubscriptNode(n).left).symtableentry=PRecordData(arg)^.BaseSymbol) and
+              { Needs to be a basic type }
+              not is_string(TSubscriptNode(n).vs.vardef) and
+              not is_object(TSubscriptNode(n).vs.vardef) and
+              not is_managed_type(TSubscriptNode(n).vs.vardef) and
+              (
+                (
+                  tstoreddef(TSubscriptNode(n).vs.vardef).is_intregable and
+                  (TSubscriptNode(n).vs.vardef.size<=sizeof(aint))
+                ) or
+                tstoreddef(TSubscriptNode(n).vs.vardef).is_fpuregable or
+                (
+                  is_vector(tstoreddef(TSubscriptNode(n).vs.vardef)) and
+                  fits_in_mm_register(tstoreddef(TSubscriptNode(n).vs.vardef))
+                )
+              ) then
+              begin
+                { See if we've defined this field already }
+                ThisTemp:=TFieldTempPair(PRecordData(arg)^.Fields.First);
+                while Assigned(ThisTemp) do
+                  begin
+                    if (ThisTemp.BaseSymbol=PRecordData(arg)^.BaseSymbol) and
+                      (ThisTemp.Field=TSubscriptNode(n).vs) then
+                      Break;
+                    ThisTemp:=TFieldTempPair(ThisTemp.Next);
+                  end;
+
+                if not Assigned(ThisTemp) then
+                  begin
+                    ThisTemp:=TFieldTempPair.Create;
+                    ThisTemp.BaseSymbol:=PRecordData(arg)^.BaseSymbol;
+                    ThisTemp.Field:=TSubscriptNode(n).vs;
+                    ThisTemp.TempCreate:=CTempCreateNode.Create(TSubscriptNode(n).vs.vardef,TSubscriptNode(n).vs.vardef.size,tt_persistent,True);
+                    ThisTemp.InitialRead:=(nf_modify in TLoadNode(TSubscriptNode(n).left).flags) or not (nf_write in TLoadNode(TSubscriptNode(n).left).flags);
+                    ThisTemp.FieldWritten:=False;
+                    ThisTemp.Refs:=0;
+                    ThisTemp.FirstDepth:=PRecordData(arg)^.Depth;
+                    if not Assigned(PRecordData(arg)^.Fields.Last) then
+                      PRecordData(arg)^.Fields.Insert(ThisTemp)
+                    else
+                      PRecordData(arg)^.Fields.InsertAfter(ThisTemp,PRecordData(arg)^.Fields.Last);
+                  end;
+
+                if TLoadNode(TSubscriptNode(n).left).flags*[nf_write,nf_modify]<>[] then
+                  begin
+                    ThisTemp.FieldWritten:=True;
+                    if nf_modify in TLoadNode(TSubscriptNode(n).left).flags then
+                      ThisTemp.FieldRead:=True;
+                  end
+                else
+                  ThisTemp.FieldRead:=True;
+
+                Inc(ThisTemp.Refs);
+                result:=fen_true;
+                Exit;
+              end;
+          else
+            if n.InheritsFrom(TLoopNode) then
+              begin
+                if foreachnodestatic(pm_postprocess, TLoopNode(n).left, @recordloopfindrefs_recursive, arg) then
+                  result:=fen_true;
+
+                { Writes inside loops may not get executed, so we need to read an initial value to be safe,
+                  hence the incrementation of Depth prior to analysing the right and t1 nodes }
+                Inc(PRecordData(arg)^.Depth);
+                if foreachnodestatic(pm_postprocess, TLoopNode(n).right, @recordloopfindrefs_recursive, arg) then
+                  result:=fen_true;
+                if foreachnodestatic(pm_postprocess, TLoopNode(n).t1, @recordloopfindrefs_recursive, arg) then
+                  result:=fen_true;
+
+                Dec(PRecordData(arg)^.Depth);
+              end;
+        end;
+        result:=fen_false;
+      end;
+
+
+    function recordloopreplacerefs(var n: tnode; arg: pointer): foreachnoderesult;
+      var
+        ThisTemp: TFieldTempPair;
+        NewNode: TNode;
+      begin
+        case n.nodetype of
+          subscriptn:
+            if (TSubscriptNode(n).left.nodetype=loadn) and
+              (TLoadNode(TSubscriptNode(n).left).symtableentry.typ in [localvarsym, paravarsym]) then
+              begin
+                { See if this field has been defined }
+                ThisTemp:=TFieldTempPair(PRecordData(arg)^.Fields.First);
+                while Assigned(ThisTemp) do
+                  begin
+                    if (ThisTemp.BaseSymbol=TLoadNode(TSubscriptNode(n).left).symtableentry) and
+                      (ThisTemp.Field=TSubscriptNode(n).vs) then
+                      Break;
+                    ThisTemp:=TFieldTempPair(ThisTemp.Next);
+                  end;
+
+                if not Assigned(ThisTemp) then
+                  begin
+                    { The field should not be replaced }
+                    result:=fen_norecurse_false;
+                    Exit;
+                  end;
+
+                { Now actually replace the node }
+                NewNode:=CTempRefNode.Create(ThisTemp.TempCreate);
+                NewNode.fileinfo:=n.fileinfo;
+                NewNode.flags:=NewNode.flags+(TLoadNode(TSubscriptNode(n).left).flags*[nf_write,nf_modify]);
+                n.Free;
+                n:=NewNode;
+                n.pass_typecheck;
+                result:=fen_true;
+                Exit;
+              end;
+          else
+            ;
+        end;
+        result:=fen_false;
+      end;
+
+
+    { Estimate a per-platform register limit to prevent too much register pressure. }
+    const
+{$if defined(i386) or defined(i8086)}
+      RECORD_TEMP_LIMIT = 3;
+{$elseif defined(aarch64) or defined(riscv64)}
+      RECORD_TEMP_LIMIT = 15;
+{$else}
+      RECORD_TEMP_LIMIT = 7;
+{$endif}
+
+
+    function twhilerepeatnode.simplify(forinline : boolean) : tnode;
+      var
+        X, Y, SymCount: Integer;
+        MinRefs: LongInt;
+        CurrentSym: TSym;
+        RecordData: TRecordData;
+        AbortRecord: Boolean;
+        NewBlock: TBlockNode;
+        NewWrapper: TStatementNode;
+        ThisTemp, NextTemp: TFieldTempPair;
+        NewCopy, NewNode: TNode;
+      begin
+        result:=nil;
+
+        { Record promotion }
+        if not (nf_internal in flags) and
+          { Slow and not debugger-friendly }
+          (current_settings.optimizerswitches*[cs_opt_level3,cs_opt_regvar]=[cs_opt_level3,cs_opt_regvar]) then
+          begin
+            RecordData.Fields:=nil;
+            { Check to see if local record-types can have individual fields
+              promoted to registers }
+            if current_procinfo.procdef.localst.symtabletype = localsymtable then
+              begin
+                RecordData.Fields:=TLinkedList.Create;
+                SymCount:=current_procinfo.procdef.localst.SymList.Count-1;
+                for X:=0 to SymCount do
+                  begin
+                    CurrentSym:=TSym(current_procinfo.procdef.localst.SymList[X]);
+                    if (CurrentSym.typ=localvarsym) and
+                      { Don't optimise records whose address has been taken,
+                        since there may be some multithreaded access going on }
+                      (TAbstractVarSym(CurrentSym).varsymaccess*[vsa_addr_taken,vsa_different_scope]=[]) and
+                      is_record(TAbstractVarSym(CurrentSym).vardef) and
+                      { TODO: Support unions in a limited fashion later }
+                      not TRecordDef(TAbstractVarSym(CurrentSym).vardef).isunion and
+                      { Ignore records with only a single field }
+                      (TRecordDef(TAbstractVarSym(CurrentSym).vardef).symtable.SymList.Count > 1) then
+                      begin
+                        AbortRecord:=False;
+                        { Make sure an absolute variable doesn't alias to it }
+                        for Y:=0 to SymCount do
+                          if (X<>Y) and
+                            (TSym(current_procinfo.procdef.localst.SymList[X]).typ=absolutevarsym) and
+                            (TAbsoluteVarSym(current_procinfo.procdef.localst.SymList[X]).abstyp=tovar) and
+                            (TAbsoluteVarSym(current_procinfo.procdef.localst.SymList[X]).ref.firstsym^.sltype=sl_load) and
+                            (TAbsoluteVarSym(current_procinfo.procdef.localst.SymList[X]).ref.firstsym^.sym=CurrentSym) then
+                            begin
+                              { Don't take any chances }
+                              AbortRecord:=True;
+                              Break;
+                            end;
+
+                        if AbortRecord then
+                          Continue;
+
+                        { Check to see that the symbol isn't directly accessed as one }
+                        if foreachnodestatic(pm_postprocess, TNode(Self), @recorddirectaccess, CurrentSym) then
+                          Continue;
+
+                        RecordData.BaseSymbol:=TAbstractVarSym(CurrentSym);
+                        RecordData.Depth:=0;
+
+                        foreachnodestatic(pm_postprocess, TNode(Self), @recordloopfindrefs, @RecordData);
+                      end;
+                  end;
+
+                if (RecordData.Fields.Count > 0) then
+                  begin
+                    { Remove any read-only fields with too low a reference count,
+                      since the saving isn't really worth it }
+                    ThisTemp:=TFieldTempPair(RecordData.Fields.First);
+                    while Assigned(ThisTemp) do
+                      begin
+                        NextTemp:=TFieldTempPair(ThisTemp.Next);
+                        if not ThisTemp.FieldWritten and (ThisTemp.Refs<3) then
+                          begin
+                            { Exclude, as saving is minimal at best and
+                              it risks too much register pressure }
+                            ThisTemp.TempCreate.Free;
+                            RecordData.Fields.Remove(ThisTemp);
+                          end;
+                        ThisTemp:=NextTemp;
+                      end;
+
+                    { If we have too many record fields to potentially optimise,
+                      start excluding read-only ones that give a low return }
+                    while (RecordData.Fields.Count > RECORD_TEMP_LIMIT) do
+                      begin
+                        MinRefs:=$7FFFFFFF;
+                        NextTemp:=nil;
+
+                        ThisTemp:=TFieldTempPair(RecordData.Fields.First);
+                        while Assigned(ThisTemp) do
+                          begin
+                            if not ThisTemp.FieldWritten and (ThisTemp.Refs<MinRefs) then
+                              begin
+                                NextTemp:=ThisTemp;
+                                MinRefs:=ThisTemp.Refs;
+                              end;
+
+                            ThisTemp:=TFieldTempPair(ThisTemp.Next);
+                          end;
+
+                        if not Assigned(NextTemp) then
+                          { No more read-only temps }
+                          Break;
+
+                        TFieldTempPair(NextTemp).TempCreate.Free;
+                        RecordData.Fields.Remove(NextTemp);
+                      end;
+
+                    { If we're still over the limit, start removing ones that write
+                      back to the records }
+                    while (RecordData.Fields.Count > RECORD_TEMP_LIMIT) do
+                      begin
+                        MinRefs:=$7FFFFFFF;
+                        NextTemp:=nil;
+
+                        ThisTemp:=TFieldTempPair(RecordData.Fields.First);
+                        while Assigned(ThisTemp) do
+                          begin
+                            if (ThisTemp.Refs<MinRefs) then
+                              begin
+                                NextTemp:=ThisTemp;
+                                MinRefs:=ThisTemp.Refs;
+                              end;
+
+                            ThisTemp:=TFieldTempPair(ThisTemp.Next);
+                          end;
+
+                        TFieldTempPair(NextTemp).TempCreate.Free;
+                        RecordData.Fields.Remove(NextTemp);
+                      end;
+
+                    { Now that inefficient ones have been removed, replace the subscript nodes }
+                    if (RecordData.Fields.Count > 0) and
+                      foreachnodestatic(pm_postprocess, TNode(Self), @recordloopreplacerefs, @RecordData) then
+                      begin
+                        { Since the for-loop has had temprefs inserted, put
+                          the relevant tempcreates and tempdeletes before
+                          and after it. }
+                        NewBlock:=internalstatements(NewWrapper);
+                        ThisTemp:=TFieldTempPair(RecordData.Fields.First);
+                        while Assigned(ThisTemp) do
+                          begin
+                            ThisTemp.TempCreate.fileinfo:=fileinfo;
+                            addstatement(NewWrapper, ThisTemp.TempCreate);
+                            if ThisTemp.InitialRead or (ThisTemp.FirstDepth<>0) then
+                              begin
+                                NewNode:=cassignmentnode.create_internal( { Suppress uninitialized value warning }
+                                  ctemprefnode.create(
+                                    ThisTemp.TempCreate
+                                  ),
+                                  csubscriptnode.create(
+                                    ThisTemp.Field,
+                                    cloadnode.create(ThisTemp.BaseSymbol,current_procinfo.procdef.localst)
+                                  )
+                                );
+                                NewNode.fileinfo:=fileinfo;
+                                addstatement(NewWrapper,NewNode);
+                              end;
+                            ThisTemp:=TFieldTempPair(ThisTemp.Next);
+                          end;
+
+                        { If NewCopy is assigned, then it contains a block
+                          created during a previous iteration of this
+                          function's for-loop, which includes the original
+                          loop node, so insert that instead }
+                        NewCopy:=getcopy();
+                        node_reset_flags(NewCopy,[],[tnf_pass1_done]);
+                        Include(NewCopy.flags, nf_internal); { Prevents this simplification pass from happening again }
+                        addstatement(NewWrapper, NewCopy);
+
+                        ThisTemp:=TFieldTempPair(RecordData.Fields.Last);
+                        while Assigned(ThisTemp) do
+                          begin
+                            if ThisTemp.FieldWritten then
+                              begin
+                                { Write the value back to the record }
+
+                                NewNode:=cassignmentnode.create(
+                                  csubscriptnode.create(
+                                    ThisTemp.Field,
+                                    cloadnode.create(ThisTemp.BaseSymbol,current_procinfo.procdef.localst)
+                                  ),
+                                  ctemprefnode.create(
+                                    ThisTemp.TempCreate
+                                  )
+                                );
+                                NewNode.pass_typecheck;
+                                NewNode.fileinfo:=fileinfo;
+                                addstatement(NewWrapper, NewNode);
+                              end
+                            else
+                              { Might produce a more efficient temp }
+                              ThisTemp.TempCreate.tempflags:=ThisTemp.TempCreate.tempflags+[ti_const];
+
+                            NewNode:=CTempDeleteNode.create(ThisTemp.TempCreate);
+                            NewNode.fileinfo:=fileinfo;
+                            addstatement(NewWrapper, NewNode);
+                            ThisTemp:=TFieldTempPair(ThisTemp.Previous);
+                          end;
+
+                        Result:=NewBlock;
+
+                        { Keep track of the old block in case more than one
+                          local record appears in the loop }
+                      end;
+                  end;
+              end;
+
+            RecordData.Fields.Free;
+          end;
+      end;
+
 {****************************************************************************
                               TWHILEREPEATNODE
 *****************************************************************************}