Merge branch 'struct-regvar' into 'main'

[Cross-platform / Prototype] Record field optimisation in loops

See merge request freepascal.org/fpc/source!948
This commit is contained in:
J. Gareth "Kit" Moreton 2025-04-04 14:32:51 +00:00
commit 317e6470ee

View File

@ -85,6 +85,7 @@ interface
constructor create(l,r:Tnode;tab,cn:boolean);virtual;reintroduce;
function pass_typecheck:tnode;override;
function pass_1 : tnode;override;
function simplify(forinline : boolean) : tnode;override;
{$ifdef state_tracking}
function track_state_pass(exec_known:boolean):boolean;override;
{$endif}
@ -1269,6 +1270,407 @@ implementation
t2.isequal(tloopnode(p).t2);
end;
function recorddirectaccess(var n: tnode; arg: pointer): foreachnoderesult;
begin
result:=fen_false;
case n.nodetype of
subscriptn:
if (TSubscriptNode(n).left.nodetype=loadn) and
(TLoadNode(TSubscriptNode(n).left).symtableentry=TSymEntry(arg)) then
{ It's fine if the record is loaded to access a single field }
result:=fen_norecurse_false;
loadn:
if (TLoadNode(n).symtableentry=TSymEntry(arg)) then
result:=fen_norecurse_true;
else
;
end;
end;
type
TFieldTempPair = class(TLinkedListItem)
BaseSymbol: TAbstractVarSym;
Field: TFieldVarSym;
TempCreate: TTempCreateNode;
InitialRead: Boolean;
FieldRead: Boolean;
FieldWritten: Boolean;
Refs: LongInt;
FirstDepth: Integer;
end;
PRecordData = ^TRecordData;
TRecordData = record
BaseSymbol: TAbstractVarSym;
Fields: TLinkedList;
Depth: Integer;
end;
function recordloopfindrefs(var n: tnode; arg: pointer): foreachnoderesult; forward;
{ Needed as we can't reference recordloopfindrefs directly within itself }
function recordloopfindrefs_recursive(var n: tnode; arg: pointer): foreachnoderesult;
begin
result:=recordloopfindrefs(n, arg);
end;
function recordloopfindrefs(var n: tnode; arg: pointer): foreachnoderesult;
var
ThisTemp: TFieldTempPair;
begin
case n.nodetype of
subscriptn:
if (TSubscriptNode(n).left.nodetype=loadn) and
(TLoadNode(TSubscriptNode(n).left).symtableentry=PRecordData(arg)^.BaseSymbol) and
{ Needs to be a basic type }
not is_string(TSubscriptNode(n).vs.vardef) and
not is_object(TSubscriptNode(n).vs.vardef) and
not is_managed_type(TSubscriptNode(n).vs.vardef) and
(
(
tstoreddef(TSubscriptNode(n).vs.vardef).is_intregable and
(TSubscriptNode(n).vs.vardef.size<=sizeof(aint))
) or
tstoreddef(TSubscriptNode(n).vs.vardef).is_fpuregable or
(
is_vector(tstoreddef(TSubscriptNode(n).vs.vardef)) and
fits_in_mm_register(tstoreddef(TSubscriptNode(n).vs.vardef))
)
) then
begin
{ See if we've defined this field already }
ThisTemp:=TFieldTempPair(PRecordData(arg)^.Fields.First);
while Assigned(ThisTemp) do
begin
if (ThisTemp.BaseSymbol=PRecordData(arg)^.BaseSymbol) and
(ThisTemp.Field=TSubscriptNode(n).vs) then
Break;
ThisTemp:=TFieldTempPair(ThisTemp.Next);
end;
if not Assigned(ThisTemp) then
begin
ThisTemp:=TFieldTempPair.Create;
ThisTemp.BaseSymbol:=PRecordData(arg)^.BaseSymbol;
ThisTemp.Field:=TSubscriptNode(n).vs;
ThisTemp.TempCreate:=CTempCreateNode.Create(TSubscriptNode(n).vs.vardef,TSubscriptNode(n).vs.vardef.size,tt_persistent,True);
ThisTemp.InitialRead:=(nf_modify in TLoadNode(TSubscriptNode(n).left).flags) or not (nf_write in TLoadNode(TSubscriptNode(n).left).flags);
ThisTemp.FieldWritten:=False;
ThisTemp.Refs:=0;
ThisTemp.FirstDepth:=PRecordData(arg)^.Depth;
if not Assigned(PRecordData(arg)^.Fields.Last) then
PRecordData(arg)^.Fields.Insert(ThisTemp)
else
PRecordData(arg)^.Fields.InsertAfter(ThisTemp,PRecordData(arg)^.Fields.Last);
end;
if TLoadNode(TSubscriptNode(n).left).flags*[nf_write,nf_modify]<>[] then
begin
ThisTemp.FieldWritten:=True;
if nf_modify in TLoadNode(TSubscriptNode(n).left).flags then
ThisTemp.FieldRead:=True;
end
else
ThisTemp.FieldRead:=True;
Inc(ThisTemp.Refs);
result:=fen_true;
Exit;
end;
else
if n.InheritsFrom(TLoopNode) then
begin
if foreachnodestatic(pm_postprocess, TLoopNode(n).left, @recordloopfindrefs_recursive, arg) then
result:=fen_true;
{ Writes inside loops may not get executed, so we need to read an initial value to be safe,
hence the incrementation of Depth prior to analysing the right and t1 nodes }
Inc(PRecordData(arg)^.Depth);
if foreachnodestatic(pm_postprocess, TLoopNode(n).right, @recordloopfindrefs_recursive, arg) then
result:=fen_true;
if foreachnodestatic(pm_postprocess, TLoopNode(n).t1, @recordloopfindrefs_recursive, arg) then
result:=fen_true;
Dec(PRecordData(arg)^.Depth);
end;
end;
result:=fen_false;
end;
function recordloopreplacerefs(var n: tnode; arg: pointer): foreachnoderesult;
var
ThisTemp: TFieldTempPair;
NewNode: TNode;
begin
case n.nodetype of
subscriptn:
if (TSubscriptNode(n).left.nodetype=loadn) and
(TLoadNode(TSubscriptNode(n).left).symtableentry.typ in [localvarsym, paravarsym]) then
begin
{ See if this field has been defined }
ThisTemp:=TFieldTempPair(PRecordData(arg)^.Fields.First);
while Assigned(ThisTemp) do
begin
if (ThisTemp.BaseSymbol=TLoadNode(TSubscriptNode(n).left).symtableentry) and
(ThisTemp.Field=TSubscriptNode(n).vs) then
Break;
ThisTemp:=TFieldTempPair(ThisTemp.Next);
end;
if not Assigned(ThisTemp) then
begin
{ The field should not be replaced }
result:=fen_norecurse_false;
Exit;
end;
{ Now actually replace the node }
NewNode:=CTempRefNode.Create(ThisTemp.TempCreate);
NewNode.fileinfo:=n.fileinfo;
NewNode.flags:=NewNode.flags+(TLoadNode(TSubscriptNode(n).left).flags*[nf_write,nf_modify]);
n.Free;
n:=NewNode;
n.pass_typecheck;
result:=fen_true;
Exit;
end;
else
;
end;
result:=fen_false;
end;
{ Estimate a per-platform register limit to prevent too much register pressure. }
const
{$if defined(i386) or defined(i8086)}
RECORD_TEMP_LIMIT = 3;
{$elseif defined(aarch64) or defined(riscv64)}
RECORD_TEMP_LIMIT = 15;
{$else}
RECORD_TEMP_LIMIT = 7;
{$endif}
function twhilerepeatnode.simplify(forinline : boolean) : tnode;
var
X, Y, SymCount: Integer;
MinRefs: LongInt;
CurrentSym: TSym;
RecordData: TRecordData;
AbortRecord: Boolean;
NewBlock: TBlockNode;
NewWrapper: TStatementNode;
ThisTemp, NextTemp: TFieldTempPair;
NewCopy, NewNode: TNode;
begin
result:=nil;
{ Record promotion }
if not (nf_internal in flags) and
{ Slow and not debugger-friendly }
(current_settings.optimizerswitches*[cs_opt_level3,cs_opt_regvar]=[cs_opt_level3,cs_opt_regvar]) then
begin
RecordData.Fields:=nil;
{ Check to see if local record-types can have individual fields
promoted to registers }
if current_procinfo.procdef.localst.symtabletype = localsymtable then
begin
RecordData.Fields:=TLinkedList.Create;
SymCount:=current_procinfo.procdef.localst.SymList.Count-1;
for X:=0 to SymCount do
begin
CurrentSym:=TSym(current_procinfo.procdef.localst.SymList[X]);
if (CurrentSym.typ=localvarsym) and
{ Don't optimise records whose address has been taken,
since there may be some multithreaded access going on }
(TAbstractVarSym(CurrentSym).varsymaccess*[vsa_addr_taken,vsa_different_scope]=[]) and
is_record(TAbstractVarSym(CurrentSym).vardef) and
{ TODO: Support unions in a limited fashion later }
not TRecordDef(TAbstractVarSym(CurrentSym).vardef).isunion and
{ Ignore records with only a single field }
(TRecordDef(TAbstractVarSym(CurrentSym).vardef).symtable.SymList.Count > 1) then
begin
AbortRecord:=False;
{ Make sure an absolute variable doesn't alias to it }
for Y:=0 to SymCount do
if (X<>Y) and
(TSym(current_procinfo.procdef.localst.SymList[X]).typ=absolutevarsym) and
(TAbsoluteVarSym(current_procinfo.procdef.localst.SymList[X]).abstyp=tovar) and
(TAbsoluteVarSym(current_procinfo.procdef.localst.SymList[X]).ref.firstsym^.sltype=sl_load) and
(TAbsoluteVarSym(current_procinfo.procdef.localst.SymList[X]).ref.firstsym^.sym=CurrentSym) then
begin
{ Don't take any chances }
AbortRecord:=True;
Break;
end;
if AbortRecord then
Continue;
{ Check to see that the symbol isn't directly accessed as one }
if foreachnodestatic(pm_postprocess, TNode(Self), @recorddirectaccess, CurrentSym) then
Continue;
RecordData.BaseSymbol:=TAbstractVarSym(CurrentSym);
RecordData.Depth:=0;
foreachnodestatic(pm_postprocess, TNode(Self), @recordloopfindrefs, @RecordData);
end;
end;
if (RecordData.Fields.Count > 0) then
begin
{ Remove any read-only fields with too low a reference count,
since the saving isn't really worth it }
ThisTemp:=TFieldTempPair(RecordData.Fields.First);
while Assigned(ThisTemp) do
begin
NextTemp:=TFieldTempPair(ThisTemp.Next);
if not ThisTemp.FieldWritten and (ThisTemp.Refs<3) then
begin
{ Exclude, as saving is minimal at best and
it risks too much register pressure }
ThisTemp.TempCreate.Free;
RecordData.Fields.Remove(ThisTemp);
end;
ThisTemp:=NextTemp;
end;
{ If we have too many record fields to potentially optimise,
start excluding read-only ones that give a low return }
while (RecordData.Fields.Count > RECORD_TEMP_LIMIT) do
begin
MinRefs:=$7FFFFFFF;
NextTemp:=nil;
ThisTemp:=TFieldTempPair(RecordData.Fields.First);
while Assigned(ThisTemp) do
begin
if not ThisTemp.FieldWritten and (ThisTemp.Refs<MinRefs) then
begin
NextTemp:=ThisTemp;
MinRefs:=ThisTemp.Refs;
end;
ThisTemp:=TFieldTempPair(ThisTemp.Next);
end;
if not Assigned(NextTemp) then
{ No more read-only temps }
Break;
TFieldTempPair(NextTemp).TempCreate.Free;
RecordData.Fields.Remove(NextTemp);
end;
{ If we're still over the limit, start removing ones that write
back to the records }
while (RecordData.Fields.Count > RECORD_TEMP_LIMIT) do
begin
MinRefs:=$7FFFFFFF;
NextTemp:=nil;
ThisTemp:=TFieldTempPair(RecordData.Fields.First);
while Assigned(ThisTemp) do
begin
if (ThisTemp.Refs<MinRefs) then
begin
NextTemp:=ThisTemp;
MinRefs:=ThisTemp.Refs;
end;
ThisTemp:=TFieldTempPair(ThisTemp.Next);
end;
TFieldTempPair(NextTemp).TempCreate.Free;
RecordData.Fields.Remove(NextTemp);
end;
{ Now that inefficient ones have been removed, replace the subscript nodes }
if (RecordData.Fields.Count > 0) and
foreachnodestatic(pm_postprocess, TNode(Self), @recordloopreplacerefs, @RecordData) then
begin
{ Since the for-loop has had temprefs inserted, put
the relevant tempcreates and tempdeletes before
and after it. }
NewBlock:=internalstatements(NewWrapper);
ThisTemp:=TFieldTempPair(RecordData.Fields.First);
while Assigned(ThisTemp) do
begin
ThisTemp.TempCreate.fileinfo:=fileinfo;
addstatement(NewWrapper, ThisTemp.TempCreate);
if ThisTemp.InitialRead or (ThisTemp.FirstDepth<>0) then
begin
NewNode:=cassignmentnode.create_internal( { Suppress uninitialized value warning }
ctemprefnode.create(
ThisTemp.TempCreate
),
csubscriptnode.create(
ThisTemp.Field,
cloadnode.create(ThisTemp.BaseSymbol,current_procinfo.procdef.localst)
)
);
NewNode.fileinfo:=fileinfo;
addstatement(NewWrapper,NewNode);
end;
ThisTemp:=TFieldTempPair(ThisTemp.Next);
end;
{ If NewCopy is assigned, then it contains a block
created during a previous iteration of this
function's for-loop, which includes the original
loop node, so insert that instead }
NewCopy:=getcopy();
node_reset_flags(NewCopy,[],[tnf_pass1_done]);
Include(NewCopy.flags, nf_internal); { Prevents this simplification pass from happening again }
addstatement(NewWrapper, NewCopy);
ThisTemp:=TFieldTempPair(RecordData.Fields.Last);
while Assigned(ThisTemp) do
begin
if ThisTemp.FieldWritten then
begin
{ Write the value back to the record }
NewNode:=cassignmentnode.create(
csubscriptnode.create(
ThisTemp.Field,
cloadnode.create(ThisTemp.BaseSymbol,current_procinfo.procdef.localst)
),
ctemprefnode.create(
ThisTemp.TempCreate
)
);
NewNode.pass_typecheck;
NewNode.fileinfo:=fileinfo;
addstatement(NewWrapper, NewNode);
end
else
{ Might produce a more efficient temp }
ThisTemp.TempCreate.tempflags:=ThisTemp.TempCreate.tempflags+[ti_const];
NewNode:=CTempDeleteNode.create(ThisTemp.TempCreate);
NewNode.fileinfo:=fileinfo;
addstatement(NewWrapper, NewNode);
ThisTemp:=TFieldTempPair(ThisTemp.Previous);
end;
Result:=NewBlock;
{ Keep track of the old block in case more than one
local record appears in the loop }
end;
end;
end;
RecordData.Fields.Free;
end;
end;
{****************************************************************************
TWHILEREPEATNODE
*****************************************************************************}