Merge branch 'sliding-window' into 'main'

[x86 (currently)] New Sliding Window-based assembly-level CSE subsystem

See merge request freepascal.org/fpc/source!191
This commit is contained in:
J. Gareth "Kit" Moreton 2025-04-04 05:20:32 +00:00
commit 23559204c1
7 changed files with 1220 additions and 9 deletions

View File

@ -99,6 +99,9 @@ unit aoptbase;
{ returns true if reg is modified by any instruction between p1 and p2 }
function RegModifiedBetween(reg: TRegister; p1, p2: tai): Boolean;
{ returns true if reg1 or reg2 is modified by any instruction between p1 and p2 }
function RegPairModifiedBetween(reg1,reg2: TRegister; p1, p2: tai): Boolean;
{ returns true if reg is loaded with a new value by hp }
function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean; Virtual;
@ -318,6 +321,19 @@ unit aoptbase;
end;
Function TAOptBase.RegPairModifiedBetween(reg1,reg2 : TRegister;p1,p2 : tai) : Boolean;
Begin
Result:=false;
while assigned(p1) and assigned(p2) and GetNextInstruction(p1,p1) and (p1<>p2) do
if ((reg1<>NR_NO) and RegModifiedByInstruction(reg1,p1)) or
((reg2<>NR_NO) and RegModifiedByInstruction(reg2,p1)) then
begin
Result:=true;
exit;
end;
end;
function TAoptBase.RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean;
begin
result:=false;

View File

@ -272,6 +272,7 @@ Unit AoptObj;
Procedure ClearUsedRegs;
Procedure UpdateUsedRegs(p : Tai); {$ifdef USEINLINE}inline;{$endif USEINLINE}
class procedure UpdateUsedRegs(var Regs: TAllUsedRegs; p: Tai); static;
class procedure UpdateUsedRegsIgnoreNew(var Regs: TAllUsedRegs; p: Tai); static;
{ UpdateUsedRegsBetween updates the given TUsedRegs from p1 to p2 exclusive, calling GetNextInstruction
to move between instructions and sending p1.Next to UpdateUsedRegs }
@ -283,8 +284,14 @@ Unit AoptObj;
function UpdateUsedRegsAndOptimize(p : Tai): Tai;
Function CopyUsedRegs(var dest : TAllUsedRegs) : boolean;
class function CopyUsedRegs(var source: TAllUsedRegs; var dest : TAllUsedRegs) : boolean; static;
{ Merges the registers marked as used into UsedRegs }
procedure MergeUsedRegs(const Regs : TAllUsedRegs);
procedure RestoreUsedRegs(const Regs : TAllUsedRegs);
procedure TransferUsedRegs(var dest: TAllUsedRegs);
class procedure TransferUsedRegs(var source: TAllUsedRegs; var dest: TAllUsedRegs); static;
class procedure ReleaseUsedRegs(const regs : TAllUsedRegs); static;
class function RegInUsedRegs(reg : TRegister;var regs : TAllUsedRegs) : boolean; static;
class procedure IncludeRegInUsedRegs(reg : TRegister;var regs : TAllUsedRegs); static; {$ifdef USEINLINE}inline;{$endif USEINLINE}
@ -440,6 +447,14 @@ Unit AoptObj;
{ Actually updates a used register }
class procedure UpdateReg(var Regs : TAllUsedRegs; p: tai_regalloc); static; {$ifdef USEINLINE}inline;{$endif USEINLINE}
{ Called whenever a new iteration of pass 1 starts. Override for
platform-specific behaviour }
procedure Pass1Initialize; virtual;
{ Called whenever a new iteration of pass 2 starts. Override for
platform-specific behaviour }
procedure Pass2Initialize; virtual;
private
procedure DebugMsg(const s: string; p: tai);
@ -1150,6 +1165,37 @@ Unit AoptObj;
end;
class procedure TAOptObj.UpdateUsedRegsIgnoreNew(var Regs: TAllUsedRegs; p: Tai);
begin
{ this code is based on TUsedRegs.Update to avoid multiple passes through the asmlist,
the code is duplicated here }
repeat
while assigned(p) and
((p.typ in (SkipInstr - [ait_RegAlloc])) or
((p.typ = ait_label) and
labelCanBeSkipped(tai_label(p))) or
((p.typ = ait_marker) and
(tai_Marker(p).Kind in [mark_AsmBlockEnd,mark_NoLineInfoStart,mark_NoLineInfoEnd]))) do
p := tai(p.next);
while assigned(p) and
(p.typ=ait_RegAlloc) Do
begin
prefetch(pointer(p.Next)^);
case tai_regalloc(p).ratype of
ra_dealloc :
Exclude(Regs[getregtype(tai_regalloc(p).reg)].UsedRegs, getsupreg(tai_regalloc(p).reg));
else
;
end;
p := tai(p.next);
end;
until not(assigned(p)) or
(not(p.typ in SkipInstr) and
not((p.typ = ait_label) and
labelCanBeSkipped(tai_label(p))));
end;
class procedure TAOptObj.UpdateUsedRegsBetween(var Regs: TAllUsedRegs; p1, p2: Tai);
begin
{ this code is based on TUsedRegs.Update to avoid multiple passes through the asmlist,
@ -1191,6 +1237,26 @@ Unit AoptObj;
end;
class function TAOptObj.CopyUsedRegs(var source: TAllUsedRegs; var dest: TAllUsedRegs): boolean;
var
i : TRegisterType;
begin
Result:=true;
for i:=low(TRegisterType) to high(TRegisterType) do
dest[i]:=TUsedRegs.Create_Regset(i,source[i].GetUsedRegs);
end;
{ Merges the registers marked as used into UsedRegs }
procedure TAOptObj.MergeUsedRegs(const Regs : TAllUsedRegs);
var
i : TRegisterType;
begin
for i:=low(TRegisterType) to high(TRegisterType) do
UsedRegs[i].UsedRegs := UsedRegs[i].UsedRegs + Regs[i].UsedRegs;
end;
procedure TAOptObj.RestoreUsedRegs(const Regs: TAllUsedRegs);
var
i : TRegisterType;
@ -1215,6 +1281,18 @@ Unit AoptObj;
end;
class procedure TAOptObj.TransferUsedRegs(var source: TAllUsedRegs; var dest: TAllUsedRegs); static;
var
i : TRegisterType;
begin
{ Note that the constructor Create_Regset is being called as a regular
method - it is not instantiating a new object. This is because it is
the only published means to modify the internal state en-masse. [Kit] }
for i:=low(TRegisterType) to high(TRegisterType) do
dest[i].Create_Regset(i, source[i].GetUsedRegs);
end;
class procedure TAOptObj.ReleaseUsedRegs(const regs: TAllUsedRegs);
var
i : TRegisterType;
@ -2694,6 +2772,7 @@ Unit AoptObj;
p := StartPoint;
FirstInstruction := True;
ClearUsedRegs;
Pass1Initialize;
while Assigned(p) and (p <> BlockEnd) Do
begin
@ -2767,6 +2846,7 @@ Unit AoptObj;
stoploop := True;
p := BlockStart;
ClearUsedRegs;
Pass2Initialize;
while (p <> BlockEnd) Do
begin
prefetch(pointer(p.Next)^);
@ -2829,6 +2909,18 @@ Unit AoptObj;
end;
procedure TAOptObj.Pass1Initialize;
begin
{ Do nothing by default }
end;
procedure TAOptObj.Pass2Initialize;
begin
{ Do nothing by default }
end;
procedure TAOptObj.Debug_InsertInstrRegisterDependencyInfo;
var
p: tai;

View File

@ -378,7 +378,8 @@ interface
cs_opt_use_load_modify_store,
cs_opt_unused_para,
cs_opt_consts,
cs_opt_forloop
cs_opt_forloop,
cs_opt_asmcse
);
toptimizerswitches = set of toptimizerswitch;
@ -452,7 +453,8 @@ interface
'ORDERFIELDS','FASTMATH','DEADVALUES','REMOVEEMPTYPROCS',
'CONSTPROP',
'DEADSTORE','FORCENOSTACKFRAME','USELOADMODIFYSTORE',
'UNUSEDPARA','CONSTS','FORLOOP'
'UNUSEDPARA','CONSTS','FORLOOP',
'ASMCSE'
);
WPOptimizerSwitchStr : array [twpoptimizerswitch] of string[14] = (
'DEVIRTCALLS','OPTVMTS','SYMBOLLIVENESS'
@ -486,7 +488,7 @@ interface
genericlevel2optimizerswitches = [cs_opt_level2,cs_opt_remove_empty_proc,cs_opt_unused_para];
genericlevel3optimizerswitches = [cs_opt_level3,cs_opt_constant_propagate,cs_opt_nodedfa,cs_opt_loopstrength
{$ifndef llvm},cs_opt_use_load_modify_store{$endif},
cs_opt_loopunroll,cs_opt_forloop];
cs_opt_loopunroll,cs_opt_forloop,cs_opt_asmcse];
genericlevel4optimizerswitches = [cs_opt_level4,cs_opt_reorder_fields,cs_opt_dead_values,cs_opt_fastmath];
{ whole program optimizations whose information generation requires

View File

@ -26,6 +26,7 @@ unit aoptcpu;
{$ifdef EXTDEBUG}
{$define DEBUG_AOPTCPU}
{$define DEBUG_AOPTCSE}
{$endif EXTDEBUG}
Interface
@ -37,6 +38,7 @@ unit aoptcpu;
Type
TCpuAsmOptimizer = class(TX86AsmOptimizer)
procedure DebugSWMsg(const s : string; p : tai);inline;
function PrePeepHoleOptsCpu(var p: tai): boolean; override;
function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
@ -49,10 +51,10 @@ unit aoptcpu;
Implementation
uses
verbose,globtype,globals,
verbose,globtype,globals,cutils,
cpuinfo,
aasmcpu,
aoptutils,
aoptbase,aoptutils,
aasmcfi,
procinfo,
cgutils,
@ -60,6 +62,26 @@ unit aoptcpu;
symsym,symconst;
{$ifdef DEBUG_AOPTCSE}
const
SSlidingWindow: shortstring = 'Assembly CSE: ';
procedure TCpuAsmOptimizer.DebugSWMsg(const s: string;p : tai);
begin
asml.insertbefore(tai_comment.Create(strpnew(s)), p);
end;
{$else DEBUG_AOPTCSE}
{ Empty strings help the optimizer to remove string concatenations that won't
ever appear to the user on release builds. [Kit] }
const
SSlidingWindow = '';
procedure TCpuAsmOptimizer.DebugSWMsg(const s: string;p : tai);inline;
begin
end;
{$endif DEBUG_AOPTCSE}
{ Checks if the register is a 32 bit general purpose register }
function isgp32reg(reg: TRegister): boolean;
begin
@ -260,10 +282,34 @@ unit aoptcpu;
A_CLC,
A_STC:
Result:=OptPass1STCCLC(p);
A_CALL:
if (cs_opt_asmcse in current_settings.optimizerswitches) then
begin
DebugSWMsg(SSlidingWindow + 'Reset sliding window upon CALL', p);
ResetSW;
end;
else
;
end;
{ If an unsafe reference is found, clear the sliding window }
if not Result and
(cs_opt_asmcse in current_settings.optimizerswitches) and
{ Saves doing it twice }
(taicpu(p).opcode <> A_CALL) and
IsWriteToMemory(taicpu(p)) then
begin
DebugSWMsg(SSlidingWindow + 'Reset sliding window upon memory write', p);
ResetSW;
end;
end;
ait_label:
if (cs_opt_asmcse in current_settings.optimizerswitches) and
not labelCanBeSkipped(tai_label(p)) then
begin
DebugSWMsg(SSlidingWindow + 'Reset sliding window upon finding label', p);
ResetSW;
end;
else
;
end;

View File

@ -2524,7 +2524,8 @@ const
'cs_opt_use_load_modify_store',
'cs_opt_unused_para',
'cs_opt_consts',
'cs_opt_forloop'
'cs_opt_forloop',
'cs_opt_asmcse'
);
var
globalswitch : tglobalswitch;

File diff suppressed because it is too large Load Diff

View File

@ -41,6 +41,7 @@ implementation
uses
globals,
aoptbase,cutils,
globtype,
aasmcpu;
@ -183,10 +184,34 @@ uses
A_CLC,
A_STC:
Result:=OptPass1STCCLC(p);
A_CALL:
if (cs_opt_asmcse in current_settings.optimizerswitches) then
begin
DebugSWMsg(SSlidingWindow + 'Reset sliding window upon CALL', p);
ResetSW;
end;
else
;
end;
{ If an unsafe reference is found, clear the sliding window }
if not Result and
(cs_opt_asmcse in current_settings.optimizerswitches) and
{ Saves doing it twice }
(taicpu(p).opcode <> A_CALL) and
IsWriteToMemory(taicpu(p)) then
begin
DebugSWMsg(SSlidingWindow + 'Reset sliding window upon memory write', p);
ResetSW;
end;
end;
ait_label:
if (cs_opt_asmcse in current_settings.optimizerswitches) and
not labelCanBeSkipped(tai_label(p)) then
begin
DebugSWMsg(SSlidingWindow + 'Reset sliding window upon finding label', p);
ResetSW;
end;
else
;
end;