Refactoring procedures in aoptx86 to gain speed boosts

This commit is contained in:
J. Gareth "Curious Kit" Moreton 2022-01-12 08:52:35 +00:00 committed by J. Gareth "Kit" Moreton
parent c85bef5494
commit d209d3ba84

View File

@ -50,7 +50,7 @@ unit aoptx86;
OptsToCheck : set of TOptsToCheck; OptsToCheck : set of TOptsToCheck;
function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override; function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override; function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
function RegReadByInstruction(reg : TRegister; hp : tai) : boolean; class function RegReadByInstruction(reg : TRegister; hp : tai) : boolean; static;
function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override; function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean; function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
@ -98,12 +98,12 @@ unit aoptx86;
function GetMMRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai): TRegister; function GetMMRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai): TRegister;
{ checks whether loading a new value in reg1 overwrites the entirety of reg2 } { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean; class function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean; static;
{ checks whether reading the value in reg1 depends on the value of reg2. This { checks whether reading the value in reg1 depends on the value of reg2. This
is very similar to SuperRegisterEquals, except it takes into account that is very similar to SuperRegisterEquals, except it takes into account that
R_SUBH and R_SUBL are independendent (e.g. reading from AL does not R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
depend on the value in AH). } depend on the value in AH). }
function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean; class function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean; static;
{ Replaces all references to AOldReg in a memory reference to ANewReg } { Replaces all references to AOldReg in a memory reference to ANewReg }
class function ReplaceRegisterInRef(var ref: TReference; const AOldReg, ANewReg: TRegister): Boolean; static; class function ReplaceRegisterInRef(var ref: TReference; const AOldReg, ANewReg: TRegister): Boolean; static;
@ -353,13 +353,12 @@ unit aoptx86;
function RefsEqual(const r1, r2: treference): boolean; function RefsEqual(const r1, r2: treference): boolean;
begin begin
RefsEqual := RefsEqual :=
(r1.offset = r2.offset) and
(r1.segment = r2.segment) and (r1.base = r2.base) and
(r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
(r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
(r1.relsymbol = r2.relsymbol) and (r1.relsymbol = r2.relsymbol) and
(r1.volatility=[]) and (r1.segment = r2.segment) and (r1.base = r2.base) and
(r2.volatility=[]); (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
(r1.offset = r2.offset) and
(r1.volatility + r2.volatility = []);
end; end;
@ -476,7 +475,7 @@ unit aoptx86;
end; end;
function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean; class function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
var var
p: taicpu; p: taicpu;
opcount: longint; opcount: longint;
@ -1069,7 +1068,6 @@ unit aoptx86;
Currentp: tai; Currentp: tai;
Breakout: Boolean; Breakout: Boolean;
begin begin
{ TODO: Currently, only the volatile registers are checked - can this be extended to use any register the procedure has preserved? }
Result := NR_NO; Result := NR_NO;
RegSet := RegSet :=
paramanager.get_volatile_registers_int(current_procinfo.procdef.proccalloption) + paramanager.get_volatile_registers_int(current_procinfo.procdef.proccalloption) +
@ -1143,7 +1141,6 @@ unit aoptx86;
Currentp: tai; Currentp: tai;
Breakout: Boolean; Breakout: Boolean;
begin begin
{ TODO: Currently, only the volatile registers are checked - can this be extended to use any register the procedure has preserved? }
Result := NR_NO; Result := NR_NO;
RegSet := RegSet :=
paramanager.get_volatile_registers_mm(current_procinfo.procdef.proccalloption) + paramanager.get_volatile_registers_mm(current_procinfo.procdef.proccalloption) +
@ -1198,7 +1195,7 @@ unit aoptx86;
end; end;
function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean; class function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
begin begin
if not SuperRegistersEqual(reg1,reg2) then if not SuperRegistersEqual(reg1,reg2) then
exit(false); exit(false);
@ -1235,7 +1232,7 @@ unit aoptx86;
end; end;
function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean; class function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
begin begin
if not SuperRegistersEqual(reg1,reg2) then if not SuperRegistersEqual(reg1,reg2) then
exit(false); exit(false);
@ -1426,7 +1423,7 @@ unit aoptx86;
function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean; function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
var var
p: taicpu absolute hp; p: taicpu absolute hp; { Implicit typecast }
i: Integer; i: Integer;
begin begin
Result := False; Result := False;
@ -1434,7 +1431,6 @@ unit aoptx86;
(hp.typ <> ait_instruction) then (hp.typ <> ait_instruction) then
Exit; Exit;
// p := taicpu(hp);
Prefetch(insprop[p.opcode]); Prefetch(insprop[p.opcode]);
if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
with insprop[p.opcode] do with insprop[p.opcode] do
@ -1442,12 +1438,14 @@ unit aoptx86;
case getsubreg(reg) of case getsubreg(reg) of
R_SUBW,R_SUBD,R_SUBQ: R_SUBW,R_SUBD,R_SUBQ:
Result:= Result:=
RegLoadedWithNewValue(NR_CARRYFLAG,hp) and { ZF, CF, OF, SF, PF and AF must all be set in some way (ordered so the most
RegLoadedWithNewValue(NR_PARITYFLAG,hp) and uncommon flags are checked first }
RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and ([Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags] * Ch <> []) and
RegLoadedWithNewValue(NR_ZEROFLAG,hp) and ([Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch <> []) and
RegLoadedWithNewValue(NR_SIGNFLAG,hp) and ([Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch <> []) and
RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp); ([Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch <> []) and
([Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch <> []) and
([Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch <> []);
R_SUBFLAGCARRY: R_SUBFLAGCARRY:
Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[]; Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
R_SUBFLAGPARITY: R_SUBFLAGPARITY:
@ -1465,10 +1463,7 @@ unit aoptx86;
R_SUBFLAGDIRECTION: R_SUBFLAGDIRECTION:
Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[]; Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
else else
begin
writeln(getsubreg(reg));
internalerror(2017050501); internalerror(2017050501);
end;
end; end;
exit; exit;
end; end;
@ -1731,7 +1726,7 @@ unit aoptx86;
end; end;
end; end;
{ Don't do these ones first in case an input operand is equal to an explicit output registers } { Don't do these ones first in case an input operand is equal to an explicit output register }
case getsupreg(reg) of case getsupreg(reg) of
RS_EAX: RS_EAX:
if ([Ch_WEAX{$ifdef x86_64},Ch_WRAX{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_EAX, reg) then if ([Ch_WEAX{$ifdef x86_64},Ch_WRAX{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_EAX, reg) then
@ -5002,7 +4997,8 @@ unit aoptx86;
((taicpu(p).oper[0]^.ref^.symbol=nil) or (taicpu(hp1).oper[ref]^.ref^.symbol=nil)) and ((taicpu(p).oper[0]^.ref^.symbol=nil) or (taicpu(hp1).oper[ref]^.ref^.symbol=nil)) and
((taicpu(p).oper[0]^.ref^.relsymbol=nil) or (taicpu(hp1).oper[ref]^.ref^.relsymbol=nil)) and ((taicpu(p).oper[0]^.ref^.relsymbol=nil) or (taicpu(hp1).oper[ref]^.ref^.relsymbol=nil)) and
((taicpu(p).oper[0]^.ref^.scalefactor <= 1) or (taicpu(hp1).oper[ref]^.ref^.scalefactor <= 1)) and ((taicpu(p).oper[0]^.ref^.scalefactor <= 1) or (taicpu(hp1).oper[ref]^.ref^.scalefactor <= 1)) and
(taicpu(p).oper[0]^.ref^.segment=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.segment=NR_NO) { Segment register of p.oper[0]^.ref will be NR_NO already }
(taicpu(hp1).oper[ref]^.ref^.segment=NR_NO)
{$ifdef x86_64} {$ifdef x86_64}
and (abs(taicpu(hp1).oper[ref]^.ref^.offset+taicpu(p).oper[0]^.ref^.offset)<=$7fffffff) and (abs(taicpu(hp1).oper[ref]^.ref^.offset+taicpu(p).oper[0]^.ref^.offset)<=$7fffffff)
and (((taicpu(p).oper[0]^.ref^.base<>NR_RIP) and (taicpu(p).oper[0]^.ref^.index<>NR_RIP)) or and (((taicpu(p).oper[0]^.ref^.base<>NR_RIP) and (taicpu(p).oper[0]^.ref^.index<>NR_RIP)) or
@ -5053,10 +5049,9 @@ unit aoptx86;
if MatchInstruction(hp1,A_LEA,[taicpu(p).opsize]) and if MatchInstruction(hp1,A_LEA,[taicpu(p).opsize]) and
(taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
(taicpu(p).oper[0]^.ref^.relsymbol = nil) and (taicpu(p).oper[0]^.ref^.relsymbol = nil) and
(taicpu(p).oper[0]^.ref^.segment = NR_NO) and
(taicpu(p).oper[0]^.ref^.symbol = nil) and (taicpu(p).oper[0]^.ref^.symbol = nil) and
(taicpu(hp1).oper[0]^.ref^.relsymbol = nil) and (taicpu(hp1).oper[0]^.ref^.relsymbol = nil) and
(taicpu(hp1).oper[0]^.ref^.segment = NR_NO) and { Since we're merging two LEA instructions, the segment registers don't matter }
(taicpu(hp1).oper[0]^.ref^.symbol = nil) and (taicpu(hp1).oper[0]^.ref^.symbol = nil) and
( (
(taicpu(p).oper[0]^.ref^.base = NR_NO) or { Don't call RegModifiedBetween unnecessarily } (taicpu(p).oper[0]^.ref^.base = NR_NO) or { Don't call RegModifiedBetween unnecessarily }
@ -5855,7 +5850,7 @@ unit aoptx86;
if (TmpRef.base = NR_NO) and if (TmpRef.base = NR_NO) and
(taicpu(hp1).oper[0]^.ref^.symbol=nil) and (taicpu(hp1).oper[0]^.ref^.symbol=nil) and
(taicpu(hp1).oper[0]^.ref^.relsymbol=nil) and (taicpu(hp1).oper[0]^.ref^.relsymbol=nil) and
(taicpu(hp1).oper[0]^.ref^.segment=NR_NO) and { Segment register isn't a concern here }
((taicpu(hp1).oper[0]^.ref^.scalefactor=0) or ((taicpu(hp1).oper[0]^.ref^.scalefactor=0) or
(taicpu(hp1).oper[0]^.ref^.scalefactor*tmpref.scalefactor<=8)) then (taicpu(hp1).oper[0]^.ref^.scalefactor*tmpref.scalefactor<=8)) then
begin begin
@ -12200,7 +12195,6 @@ unit aoptx86;
(taicpu(p).oper[0]^.ref^.offset=-24)) and (taicpu(p).oper[0]^.ref^.offset=-24)) and
(taicpu(p).oper[0]^.ref^.symbol=nil) and (taicpu(p).oper[0]^.ref^.symbol=nil) and
(taicpu(p).oper[0]^.ref^.relsymbol=nil) and (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
(taicpu(p).oper[0]^.ref^.segment=NR_NO) and
(taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG) and (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG) and
GetNextInstruction(p, hp1) and GetNextInstruction(p, hp1) and
{ Take a copy of hp1 } { Take a copy of hp1 }
@ -12217,7 +12211,7 @@ unit aoptx86;
(taicpu(hp2).oper[0]^.ref^.index=NR_NO) and (taicpu(hp2).oper[0]^.ref^.index=NR_NO) and
(taicpu(hp2).oper[0]^.ref^.symbol=nil) and (taicpu(hp2).oper[0]^.ref^.symbol=nil) and
(taicpu(hp2).oper[0]^.ref^.relsymbol=nil) and (taicpu(hp2).oper[0]^.ref^.relsymbol=nil) and
(taicpu(hp2).oper[0]^.ref^.segment=NR_NO) and { Segment register will be NR_NO }
(taicpu(hp2).oper[1]^.reg=NR_STACK_POINTER_REG) and (taicpu(hp2).oper[1]^.reg=NR_STACK_POINTER_REG) and
GetNextInstruction(hp2, hp3) and GetNextInstruction(hp2, hp3) and
{ trick to skip label } { trick to skip label }