mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-18 18:29:18 +02:00
* (modified) patch by J. Gareth Moreton to unify ldr/str optimizations on Aarch64/ARM, part of #38841
git-svn-id: trunk@49338 -
This commit is contained in:
parent
3c205f895c
commit
d936280c6b
@ -44,6 +44,10 @@ Interface
|
|||||||
function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;override;
|
function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;override;
|
||||||
function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;override;
|
function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;override;
|
||||||
function LookForPostindexedPattern(var p : tai) : boolean;
|
function LookForPostindexedPattern(var p : tai) : boolean;
|
||||||
|
public
|
||||||
|
{ With these routines, there's optimisation code that's general for all ARM platforms }
|
||||||
|
function OptPass1LDR(var p: tai): Boolean; override;
|
||||||
|
function OptPass1STR(var p: tai): Boolean; override;
|
||||||
private
|
private
|
||||||
function RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string): boolean;
|
function RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string): boolean;
|
||||||
function OptPass1Shift(var p: tai): boolean;
|
function OptPass1Shift(var p: tai): boolean;
|
||||||
@ -291,6 +295,24 @@ Implementation
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
function TCpuAsmOptimizer.OptPass1LDR(var p: tai): Boolean;
|
||||||
|
begin
|
||||||
|
Result := False;
|
||||||
|
if inherited OptPass1LDR(p) or
|
||||||
|
LookForPostindexedPattern(p) then
|
||||||
|
Exit(True);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
function TCpuAsmOptimizer.OptPass1STR(var p: tai): Boolean;
|
||||||
|
begin
|
||||||
|
Result := False;
|
||||||
|
if inherited OptPass1STR(p) or
|
||||||
|
LookForPostindexedPattern(p) then
|
||||||
|
Exit(True);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
function TCpuAsmOptimizer.OptPass1Shift(var p : tai): boolean;
|
function TCpuAsmOptimizer.OptPass1Shift(var p : tai): boolean;
|
||||||
var
|
var
|
||||||
hp1,hp2: tai;
|
hp1,hp2: tai;
|
||||||
@ -764,9 +786,10 @@ Implementation
|
|||||||
if p.typ=ait_instruction then
|
if p.typ=ait_instruction then
|
||||||
begin
|
begin
|
||||||
case taicpu(p).opcode of
|
case taicpu(p).opcode of
|
||||||
A_LDR,
|
A_LDR:
|
||||||
|
Result:=OptPass1LDR(p);
|
||||||
A_STR:
|
A_STR:
|
||||||
Result:=LookForPostindexedPattern(p);
|
Result:=OptPass1STR(p);
|
||||||
A_MOV:
|
A_MOV:
|
||||||
Result:=OptPass1Mov(p);
|
Result:=OptPass1Mov(p);
|
||||||
A_STP:
|
A_STP:
|
||||||
|
@ -59,7 +59,11 @@ Type
|
|||||||
function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
|
function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
|
||||||
|
|
||||||
function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
|
function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
|
||||||
function OptPass1And(var p: tai): Boolean; override; { There's optimisation code that's general for all ARM platforms }
|
|
||||||
|
{ With these routines, there's optimisation code that's general for all ARM platforms }
|
||||||
|
function OptPass1And(var p: tai): Boolean; override;
|
||||||
|
function OptPass1LDR(var p: tai): Boolean; override;
|
||||||
|
function OptPass1STR(var p: tai): Boolean; override;
|
||||||
protected
|
protected
|
||||||
function LookForPreindexedPattern(p: taicpu): boolean;
|
function LookForPreindexedPattern(p: taicpu): boolean;
|
||||||
function LookForPostindexedPattern(p: taicpu): boolean;
|
function LookForPostindexedPattern(p: taicpu): boolean;
|
||||||
@ -69,9 +73,7 @@ Type
|
|||||||
function OptPass1DataCheckMov(var p: tai): Boolean;
|
function OptPass1DataCheckMov(var p: tai): Boolean;
|
||||||
function OptPass1ADDSUB(var p: tai): Boolean;
|
function OptPass1ADDSUB(var p: tai): Boolean;
|
||||||
function OptPass1CMP(var p: tai): Boolean;
|
function OptPass1CMP(var p: tai): Boolean;
|
||||||
function OptPass1LDR(var p: tai): Boolean;
|
|
||||||
function OptPass1STM(var p: tai): Boolean;
|
function OptPass1STM(var p: tai): Boolean;
|
||||||
function OptPass1STR(var p: tai): Boolean;
|
|
||||||
function OptPass1MOV(var p: tai): Boolean;
|
function OptPass1MOV(var p: tai): Boolean;
|
||||||
function OptPass1MUL(var p: tai): Boolean;
|
function OptPass1MUL(var p: tai): Boolean;
|
||||||
function OptPass1MVN(var p: tai): Boolean;
|
function OptPass1MVN(var p: tai): Boolean;
|
||||||
@ -834,7 +836,9 @@ Implementation
|
|||||||
var
|
var
|
||||||
hp1: tai;
|
hp1: tai;
|
||||||
begin
|
begin
|
||||||
Result := False;
|
Result := inherited OptPass1LDR(p);
|
||||||
|
if Result then
|
||||||
|
Exit;
|
||||||
|
|
||||||
{ change
|
{ change
|
||||||
ldr reg1,ref
|
ldr reg1,ref
|
||||||
@ -1022,7 +1026,9 @@ Implementation
|
|||||||
var
|
var
|
||||||
hp1: tai;
|
hp1: tai;
|
||||||
begin
|
begin
|
||||||
Result := False;
|
Result := inherited OptPass1STR(p);
|
||||||
|
if Result then
|
||||||
|
Exit;
|
||||||
|
|
||||||
{ Common conditions }
|
{ Common conditions }
|
||||||
if (taicpu(p).oper[1]^.typ = top_ref) and
|
if (taicpu(p).oper[1]^.typ = top_ref) and
|
||||||
|
@ -41,12 +41,15 @@ Type
|
|||||||
|
|
||||||
function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
|
function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
|
||||||
function RedundantMovProcess(var p: tai; var hp1: tai): boolean;
|
function RedundantMovProcess(var p: tai; var hp1: tai): boolean;
|
||||||
function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
|
function GetNextInstructionUsingReg(Current: tai; out Next: tai; const reg: TRegister): Boolean;
|
||||||
|
|
||||||
function OptPass1UXTB(var p: tai): Boolean;
|
function OptPass1UXTB(var p: tai): Boolean;
|
||||||
function OptPass1UXTH(var p: tai): Boolean;
|
function OptPass1UXTH(var p: tai): Boolean;
|
||||||
function OptPass1SXTB(var p: tai): Boolean;
|
function OptPass1SXTB(var p: tai): Boolean;
|
||||||
function OptPass1SXTH(var p: tai): Boolean;
|
function OptPass1SXTH(var p: tai): Boolean;
|
||||||
|
|
||||||
|
function OptPass1LDR(var p: tai): Boolean; virtual;
|
||||||
|
function OptPass1STR(var p: tai): Boolean; virtual;
|
||||||
function OptPass1And(var p: tai): Boolean; virtual;
|
function OptPass1And(var p: tai): Boolean; virtual;
|
||||||
End;
|
End;
|
||||||
|
|
||||||
@ -69,15 +72,23 @@ Implementation
|
|||||||
systems,
|
systems,
|
||||||
cpuinfo,
|
cpuinfo,
|
||||||
cgobj,procinfo,
|
cgobj,procinfo,
|
||||||
aasmbase,aasmdata;
|
aasmbase,aasmdata,itcpugas;
|
||||||
|
|
||||||
|
|
||||||
{$ifdef DEBUG_AOPTCPU}
|
{$ifdef DEBUG_AOPTCPU}
|
||||||
|
const
|
||||||
|
SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
|
||||||
|
|
||||||
procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
|
procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
|
||||||
begin
|
begin
|
||||||
asml.insertbefore(tai_comment.Create(strpnew(s)), p);
|
asml.insertbefore(tai_comment.Create(strpnew(s)), p);
|
||||||
end;
|
end;
|
||||||
{$else DEBUG_AOPTCPU}
|
{$else DEBUG_AOPTCPU}
|
||||||
|
{ Empty strings help the optimizer to remove string concatenations that won't
|
||||||
|
ever appear to the user on release builds. [Kit] }
|
||||||
|
const
|
||||||
|
SPeepholeOptimization = '';
|
||||||
|
|
||||||
procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
|
procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
|
||||||
begin
|
begin
|
||||||
end;
|
end;
|
||||||
@ -179,7 +190,7 @@ Implementation
|
|||||||
|
|
||||||
|
|
||||||
function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
|
function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
|
||||||
Out Next: tai; reg: TRegister): Boolean;
|
Out Next: tai; const reg: TRegister): Boolean;
|
||||||
var
|
var
|
||||||
gniResult: Boolean;
|
gniResult: Boolean;
|
||||||
begin
|
begin
|
||||||
@ -395,7 +406,14 @@ Implementation
|
|||||||
UpdateUsedRegs(TmpUsedRegs, tai(current_hp.Next));
|
UpdateUsedRegs(TmpUsedRegs, tai(current_hp.Next));
|
||||||
LDRChange := False;
|
LDRChange := False;
|
||||||
|
|
||||||
if (taicpu(next_hp).opcode in [A_LDR,A_STR]) and (taicpu(next_hp).ops = 2) then
|
if (taicpu(next_hp).opcode in [A_LDR,A_STR]) and (taicpu(next_hp).ops = 2)
|
||||||
|
{$ifdef AARCH64}
|
||||||
|
{ If r0 is the zero register, then this sequence of instructions will cause
|
||||||
|
an access violation, but that's better than an assembler error caused by
|
||||||
|
changing r0 to xzr inside the reference (Where it's illegal). [Kit] }
|
||||||
|
and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_XZR)
|
||||||
|
{$endif AARCH64}
|
||||||
|
then
|
||||||
begin
|
begin
|
||||||
|
|
||||||
{ Change the registers from r1 to r0 }
|
{ Change the registers from r1 to r0 }
|
||||||
@ -1018,6 +1036,201 @@ Implementation
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
function TARMAsmOptimizer.OptPass1LDR(var p : tai) : Boolean;
|
||||||
|
var
|
||||||
|
hp1: tai;
|
||||||
|
Reference: TReference;
|
||||||
|
NewOp: TAsmOp;
|
||||||
|
begin
|
||||||
|
Result := False;
|
||||||
|
if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
|
||||||
|
Exit;
|
||||||
|
|
||||||
|
Reference := taicpu(p).oper[1]^.ref^;
|
||||||
|
if (Reference.addressmode = AM_OFFSET) and
|
||||||
|
not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
|
||||||
|
{ Delay calling GetNextInstruction for as long as possible }
|
||||||
|
GetNextInstruction(p, hp1) and
|
||||||
|
(hp1.typ = ait_instruction) and
|
||||||
|
(taicpu(hp1).condition = C_None) and
|
||||||
|
(taicpu(hp1).oppostfix = taicpu(p).oppostfix) then
|
||||||
|
begin
|
||||||
|
if (taicpu(hp1).opcode = A_STR) and
|
||||||
|
RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
|
||||||
|
(getregtype(taicpu(p).oper[0]^.reg) = getregtype(taicpu(hp1).oper[0]^.reg)) then
|
||||||
|
begin
|
||||||
|
{ With:
|
||||||
|
ldr reg1,[ref]
|
||||||
|
str reg2,[ref]
|
||||||
|
|
||||||
|
If reg1 = reg2, Remove str
|
||||||
|
}
|
||||||
|
if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
|
||||||
|
begin
|
||||||
|
DebugMsg(SPeepholeOptimization + 'Removed redundant store instruction (load/store -> load/nop)', hp1);
|
||||||
|
RemoveInstruction(hp1);
|
||||||
|
Result := True;
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
end
|
||||||
|
else if (taicpu(hp1).opcode = A_LDR) and
|
||||||
|
RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
|
||||||
|
begin
|
||||||
|
{ With:
|
||||||
|
ldr reg1,[ref]
|
||||||
|
ldr reg2,[ref]
|
||||||
|
|
||||||
|
If reg1 = reg2, delete the second ldr
|
||||||
|
If reg1 <> reg2, changing the 2nd ldr to a mov might introduce
|
||||||
|
a dependency, but it will likely open up new optimisations, so
|
||||||
|
do it for now and handle any new dependencies later.
|
||||||
|
}
|
||||||
|
if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
|
||||||
|
begin
|
||||||
|
DebugMsg(SPeepholeOptimization + 'Removed duplicate load instruction (load/load -> load/nop)', hp1);
|
||||||
|
RemoveInstruction(hp1);
|
||||||
|
Result := True;
|
||||||
|
Exit;
|
||||||
|
end
|
||||||
|
else if
|
||||||
|
(getregtype(taicpu(p).oper[0]^.reg) = R_INTREGISTER) and
|
||||||
|
(getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
|
||||||
|
(getsubreg(taicpu(p).oper[0]^.reg) = getsubreg(taicpu(hp1).oper[0]^.reg)) then
|
||||||
|
begin
|
||||||
|
DebugMsg(SPeepholeOptimization + 'Changed second ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to mov (load/load -> load/move)', hp1);
|
||||||
|
taicpu(hp1).opcode := A_MOV;
|
||||||
|
taicpu(hp1).oppostfix := PF_None;
|
||||||
|
taicpu(hp1).loadreg(1, taicpu(p).oper[0]^.reg);
|
||||||
|
AllocRegBetween(taicpu(p).oper[0]^.reg, p, hp1, UsedRegs);
|
||||||
|
Result := True;
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
function TARMAsmOptimizer.OptPass1STR(var p : tai) : Boolean;
|
||||||
|
var
|
||||||
|
hp1: tai;
|
||||||
|
Reference: TReference;
|
||||||
|
SizeMismatch: Boolean;
|
||||||
|
SrcReg: TRegister;
|
||||||
|
NewOp: TAsmOp;
|
||||||
|
begin
|
||||||
|
Result := False;
|
||||||
|
if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
|
||||||
|
Exit;
|
||||||
|
|
||||||
|
Reference := taicpu(p).oper[1]^.ref^;
|
||||||
|
if (Reference.addressmode = AM_OFFSET) and
|
||||||
|
not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
|
||||||
|
{ Delay calling GetNextInstruction for as long as possible }
|
||||||
|
GetNextInstruction(p, hp1) and
|
||||||
|
(hp1.typ = ait_instruction) and
|
||||||
|
(taicpu(hp1).condition = C_None) and
|
||||||
|
(taicpu(hp1).oppostfix = taicpu(p).oppostfix) then
|
||||||
|
|
||||||
|
if GetNextInstruction(p, hp1) and
|
||||||
|
(hp1.typ = ait_instruction) and
|
||||||
|
(taicpu(hp1).condition = C_None) then
|
||||||
|
begin
|
||||||
|
{ Saves constant dereferencing and makes it easier to change the size if necessary }
|
||||||
|
SrcReg := taicpu(p).oper[0]^.reg;
|
||||||
|
|
||||||
|
if (taicpu(hp1).opcode = A_LDR) and
|
||||||
|
RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
|
||||||
|
(
|
||||||
|
(taicpu(hp1).oppostfix = taicpu(p).oppostfix) or
|
||||||
|
((taicpu(p).oppostfix = PF_B) and (taicpu(hp1).oppostfix = PF_SB)) or
|
||||||
|
((taicpu(p).oppostfix = PF_H) and (taicpu(hp1).oppostfix = PF_SH))
|
||||||
|
{$ifdef AARCH64}
|
||||||
|
or ((taicpu(p).oppostfix = PF_W) and (taicpu(hp1).oppostfix = PF_SW))
|
||||||
|
{$endif AARCH64}
|
||||||
|
) then
|
||||||
|
begin
|
||||||
|
{ With:
|
||||||
|
str reg1,[ref]
|
||||||
|
ldr reg2,[ref]
|
||||||
|
|
||||||
|
If reg1 = reg2, Remove ldr.
|
||||||
|
If reg1 <> reg2, replace ldr with "mov reg2,reg1"
|
||||||
|
}
|
||||||
|
|
||||||
|
if (SrcReg = taicpu(hp1).oper[0]^.reg) and
|
||||||
|
{ e.g. the ldrb in strb/ldrb is not a null operation as it clears the upper 24 bits }
|
||||||
|
(taicpu(p).oppostfix=PF_None) then
|
||||||
|
begin
|
||||||
|
DebugMsg(SPeepholeOptimization + 'Removed redundant load instruction (store/load -> store/nop)', hp1);
|
||||||
|
RemoveInstruction(hp1);
|
||||||
|
Result := True;
|
||||||
|
Exit;
|
||||||
|
end
|
||||||
|
else if (getregtype(taicpu(p).oper[0]^.reg) = R_INTREGISTER) and
|
||||||
|
(getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
|
||||||
|
(getsubreg(taicpu(p).oper[0]^.reg) = getsubreg(taicpu(hp1).oper[0]^.reg)) then
|
||||||
|
begin
|
||||||
|
case taicpu(hp1).oppostfix of
|
||||||
|
PF_B:
|
||||||
|
NewOp := A_UXTB;
|
||||||
|
PF_SB:
|
||||||
|
NewOp := A_SXTB;
|
||||||
|
PF_H:
|
||||||
|
NewOp := A_UXTH;
|
||||||
|
PF_SH:
|
||||||
|
NewOp := A_SXTH;
|
||||||
|
{$ifdef AARCH64}
|
||||||
|
PF_SW:
|
||||||
|
NewOp := A_SXTW;
|
||||||
|
PF_W,
|
||||||
|
{$endif AARCH64}
|
||||||
|
PF_None:
|
||||||
|
NewOp := A_MOV;
|
||||||
|
else
|
||||||
|
InternalError(2021043001);
|
||||||
|
end;
|
||||||
|
|
||||||
|
DebugMsg(SPeepholeOptimization + 'Changed ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to ' + gas_op2str[NewOp] + ' (store/load -> store/move)', hp1);
|
||||||
|
|
||||||
|
taicpu(hp1).oppostfix := PF_None;
|
||||||
|
taicpu(hp1).opcode := NewOp;
|
||||||
|
taicpu(hp1).loadreg(1, taicpu(p).oper[0]^.reg);
|
||||||
|
AllocRegBetween(taicpu(p).oper[0]^.reg, p, hp1, UsedRegs);
|
||||||
|
Result := True;
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
end
|
||||||
|
else if (taicpu(hp1).opcode = A_STR) and
|
||||||
|
RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
|
||||||
|
begin
|
||||||
|
{ With:
|
||||||
|
str reg1,[ref]
|
||||||
|
str reg2,[ref]
|
||||||
|
|
||||||
|
If reg1 <> reg2, delete the first str
|
||||||
|
IF reg1 = reg2, delete the second str
|
||||||
|
}
|
||||||
|
if SrcReg = taicpu(hp1).oper[0]^.reg then
|
||||||
|
begin
|
||||||
|
DebugMsg(SPeepholeOptimization + 'Removed duplicate store instruction (store/store -> store/nop)', hp1);
|
||||||
|
RemoveInstruction(hp1);
|
||||||
|
Result := True;
|
||||||
|
Exit;
|
||||||
|
end
|
||||||
|
else if
|
||||||
|
{ Registers same byte size? }
|
||||||
|
(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)] = tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]) then
|
||||||
|
begin
|
||||||
|
DebugMsg(SPeepholeOptimization + 'Removed dominated store instruction (store/store -> nop/store)', p);
|
||||||
|
RemoveCurrentP(p, hp1);
|
||||||
|
Result := True;
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
function TARMAsmOptimizer.OptPass1And(var p : tai) : Boolean;
|
function TARMAsmOptimizer.OptPass1And(var p : tai) : Boolean;
|
||||||
var
|
var
|
||||||
hp1, hp2: tai;
|
hp1, hp2: tai;
|
||||||
|
Loading…
Reference in New Issue
Block a user