mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-23 18:49:49 +02:00
* patch by Nico Erfurth:
Improve ARM-Peephole Optimizers 1.) Introduce a ARM-specific RegUsedAfterInstruction which analyzes instructions and reg allocation information to see if a register is really needed afterwards to decide if some special optimizations can be done. 2.) Introduce "RemoveSuperfluousMove" This tries to fold mov into a previous Data-Instruction (ADD, ORR, etc) or LDR-Instruction. 3.) Introduce new Optimizer "DataMov2Data" and modify LdrMov2Ldr to use RemoveSuperfluousMove 4.) Expand Ldr* and Str* Optimizers to also work on {Ldr,Str}{,b,h} git-svn-id: trunk@21314 -
This commit is contained in:
parent
ad4876dcb5
commit
05a8783b1e
@ -39,6 +39,9 @@ Type
|
||||
function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
|
||||
procedure PeepHoleOptPass2;override;
|
||||
Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
|
||||
procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
|
||||
function RegUsedAfterInstruction(reg: Tregister; p: tai;
|
||||
var AllUsedRegs: TAllUsedRegs): Boolean;
|
||||
End;
|
||||
|
||||
TCpuPreRegallocScheduler = class(TAsmOptimizer)
|
||||
@ -106,7 +109,7 @@ Implementation
|
||||
result := (oper.typ = top_reg) and (oper.reg = reg);
|
||||
end;
|
||||
|
||||
procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList) ;
|
||||
procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
|
||||
begin
|
||||
if (taicpu(movp).condition = C_EQ) and
|
||||
(taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
|
||||
@ -118,11 +121,91 @@ Implementation
|
||||
end;
|
||||
end;
|
||||
|
||||
function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
|
||||
var
|
||||
p: taicpu;
|
||||
begin
|
||||
p := taicpu(hp);
|
||||
regLoadedWithNewValue :=
|
||||
(assigned(hp)) and
|
||||
(hp.typ = ait_instruction) and
|
||||
(not(p.opcode in [A_STR, A_STRB, A_STRH, A_CMP, A_CMN, A_TST, A_TEQ,
|
||||
A_B, A_BL, A_BX, A_BLX])) and
|
||||
(p.oper[0]^.typ = top_reg) and
|
||||
(p.oper[0]^.reg = reg);
|
||||
end;
|
||||
|
||||
function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
|
||||
var
|
||||
p: taicpu;
|
||||
i: longint;
|
||||
begin
|
||||
instructionLoadsFromReg := false;
|
||||
if not (assigned(hp) and (hp.typ = ait_instruction)) then
|
||||
exit;
|
||||
p:=taicpu(hp);
|
||||
|
||||
i:=1;
|
||||
{For these instructions we have to start on oper[0]}
|
||||
if (p.opcode in [A_STR, A_STRB, A_STRH, A_CMP, A_CMN, A_TST, A_TEQ,
|
||||
A_B, A_BL, A_BX, A_BLX]) then i:=0;
|
||||
|
||||
while(i<p.ops) do
|
||||
begin
|
||||
case p.oper[I]^.typ of
|
||||
top_reg:
|
||||
instructionLoadsFromReg := p.oper[I]^.reg = reg;
|
||||
top_regset:
|
||||
instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
|
||||
top_shifterop:
|
||||
instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
|
||||
top_ref:
|
||||
instructionLoadsFromReg :=
|
||||
(p.oper[I]^.ref^.base = reg) or
|
||||
(p.oper[I]^.ref^.index = reg);
|
||||
end;
|
||||
if instructionLoadsFromReg then exit; {Bailout if we found something}
|
||||
Inc(I);
|
||||
end;
|
||||
end;
|
||||
|
||||
function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
|
||||
var AllUsedRegs: TAllUsedRegs): Boolean;
|
||||
begin
|
||||
AllUsedRegs[getregtype(reg)].Update(tai(p.Next));
|
||||
RegUsedAfterInstruction :=
|
||||
(AllUsedRegs[getregtype(reg)].IsUsed(reg)) and
|
||||
(not(getNextInstruction(p,p)) or
|
||||
instructionLoadsFromReg(reg,p) or
|
||||
not(regLoadedWithNewValue(reg,p)));
|
||||
end;
|
||||
|
||||
procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
|
||||
var
|
||||
TmpUsedRegs: TAllUsedRegs;
|
||||
begin
|
||||
if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
|
||||
(taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
|
||||
MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) then
|
||||
begin
|
||||
CopyUsedRegs(TmpUsedRegs);
|
||||
UpdateUsedRegs(TmpUsedRegs, tai(p.next));
|
||||
if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,movp,TmpUsedRegs)) then
|
||||
begin
|
||||
asml.insertbefore(tai_comment.Create(strpnew('Peephole '+optimizer+' removed superfluous mov')), movp);
|
||||
taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
|
||||
asml.remove(movp);
|
||||
movp.free;
|
||||
end;
|
||||
ReleaseUsedRegs(TmpUsedRegs);
|
||||
end;
|
||||
end;
|
||||
|
||||
function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
|
||||
var
|
||||
hp1,hp2: tai;
|
||||
i: longint;
|
||||
TmpUsedRegs: TAllUsedRegs;
|
||||
|
||||
begin
|
||||
result := false;
|
||||
case p.typ of
|
||||
@ -160,7 +243,9 @@ Implementation
|
||||
else
|
||||
*)
|
||||
case taicpu(p).opcode of
|
||||
A_STR:
|
||||
A_STR,
|
||||
A_STRH,
|
||||
A_STRB:
|
||||
begin
|
||||
{ change
|
||||
str reg1,ref
|
||||
@ -171,7 +256,17 @@ Implementation
|
||||
}
|
||||
if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
|
||||
GetNextInstruction(p,hp1) and
|
||||
MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
|
||||
(
|
||||
( (taicpu(p).opcode = A_STR) and
|
||||
MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None])
|
||||
) or
|
||||
( (taicpu(p).opcode = A_STRH) and
|
||||
MatchInstruction(hp1, A_LDRH, [taicpu(p).condition, C_None], [PF_None])
|
||||
) or
|
||||
( (taicpu(p).opcode = A_STRB) and
|
||||
MatchInstruction(hp1, A_LDRB, [taicpu(p).condition, C_None], [PF_None])
|
||||
)
|
||||
) and
|
||||
RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
|
||||
(taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
|
||||
begin
|
||||
@ -190,7 +285,11 @@ Implementation
|
||||
result := true;
|
||||
end;
|
||||
end;
|
||||
A_LDR:
|
||||
A_LDR,
|
||||
A_LDRH,
|
||||
A_LDRB,
|
||||
A_LDRSH,
|
||||
A_LDRSB:
|
||||
begin
|
||||
{ change
|
||||
ldr reg1,ref
|
||||
@ -201,7 +300,7 @@ Implementation
|
||||
}
|
||||
if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
|
||||
GetNextInstruction(p,hp1) and
|
||||
MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
|
||||
MatchInstruction(hp1, taicpu(p).opcode, [taicpu(p).condition, C_None], [PF_None]) and
|
||||
RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
|
||||
(taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
|
||||
(taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
|
||||
@ -221,8 +320,7 @@ Implementation
|
||||
taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
|
||||
end;
|
||||
result := true;
|
||||
end
|
||||
else
|
||||
end;
|
||||
{ Remove superfluous mov after ldr
|
||||
changes
|
||||
ldr reg1, ref
|
||||
@ -236,22 +334,8 @@ Implementation
|
||||
* ldr+mov have the same conditions
|
||||
* mov does not set flags
|
||||
}
|
||||
if GetNextInstruction(p, hp1) and
|
||||
MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
|
||||
(taicpu(hp1).ops=2) and {We can't optimize if there is a shiftop}
|
||||
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
|
||||
begin
|
||||
CopyUsedRegs(TmpUsedRegs);
|
||||
UpdateUsedRegs(TmpUsedRegs, tai(p.next));
|
||||
If not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
|
||||
begin
|
||||
asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrMov2Ldr removed superfluous mov')), hp1);
|
||||
taicpu(p).loadreg(0,taicpu(hp1).oper[0]^.reg);
|
||||
asml.remove(hp1);
|
||||
hp1.free;
|
||||
end;
|
||||
ReleaseUsedRegs(TmpUsedRegs);
|
||||
end;
|
||||
if GetNextInstruction(p, hp1) then
|
||||
RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
|
||||
end;
|
||||
A_MOV:
|
||||
begin
|
||||
@ -311,7 +395,8 @@ Implementation
|
||||
(taicpu(p).oper[1]^.typ = top_const) and
|
||||
GetNextInstruction(p,hp1) then
|
||||
begin
|
||||
while MatchInstruction(hp1, A_STR, [], []) and
|
||||
while (tai(p).typ = ait_instruction) and
|
||||
(taicpu(p).opcode in [A_STR, A_STRH, A_STRB]) and
|
||||
MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) and
|
||||
GetNextInstruction(hp1, hp2) and
|
||||
MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
|
||||
@ -332,6 +417,8 @@ Implementation
|
||||
add r1, r1, #1
|
||||
to
|
||||
add r1, r0, #1
|
||||
|
||||
Todo: Make it work for mov+cmp too
|
||||
}
|
||||
if (taicpu(p).ops = 2) and
|
||||
(taicpu(p).oper[1]^.typ = top_reg) and
|
||||
@ -365,7 +452,16 @@ Implementation
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
A_AND:
|
||||
A_ADD,
|
||||
A_ADC,
|
||||
A_RSB,
|
||||
A_RSC,
|
||||
A_SUB,
|
||||
A_SBC,
|
||||
A_AND,
|
||||
A_BIC,
|
||||
A_EOR,
|
||||
A_ORR:
|
||||
begin
|
||||
{
|
||||
change
|
||||
@ -374,7 +470,8 @@ Implementation
|
||||
to
|
||||
and reg2,reg1,(const1 and const2)
|
||||
}
|
||||
if (taicpu(p).oper[1]^.typ = top_reg) and
|
||||
if (taicpu(p).opcode = A_AND) and
|
||||
(taicpu(p).oper[1]^.typ = top_reg) and
|
||||
(taicpu(p).oper[2]^.typ = top_const) and
|
||||
GetNextInstruction(p, hp1) and
|
||||
MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
|
||||
@ -388,6 +485,15 @@ Implementation
|
||||
asml.remove(hp1);
|
||||
hp1.free;
|
||||
end;
|
||||
{
|
||||
change
|
||||
add reg1, ...
|
||||
mov reg2, reg1
|
||||
to
|
||||
add reg2, ...
|
||||
}
|
||||
if GetNextInstruction(p, hp1) then
|
||||
RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
|
||||
end;
|
||||
A_CMP:
|
||||
begin
|
||||
|
Loading…
Reference in New Issue
Block a user