fpc/compiler/arm/aoptcpu.pas
masta 073cab8d86 Optimize FoldShiftLdrStr in ARM Peephole optimizer
The optimizer now juggles around the base and index register if that opens
up the possibility of folding the shift into the instruction.

This can only be done in the case of addressmode=AM_OFFSET, in case of
[AM_POSTINDEXED, AM_PREINDEXED] we can not move the base register, as this
would cause havoc and destruction.

git-svn-id: trunk@24645 -
2013-05-30 16:13:58 +00:00

2653 lines
120 KiB
ObjectPascal

{
Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
Development Team
This unit implements the ARM optimizer object
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
****************************************************************************
}
Unit aoptcpu;
{$i fpcdefs.inc}
{$define DEBUG_PREREGSCHEDULER}
{$define DEBUG_AOPTCPU}
Interface
uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
Type
TCpuAsmOptimizer = class(TAsmOptimizer)
{ uses the same constructor as TAopObj }
function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
procedure PeepHoleOptPass2;override;
Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
function RegUsedAfterInstruction(reg: Tregister; p: tai;
var AllUsedRegs: TAllUsedRegs): Boolean;
{ returns true if reg reaches it's end of life at p, this means it is either
reloaded with a new value or it is deallocated afterwards }
function RegEndOfLife(reg: TRegister;p: taicpu): boolean;
{ gets the next tai object after current that contains info relevant
to the optimizer in p1 which used the given register or does a
change in program flow.
If there is none, it returns false and
sets p1 to nil }
Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
{ outputs a debug message into the assembler file }
procedure DebugMsg(const s: string; p: tai);
private
function SkipEntryExitMarker(current: tai; var next: tai): boolean;
protected
function LookForPostindexedPattern(p: taicpu): boolean;
End;
TCpuPreRegallocScheduler = class(TAsmScheduler)
function SchedulerPass1Cpu(var p: tai): boolean;override;
procedure SwapRegLive(p, hp1: taicpu);
end;
TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
{ uses the same constructor as TAopObj }
function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
procedure PeepHoleOptPass2;override;
End;
function MustBeLast(p : tai) : boolean;
Implementation
uses
cutils,verbose,globtype,globals,
systems,
cpuinfo,
cgobj,cgutils,procinfo,
aasmbase,aasmdata;
function CanBeCond(p : tai) : boolean;
begin
result:=
not(current_settings.cputype in cpu_thumb) and
(p.typ=ait_instruction) and
(taicpu(p).condition=C_None) and
((taicpu(p).opcode<A_IT) or (taicpu(p).opcode>A_ITTTT)) and
(taicpu(p).opcode<>A_CBZ) and
(taicpu(p).opcode<>A_CBNZ) and
(taicpu(p).opcode<>A_PLD) and
((taicpu(p).opcode<>A_BLX) or
(taicpu(p).oper[0]^.typ=top_reg));
end;
function RefsEqual(const r1, r2: treference): boolean;
begin
refsequal :=
(r1.offset = r2.offset) and
(r1.base = r2.base) and
(r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
(r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
(r1.relsymbol = r2.relsymbol) and
(r1.signindex = r2.signindex) and
(r1.shiftimm = r2.shiftimm) and
(r1.addressmode = r2.addressmode) and
(r1.shiftmode = r2.shiftmode);
end;
function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
begin
result :=
(instr.typ = ait_instruction) and
((op = []) or ((ord(taicpu(instr).opcode)<256) and (taicpu(instr).opcode in op))) and
((cond = []) or (taicpu(instr).condition in cond)) and
((postfix = []) or (taicpu(instr).oppostfix in postfix));
end;
function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
begin
result :=
(instr.typ = ait_instruction) and
(taicpu(instr).opcode = op) and
((cond = []) or (taicpu(instr).condition in cond)) and
((postfix = []) or (taicpu(instr).oppostfix in postfix));
end;
function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
begin
result := oper1.typ = oper2.typ;
if result then
case oper1.typ of
top_const:
Result:=oper1.val = oper2.val;
top_reg:
Result:=oper1.reg = oper2.reg;
top_conditioncode:
Result:=oper1.cc = oper2.cc;
top_ref:
Result:=RefsEqual(oper1.ref^, oper2.ref^);
else Result:=false;
end
end;
function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
begin
result := (oper.typ = top_reg) and (oper.reg = reg);
end;
procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
begin
if (taicpu(movp).condition = C_EQ) and
(taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
(taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
begin
asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
asml.remove(movp);
movp.free;
end;
end;
function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
var
p: taicpu;
begin
p := taicpu(hp);
regLoadedWithNewValue := false;
if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
exit;
case p.opcode of
{ These operands do not write into a register at all }
A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
exit;
{Take care of post/preincremented store and loads, they will change their base register}
A_STR, A_LDR:
begin
regLoadedWithNewValue :=
(taicpu(p).oper[1]^.typ=top_ref) and
(taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
(taicpu(p).oper[1]^.ref^.base = reg);
{STR does not load into it's first register}
if p.opcode = A_STR then exit;
end;
{ These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
regLoadedWithNewValue :=
(p.oper[1]^.typ = top_reg) and
(p.oper[1]^.reg = reg);
{Loads to oper2 from coprocessor}
{
MCR/MRC is currently not supported in FPC
A_MRC:
regLoadedWithNewValue :=
(p.oper[2]^.typ = top_reg) and
(p.oper[2]^.reg = reg);
}
{Loads to all register in the registerset}
A_LDM:
regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
end;
if regLoadedWithNewValue then
exit;
case p.oper[0]^.typ of
{This is the case}
top_reg:
regLoadedWithNewValue := (p.oper[0]^.reg = reg) or
{ LDRD }
(p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
{LDM/STM might write a new value to their index register}
top_ref:
regLoadedWithNewValue :=
(taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
(taicpu(p).oper[0]^.ref^.base = reg);
end;
end;
function AlignedToQWord(const ref : treference) : boolean;
begin
{ (safe) heuristics to ensure alignment }
result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
(((ref.offset>=0) and
((ref.offset mod 8)=0) and
((ref.base=NR_R13) or
(ref.index=NR_R13))
) or
((ref.offset<=0) and
{ when using NR_R11, it has always a value of <qword align>+4 }
((abs(ref.offset+4) mod 8)=0) and
(current_procinfo.framepointer=NR_R11) and
((ref.base=NR_R11) or
(ref.index=NR_R11))
)
);
end;
function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
var
p: taicpu;
i: longint;
begin
instructionLoadsFromReg := false;
if not (assigned(hp) and (hp.typ = ait_instruction)) then
exit;
p:=taicpu(hp);
i:=1;
{For these instructions we have to start on oper[0]}
if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
A_CMP, A_CMN, A_TST, A_TEQ,
A_B, A_BL, A_BX, A_BLX,
A_SMLAL, A_UMLAL]) then i:=0;
while(i<p.ops) do
begin
case p.oper[I]^.typ of
top_reg:
instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
{ STRD }
((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
top_regset:
instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
top_shifterop:
instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
top_ref:
instructionLoadsFromReg :=
(p.oper[I]^.ref^.base = reg) or
(p.oper[I]^.ref^.index = reg);
end;
if instructionLoadsFromReg then exit; {Bailout if we found something}
Inc(I);
end;
end;
function isValidConstLoadStoreOffset(const aoffset: longint; const pf: TOpPostfix) : boolean;
begin
if current_settings.cputype in cpu_thumb2 then
result := (aoffset<4096) and (aoffset>-256)
else
result := ((pf in [PF_None,PF_B]) and
(abs(aoffset)<4096)) or
(abs(aoffset)<256);
end;
function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
var AllUsedRegs: TAllUsedRegs): Boolean;
begin
AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
RegUsedAfterInstruction :=
AllUsedRegs[getregtype(reg)].IsUsed(reg) and
not(regLoadedWithNewValue(reg,p)) and
(
not(GetNextInstruction(p,p)) or
instructionLoadsFromReg(reg,p) or
not(regLoadedWithNewValue(reg,p))
);
end;
function TCpuAsmOptimizer.RegEndOfLife(reg : TRegister;p : taicpu) : boolean;
begin
Result:=assigned(FindRegDealloc(reg,tai(p.Next))) or
RegLoadedWithNewValue(reg,p);
end;
function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
var Next: tai; reg: TRegister): Boolean;
begin
Next:=Current;
repeat
Result:=GetNextInstruction(Next,Next);
until not(cs_opt_level3 in current_settings.optimizerswitches) or not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
(is_calljmp(taicpu(Next).opcode)) or (RegInInstruction(NR_PC,Next));
end;
{$ifdef DEBUG_AOPTCPU}
procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
begin
asml.insertbefore(tai_comment.Create(strpnew(s)), p);
end;
{$else DEBUG_AOPTCPU}
procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
begin
end;
{$endif DEBUG_AOPTCPU}
procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
var
alloc,
dealloc : tai_regalloc;
hp1 : tai;
begin
if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
(taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
{ don't mess with moves to pc }
(taicpu(movp).oper[0]^.reg<>NR_PC) and
{ don't mess with moves to lr }
(taicpu(movp).oper[0]^.reg<>NR_R14) and
{ the destination register of the mov might not be used beween p and movp }
not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
{ cb[n]z are thumb instructions which require specific registers, with no wide forms }
(taicpu(p).opcode<>A_CBZ) and
(taicpu(p).opcode<>A_CBNZ) and
{There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
not (
(taicpu(p).opcode in [A_MLA, A_MUL]) and
(taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
(current_settings.cputype < cpu_armv6)
) and
{ Take care to only do this for instructions which REALLY load to the first register.
Otherwise
str reg0, [reg1]
mov reg2, reg0
will be optimized to
str reg2, [reg1]
}
regLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
begin
dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
if assigned(dealloc) then
begin
DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
{ taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
and remove it if possible }
GetLastInstruction(p,hp1);
asml.Remove(dealloc);
alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
if assigned(alloc) then
begin
asml.Remove(alloc);
alloc.free;
dealloc.free;
end
else
asml.InsertAfter(dealloc,p);
{ try to move the allocation of the target register }
GetLastInstruction(movp,hp1);
alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
if assigned(alloc) then
begin
asml.Remove(alloc);
asml.InsertBefore(alloc,p);
{ adjust used regs }
IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
end;
{ finally get rid of the mov }
taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
asml.remove(movp);
movp.free;
end;
end;
end;
{
optimize
ldr/str regX,[reg1]
...
add/sub reg1,reg1,regY/const
into
ldr/str regX,[reg1], regY/const
}
function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
var
hp1 : tai;
begin
Result:=false;
if (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
(p.oper[1]^.ref^.index=NR_NO) and
(p.oper[1]^.ref^.offset=0) and
GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
{ we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
MatchInstruction(hp1, [A_ADD, A_SUB], [C_None], [PF_None]) and
(taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
(taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
(
(taicpu(hp1).oper[2]^.typ=top_reg) or
{ valid offset? }
((taicpu(hp1).oper[2]^.typ=top_const) and
((abs(taicpu(hp1).oper[2]^.val)<256) or
((abs(taicpu(hp1).oper[2]^.val)<4096) and (p.oppostfix in [PF_None,PF_B]))
)
)
) and
{ don't apply the optimization if the base register is loaded }
(p.oper[0]^.reg<>p.oper[1]^.ref^.base) and
not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
{ don't apply the optimization if the (new) index register is loaded }
(p.oper[0]^.reg<>taicpu(hp1).oper[2]^.reg) and
not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) and
not(current_settings.cputype in cpu_thumb) then
begin
DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
if taicpu(hp1).oper[2]^.typ=top_const then
begin
if taicpu(hp1).opcode=A_ADD then
p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
else
p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
end
else
begin
p.oper[1]^.ref^.index:=taicpu(hp1).oper[2]^.reg;
if taicpu(hp1).opcode=A_ADD then
p.oper[1]^.ref^.signindex:=1
else
p.oper[1]^.ref^.signindex:=-1;
end;
asml.Remove(hp1);
hp1.Free;
Result:=true;
end;
end;
{ skip harmless marker marking entry/exit code, so it can be optimized as well }
function TCpuAsmOptimizer.SkipEntryExitMarker(current : tai;var next : tai) : boolean;
begin
result:=true;
if current.typ<>ait_marker then
exit;
next:=current;
while GetNextInstruction(next,next) do
begin
if (next.typ<>ait_marker) or not(tai_marker(next).Kind in [mark_Position,mark_BlockStart]) then
exit;
end;
result:=false;
end;
function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
var
hp1,hp2,hp3,hp4: tai;
i, i2: longint;
TmpUsedRegs: TAllUsedRegs;
tempop: tasmop;
function IsPowerOf2(const value: DWord): boolean; inline;
begin
Result:=(value and (value - 1)) = 0;
end;
begin
result := false;
case p.typ of
ait_instruction:
begin
{
change
<op> reg,x,y
cmp reg,#0
into
<op>s reg,x,y
}
{ this optimization can applied only to the currently enabled operations because
the other operations do not update all flags and FPC does not track flag usage }
if MatchInstruction(p, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND,
A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
GetNextInstruction(p, hp1) and
MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
(taicpu(hp1).oper[1]^.typ = top_const) and
(taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
(taicpu(hp1).oper[1]^.val = 0) and
GetNextInstruction(hp1, hp2) and
{ be careful here, following instructions could use other flags
however after a jump fpc never depends on the value of flags }
{ All above instructions set Z and N according to the following
Z := result = 0;
N := result[31];
EQ = Z=1; NE = Z=0;
MI = N=1; PL = N=0; }
MatchInstruction(hp2, A_B, [C_EQ,C_NE,C_MI,C_PL], []) and
assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
begin
DebugMsg('Peephole OpCmp2OpS done', p);
taicpu(p).oppostfix:=PF_S;
{ move flag allocation if possible }
GetLastInstruction(hp1, hp2);
hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
if assigned(hp2) then
begin
asml.Remove(hp2);
asml.insertbefore(hp2, p);
end;
asml.remove(hp1);
hp1.free;
end
else
case taicpu(p).opcode of
A_STR:
begin
{ change
str reg1,ref
ldr reg2,ref
into
str reg1,ref
mov reg2,reg1
}
if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
(taicpu(p).oppostfix=PF_None) and
GetNextInstruction(p,hp1) and
MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
(taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
begin
if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
begin
DebugMsg('Peephole StrLdr2StrMov 1 done', hp1);
asml.remove(hp1);
hp1.free;
end
else
begin
taicpu(hp1).opcode:=A_MOV;
taicpu(hp1).oppostfix:=PF_None;
taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
DebugMsg('Peephole StrLdr2StrMov 2 done', hp1);
end;
result := true;
end
{ change
str reg1,ref
str reg2,ref
into
strd reg1,ref
}
else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
(taicpu(p).oppostfix=PF_None) and
(taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
GetNextInstruction(p,hp1) and
MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
(getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
{ str ensures that either base or index contain no register, else ldr wouldn't
use an offset either
}
(taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
(taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
(taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
(abs(taicpu(p).oper[1]^.ref^.offset)<256) and
AlignedToQWord(taicpu(p).oper[1]^.ref^) then
begin
DebugMsg('Peephole StrStr2Strd done', p);
taicpu(p).oppostfix:=PF_D;
asml.remove(hp1);
hp1.free;
end;
LookForPostindexedPattern(taicpu(p));
end;
A_LDR:
begin
{ change
ldr reg1,ref
ldr reg2,ref
into ...
}
if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
GetNextInstruction(p,hp1) and
{ ldrd is not allowed here }
MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
begin
{
...
ldr reg1,ref
mov reg2,reg1
}
if RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
(taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
(taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
(taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
begin
if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
begin
DebugMsg('Peephole LdrLdr2Ldr done', hp1);
asml.remove(hp1);
hp1.free;
end
else
begin
DebugMsg('Peephole LdrLdr2LdrMov done', hp1);
taicpu(hp1).opcode:=A_MOV;
taicpu(hp1).oppostfix:=PF_None;
taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
end;
result := true;
end
{
...
ldrd reg1,ref
}
else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
{ ldrd does not allow any postfixes ... }
(taicpu(p).oppostfix=PF_None) and
not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
(getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
{ ldr ensures that either base or index contain no register, else ldr wouldn't
use an offset either
}
(taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
(taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
(taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
(abs(taicpu(p).oper[1]^.ref^.offset)<256) and
AlignedToQWord(taicpu(p).oper[1]^.ref^) then
begin
DebugMsg('Peephole LdrLdr2Ldrd done', p);
taicpu(p).oppostfix:=PF_D;
asml.remove(hp1);
hp1.free;
end;
end;
LookForPostindexedPattern(taicpu(p));
{ Remove superfluous mov after ldr
changes
ldr reg1, ref
mov reg2, reg1
to
ldr reg2, ref
conditions are:
* no ldrd usage
* reg1 must be released after mov
* mov can not contain shifterops
* ldr+mov have the same conditions
* mov does not set flags
}
if (taicpu(p).oppostfix<>PF_D) and GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
end;
A_MOV:
begin
{ fold
mov reg1,reg0, shift imm1
mov reg1,reg1, shift imm2
}
if (taicpu(p).ops=3) and
(taicpu(p).oper[2]^.typ = top_shifterop) and
(taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
getnextinstruction(p,hp1) and
MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
(taicpu(hp1).ops=3) and
MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
(taicpu(hp1).oper[2]^.typ = top_shifterop) and
(taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
begin
{ fold
mov reg1,reg0, lsl 16
mov reg1,reg1, lsr 16
strh reg1, ...
dealloc reg1
to
strh reg1, ...
dealloc reg1
}
if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
(taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
(taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
(taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
getnextinstruction(hp1,hp2) and
MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
begin
CopyUsedRegs(TmpUsedRegs);
UpdateUsedRegs(TmpUsedRegs, tai(p.next));
UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
begin
DebugMsg('Peephole optimizer removed superfluous 16 Bit zero extension', hp1);
taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
asml.remove(p);
asml.remove(hp1);
p.free;
hp1.free;
p:=hp2;
end;
ReleaseUsedRegs(TmpUsedRegs);
end
{ fold
mov reg1,reg0, shift imm1
mov reg1,reg1, shift imm2
to
mov reg1,reg0, shift imm1+imm2
}
else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
{ asr makes no use after a lsr, the asr can be foled into the lsr }
((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
begin
inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
{ avoid overflows }
if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
case taicpu(p).oper[2]^.shifterop^.shiftmode of
SM_ROR:
taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
SM_ASR:
taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
SM_LSR,
SM_LSL:
begin
hp1:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
InsertLLItem(p.previous, p.next, hp1);
p.free;
p:=hp1;
end;
else
internalerror(2008072803);
end;
DebugMsg('Peephole ShiftShift2Shift 1 done', p);
asml.remove(hp1);
hp1.free;
result := true;
end
{ fold
mov reg1,reg0, shift imm1
mov reg1,reg1, shift imm2
mov reg1,reg1, shift imm3 ...
mov reg2,reg1, shift imm3 ...
}
else if GetNextInstructionUsingReg(hp1,hp2, taicpu(hp1).oper[0]^.reg) and
MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
(taicpu(hp2).ops=3) and
MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp2)) and
(taicpu(hp2).oper[2]^.typ = top_shifterop) and
(taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
begin
{ mov reg1,reg0, lsl imm1
mov reg1,reg1, lsr/asr imm2
mov reg2,reg1, lsl imm3 ...
to
mov reg1,reg0, lsl imm1
mov reg2,reg1, lsr/asr imm2-imm3
if
imm1>=imm2
}
if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
(taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
(taicpu(p).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
begin
if (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
begin
if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,p,hp1)) and
not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
begin
DebugMsg('Peephole ShiftShiftShift2ShiftShift 1a done', p);
inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm-taicpu(hp1).oper[2]^.shifterop^.shiftimm);
taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
asml.remove(hp1);
asml.remove(hp2);
hp1.free;
hp2.free;
if taicpu(p).oper[2]^.shifterop^.shiftimm>=32 then
begin
taicpu(p).freeop(1);
taicpu(p).freeop(2);
taicpu(p).loadconst(1,0);
end;
result := true;
end;
end
else if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
begin
DebugMsg('Peephole ShiftShiftShift2ShiftShift 1b done', p);
dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
asml.remove(hp2);
hp2.free;
result := true;
end;
end
{ mov reg1,reg0, lsr/asr imm1
mov reg1,reg1, lsl imm2
mov reg1,reg1, lsr/asr imm3 ...
if imm3>=imm1 and imm2>=imm1
to
mov reg1,reg0, lsl imm2-imm1
mov reg1,reg1, lsr/asr imm3 ...
}
else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
(taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
(taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
(taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
begin
dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
DebugMsg('Peephole ShiftShiftShift2ShiftShift 2 done', p);
asml.remove(p);
p.free;
p:=hp2;
if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
begin
taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
asml.remove(hp1);
hp1.free;
p:=hp2;
end;
result := true;
end;
end;
end;
{ Change the common
mov r0, r0, lsr #xxx
and r0, r0, #yyy/bic r0, r0, #xxx
and remove the superfluous and/bic if possible
This could be extended to handle more cases.
}
if (taicpu(p).ops=3) and
(taicpu(p).oper[2]^.typ = top_shifterop) and
(taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
(taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
GetNextInstructionUsingReg(p,hp1, taicpu(p).oper[0]^.reg) and
(assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or
regLoadedWithNewValue(taicpu(p).oper[0]^.reg, hp1)) then
begin
if (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
(taicpu(hp1).ops=3) and
MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
(taicpu(hp1).oper[2]^.typ = top_const) and
{ Check if the AND actually would only mask out bits beeing already zero because of the shift
For LSR #25 and an AndConst of 255 that whould go like this:
255 and ((2 shl (32-25))-1)
which results in 127, which is one less a power-of-2, meaning all lower bits are set.
LSR #25 and AndConst of 254:
254 and ((2 shl (32-25))-1) = 126 -> lowest bit is clear, so we can't remove it.
}
ispowerof2((taicpu(hp1).oper[2]^.val and ((2 shl (32-taicpu(p).oper[2]^.shifterop^.shiftimm))-1))+1) then
begin
DebugMsg('Peephole LsrAnd2Lsr done', hp1);
taicpu(p).oper[0]^.reg:=taicpu(hp1).oper[0]^.reg;
asml.remove(hp1);
hp1.free;
result:=true;
end
else if MatchInstruction(hp1, A_BIC, [taicpu(p).condition], [taicpu(p).oppostfix]) and
(taicpu(hp1).ops=3) and
MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
(taicpu(hp1).oper[2]^.typ = top_const) and
{ Check if the BIC actually would only mask out bits beeing already zero because of the shift }
(taicpu(hp1).oper[2]^.val<>0) and
(BsfDWord(taicpu(hp1).oper[2]^.val)>=32-taicpu(p).oper[2]^.shifterop^.shiftimm) then
begin
DebugMsg('Peephole LsrBic2Lsr done', hp1);
taicpu(p).oper[0]^.reg:=taicpu(hp1).oper[0]^.reg;
asml.remove(hp1);
hp1.free;
result:=true;
end;
end;
{
optimize
mov rX, yyyy
....
}
if (taicpu(p).ops = 2) and
GetNextInstruction(p,hp1) and
(tai(hp1).typ = ait_instruction) then
begin
{
This changes the very common
mov r0, #0
str r0, [...]
mov r0, #0
str r0, [...]
and removes all superfluous mov instructions
}
if (taicpu(p).oper[1]^.typ = top_const) and
(taicpu(hp1).opcode=A_STR) then
while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
GetNextInstruction(hp1, hp2) and
MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
(taicpu(hp2).ops = 2) and
MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
begin
DebugMsg('Peephole MovStrMov done', hp2);
GetNextInstruction(hp2,hp1);
asml.remove(hp2);
hp2.free;
if not assigned(hp1) then break;
end
{
This removes the first mov from
mov rX,...
mov rX,...
}
else if taicpu(hp1).opcode=A_MOV then
while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
(taicpu(hp1).ops = 2) and
MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
{ don't remove the first mov if the second is a mov rX,rX }
not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
begin
DebugMsg('Peephole MovMov done', p);
asml.remove(p);
p.free;
p:=hp1;
GetNextInstruction(hp1,hp1);
if not assigned(hp1) then
break;
end;
end;
{
change
mov r1, r0
add r1, r1, #1
to
add r1, r0, #1
Todo: Make it work for mov+cmp too
CAUTION! If this one is successful p might not be a mov instruction anymore!
}
if (taicpu(p).ops = 2) and
(taicpu(p).oper[1]^.typ = top_reg) and
(taicpu(p).oppostfix = PF_NONE) and
GetNextInstruction(p, hp1) and
MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
[taicpu(p).condition], []) and
{MOV and MVN might only have 2 ops}
(taicpu(hp1).ops >= 2) and
MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
(taicpu(hp1).oper[1]^.typ = top_reg) and
(
(taicpu(hp1).ops = 2) or
(taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
) then
begin
{ When we get here we still don't know if the registers match}
for I:=1 to 2 do
{
If the first loop was successful p will be replaced with hp1.
The checks will still be ok, because all required information
will also be in hp1 then.
}
if (taicpu(hp1).ops > I) and
MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
begin
DebugMsg('Peephole RedundantMovProcess done', hp1);
taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
if p<>hp1 then
begin
asml.remove(p);
p.free;
p:=hp1;
end;
end;
end;
{ This folds shifterops into following instructions
mov r0, r1, lsl #8
add r2, r3, r0
to
add r2, r3, r1, lsl #8
CAUTION! If this one is successful p might not be a mov instruction anymore!
}
if (taicpu(p).opcode = A_MOV) and
(taicpu(p).ops = 3) and
(taicpu(p).oper[1]^.typ = top_reg) and
(taicpu(p).oper[2]^.typ = top_shifterop) and
(taicpu(p).oppostfix = PF_NONE) and
GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
A_CMP, A_CMN],
[taicpu(p).condition], [PF_None]) and
(not ((current_settings.cputype in cpu_thumb2) and
(taicpu(hp1).opcode in [A_SBC]) and
(((taicpu(hp1).ops=3) and
MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^.reg)) or
((taicpu(hp1).ops=2) and
MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg))))) and
(assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or
regLoadedWithNewValue(taicpu(p).oper[0]^.reg, hp1)) and
(taicpu(hp1).ops >= 2) and
{Currently we can't fold into another shifterop}
(taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
{Folding rrx is problematic because of the C-Flag, as we currently can't check
NR_DEFAULTFLAGS for modification}
(
{Everything is fine if we don't use RRX}
(taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) or
(
{If it is RRX, then check if we're just accessing the next instruction}
GetNextInstruction(p, hp2) and
(hp1 = hp2)
)
) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
{ The shifterop can contain a register, might not be modified}
(
(taicpu(p).oper[2]^.shifterop^.rs = NR_NO) or
not(RegModifiedBetween(taicpu(p).oper[2]^.shifterop^.rs, p, hp1))
) and
(
{Only ONE of the two src operands is allowed to match}
MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
) then
begin
if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
I2:=0
else
I2:=1;
for I:=I2 to taicpu(hp1).ops-1 do
if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
begin
{ If the parameter matched on the second op from the RIGHT
we have to switch the parameters, this will not happen for CMP
were we're only evaluating the most right parameter
}
if I <> taicpu(hp1).ops-1 then
begin
{The SUB operators need to be changed when we swap parameters}
case taicpu(hp1).opcode of
A_SUB: tempop:=A_RSB;
A_SBC: tempop:=A_RSC;
A_RSB: tempop:=A_SUB;
A_RSC: tempop:=A_SBC;
else tempop:=taicpu(hp1).opcode;
end;
if taicpu(hp1).ops = 3 then
hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
else
hp2:=taicpu.op_reg_reg_shifterop(tempop,
taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
taicpu(p).oper[2]^.shifterop^);
end
else
if taicpu(hp1).ops = 3 then
hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
else
hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
taicpu(p).oper[2]^.shifterop^);
asml.insertbefore(hp2, hp1);
asml.remove(p);
asml.remove(hp1);
p.free;
hp1.free;
p:=hp2;
GetNextInstruction(p,hp1);
DebugMsg('Peephole FoldShiftProcess done', p);
break;
end;
end;
{
Fold
mov r1, r1, lsl #2
ldr/ldrb r0, [r0, r1]
to
ldr/ldrb r0, [r0, r1, lsl #2]
XXX: This still needs some work, as we quite often encounter something like
mov r1, r2, lsl #2
add r2, r3, #imm
ldr r0, [r2, r1]
which can't be folded because r2 is overwritten between the shift and the ldr.
We could try to shuffle the registers around and fold it into.
add r1, r3, #imm
ldr r0, [r1, r2, lsl #2]
}
if (taicpu(p).opcode = A_MOV) and
(taicpu(p).ops = 3) and
(taicpu(p).oper[1]^.typ = top_reg) and
(taicpu(p).oper[2]^.typ = top_shifterop) and
{ RRX is tough to handle, because it requires tracking the C-Flag,
it is also extremly unlikely to be emitted this way}
(taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) and
(taicpu(p).oper[2]^.shifterop^.shiftimm <> 0) and
(taicpu(p).oppostfix = PF_NONE) and
GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
{Only LDR, LDRB, STR, STRB can handle scaled register indexing}
MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition],
[PF_None, PF_B]) and
(
{If this is address by offset, one of the two registers can be used}
((taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
(
(taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) xor
(taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg)
)
) or
{For post and preindexed only the index register can be used}
((taicpu(hp1).oper[1]^.ref^.addressmode in [AM_POSTINDEXED, AM_PREINDEXED]) and
(
(taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) and
(taicpu(hp1).oper[1]^.ref^.base <> taicpu(p).oper[0]^.reg)
)
)
) and
{ Only fold if there isn't another shifterop already. }
(taicpu(hp1).oper[1]^.ref^.shiftmode = SM_None) and
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
(assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or
regLoadedWithNewValue(taicpu(p).oper[0]^.reg, hp1)) then
begin
{ If the register we want to do the shift for resides in base, we need to swap that}
if (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
taicpu(hp1).oper[1]^.ref^.base := taicpu(hp1).oper[1]^.ref^.index;
taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
taicpu(hp1).oper[1]^.ref^.shiftmode := taicpu(p).oper[2]^.shifterop^.shiftmode;
taicpu(hp1).oper[1]^.ref^.shiftimm := taicpu(p).oper[2]^.shifterop^.shiftimm;
DebugMsg('Peephole FoldShiftLdrStr done', hp1);
asml.remove(p);
p.free;
p:=hp1;
end;
{
Often we see shifts and then a superfluous mov to another register
In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
}
if (taicpu(p).opcode = A_MOV) and
GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
end;
A_ADD,
A_ADC,
A_RSB,
A_RSC,
A_SUB,
A_SBC,
A_AND,
A_BIC,
A_EOR,
A_ORR,
A_MLA,
A_MUL:
begin
{
optimize
and reg2,reg1,const1
...
}
if (taicpu(p).opcode = A_AND) and
(taicpu(p).ops>2) and
(taicpu(p).oper[1]^.typ = top_reg) and
(taicpu(p).oper[2]^.typ = top_const) then
begin
{
change
and reg2,reg1,const1
...
and reg3,reg2,const2
to
and reg3,reg1,(const1 and const2)
}
if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
(taicpu(hp1).oper[2]^.typ = top_const) then
begin
if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
begin
DebugMsg('Peephole AndAnd2And done', p);
taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
asml.remove(hp1);
hp1.free;
Result:=true;
end
else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole AndAnd2And done', hp1);
taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
asml.remove(p);
p.free;
p:=hp1;
Result:=true;
end;
end
{
change
and reg2,reg1,$xxxxxxFF
strb reg2,[...]
dealloc reg2
to
strb reg1,[...]
}
else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
MatchInstruction(p, A_AND, [C_None], [PF_None]) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
{ the reference in strb might not use reg2 }
not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole AndStrb2Strb done', p);
taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
asml.remove(p);
p.free;
p:=hp1;
result:=true;
end
{
change
and reg2,reg1,255
uxtb/uxth reg3,reg2
dealloc reg2
to
and reg3,reg1,x
}
else if (taicpu(p).oper[2]^.val = $FF) and
MatchInstruction(p, A_AND, [C_None], [PF_None]) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, [A_UXTB,A_UXTH], [C_None], [PF_None]) and
RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole AndUxt2And done', p);
taicpu(hp1).opcode:=A_AND;
taicpu(hp1).ops:=3;
taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
taicpu(hp1).loadconst(2,255);
GetNextInstruction(p,hp1);
asml.remove(p);
p.Free;
p:=hp1;
result:=true;
end
{
from
and reg1,reg0,2^n-1
mov reg2,reg1, lsl imm1
(mov reg3,reg2, lsr/asr imm1)
remove either the and or the lsl/xsr sequence if possible
}
else if cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
(taicpu(hp1).ops=3) and
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
(taicpu(hp1).oper[2]^.typ = top_shifterop) and
(taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
(taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
begin
{
and reg1,reg0,2^n-1
mov reg2,reg1, lsl imm1
mov reg3,reg2, lsr/asr imm1
=>
and reg1,reg0,2^n-1
if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
}
if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
(taicpu(hp2).ops=3) and
MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
(taicpu(hp2).oper[2]^.typ = top_shifterop) and
(taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
(taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
(taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
(taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
begin
DebugMsg('Peephole AndLslXsr2And done', p);
taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
asml.Remove(hp1);
asml.Remove(hp2);
hp1.free;
hp2.free;
result:=true;
end
{
and reg1,reg0,2^n-1
mov reg2,reg1, lsl imm1
=>
mov reg2,reg1, lsl imm1
if imm1>i
}
else if i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm then
begin
DebugMsg('Peephole AndLsl2Lsl done', p);
taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[0]^.reg;
asml.Remove(p);
p.free;
p:=hp1;
result:=true;
end
end;
end;
{
change
add/sub reg2,reg1,const1
str/ldr reg3,[reg2,const2]
dealloc reg2
to
str/ldr reg3,[reg1,const2+/-const1]
}
if (taicpu(p).opcode in [A_ADD,A_SUB]) and
(taicpu(p).ops>2) and
(taicpu(p).oper[1]^.typ = top_reg) and
(taicpu(p).oper[2]^.typ = top_const) then
begin
hp1:=p;
while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
{ we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
(taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
{ don't optimize if the register is stored/overwritten }
(taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
(taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
(taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
{ new offset must be valid: either in the range of 8 or 12 bit, depend on the
ldr postfix }
(((taicpu(p).opcode=A_ADD) and
isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
) or
((taicpu(p).opcode=A_SUB) and
isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
)
) do
begin
{ neither reg1 nor reg2 might be changed inbetween }
if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
break;
{ reg2 must be either overwritten by the ldr or it is deallocated afterwards }
if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
begin
{ remember last instruction }
hp2:=hp1;
DebugMsg('Peephole Add/SubLdr2Ldr done', p);
hp1:=p;
{ fix all ldr/str }
while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
begin
taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
if taicpu(p).opcode=A_ADD then
inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
else
dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
if hp1=hp2 then
break;
end;
GetNextInstruction(p,hp1);
asml.remove(p);
p.free;
p:=hp1;
break;
end;
end;
end;
{
change
add reg1, ...
mov reg2, reg1
to
add reg2, ...
}
if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
begin
if (taicpu(p).ops=3) then
RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
end;
end;
{$ifdef dummy}
A_MVN:
begin
{
change
mvn reg2,reg1
and reg3,reg4,reg2
dealloc reg2
to
bic reg3,reg4,reg1
}
if (taicpu(p).oper[1]^.typ = top_reg) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1,A_AND,[],[]) and
(((taicpu(hp1).ops=3) and
(taicpu(hp1).oper[2]^.typ=top_reg) and
(MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
((taicpu(hp1).ops=2) and
(taicpu(hp1).oper[1]^.typ=top_reg) and
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole MvnAnd2Bic done', p);
taicpu(hp1).opcode:=A_BIC;
if taicpu(hp1).ops=3 then
begin
if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
taicpu(hp1).loadReg(1,taicpu(hp1).oper[2]^.reg); // Swap operands
taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
end
else
taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
asml.remove(p);
p.free;
p:=hp1;
end;
end;
{$endif dummy}
A_UXTB:
begin
{
change
uxtb reg2,reg1
strb reg2,[...]
dealloc reg2
to
strb reg1,[...]
}
if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
{ the reference in strb might not use reg2 }
not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole UxtbStrb2Strb done', p);
taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
GetNextInstruction(p,hp2);
asml.remove(p);
p.free;
p:=hp2;
result:=true;
end
{
change
uxtb reg2,reg1
uxth reg3,reg2
dealloc reg2
to
uxtb reg3,reg1
}
else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole UxtbUxth2Uxtb done', p);
taicpu(hp1).opcode:=A_UXTB;
taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
GetNextInstruction(p,hp2);
asml.remove(p);
p.free;
p:=hp2;
result:=true;
end
{
change
uxtb reg2,reg1
uxtb reg3,reg2
dealloc reg2
to
uxtb reg3,reg1
}
else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
taicpu(hp1).opcode:=A_UXTB;
taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
GetNextInstruction(p,hp2);
asml.remove(p);
p.free;
p:=hp2;
result:=true;
end
{
change
uxtb reg2,reg1
and reg3,reg2,#0x*FF
dealloc reg2
to
uxtb reg3,reg1
}
else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
(taicpu(hp1).ops=3) and
(taicpu(hp1).oper[2]^.typ=top_const) and
((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
taicpu(hp1).opcode:=A_UXTB;
taicpu(hp1).ops:=2;
taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
GetNextInstruction(p,hp2);
asml.remove(p);
p.free;
p:=hp2;
result:=true;
end
else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
begin
//if (taicpu(p).ops=3) then
RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data');
end;
end;
A_UXTH:
begin
{
change
uxth reg2,reg1
strh reg2,[...]
dealloc reg2
to
strh reg1,[...]
}
if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
{ the reference in strb might not use reg2 }
not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole UXTHStrh2Strh done', p);
taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
asml.remove(p);
p.free;
p:=hp1;
result:=true;
end
{
change
uxth reg2,reg1
uxth reg3,reg2
dealloc reg2
to
uxth reg3,reg1
}
else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole UxthUxth2Uxth done', p);
taicpu(hp1).opcode:=A_UXTH;
taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
asml.remove(p);
p.free;
p:=hp1;
result:=true;
end
{
change
uxth reg2,reg1
and reg3,reg2,#65535
dealloc reg2
to
uxth reg3,reg1
}
else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
(taicpu(hp1).ops=3) and
(taicpu(hp1).oper[2]^.typ=top_const) and
((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole UxthAndImm2Uxth done', p);
taicpu(hp1).opcode:=A_UXTH;
taicpu(hp1).ops:=2;
taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
asml.remove(p);
p.free;
p:=hp1;
result:=true;
end
else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
begin
//if (taicpu(p).ops=3) then
RemoveSuperfluousMove(p, hp1, 'UxthMov2Data');
end;
end;
A_CMP:
begin
{
change
cmp reg,const1
moveq reg,const1
movne reg,const2
to
cmp reg,const1
movne reg,const2
}
if (taicpu(p).oper[1]^.typ = top_const) and
GetNextInstruction(p, hp1) and
MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
(taicpu(hp1).oper[1]^.typ = top_const) and
GetNextInstruction(hp1, hp2) and
MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
(taicpu(hp1).oper[1]^.typ = top_const) then
begin
RemoveRedundantMove(p, hp1, asml);
RemoveRedundantMove(p, hp2, asml);
end;
end;
A_STM:
begin
{
change
stmfd r13!,[r14]
sub r13,r13,#4
bl abc
add r13,r13,#4
ldmfd r13!,[r15]
into
b abc
}
if not(ts_thumb_interworking in current_settings.targetswitches) and
MatchInstruction(p, A_STM, [C_None], [PF_FD]) and
GetNextInstruction(p, hp1) and
GetNextInstruction(hp1, hp2) and
SkipEntryExitMarker(hp2, hp2) and
GetNextInstruction(hp2, hp3) and
SkipEntryExitMarker(hp3, hp3) and
GetNextInstruction(hp3, hp4) and
(taicpu(p).oper[0]^.typ = top_ref) and
(taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
(taicpu(p).oper[0]^.ref^.base=NR_NO) and
(taicpu(p).oper[0]^.ref^.offset=0) and
(taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
(taicpu(p).oper[1]^.typ = top_regset) and
(taicpu(p).oper[1]^.regset^ = [RS_R14]) and
MatchInstruction(hp1, A_SUB, [C_None], [PF_NONE]) and
(taicpu(hp1).oper[0]^.typ = top_reg) and
(taicpu(hp1).oper[0]^.reg = NR_STACK_POINTER_REG) and
MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^) and
(taicpu(hp1).oper[2]^.typ = top_const) and
MatchInstruction(hp3, A_ADD, [C_None], [PF_NONE]) and
MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[0]^) and
MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[1]^) and
MatchOperand(taicpu(hp1).oper[2]^,taicpu(hp3).oper[2]^) and
MatchInstruction(hp2, [A_BL,A_BLX], [C_None], [PF_NONE]) and
(taicpu(hp2).oper[0]^.typ = top_ref) and
MatchInstruction(hp4, A_LDM, [C_None], [PF_FD]) and
MatchOperand(taicpu(p).oper[0]^,taicpu(hp4).oper[0]^) and
(taicpu(hp4).oper[1]^.typ = top_regset) and
(taicpu(hp4).oper[1]^.regset^ = [RS_R15]) then
begin
asml.Remove(p);
asml.Remove(hp1);
asml.Remove(hp3);
asml.Remove(hp4);
taicpu(hp2).opcode:=A_B;
p.free;
hp1.free;
hp3.free;
hp4.free;
p:=hp2;
DebugMsg('Peephole Bl2B done', p);
end;
end;
end;
end;
end;
end;
{ instructions modifying the CPSR can be only the last instruction }
function MustBeLast(p : tai) : boolean;
begin
Result:=(p.typ=ait_instruction) and
((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
(taicpu(p).oppostfix=PF_S));
end;
procedure TCpuAsmOptimizer.PeepHoleOptPass2;
var
p,hp1,hp2: tai;
l : longint;
condition : tasmcond;
hp3: tai;
WasLast: boolean;
{ UsedRegs, TmpUsedRegs: TRegSet; }
begin
p := BlockStart;
{ UsedRegs := []; }
while (p <> BlockEnd) Do
begin
{ UpdateUsedRegs(UsedRegs, tai(p.next)); }
case p.Typ Of
Ait_Instruction:
begin
case taicpu(p).opcode Of
A_B:
if (taicpu(p).condition<>C_None) and
not(current_settings.cputype in cpu_thumb) then
begin
{ check for
Bxx xxx
<several instructions>
xxx:
}
l:=0;
WasLast:=False;
GetNextInstruction(p, hp1);
while assigned(hp1) and
(l<=4) and
CanBeCond(hp1) and
{ stop on labels }
not(hp1.typ=ait_label) do
begin
inc(l);
if MustBeLast(hp1) then
begin
WasLast:=True;
GetNextInstruction(hp1,hp1);
break;
end
else
GetNextInstruction(hp1,hp1);
end;
if assigned(hp1) then
begin
if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
begin
if (l<=4) and (l>0) then
begin
condition:=inverse_cond(taicpu(p).condition);
hp2:=p;
GetNextInstruction(p,hp1);
p:=hp1;
repeat
if hp1.typ=ait_instruction then
taicpu(hp1).condition:=condition;
if MustBeLast(hp1) then
begin
GetNextInstruction(hp1,hp1);
break;
end
else
GetNextInstruction(hp1,hp1);
until not(assigned(hp1)) or
not(CanBeCond(hp1)) or
(hp1.typ=ait_label);
{ wait with removing else GetNextInstruction could
ignore the label if it was the only usage in the
jump moved away }
tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
asml.remove(hp2);
hp2.free;
continue;
end;
end
else
{ do not perform further optimizations if there is inctructon
in block #1 which can not be optimized.
}
if not WasLast then
begin
{ check further for
Bcc xxx
<several instructions 1>
B yyy
xxx:
<several instructions 2>
yyy:
}
{ hp2 points to jmp yyy }
hp2:=hp1;
{ skip hp1 to xxx }
GetNextInstruction(hp1, hp1);
if assigned(hp2) and
assigned(hp1) and
(l<=3) and
(hp2.typ=ait_instruction) and
(taicpu(hp2).is_jmp) and
(taicpu(hp2).condition=C_None) and
{ real label and jump, no further references to the
label are allowed }
(tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
begin
l:=0;
{ skip hp1 to <several moves 2> }
GetNextInstruction(hp1, hp1);
while assigned(hp1) and
CanBeCond(hp1) do
begin
inc(l);
GetNextInstruction(hp1, hp1);
end;
{ hp1 points to yyy: }
if assigned(hp1) and
FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
begin
condition:=inverse_cond(taicpu(p).condition);
GetNextInstruction(p,hp1);
hp3:=p;
p:=hp1;
repeat
if hp1.typ=ait_instruction then
taicpu(hp1).condition:=condition;
GetNextInstruction(hp1,hp1);
until not(assigned(hp1)) or
not(CanBeCond(hp1));
{ hp2 is still at jmp yyy }
GetNextInstruction(hp2,hp1);
{ hp2 is now at xxx: }
condition:=inverse_cond(condition);
GetNextInstruction(hp1,hp1);
{ hp1 is now at <several movs 2> }
repeat
taicpu(hp1).condition:=condition;
GetNextInstruction(hp1,hp1);
until not(assigned(hp1)) or
not(CanBeCond(hp1)) or
(hp1.typ=ait_label);
{
asml.remove(hp1.next)
hp1.next.free;
asml.remove(hp1);
hp1.free;
}
{ remove Bcc }
tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
asml.remove(hp3);
hp3.free;
{ remove jmp }
tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
asml.remove(hp2);
hp2.free;
continue;
end;
end;
end;
end;
end;
end;
end;
end;
p := tai(p.next)
end;
end;
function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
begin
If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
Result:=true
else
Result:=inherited RegInInstruction(Reg, p1);
end;
const
{ set of opcode which might or do write to memory }
{ TODO : extend armins.dat to contain r/w info }
opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
{ adjust the register live information when swapping the two instructions p and hp1,
they must follow one after the other }
procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
procedure CheckLiveEnd(reg : tregister);
var
supreg : TSuperRegister;
regtype : TRegisterType;
begin
if reg=NR_NO then
exit;
regtype:=getregtype(reg);
supreg:=getsupreg(reg);
if (cg.rg[regtype].live_end[supreg]=hp1) and
RegInInstruction(reg,p) then
cg.rg[regtype].live_end[supreg]:=p;
end;
procedure CheckLiveStart(reg : TRegister);
var
supreg : TSuperRegister;
regtype : TRegisterType;
begin
if reg=NR_NO then
exit;
regtype:=getregtype(reg);
supreg:=getsupreg(reg);
if (cg.rg[regtype].live_start[supreg]=p) and
RegInInstruction(reg,hp1) then
cg.rg[regtype].live_start[supreg]:=hp1;
end;
var
i : longint;
r : TSuperRegister;
begin
{ assumption: p is directly followed by hp1 }
{ if live of any reg used by p starts at p and hp1 uses this register then
set live start to hp1 }
for i:=0 to p.ops-1 do
case p.oper[i]^.typ of
Top_Reg:
CheckLiveStart(p.oper[i]^.reg);
Top_Ref:
begin
CheckLiveStart(p.oper[i]^.ref^.base);
CheckLiveStart(p.oper[i]^.ref^.index);
end;
Top_Shifterop:
CheckLiveStart(p.oper[i]^.shifterop^.rs);
Top_RegSet:
for r:=RS_R0 to RS_R15 do
if r in p.oper[i]^.regset^ then
CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
end;
{ if live of any reg used by hp1 ends at hp1 and p uses this register then
set live end to p }
for i:=0 to hp1.ops-1 do
case hp1.oper[i]^.typ of
Top_Reg:
CheckLiveEnd(hp1.oper[i]^.reg);
Top_Ref:
begin
CheckLiveEnd(hp1.oper[i]^.ref^.base);
CheckLiveEnd(hp1.oper[i]^.ref^.index);
end;
Top_Shifterop:
CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
Top_RegSet:
for r:=RS_R0 to RS_R15 do
if r in hp1.oper[i]^.regset^ then
CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
end;
end;
function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
{ TODO : schedule also forward }
{ TODO : schedule distance > 1 }
var
hp1,hp2,hp3,hp4,hp5 : tai;
list : TAsmList;
begin
result:=true;
list:=TAsmList.Create;
p:=BlockStart;
while p<>BlockEnd Do
begin
if (p.typ=ait_instruction) and
GetNextInstruction(p,hp1) and
(hp1.typ=ait_instruction) and
(taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
{ for now we don't reschedule if the previous instruction changes potentially a memory location }
( (not(taicpu(p).opcode in opcode_could_mem_write) and
not(RegModifiedByInstruction(NR_PC,p))
) or
((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
(assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
(taicpu(hp1).oper[1]^.ref^.offset=0)
)
) or
{ try to prove that the memory accesses don't overlapp }
((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
(taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
(taicpu(p).oppostfix=PF_None) and
(taicpu(hp1).oppostfix=PF_None) and
(taicpu(p).oper[1]^.ref^.index=NR_NO) and
(taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
{ get operand sizes and check if the offset distance is large enough to ensure no overlapp }
(abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
)
)
) and
GetNextInstruction(hp1,hp2) and
(hp2.typ=ait_instruction) and
{ loaded register used by next instruction? }
(RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
{ loaded register not used by previous instruction? }
not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
{ same condition? }
(taicpu(p).condition=taicpu(hp1).condition) and
{ first instruction might not change the register used as base }
((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
) and
{ first instruction might not change the register used as index }
((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
) then
begin
hp3:=tai(p.Previous);
hp5:=tai(p.next);
asml.Remove(p);
{ if there is a reg. dealloc instruction associated with p, move it together with p }
{ before the instruction? }
while assigned(hp3) and (hp3.typ<>ait_instruction) do
begin
if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
RegInInstruction(tai_regalloc(hp3).reg,p) then
begin
hp4:=hp3;
hp3:=tai(hp3.Previous);
asml.Remove(hp4);
list.Concat(hp4);
end
else
hp3:=tai(hp3.Previous);
end;
list.Concat(p);
SwapRegLive(taicpu(p),taicpu(hp1));
{ after the instruction? }
while assigned(hp5) and (hp5.typ<>ait_instruction) do
begin
if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
RegInInstruction(tai_regalloc(hp5).reg,p) then
begin
hp4:=hp5;
hp5:=tai(hp5.next);
asml.Remove(hp4);
list.Concat(hp4);
end
else
hp5:=tai(hp5.Next);
end;
asml.Remove(hp1);
{$ifdef DEBUG_PREREGSCHEDULER}
asml.insertbefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
{$endif DEBUG_PREREGSCHEDULER}
asml.InsertBefore(hp1,hp2);
asml.InsertListBefore(hp2,list);
p:=tai(p.next)
end
else if p.typ=ait_instruction then
p:=hp1
else
p:=tai(p.next);
end;
list.Free;
end;
procedure DecrementPreceedingIT(list: TAsmList; p: tai);
var
hp : tai;
l : longint;
begin
hp := tai(p.Previous);
l := 1;
while assigned(hp) and
(l <= 4) do
begin
if hp.typ=ait_instruction then
begin
if (taicpu(hp).opcode>=A_IT) and
(taicpu(hp).opcode <= A_ITTTT) then
begin
if (taicpu(hp).opcode = A_IT) and
(l=1) then
list.Remove(hp)
else
case taicpu(hp).opcode of
A_ITE:
if l=2 then taicpu(hp).opcode := A_IT;
A_ITT:
if l=2 then taicpu(hp).opcode := A_IT;
A_ITEE:
if l=3 then taicpu(hp).opcode := A_ITE;
A_ITTE:
if l=3 then taicpu(hp).opcode := A_ITT;
A_ITET:
if l=3 then taicpu(hp).opcode := A_ITE;
A_ITTT:
if l=3 then taicpu(hp).opcode := A_ITT;
A_ITEEE:
if l=4 then taicpu(hp).opcode := A_ITEE;
A_ITTEE:
if l=4 then taicpu(hp).opcode := A_ITTE;
A_ITETE:
if l=4 then taicpu(hp).opcode := A_ITET;
A_ITTTE:
if l=4 then taicpu(hp).opcode := A_ITTT;
A_ITEET:
if l=4 then taicpu(hp).opcode := A_ITEE;
A_ITTET:
if l=4 then taicpu(hp).opcode := A_ITTE;
A_ITETT:
if l=4 then taicpu(hp).opcode := A_ITET;
A_ITTTT:
if l=4 then taicpu(hp).opcode := A_ITTT;
end;
break;
end;
{else if (taicpu(hp).condition<>taicpu(p).condition) or
(taicpu(hp).condition<>inverse_cond(taicpu(p).condition)) then
break;}
inc(l);
end;
hp := tai(hp.Previous);
end;
end;
function TCpuThumb2AsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
var
hp : taicpu;
hp1,hp2 : tai;
begin
result:=false;
if inherited PeepHoleOptPass1Cpu(p) then
result:=true
else if (p.typ=ait_instruction) and
MatchInstruction(p, A_STM, [C_None], [PF_FD,PF_DB]) and
(taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
(taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
((taicpu(p).oper[1]^.regset^*[8..13,15])=[]) then
begin
DebugMsg('Peephole Stm2Push done', p);
hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
AsmL.InsertAfter(hp, p);
asml.Remove(p);
p:=hp;
result:=true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, A_STR, [C_None], [PF_None]) and
(taicpu(p).oper[1]^.ref^.addressmode=AM_PREINDEXED) and
(taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
(taicpu(p).oper[1]^.ref^.offset=-4) and
(getsupreg(taicpu(p).oper[0]^.reg) in [0..7,14]) then
begin
DebugMsg('Peephole Str2Push done', p);
hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
asml.InsertAfter(hp, p);
asml.Remove(p);
p.Free;
p:=hp;
result:=true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, A_LDM, [C_None], [PF_FD,PF_IA]) and
(taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
(taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
((taicpu(p).oper[1]^.regset^*[8..14])=[]) then
begin
DebugMsg('Peephole Ldm2Pop done', p);
hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
asml.InsertBefore(hp, p);
asml.Remove(p);
p.Free;
p:=hp;
result:=true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, A_LDR, [C_None], [PF_None]) and
(taicpu(p).oper[1]^.ref^.addressmode=AM_POSTINDEXED) and
(taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
(taicpu(p).oper[1]^.ref^.offset=4) and
(getsupreg(taicpu(p).oper[0]^.reg) in [0..7,15]) then
begin
DebugMsg('Peephole Ldr2Pop done', p);
hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
asml.InsertBefore(hp, p);
asml.Remove(p);
p.Free;
p:=hp;
result:=true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, A_MOV, [C_None], [PF_None]) and
(taicpu(p).oper[1]^.typ=top_const) and
(taicpu(p).oper[1]^.val >= 0) and
(taicpu(p).oper[1]^.val < 256) and
(not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
begin
DebugMsg('Peephole Mov2Movs done', p);
asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
taicpu(p).oppostfix:=PF_S;
result:=true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, A_MVN, [C_None], [PF_None]) and
(taicpu(p).oper[1]^.typ=top_reg) and
(not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
begin
DebugMsg('Peephole Mvn2Mvns done', p);
asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
taicpu(p).oppostfix:=PF_S;
result:=true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
(taicpu(p).ops = 3) and
MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
(not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
(taicpu(p).oper[2]^.typ=top_const) and
(taicpu(p).oper[2]^.val >= 0) and
(taicpu(p).oper[2]^.val < 256) and
(not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
begin
DebugMsg('Peephole AddSub2*s done', p);
asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
taicpu(p).oppostfix:=PF_S;
taicpu(p).ops := 2;
result:=true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, [A_ADD], [C_None], [PF_None]) and
(taicpu(p).ops = 3) and
MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
(taicpu(p).oper[2]^.typ=top_reg) then
begin
DebugMsg('Peephole AddRRR2AddRR done', p);
taicpu(p).ops := 2;
taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
result:=true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_None]) and
(taicpu(p).ops = 3) and
MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
(taicpu(p).oper[2]^.typ=top_reg) and
(not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
begin
asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
taicpu(p).ops := 2;
taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
taicpu(p).oppostfix:=PF_S;
result:=true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_S]) and
(taicpu(p).ops = 3) and
MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
(taicpu(p).oper[2]^.typ in [top_reg,top_const]) then
begin
taicpu(p).ops := 2;
if taicpu(p).oper[2]^.typ=top_reg then
taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg)
else
taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
result:=true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, [A_AND,A_ORR,A_EOR], [C_None], [PF_None,PF_S]) and
(taicpu(p).ops = 3) and
MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and
(not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
begin
asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
taicpu(p).oppostfix:=PF_S;
taicpu(p).ops := 2;
result:=true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, [A_MOV], [C_None], [PF_None]) and
(taicpu(p).ops=3) and
(taicpu(p).oper[2]^.typ=top_shifterop) and
(taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSL,SM_LSR,SM_ASR,SM_ROR]) and
//MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
(not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
begin
DebugMsg('Peephole Mov2Shift done', p);
asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
taicpu(p).oppostfix:=PF_S;
//taicpu(p).ops := 2;
case taicpu(p).oper[2]^.shifterop^.shiftmode of
SM_LSL: taicpu(p).opcode:=A_LSL;
SM_LSR: taicpu(p).opcode:=A_LSR;
SM_ASR: taicpu(p).opcode:=A_ASR;
SM_ROR: taicpu(p).opcode:=A_ROR;
end;
if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
taicpu(p).loadreg(2, taicpu(p).oper[2]^.shifterop^.rs)
else
taicpu(p).loadconst(2, taicpu(p).oper[2]^.shifterop^.shiftimm);
result:=true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, [A_AND], [], [PF_None]) and
(taicpu(p).ops = 2) and
(taicpu(p).oper[1]^.typ=top_const) and
((taicpu(p).oper[1]^.val=255) or
(taicpu(p).oper[1]^.val=65535)) then
begin
DebugMsg('Peephole AndR2Uxt done', p);
if taicpu(p).oper[1]^.val=255 then
taicpu(p).opcode:=A_UXTB
else
taicpu(p).opcode:=A_UXTH;
taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
result := true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, [A_AND], [], [PF_None]) and
(taicpu(p).ops = 3) and
(taicpu(p).oper[2]^.typ=top_const) and
((taicpu(p).oper[2]^.val=255) or
(taicpu(p).oper[2]^.val=65535)) then
begin
DebugMsg('Peephole AndRR2Uxt done', p);
if taicpu(p).oper[2]^.val=255 then
taicpu(p).opcode:=A_UXTB
else
taicpu(p).opcode:=A_UXTH;
taicpu(p).ops:=2;
result := true;
end
{
Turn
mul reg0, z,w
sub/add x, y, reg0
dealloc reg0
into
mls/mla x,y,z,w
}
{
According to Jeppe Johansen this currently uses operands in the wrong order.
else if (p.typ=ait_instruction) and
MatchInstruction(p, [A_MUL], [C_None], [PF_None]) and
(taicpu(p).ops=3) and
(taicpu(p).oper[0]^.typ = top_reg) and
(taicpu(p).oper[1]^.typ = top_reg) and
(taicpu(p).oper[2]^.typ = top_reg) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and
(((taicpu(hp1).ops=3) and
(taicpu(hp1).oper[2]^.typ=top_reg) and
(MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
(MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
(taicpu(hp1).opcode=A_ADD)))) or
((taicpu(hp1).ops=2) and
(taicpu(hp1).oper[1]^.typ=top_reg) and
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
not(RegModifiedBetween(taicpu(p).oper[2]^.reg,p,hp1)) then
begin
if taicpu(hp1).opcode=A_ADD then
begin
taicpu(hp1).opcode:=A_MLA;
if taicpu(hp1).ops=3 then
if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then
taicpu(hp1).loadreg(1,taicpu(hp1).oper[2]^.reg);
taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
taicpu(hp1).loadreg(3,taicpu(p).oper[2]^.reg);
DebugMsg('MulAdd2MLA done', p);
taicpu(hp1).ops:=4;
asml.remove(p);
p.free;
p:=hp1;
end
else
begin
taicpu(hp1).opcode:=A_MLS;
if taicpu(hp1).ops=2 then
taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg);
taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
taicpu(hp1).loadreg(3,taicpu(p).oper[2]^.reg);
DebugMsg('MulSub2MLS done', p);
taicpu(hp1).ops:=4;
asml.remove(p);
p.free;
p:=hp1;
end;
result:=true;
end
}
{else if (p.typ=ait_instruction) and
MatchInstruction(p, [A_CMP], [C_None], [PF_None]) and
(taicpu(p).oper[1]^.typ=top_const) and
(taicpu(p).oper[1]^.val=0) and
GetNextInstruction(p,hp1) and
(taicpu(hp1).opcode=A_B) and
(taicpu(hp1).condition in [C_EQ,C_NE]) then
begin
if taicpu(hp1).condition = C_EQ then
hp2:=taicpu.op_reg_ref(A_CBZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^)
else
hp2:=taicpu.op_reg_ref(A_CBNZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^);
taicpu(hp2).is_jmp := true;
asml.InsertAfter(hp2, hp1);
asml.Remove(hp1);
hp1.Free;
asml.Remove(p);
p.Free;
p := hp2;
result := true;
end}
end;
procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
var
p,hp1,hp2: tai;
l,l2 : longint;
condition : tasmcond;
hp3: tai;
WasLast: boolean;
{ UsedRegs, TmpUsedRegs: TRegSet; }
begin
p := BlockStart;
{ UsedRegs := []; }
while (p <> BlockEnd) Do
begin
{ UpdateUsedRegs(UsedRegs, tai(p.next)); }
case p.Typ Of
Ait_Instruction:
begin
case taicpu(p).opcode Of
A_B:
if taicpu(p).condition<>C_None then
begin
{ check for
Bxx xxx
<several instructions>
xxx:
}
l:=0;
GetNextInstruction(p, hp1);
while assigned(hp1) and
(l<=4) and
CanBeCond(hp1) and
{ stop on labels }
not(hp1.typ=ait_label) do
begin
inc(l);
if MustBeLast(hp1) then
begin
//hp1:=nil;
GetNextInstruction(hp1,hp1);
break;
end
else
GetNextInstruction(hp1,hp1);
end;
if assigned(hp1) then
begin
if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
begin
if (l<=4) and (l>0) then
begin
condition:=inverse_cond(taicpu(p).condition);
hp2:=p;
GetNextInstruction(p,hp1);
p:=hp1;
repeat
if hp1.typ=ait_instruction then
taicpu(hp1).condition:=condition;
if MustBeLast(hp1) then
begin
GetNextInstruction(hp1,hp1);
break;
end
else
GetNextInstruction(hp1,hp1);
until not(assigned(hp1)) or
not(CanBeCond(hp1)) or
(hp1.typ=ait_label);
{ wait with removing else GetNextInstruction could
ignore the label if it was the only usage in the
jump moved away }
asml.InsertAfter(tai_comment.create(strpnew('Collapsed')), hp2);
DecrementPreceedingIT(asml, hp2);
case l of
1: asml.InsertAfter(taicpu.op_cond(A_IT,condition), hp2);
2: asml.InsertAfter(taicpu.op_cond(A_ITT,condition), hp2);
3: asml.InsertAfter(taicpu.op_cond(A_ITTT,condition), hp2);
4: asml.InsertAfter(taicpu.op_cond(A_ITTTT,condition), hp2);
end;
tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
asml.remove(hp2);
hp2.free;
continue;
end;
end;
end;
end;
end;
end;
end;
p := tai(p.next)
end;
end;
begin
casmoptimizer:=TCpuAsmOptimizer;
cpreregallocscheduler:=TCpuPreRegallocScheduler;
End.