mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-07 11:48:04 +02:00
1501 lines
48 KiB
ObjectPascal
1501 lines
48 KiB
ObjectPascal
{
|
|
Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
|
|
Development Team
|
|
|
|
This unit implements the ARM optimizer object
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
****************************************************************************
|
|
}
|
|
|
|
|
|
Unit aoptcpu;
|
|
|
|
{$i fpcdefs.inc}
|
|
|
|
{ $define DEBUG_AOPTCPU}
|
|
|
|
Interface
|
|
|
|
uses cpubase,cgbase,aasmtai,aopt,AoptObj, cclasses,aoptcpub;
|
|
|
|
Type
|
|
TCpuAsmOptimizer = class(TAsmOptimizer)
|
|
{ outputs a debug message into the assembler file }
|
|
procedure DebugMsg(const s: string; p: tai);
|
|
|
|
Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
|
|
function RegInInstruction(Reg: TRegister; p1: tai): Boolean; override;
|
|
function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
|
|
function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
|
|
|
|
function InvertSkipInstruction(var p: tai): boolean;
|
|
|
|
{ uses the same constructor as TAopObj }
|
|
function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
|
|
function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
|
|
private
|
|
function OptPass1ADD(var p : tai) : boolean;
|
|
function OptPass1ANDI(var p : tai) : boolean;
|
|
function OptPass1CALL(var p : tai) : boolean;
|
|
function OptPass1CLR(var p : tai) : boolean;
|
|
function OptPass1IN(var p : tai) : boolean;
|
|
function OptPass1LDI(var p : tai) : boolean;
|
|
function OptPass1LDS(var p : tai) : boolean;
|
|
function OptPass1MOV(var p : tai) : boolean;
|
|
function OptPass1PUSH(var p : tai) : boolean;
|
|
function OptPass1RCALL(var p : tai) : boolean;
|
|
function OptPass1SBI(var p : tai) : boolean;
|
|
function OptPass1SBR(var p : tai) : boolean;
|
|
function OptPass1STS(var p : tai) : boolean;
|
|
function OptPass1SUB(var p : tai) : boolean;
|
|
|
|
function OptPass2MOV(var p : tai) : boolean;
|
|
End;
|
|
|
|
Implementation
|
|
|
|
uses
|
|
cutils,
|
|
verbose,
|
|
cpuinfo,
|
|
aasmbase,aasmcpu,aasmdata,
|
|
aoptutils,
|
|
globals,globtype,
|
|
cgutils;
|
|
|
|
type
|
|
TAsmOpSet = set of TAsmOp;
|
|
|
|
function CanBeCond(p : tai) : boolean;
|
|
begin
|
|
result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None);
|
|
end;
|
|
|
|
|
|
function RefsEqual(const r1, r2: treference): boolean;
|
|
begin
|
|
refsequal :=
|
|
(r1.offset = r2.offset) and
|
|
(r1.base = r2.base) and
|
|
(r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
|
|
(r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
|
|
(r1.relsymbol = r2.relsymbol) and
|
|
(r1.addressmode = r2.addressmode) and
|
|
(r1.volatility=[]) and
|
|
(r2.volatility=[]);
|
|
end;
|
|
|
|
|
|
function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
|
|
begin
|
|
result:=oper1.typ=oper2.typ;
|
|
|
|
if result then
|
|
case oper1.typ of
|
|
top_const:
|
|
Result:=oper1.val = oper2.val;
|
|
top_reg:
|
|
Result:=oper1.reg = oper2.reg;
|
|
top_ref:
|
|
Result:=RefsEqual(oper1.ref^, oper2.ref^);
|
|
else Result:=false;
|
|
end
|
|
end;
|
|
|
|
|
|
function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
|
|
begin
|
|
result := (oper.typ = top_reg) and (oper.reg = reg);
|
|
end;
|
|
|
|
|
|
function MatchInstruction(const instr: tai; const op: TAsmOp): boolean;
|
|
begin
|
|
result :=
|
|
(instr.typ = ait_instruction) and
|
|
(taicpu(instr).opcode = op);
|
|
end;
|
|
|
|
|
|
function MatchInstruction(const instr: tai; const ops: TAsmOpSet): boolean;
|
|
begin
|
|
result :=
|
|
(instr.typ = ait_instruction) and
|
|
(taicpu(instr).opcode in ops);
|
|
end;
|
|
|
|
|
|
function MatchInstruction(const instr: tai; const ops: TAsmOpSet;opcount : byte): boolean;
|
|
begin
|
|
result :=
|
|
(instr.typ = ait_instruction) and
|
|
(taicpu(instr).opcode in ops) and
|
|
(taicpu(instr).ops=opcount);
|
|
end;
|
|
|
|
|
|
{$ifdef DEBUG_AOPTCPU}
|
|
procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
|
|
begin
|
|
asml.insertbefore(tai_comment.Create(strpnew(s)), p);
|
|
end;
|
|
{$else DEBUG_AOPTCPU}
|
|
procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
|
|
begin
|
|
end;
|
|
{$endif DEBUG_AOPTCPU}
|
|
|
|
|
|
function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
|
|
begin
|
|
If (p1.typ = ait_instruction) and (taicpu(p1).opcode in [A_MUL,A_MULS,A_FMUL,A_FMULS,A_FMULSU]) and
|
|
((getsupreg(reg)=RS_R0) or (getsupreg(reg)=RS_R1)) then
|
|
Result:=true
|
|
else if (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_MOVW) and
|
|
((TRegister(ord(taicpu(p1).oper[0]^.reg)+1)=reg) or (TRegister(ord(taicpu(p1).oper[1]^.reg)+1)=reg) or
|
|
(taicpu(p1).oper[0]^.reg=reg) or (taicpu(p1).oper[1]^.reg=reg)) then
|
|
Result:=true
|
|
else if (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_ADIW) and
|
|
((TRegister(ord(taicpu(p1).oper[0]^.reg)+1)=reg) or (taicpu(p1).oper[0]^.reg=reg)) then
|
|
Result:=true
|
|
else
|
|
Result:=inherited RegInInstruction(Reg, p1);
|
|
end;
|
|
|
|
|
|
function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
|
|
var Next: tai; reg: TRegister): Boolean;
|
|
begin
|
|
Next:=Current;
|
|
repeat
|
|
Result:=GetNextInstruction(Next,Next);
|
|
until not(cs_opt_level3 in current_settings.optimizerswitches) or not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
|
|
(is_calljmp(taicpu(Next).opcode));
|
|
end;
|
|
|
|
|
|
function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
|
|
var
|
|
p: taicpu;
|
|
begin
|
|
if not assigned(hp) or
|
|
(hp.typ <> ait_instruction) then
|
|
begin
|
|
Result := false;
|
|
exit;
|
|
end;
|
|
p := taicpu(hp);
|
|
Result := ((p.opcode in [A_LDI,A_MOV,A_LDS]) and (reg=p.oper[0]^.reg) and ((p.oper[1]^.typ<>top_reg) or (reg<>p.oper[1]^.reg))) or
|
|
((p.opcode in [A_LD,A_LDD,A_LPM]) and (reg=p.oper[0]^.reg) and not(RegInRef(reg,p.oper[1]^.ref^))) or
|
|
((p.opcode in [A_MOVW]) and ((reg=p.oper[0]^.reg) or (TRegister(ord(reg)+1)=p.oper[0]^.reg)) and not(reg=p.oper[1]^.reg) and not(TRegister(ord(reg)+1)=p.oper[1]^.reg)) or
|
|
((p.opcode in [A_POP]) and (reg=p.oper[0]^.reg));
|
|
end;
|
|
|
|
|
|
function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
|
|
var
|
|
p: taicpu;
|
|
i: longint;
|
|
begin
|
|
Result := false;
|
|
|
|
if not (assigned(hp) and (hp.typ = ait_instruction)) then
|
|
exit;
|
|
p:=taicpu(hp);
|
|
|
|
i:=0;
|
|
|
|
{ we do not care about the stack pointer }
|
|
if p.opcode in [A_POP] then
|
|
exit;
|
|
|
|
{ first operand only written?
|
|
then skip it }
|
|
if p.opcode in [A_MOV,A_LD,A_LDD,A_LDS,A_LPM,A_LDI,A_MOVW] then
|
|
i:=1;
|
|
|
|
while i<p.ops do
|
|
begin
|
|
case p.oper[i]^.typ of
|
|
top_reg:
|
|
Result := (p.oper[i]^.reg = reg) or
|
|
{ MOVW }
|
|
((i=1) and (p.opcode=A_MOVW) and (getsupreg(p.oper[i]^.reg)+1=getsupreg(reg))) or
|
|
{ ADIW }
|
|
((i=0) and (p.opcode=A_ADIW) and (getsupreg(p.oper[i]^.reg)+1=getsupreg(reg)));
|
|
top_ref:
|
|
Result :=
|
|
(p.oper[i]^.ref^.base = reg) or
|
|
(p.oper[i]^.ref^.index = reg);
|
|
end;
|
|
{ Bailout if we found something }
|
|
if Result then
|
|
exit;
|
|
Inc(i);
|
|
end;
|
|
end;
|
|
|
|
|
|
{
|
|
Turns
|
|
sbis ?
|
|
jmp .Lx
|
|
op
|
|
.Lx:
|
|
|
|
Into
|
|
sbic ?
|
|
op
|
|
|
|
For all types of skip instructions
|
|
}
|
|
function TCpuAsmOptimizer.InvertSkipInstruction(var p: tai): boolean;
|
|
|
|
function GetNextInstructionWithoutLabel(p: tai; var next: tai): boolean;
|
|
begin
|
|
repeat
|
|
result:=GetNextInstruction(p,next);
|
|
p:=next;
|
|
until
|
|
(not result) or
|
|
(not assigned(next)) or
|
|
(next.typ in [ait_instruction]);
|
|
|
|
result:=assigned(next) and (next.typ in [ait_instruction]);
|
|
end;
|
|
|
|
var
|
|
hp1, hp2, hp3: tai;
|
|
begin
|
|
result:=false;
|
|
|
|
if GetNextInstruction(taicpu(p),hp1) and
|
|
(hp1.typ=ait_instruction) and
|
|
(taicpu(hp1).opcode in [A_RJMP,A_JMP]) and
|
|
(taicpu(hp1).ops=1) and
|
|
(taicpu(hp1).oper[0]^.typ=top_ref) and
|
|
(taicpu(hp1).oper[0]^.ref^.offset=0) and
|
|
(taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
|
|
GetNextInstructionWithoutLabel(hp1,hp2) and
|
|
(hp2.typ=ait_instruction) and
|
|
(not taicpu(hp2).is_jmp) and
|
|
GetNextInstruction(hp2,hp3) and
|
|
FindLabel(TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol),hp3) then
|
|
begin
|
|
DebugMsg('SkipJump2InvertedSkip', p);
|
|
|
|
case taicpu(p).opcode of
|
|
A_SBIS: taicpu(p).opcode:=A_SBIC;
|
|
A_SBIC: taicpu(p).opcode:=A_SBIS;
|
|
A_SBRS: taicpu(p).opcode:=A_SBRC;
|
|
A_SBRC: taicpu(p).opcode:=A_SBRS;
|
|
end;
|
|
|
|
TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol).decrefs;
|
|
|
|
asml.remove(hp1);
|
|
hp1.free;
|
|
end;
|
|
end;
|
|
|
|
|
|
function TCpuAsmOptimizer.OptPass1LDI(var p : tai) : boolean;
|
|
var
|
|
hp1 : tai;
|
|
alloc ,dealloc: tai_regalloc;
|
|
begin
|
|
Result:=false;
|
|
{ turn
|
|
ldi reg0, imm
|
|
<op> reg1, reg0
|
|
dealloc reg0
|
|
into
|
|
<op>i reg1, imm
|
|
}
|
|
if MatchOpType(taicpu(p),top_reg,top_const) and
|
|
GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
|
|
MatchInstruction(hp1,[A_CP,A_MOV,A_AND,A_SUB],2) and
|
|
(not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
|
|
MatchOpType(taicpu(hp1),top_reg,top_reg) and
|
|
(getsupreg(taicpu(hp1).oper[0]^.reg) in [16..31]) and
|
|
(taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
|
|
not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) then
|
|
begin
|
|
TransferUsedRegs(TmpUsedRegs);
|
|
UpdateUsedRegs(TmpUsedRegs,tai(p.next));
|
|
UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
|
|
if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp1, TmpUsedRegs)) then
|
|
begin
|
|
case taicpu(hp1).opcode of
|
|
A_CP:
|
|
taicpu(hp1).opcode:=A_CPI;
|
|
A_MOV:
|
|
taicpu(hp1).opcode:=A_LDI;
|
|
A_AND:
|
|
taicpu(hp1).opcode:=A_ANDI;
|
|
A_SUB:
|
|
taicpu(hp1).opcode:=A_SUBI;
|
|
else
|
|
internalerror(2016111901);
|
|
end;
|
|
taicpu(hp1).loadconst(1, taicpu(p).oper[1]^.val);
|
|
|
|
alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
|
|
dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
|
|
|
|
if assigned(alloc) and assigned(dealloc) then
|
|
begin
|
|
asml.Remove(alloc);
|
|
alloc.Free;
|
|
asml.Remove(dealloc);
|
|
dealloc.Free;
|
|
end;
|
|
|
|
DebugMsg('Peephole LdiOp2Opi performed', p);
|
|
|
|
result:=RemoveCurrentP(p);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
|
|
function TCpuAsmOptimizer.OptPass1STS(var p : tai) : boolean;
|
|
begin
|
|
Result:=false;
|
|
if (taicpu(p).oper[0]^.ref^.symbol=nil) and
|
|
(taicpu(p).oper[0]^.ref^.relsymbol=nil) and
|
|
(getsupreg(taicpu(p).oper[0]^.ref^.base)=RS_NO) and
|
|
(getsupreg(taicpu(p).oper[0]^.ref^.index)=RS_NO) and
|
|
(taicpu(p).oper[0]^.ref^.addressmode=AM_UNCHANGED) and
|
|
(((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
|
|
(taicpu(p).oper[0]^.ref^.offset>=0) and
|
|
(taicpu(p).oper[0]^.ref^.offset<=63)) or
|
|
(not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
|
|
(taicpu(p).oper[0]^.ref^.offset>=32) and
|
|
(taicpu(p).oper[0]^.ref^.offset<=95))) then
|
|
begin
|
|
DebugMsg('Peephole Sts2Out performed', p);
|
|
|
|
taicpu(p).opcode:=A_OUT;
|
|
if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
|
|
taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset)
|
|
else
|
|
taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset-32);
|
|
result:=true;
|
|
end;
|
|
end;
|
|
|
|
|
|
function TCpuAsmOptimizer.OptPass1LDS(var p : tai) : boolean;
|
|
var
|
|
hp1, hp2, hp3, alloc, dealloc: tai;
|
|
begin
|
|
Result:=false;
|
|
if (taicpu(p).oper[1]^.ref^.symbol=nil) and
|
|
(taicpu(p).oper[1]^.ref^.relsymbol=nil) and
|
|
(getsupreg(taicpu(p).oper[1]^.ref^.base)=RS_NO) and
|
|
(getsupreg(taicpu(p).oper[1]^.ref^.index)=RS_NO) and
|
|
(taicpu(p).oper[1]^.ref^.addressmode=AM_UNCHANGED) and
|
|
(((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
|
|
(taicpu(p).oper[1]^.ref^.offset>=0) and
|
|
(taicpu(p).oper[1]^.ref^.offset<=63)) or
|
|
(not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
|
|
(taicpu(p).oper[1]^.ref^.offset>=32) and
|
|
(taicpu(p).oper[1]^.ref^.offset<=95))) then
|
|
begin
|
|
DebugMsg('Peephole Lds2In performed', p);
|
|
|
|
taicpu(p).opcode:=A_IN;
|
|
if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
|
|
taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset)
|
|
else
|
|
taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset-32);
|
|
|
|
result:=true;
|
|
end
|
|
|
|
{ turn
|
|
alloc reg0
|
|
alloc reg1
|
|
lds reg0, label
|
|
lds reg1, label
|
|
mov reg2, reg0
|
|
mov reg3, reg1
|
|
dealloc reg0
|
|
dealloc reg1
|
|
|
|
into
|
|
lds reg2, label
|
|
lds reg3, label
|
|
}
|
|
else if not(cs_opt_level3 in current_settings.optimizerswitches) and
|
|
(taicpu(p).oper[0]^.typ=top_reg) and
|
|
assigned(FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous))) and
|
|
(GetNextInstruction(p,hp1)) and MatchInstruction(hp1,A_LDS) and
|
|
(taicpu(hp1).oper[0]^.typ=top_reg) and
|
|
assigned(FindRegAllocBackward(taicpu(hp1).oper[0]^.reg,tai(hp1.Previous))) and
|
|
(GetNextInstruction(hp1, hp2)) and MatchInstruction(hp2,A_MOV) and
|
|
(taicpu(hp2).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
|
|
assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next))) and
|
|
(GetNextInstruction(hp2, hp3)) and MatchInstruction(hp3,A_MOV) and
|
|
(taicpu(hp3).oper[1]^.reg=taicpu(hp1).oper[0]^.reg) and
|
|
assigned(FindRegDeAlloc(taicpu(hp1).oper[0]^.reg,tai(hp3.Next))) then
|
|
begin
|
|
DebugMsg('Peephole LdsLdsMovMov2LdsLds performed', p);
|
|
|
|
alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
|
|
dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
|
|
if assigned(alloc) and assigned(dealloc) then
|
|
begin
|
|
asml.Remove(alloc);
|
|
alloc.Free;
|
|
asml.Remove(dealloc);
|
|
dealloc.Free;
|
|
end;
|
|
taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
|
|
RemoveInstruction(hp2);
|
|
|
|
alloc:=FindRegAllocBackward(taicpu(hp1).oper[0]^.reg,tai(hp1.Previous));
|
|
dealloc:=FindRegDeAlloc(taicpu(hp1).oper[0]^.reg,tai(hp3.Next));
|
|
if assigned(alloc) and assigned(dealloc) then
|
|
begin
|
|
asml.Remove(alloc);
|
|
alloc.Free;
|
|
asml.Remove(dealloc);
|
|
dealloc.Free;
|
|
end;
|
|
|
|
taicpu(hp1).oper[0]^.reg:=taicpu(hp3).oper[0]^.reg;
|
|
RemoveInstruction(hp3);
|
|
Result:=true;
|
|
end
|
|
|
|
{ turn
|
|
alloc reg0
|
|
lds reg0, label
|
|
...
|
|
mov reg1, reg0
|
|
dealloc reg0
|
|
|
|
into
|
|
lds reg1, label
|
|
}
|
|
else if (cs_opt_level3 in current_settings.optimizerswitches) and
|
|
(taicpu(p).oper[0]^.typ=top_reg) and
|
|
(GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg)) and
|
|
MatchInstruction(hp1,A_MOV) and
|
|
(taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
|
|
(not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
|
|
(not RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) then
|
|
begin
|
|
alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
|
|
dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
|
|
if assigned(alloc) and assigned(dealloc) then
|
|
begin
|
|
DebugMsg('Peephole LdsMov2Lds performed', p);
|
|
asml.Remove(alloc);
|
|
alloc.Free;
|
|
asml.Remove(dealloc);
|
|
dealloc.Free;
|
|
taicpu(p).oper[0]^.reg:=taicpu(hp1).oper[0]^.reg;
|
|
RemoveInstruction(hp1);
|
|
result:=true;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
|
|
function TCpuAsmOptimizer.OptPass1IN(var p : tai) : boolean;
|
|
var
|
|
hp1, hp2: tai;
|
|
l : TAsmLabel;
|
|
begin
|
|
Result:=false;
|
|
if GetNextInstruction(p,hp1) then
|
|
begin
|
|
{
|
|
in rX,Y
|
|
ori rX,n
|
|
out Y,rX
|
|
|
|
into
|
|
sbi rX,lg(n)
|
|
}
|
|
if (taicpu(p).oper[1]^.val<=31) and
|
|
MatchInstruction(hp1,A_ORI) and
|
|
(taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
|
|
(PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
|
|
GetNextInstruction(hp1,hp2) and
|
|
MatchInstruction(hp2,A_OUT) and
|
|
MatchOperand(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
|
|
MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) then
|
|
begin
|
|
DebugMsg('Peephole InOriOut2Sbi performed', p);
|
|
|
|
taicpu(p).opcode:=A_SBI;
|
|
taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
|
|
taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
|
|
asml.Remove(hp1);
|
|
hp1.Free;
|
|
asml.Remove(hp2);
|
|
hp2.Free;
|
|
result:=true;
|
|
end
|
|
{
|
|
in rX,Y
|
|
andi rX,not(n)
|
|
out Y,rX
|
|
|
|
into
|
|
cbi rX,lg(n)
|
|
}
|
|
else if (taicpu(p).oper[1]^.val<=31) and
|
|
MatchInstruction(hp1,A_ANDI) and
|
|
(taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
|
|
(PopCnt(byte(not(taicpu(hp1).oper[1]^.val)))=1) and
|
|
GetNextInstruction(hp1,hp2) and
|
|
MatchInstruction(hp2,A_OUT) and
|
|
MatchOperand(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
|
|
MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) then
|
|
begin
|
|
DebugMsg('Peephole InAndiOut2Cbi performed', p);
|
|
|
|
taicpu(p).opcode:=A_CBI;
|
|
taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
|
|
taicpu(p).loadconst(1,BsrByte(not(taicpu(hp1).oper[1]^.val)));
|
|
asml.Remove(hp1);
|
|
hp1.Free;
|
|
asml.Remove(hp2);
|
|
hp2.Free;
|
|
result:=true;
|
|
end
|
|
{
|
|
in rX,Y
|
|
andi rX,n
|
|
breq/brne L1
|
|
|
|
into
|
|
sbis/sbic Y,lg(n)
|
|
jmp L1
|
|
.Ltemp:
|
|
}
|
|
else if (taicpu(p).oper[1]^.val<=31) and
|
|
MatchInstruction(hp1,A_ANDI) and
|
|
(taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
|
|
(PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
|
|
GetNextInstruction(hp1,hp2) and
|
|
MatchInstruction(hp2,A_BRxx) and
|
|
(taicpu(hp2).condition in [C_EQ,C_NE]) then
|
|
begin
|
|
if taicpu(hp2).condition=C_EQ then
|
|
taicpu(p).opcode:=A_SBIS
|
|
else
|
|
taicpu(p).opcode:=A_SBIC;
|
|
|
|
DebugMsg('Peephole InAndiBrx2SbixJmp performed', p);
|
|
|
|
taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
|
|
taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
|
|
asml.Remove(hp1);
|
|
hp1.Free;
|
|
|
|
taicpu(hp2).condition:=C_None;
|
|
if CPUAVR_HAS_JMP_CALL in cpu_capabilities[current_settings.cputype] then
|
|
taicpu(hp2).opcode:=A_JMP
|
|
else
|
|
taicpu(hp2).opcode:=A_RJMP;
|
|
|
|
current_asmdata.getjumplabel(l);
|
|
l.increfs;
|
|
asml.InsertAfter(tai_label.create(l), hp2);
|
|
|
|
result:=true;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
|
|
function TCpuAsmOptimizer.OptPass1SBR(var p : tai) : boolean;
|
|
var
|
|
hp1 : tai;
|
|
begin
|
|
Result:=false;
|
|
{
|
|
Turn
|
|
in rx, y
|
|
sbr* rx, z
|
|
Into
|
|
sbi* y, z
|
|
}
|
|
if (taicpu(p).ops=2) and
|
|
(taicpu(p).oper[0]^.typ=top_reg) and
|
|
assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
|
|
GetLastInstruction(p,hp1) and
|
|
(hp1.typ=ait_instruction) and
|
|
(taicpu(hp1).opcode=A_IN) and
|
|
(taicpu(hp1).ops=2) and
|
|
(taicpu(hp1).oper[1]^.typ=top_const) and
|
|
(taicpu(hp1).oper[1]^.val in [0..31]) and
|
|
MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^.reg) and
|
|
(not RegModifiedBetween(taicpu(p).oper[0]^.reg, hp1, p)) then
|
|
begin
|
|
if taicpu(p).opcode=A_SBRS then
|
|
taicpu(p).opcode:=A_SBIS
|
|
else
|
|
taicpu(p).opcode:=A_SBIC;
|
|
|
|
taicpu(p).loadconst(0, taicpu(hp1).oper[1]^.val);
|
|
|
|
DebugMsg('Peephole InSbrx2Sbix performed', p);
|
|
|
|
asml.Remove(hp1);
|
|
hp1.free;
|
|
|
|
result:=true;
|
|
end;
|
|
|
|
if InvertSkipInstruction(p) then
|
|
result:=true;
|
|
end;
|
|
|
|
|
|
function TCpuAsmOptimizer.OptPass1SBI(var p : tai) : boolean;
|
|
var
|
|
hp1, hp2, hp3, hp4, hp5: tai;
|
|
begin
|
|
Result:=false;
|
|
{
|
|
Turn
|
|
sbic/sbis X, y
|
|
jmp .L1
|
|
op
|
|
.L1:
|
|
|
|
into
|
|
sbis/sbic X,y
|
|
op
|
|
.L1:
|
|
}
|
|
if InvertSkipInstruction(p) then
|
|
result:=true
|
|
{
|
|
Turn
|
|
sbiX X, y
|
|
jmp .L1
|
|
jmp .L2
|
|
.L1:
|
|
op
|
|
.L2:
|
|
|
|
into
|
|
sbiX X,y
|
|
.L1:
|
|
op
|
|
.L2:
|
|
}
|
|
else if GetNextInstruction(p, hp1) and
|
|
(hp1.typ=ait_instruction) and
|
|
(taicpu(hp1).opcode in [A_JMP,A_RJMP]) and
|
|
(taicpu(hp1).ops>0) and
|
|
(taicpu(hp1).oper[0]^.typ = top_ref) and
|
|
(taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
|
|
|
|
GetNextInstruction(hp1, hp2) and
|
|
(hp2.typ=ait_instruction) and
|
|
(taicpu(hp2).opcode in [A_JMP,A_RJMP]) and
|
|
(taicpu(hp2).ops>0) and
|
|
(taicpu(hp2).oper[0]^.typ = top_ref) and
|
|
(taicpu(hp2).oper[0]^.ref^.symbol is TAsmLabel) and
|
|
|
|
GetNextInstruction(hp2, hp3) and
|
|
(hp3.typ=ait_label) and
|
|
(taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) and
|
|
|
|
GetNextInstruction(hp3, hp4) and
|
|
(hp4.typ=ait_instruction) and
|
|
|
|
GetNextInstruction(hp4, hp5) and
|
|
(hp3.typ=ait_label) and
|
|
(taicpu(hp2).oper[0]^.ref^.symbol=tai_label(hp5).labsym) then
|
|
begin
|
|
DebugMsg('Peephole SbiJmpJmp2Sbi performed',p);
|
|
|
|
tai_label(hp3).labsym.decrefs;
|
|
tai_label(hp5).labsym.decrefs;
|
|
|
|
AsmL.remove(hp1);
|
|
taicpu(hp1).Free;
|
|
|
|
AsmL.remove(hp2);
|
|
taicpu(hp2).Free;
|
|
|
|
result:=true;
|
|
end;
|
|
end;
|
|
|
|
|
|
function TCpuAsmOptimizer.OptPass1ANDI(var p : tai) : boolean;
|
|
var
|
|
hp1, hp2, hp3: tai;
|
|
i : longint;
|
|
begin
|
|
Result:=false;
|
|
{
|
|
Turn
|
|
andi rx, #pow2
|
|
brne l
|
|
<op>
|
|
l:
|
|
Into
|
|
sbrs rx, #(1 shl imm)
|
|
<op>
|
|
l:
|
|
}
|
|
if (taicpu(p).ops=2) and
|
|
(taicpu(p).oper[1]^.typ=top_const) and
|
|
ispowerof2(taicpu(p).oper[1]^.val,i) and
|
|
assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
|
|
GetNextInstruction(p,hp1) and
|
|
(hp1.typ=ait_instruction) and
|
|
(taicpu(hp1).opcode=A_BRxx) and
|
|
(taicpu(hp1).condition in [C_EQ,C_NE]) and
|
|
(taicpu(hp1).ops>0) and
|
|
(taicpu(hp1).oper[0]^.typ = top_ref) and
|
|
(taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
|
|
GetNextInstruction(hp1,hp2) and
|
|
(hp2.typ=ait_instruction) and
|
|
GetNextInstruction(hp2,hp3) and
|
|
(hp3.typ=ait_label) and
|
|
(taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) then
|
|
begin
|
|
DebugMsg('Peephole AndiBr2Sbr performed', p);
|
|
|
|
taicpu(p).oper[1]^.val:=i;
|
|
|
|
if taicpu(hp1).condition=C_NE then
|
|
taicpu(p).opcode:=A_SBRS
|
|
else
|
|
taicpu(p).opcode:=A_SBRC;
|
|
|
|
asml.Remove(hp1);
|
|
hp1.free;
|
|
|
|
result:=true;
|
|
end
|
|
{
|
|
Remove
|
|
andi rx, #y
|
|
dealloc rx
|
|
}
|
|
else if (taicpu(p).ops=2) and
|
|
(taicpu(p).oper[0]^.typ=top_reg) and
|
|
assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
|
|
(assigned(FindRegDeAlloc(NR_DEFAULTFLAGS,tai(p.Next))) or
|
|
(not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs))) then
|
|
begin
|
|
DebugMsg('Redundant Andi removed', p);
|
|
|
|
result:=RemoveCurrentP(p);
|
|
end;
|
|
end;
|
|
|
|
|
|
function TCpuAsmOptimizer.OptPass1ADD(var p : tai) : boolean;
|
|
var
|
|
hp1: tai;
|
|
begin
|
|
Result:=false;
|
|
if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
|
|
GetNextInstruction(p, hp1) and
|
|
MatchInstruction(hp1,A_ADC) then
|
|
begin
|
|
DebugMsg('Peephole AddAdc2Add performed', p);
|
|
|
|
RemoveCurrentP(p, hp1);
|
|
Result := True;
|
|
end;
|
|
end;
|
|
|
|
|
|
function TCpuAsmOptimizer.OptPass1SUB(var p : tai) : boolean;
|
|
var
|
|
hp1: tai;
|
|
begin
|
|
Result:=false;
|
|
if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
|
|
GetNextInstruction(p, hp1) and
|
|
MatchInstruction(hp1,A_SBC) then
|
|
begin
|
|
DebugMsg('Peephole SubSbc2Sub performed', p);
|
|
|
|
taicpu(hp1).opcode:=A_SUB;
|
|
|
|
RemoveCurrentP(p, hp1);
|
|
Result := True;
|
|
end;
|
|
end;
|
|
|
|
|
|
function TCpuAsmOptimizer.OptPass2MOV(var p: tai): boolean;
|
|
var
|
|
hp1: tai;
|
|
begin
|
|
result:=false;
|
|
{ fold
|
|
mov reg2,reg0
|
|
mov reg3,reg1
|
|
to
|
|
movw reg2,reg0
|
|
}
|
|
if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
|
|
(taicpu(p).ops=2) and
|
|
(taicpu(p).oper[0]^.typ = top_reg) and
|
|
(taicpu(p).oper[1]^.typ = top_reg) and
|
|
getnextinstruction(p,hp1) and
|
|
(hp1.typ = ait_instruction) and
|
|
(taicpu(hp1).opcode = A_MOV) and
|
|
(taicpu(hp1).ops=2) and
|
|
(taicpu(hp1).oper[0]^.typ = top_reg) and
|
|
(taicpu(hp1).oper[1]^.typ = top_reg) and
|
|
(getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
|
|
((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
|
|
((getsupreg(taicpu(p).oper[1]^.reg) mod 2)=0) and
|
|
(getsupreg(taicpu(hp1).oper[1]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)+1) then
|
|
begin
|
|
DebugMsg('Peephole MovMov2Movw performed', p);
|
|
|
|
AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
|
|
AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
|
|
|
|
taicpu(p).opcode:=A_MOVW;
|
|
asml.remove(hp1);
|
|
hp1.free;
|
|
result:=true;
|
|
end
|
|
end;
|
|
|
|
|
|
function TCpuAsmOptimizer.OptPass1CLR(var p : tai) : boolean;
|
|
var
|
|
hp1: tai;
|
|
alloc, dealloc: tai_regalloc;
|
|
begin
|
|
Result:=false;
|
|
{ turn the common
|
|
clr rX
|
|
mov/ld rX, rY
|
|
into
|
|
mov/ld rX, rY
|
|
}
|
|
if (taicpu(p).ops=1) and
|
|
(taicpu(p).oper[0]^.typ=top_reg) and
|
|
GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
|
|
(not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
|
|
(hp1.typ=ait_instruction) and
|
|
(taicpu(hp1).opcode in [A_MOV,A_LD]) and
|
|
(taicpu(hp1).ops>0) and
|
|
(taicpu(hp1).oper[0]^.typ=top_reg) and
|
|
(taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) then
|
|
begin
|
|
DebugMsg('Peephole ClrMov2Mov performed', p);
|
|
|
|
result:=RemoveCurrentP(p);
|
|
end
|
|
{ turn
|
|
clr rX
|
|
...
|
|
adc rY, rX
|
|
into
|
|
...
|
|
adc rY, r1
|
|
}
|
|
else if (taicpu(p).ops=1) and
|
|
(taicpu(p).oper[0]^.typ=top_reg) and
|
|
GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
|
|
(not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
|
|
(hp1.typ=ait_instruction) and
|
|
(taicpu(hp1).opcode in [A_ADC,A_SBC]) and
|
|
(taicpu(hp1).ops=2) and
|
|
(taicpu(hp1).oper[1]^.typ=top_reg) and
|
|
(taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
|
|
(taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[0]^.reg) and
|
|
assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
|
|
begin
|
|
DebugMsg('Peephole ClrAdc2Adc performed', p);
|
|
|
|
taicpu(hp1).oper[1]^.reg:=GetDefaultZeroReg;
|
|
|
|
alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
|
|
dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
|
|
|
|
if assigned(alloc) and assigned(dealloc) then
|
|
begin
|
|
asml.Remove(alloc);
|
|
alloc.Free;
|
|
asml.Remove(dealloc);
|
|
dealloc.Free;
|
|
end;
|
|
|
|
result:=RemoveCurrentP(p);
|
|
end;
|
|
end;
|
|
|
|
|
|
function TCpuAsmOptimizer.OptPass1PUSH(var p : tai) : boolean;
|
|
var
|
|
hp1, hp2, hp3: tai;
|
|
begin
|
|
Result:=false;
|
|
{ turn
|
|
push reg0
|
|
push reg1
|
|
pop reg3
|
|
pop reg2
|
|
|
|
into
|
|
|
|
movw reg2,reg0
|
|
|
|
or
|
|
|
|
mov reg3,reg1
|
|
mov reg2,reg0
|
|
|
|
}
|
|
if GetNextInstruction(p,hp1) and
|
|
MatchInstruction(hp1,A_PUSH) and
|
|
|
|
GetNextInstruction(hp1,hp2) and
|
|
MatchInstruction(hp2,A_POP) and
|
|
|
|
GetNextInstruction(hp2,hp3) and
|
|
MatchInstruction(hp3,A_POP) then
|
|
begin
|
|
if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
|
|
(getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
|
|
((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
|
|
(getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp3).oper[0]^.reg)+1) and
|
|
((getsupreg(taicpu(hp3).oper[0]^.reg) mod 2)=0) then
|
|
begin
|
|
DebugMsg('Peephole PushPushPopPop2Movw performed', p);
|
|
|
|
taicpu(hp3).ops:=2;
|
|
taicpu(hp3).opcode:=A_MOVW;
|
|
|
|
taicpu(hp3).loadreg(1, taicpu(p).oper[0]^.reg);
|
|
|
|
{ We're removing 3 concurrent instructions. Remove hp1
|
|
and hp2 manually instead of calling RemoveCurrentP
|
|
as this means we won't be calling UpdateUsedRegs 3 times }
|
|
asml.Remove(hp1);
|
|
hp1.Free;
|
|
|
|
asml.Remove(hp2);
|
|
hp2.Free;
|
|
|
|
{ By removing p last, we've guaranteed that p.Next is
|
|
valid (storing it prior to removing the instructions
|
|
may result in a dangling pointer if hp1 immediately
|
|
follows p), and because hp1, hp2 and hp3 came from
|
|
sequential calls to GetNextInstruction, it is
|
|
guaranteed that UpdateUsedRegs will stop at hp3. [Kit] }
|
|
RemoveCurrentP(p, hp3);
|
|
Result := True;
|
|
end
|
|
else
|
|
begin
|
|
DebugMsg('Peephole PushPushPopPop2MovMov performed', p);
|
|
|
|
taicpu(p).ops:=2;
|
|
taicpu(p).opcode:=A_MOV;
|
|
|
|
taicpu(hp1).ops:=2;
|
|
taicpu(hp1).opcode:=A_MOV;
|
|
|
|
taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
|
|
taicpu(p).loadreg(0, taicpu(hp3).oper[0]^.reg);
|
|
|
|
taicpu(hp1).loadreg(1, taicpu(hp1).oper[0]^.reg);
|
|
taicpu(hp1).loadreg(0, taicpu(hp2).oper[0]^.reg);
|
|
|
|
{ life range of reg2 and reg3 is increased, fix register allocation entries }
|
|
TransferUsedRegs(TmpUsedRegs);
|
|
UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
|
|
AllocRegBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2,TmpUsedRegs);
|
|
|
|
TransferUsedRegs(TmpUsedRegs);
|
|
AllocRegBetween(taicpu(hp3).oper[0]^.reg,p,hp3,TmpUsedRegs);
|
|
|
|
IncludeRegInUsedRegs(taicpu(hp3).oper[0]^.reg,UsedRegs);
|
|
UpdateUsedRegs(tai(p.Next));
|
|
|
|
asml.Remove(hp2);
|
|
hp2.Free;
|
|
asml.Remove(hp3);
|
|
hp3.Free;
|
|
|
|
result:=true;
|
|
end
|
|
|
|
end;
|
|
end;
|
|
|
|
|
|
function TCpuAsmOptimizer.OptPass1CALL(var p : tai) : boolean;
|
|
var
|
|
hp1: tai;
|
|
begin
|
|
Result:=false;
|
|
if (cs_opt_level4 in current_settings.optimizerswitches) and
|
|
GetNextInstruction(p,hp1) and
|
|
MatchInstruction(hp1,A_RET) then
|
|
begin
|
|
DebugMsg('Peephole CallReg2Jmp performed', p);
|
|
|
|
taicpu(p).opcode:=A_JMP;
|
|
|
|
asml.Remove(hp1);
|
|
hp1.Free;
|
|
|
|
result:=true;
|
|
end;
|
|
end;
|
|
|
|
|
|
function TCpuAsmOptimizer.OptPass1RCALL(var p : tai) : boolean;
|
|
var
|
|
hp1: tai;
|
|
begin
|
|
Result:=false;
|
|
if (cs_opt_level4 in current_settings.optimizerswitches) and
|
|
GetNextInstruction(p,hp1) and
|
|
MatchInstruction(hp1,A_RET) then
|
|
begin
|
|
DebugMsg('Peephole RCallReg2RJmp performed', p);
|
|
|
|
taicpu(p).opcode:=A_RJMP;
|
|
|
|
asml.Remove(hp1);
|
|
hp1.Free;
|
|
|
|
result:=true;
|
|
end;
|
|
end;
|
|
|
|
|
|
function TCpuAsmOptimizer.OptPass1MOV(var p : tai) : boolean;
|
|
var
|
|
hp1, hp2: tai;
|
|
i : Integer;
|
|
alloc, dealloc: tai_regalloc;
|
|
begin
|
|
Result:=false;
|
|
{ change
|
|
mov reg0, reg1
|
|
dealloc reg0
|
|
into
|
|
dealloc reg0
|
|
}
|
|
if MatchOpType(taicpu(p),top_reg,top_reg) then
|
|
begin
|
|
TransferUsedRegs(TmpUsedRegs);
|
|
UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
|
|
if not(RegInUsedRegs(taicpu(p).oper[0]^.reg,TmpUsedRegs)) and
|
|
{ reg. allocation information before calls is not perfect, so don't do this before
|
|
calls/icalls }
|
|
GetNextInstruction(p,hp1) and
|
|
not(MatchInstruction(hp1,[A_CALL,A_RCALL])) then
|
|
begin
|
|
DebugMsg('Peephole Mov2Nop performed', p);
|
|
RemoveCurrentP(p, hp1);
|
|
Result := True;
|
|
exit;
|
|
end;
|
|
end;
|
|
|
|
{ turn
|
|
mov reg0, reg1
|
|
<op> reg2,reg0
|
|
dealloc reg0
|
|
into
|
|
<op> reg2,reg1
|
|
}
|
|
if MatchOpType(taicpu(p),top_reg,top_reg) and
|
|
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
|
|
(not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
|
|
(MatchInstruction(hp1,[A_PUSH,A_MOV,A_CP,A_CPC,A_ADD,A_SUB,A_ADC,A_SBC,A_EOR,A_AND,A_OR,
|
|
A_OUT,A_IN]) or
|
|
{ the reference register of ST/STD cannot be replaced }
|
|
(MatchInstruction(hp1,[A_STD,A_ST,A_STS]) and (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^)))) and
|
|
(not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
|
|
{(taicpu(hp1).ops=1) and
|
|
(taicpu(hp1).oper[0]^.typ = top_reg) and
|
|
(taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and }
|
|
assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
|
|
begin
|
|
DebugMsg('Peephole MovOp2Op 1 performed', p);
|
|
|
|
for i := 0 to taicpu(hp1).ops-1 do
|
|
if taicpu(hp1).oper[i]^.typ=top_reg then
|
|
if taicpu(hp1).oper[i]^.reg=taicpu(p).oper[0]^.reg then
|
|
taicpu(hp1).oper[i]^.reg:=taicpu(p).oper[1]^.reg;
|
|
|
|
alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
|
|
dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
|
|
|
|
if assigned(alloc) and assigned(dealloc) then
|
|
begin
|
|
asml.Remove(alloc);
|
|
alloc.Free;
|
|
asml.Remove(dealloc);
|
|
dealloc.Free;
|
|
end;
|
|
|
|
{ life range of reg1 is increased }
|
|
AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
|
|
{ p will be removed, update used register as we continue
|
|
with the next instruction after p }
|
|
|
|
result:=RemoveCurrentP(p);
|
|
end
|
|
{ turn
|
|
mov reg1, reg0
|
|
<op> reg1,xxxx
|
|
dealloc reg1
|
|
into
|
|
<op> reg1,xxx
|
|
}
|
|
else if MatchOpType(taicpu(p),top_reg,top_reg) and
|
|
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
|
|
not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
|
|
MatchInstruction(hp1,[A_CP,A_CPC,A_CPI,A_SBRS,A_SBRC]) and
|
|
assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
|
|
begin
|
|
DebugMsg('Peephole MovOp2Op 2 performed', p);
|
|
|
|
for i := 0 to taicpu(hp1).ops-1 do
|
|
if taicpu(hp1).oper[i]^.typ=top_reg then
|
|
if taicpu(hp1).oper[i]^.reg=taicpu(p).oper[0]^.reg then
|
|
taicpu(hp1).oper[i]^.reg:=taicpu(p).oper[1]^.reg;
|
|
|
|
alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
|
|
dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
|
|
|
|
if assigned(alloc) and assigned(dealloc) then
|
|
begin
|
|
asml.Remove(alloc);
|
|
alloc.Free;
|
|
asml.Remove(dealloc);
|
|
dealloc.Free;
|
|
end;
|
|
|
|
{ life range of reg1 is increased }
|
|
AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
|
|
{ p will be removed, update used register as we continue
|
|
with the next instruction after p }
|
|
|
|
result:=RemoveCurrentP(p);
|
|
end
|
|
{ remove
|
|
mov reg0,reg0
|
|
}
|
|
else if (taicpu(p).ops=2) and
|
|
(taicpu(p).oper[0]^.typ = top_reg) and
|
|
(taicpu(p).oper[1]^.typ = top_reg) and
|
|
(taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
|
|
begin
|
|
DebugMsg('Peephole RedundantMov performed', p);
|
|
|
|
result:=RemoveCurrentP(p);
|
|
end
|
|
{
|
|
Turn
|
|
mov rx,ry
|
|
op rx,rz
|
|
mov ry, rx
|
|
Into
|
|
op ry,rz
|
|
}
|
|
else if (taicpu(p).ops=2) and
|
|
MatchOpType(taicpu(p),top_reg,top_reg) and
|
|
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
|
|
(hp1.typ=ait_instruction) and
|
|
(taicpu(hp1).ops >= 1) and
|
|
(taicpu(hp1).oper[0]^.typ = top_reg) and
|
|
GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
|
|
MatchInstruction(hp2,A_MOV) and
|
|
MatchOpType(taicpu(hp2),top_reg,top_reg) and
|
|
(taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
|
|
(taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
|
|
(taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
|
|
(not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp2)) and
|
|
(taicpu(hp1).opcode in [A_ADD,A_ADC,A_SUB,A_SBC,A_AND,A_OR,A_EOR,
|
|
A_INC,A_DEC,
|
|
A_LSL,A_LSR,A_ASR,A_ROR,A_ROL]) and
|
|
assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then
|
|
begin
|
|
DebugMsg('Peephole MovOpMov2Op performed', p);
|
|
|
|
if (taicpu(hp1).ops=2) and
|
|
(taicpu(hp1).oper[1]^.typ=top_reg) and
|
|
(taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
|
|
taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
|
|
|
|
taicpu(hp1).oper[0]^.reg:=taicpu(p).oper[1]^.reg;
|
|
|
|
alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
|
|
dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
|
|
|
|
if assigned(alloc) and assigned(dealloc) then
|
|
begin
|
|
asml.Remove(alloc);
|
|
alloc.Free;
|
|
asml.Remove(dealloc);
|
|
dealloc.Free;
|
|
end;
|
|
|
|
asml.remove(hp2);
|
|
hp2.free;
|
|
|
|
result:=RemoveCurrentP(p);
|
|
end
|
|
{
|
|
Turn
|
|
mov rx,ry
|
|
op rx,rw
|
|
mov rw,rx
|
|
Into
|
|
op rw,ry
|
|
}
|
|
else if (taicpu(p).ops=2) and
|
|
MatchOpType(taicpu(p),top_reg,top_reg) and
|
|
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
|
|
(hp1.typ=ait_instruction) and
|
|
(taicpu(hp1).ops = 2) and
|
|
MatchOpType(taicpu(hp1),top_reg,top_reg) and
|
|
GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
|
|
(hp2.typ=ait_instruction) and
|
|
(taicpu(hp2).opcode=A_MOV) and
|
|
MatchOpType(taicpu(hp2),top_reg,top_reg) and
|
|
(taicpu(hp2).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
|
|
(taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
|
|
(taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
|
|
(not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
|
|
(taicpu(hp1).opcode in [A_ADD,A_ADC,A_AND,A_OR,A_EOR]) and
|
|
assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then
|
|
begin
|
|
DebugMsg('Peephole MovOpMov2Op2 performed', p);
|
|
|
|
taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
|
|
taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
|
|
|
|
alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
|
|
dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
|
|
|
|
if assigned(alloc) and assigned(dealloc) then
|
|
begin
|
|
asml.Remove(alloc);
|
|
alloc.Free;
|
|
asml.Remove(dealloc);
|
|
dealloc.Free;
|
|
end;
|
|
|
|
result:=RemoveCurrentP(p);
|
|
|
|
asml.remove(hp2);
|
|
hp2.free;
|
|
end
|
|
{
|
|
This removes the first mov from
|
|
mov rX,...
|
|
mov rX,...
|
|
}
|
|
else if GetNextInstruction(p,hp1) and MatchInstruction(hp1,A_MOV) and
|
|
{ test condition here already instead in the while loop only, else MovMov2Mov 2 might be oversight }
|
|
MatchInstruction(hp1,A_MOV) and
|
|
MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) then
|
|
while MatchInstruction(hp1,A_MOV) and
|
|
MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
|
|
{ don't remove the first mov if the second is a mov rX,rX }
|
|
not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) do
|
|
begin
|
|
DebugMsg('Peephole MovMov2Mov 1 performed', p);
|
|
|
|
RemoveCurrentP(p,hp1);
|
|
Result := True;
|
|
|
|
GetNextInstruction(hp1,hp1);
|
|
if not assigned(hp1) then
|
|
break;
|
|
end
|
|
{
|
|
This removes the second mov from
|
|
mov rX,rY
|
|
|
|
...
|
|
|
|
mov rX,rY
|
|
|
|
if rX and rY are not modified in-between
|
|
}
|
|
else if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) and
|
|
MatchInstruction(hp1,A_MOV) and
|
|
MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
|
|
MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[1]^) and
|
|
not(RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1)) then
|
|
begin
|
|
DebugMsg('Peephole MovMov2Mov 2 performed', p);
|
|
AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
|
|
RemoveInstruction(hp1);
|
|
Result := True;
|
|
end;
|
|
end;
|
|
|
|
function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
|
|
var
|
|
hp1,hp2: tai;
|
|
begin
|
|
result := false;
|
|
case p.typ of
|
|
ait_instruction:
|
|
begin
|
|
{
|
|
change
|
|
<op> reg,x
|
|
cp reg,r1
|
|
into
|
|
<op>s reg,x
|
|
}
|
|
{ this optimization can applied only to the currently enabled operations because
|
|
the other operations do not update all flags and FPC does not track flag usage }
|
|
if MatchInstruction(p, [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_DEC,A_EOR,
|
|
A_INC,A_LSL,A_LSR,
|
|
A_OR,A_ORI,A_ROL,A_ROR,A_SBC,A_SBCI,A_SUB,A_SUBI]) and
|
|
GetNextInstruction(p, hp1) and
|
|
((MatchInstruction(hp1, A_CP) and
|
|
(((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
|
|
(taicpu(hp1).oper[1]^.reg = GetDefaultZeroReg)) or
|
|
((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
|
|
(taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and
|
|
(taicpu(p).opcode in [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_EOR,
|
|
A_LSL,A_LSR,
|
|
A_OR,A_ORI,A_ROL,A_ROR,A_SUB,A_SBI])))) or
|
|
(MatchInstruction(hp1, A_CPI) and
|
|
(taicpu(p).opcode = A_ANDI) and
|
|
(taicpu(p).oper[1]^.typ=top_const) and
|
|
(taicpu(hp1).oper[1]^.typ=top_const) and
|
|
(taicpu(hp1).oper[1]^.val=0))) and
|
|
GetNextInstruction(hp1, hp2) and
|
|
{ be careful here, following instructions could use other flags
|
|
however after a jump fpc never depends on the value of flags }
|
|
{ All above instructions set Z and N according to the following
|
|
Z := result = 0;
|
|
N := result[7];
|
|
EQ = Z=1; NE = Z=0;
|
|
MI = N=1; PL = N=0; }
|
|
MatchInstruction(hp2, A_BRxx) and
|
|
((taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL]) or
|
|
{ sub/sbc set all flags }
|
|
(taicpu(p).opcode in [A_SUB,A_SBI])){ and
|
|
no flag allocation tracking implemented yet on avr
|
|
assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next)))} then
|
|
begin
|
|
{ move flag allocation if possible }
|
|
{ no flag allocation tracking implemented yet on avr
|
|
GetLastInstruction(hp1, hp2);
|
|
hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
|
|
if assigned(hp2) then
|
|
begin
|
|
asml.Remove(hp2);
|
|
asml.insertbefore(hp2, p);
|
|
end;
|
|
}
|
|
|
|
// If we compare to the same value we are masking then invert the comparison
|
|
if (taicpu(hp1).opcode=A_CPI) or
|
|
{ sub/sbc with reverted? }
|
|
((taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and (taicpu(p).opcode in [A_SUB,A_SBI])) then
|
|
taicpu(hp2).condition:=inverse_cond(taicpu(hp2).condition);
|
|
|
|
asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
|
|
asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,hp2), hp2);
|
|
IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
|
|
|
|
DebugMsg('Peephole OpCp2Op performed', p);
|
|
|
|
asml.remove(hp1);
|
|
hp1.free;
|
|
Result:=true;
|
|
end
|
|
else
|
|
case taicpu(p).opcode of
|
|
A_LDI:
|
|
Result:=OptPass1LDI(p);
|
|
A_STS:
|
|
Result:=OptPass1STS(p);
|
|
A_LDS:
|
|
Result:=OptPass1LDS(p);
|
|
A_IN:
|
|
Result:=OptPass1IN(p);
|
|
A_SBRS,
|
|
A_SBRC:
|
|
Result:=OptPass1SBR(p);
|
|
A_ANDI:
|
|
Result:=OptPass1ANDI(p);
|
|
A_ADD:
|
|
Result:=OptPass1ADD(p);
|
|
A_SUB:
|
|
Result:=OptPass1SUB(p);
|
|
A_CLR:
|
|
Result:=OptPass1CLR(p);
|
|
A_PUSH:
|
|
Result:=OptPass1PUSH(p);
|
|
A_CALL:
|
|
Result:=OptPass1CALL(p);
|
|
A_RCALL:
|
|
Result:=OptPass1RCALL(p);
|
|
A_MOV:
|
|
Result:=OptPass1MOV(p);
|
|
A_SBIC,
|
|
A_SBIS:
|
|
Result:=OptPass1SBI(p);
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
|
|
function TCpuAsmOptimizer.PeepHoleOptPass2Cpu(var p: tai): boolean;
|
|
begin
|
|
result := false;
|
|
case p.typ of
|
|
ait_instruction:
|
|
begin
|
|
case taicpu(p).opcode of
|
|
A_MOV:
|
|
Result:=OptPass2MOV(p);
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
|
|
begin
|
|
casmoptimizer:=TCpuAsmOptimizer;
|
|
End.
|
|
|