Add CBNZ/CBZ instructions

Create preliminary Thumb-2 PeepHoleOptPass2 code, hacked together from the ARM mode code
Added a number of simple size optimizations for common Thumb-2 instructions

git-svn-id: branches/laksen/arm-embedded@22590 -
This commit is contained in:
Jeppe Johansen 2012-10-08 12:30:00 +00:00
parent b788ba660d
commit 9ec9b44784
8 changed files with 462 additions and 22 deletions

View File

@ -175,6 +175,7 @@ uses
constructor op_reg_ref(op : tasmop;_op1 : tregister;const _op2 : treference);
constructor op_reg_const(op:tasmop; _op1: tregister; _op2: aint);
constructor op_regset(op:tasmop; regtype: tregistertype; subreg: tsubregister; _op1: tcpuregisterset);
constructor op_ref_regset(op:tasmop; _op1: treference; regtype: tregistertype; subreg: tsubregister; _op2: tcpuregisterset);
constructor op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
@ -415,6 +416,13 @@ implementation
loadconst(1,aint(_op2));
end;
constructor taicpu.op_regset(op: tasmop; regtype: tregistertype; subreg: tsubregister; _op1: tcpuregisterset);
begin
inherited create(op);
ops:=1;
loadregset(0,regtype,subreg,_op1);
end;
constructor taicpu.op_ref_regset(op:tasmop; _op1: treference; regtype: tregistertype; subreg: tsubregister; _op2: tcpuregisterset);
begin

View File

@ -30,7 +30,7 @@ Unit aoptcpu;
Interface
uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj;
uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj, cclasses;
Type
TCpuAsmOptimizer = class(TAsmOptimizer)
@ -62,6 +62,7 @@ Type
TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
{ uses the same constructor as TAopObj }
function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
procedure PeepHoleOptPass2;override;
End;
@ -80,6 +81,8 @@ Implementation
(p.typ=ait_instruction) and
(taicpu(p).condition=C_None) and
((taicpu(p).opcode<A_IT) or (taicpu(p).opcode>A_ITTTT)) and
(taicpu(p).opcode<>A_CBZ) and
(taicpu(p).opcode<>A_CBNZ) and
(taicpu(p).opcode<>A_PLD) and
((taicpu(p).opcode<>A_BLX) or
(taicpu(p).oper[0]^.typ=top_reg));
@ -327,6 +330,9 @@ Implementation
(taicpu(movp).oper[0]^.reg<>NR_R14) and
{ the destination register of the mov might not be used beween p and movp }
not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
{ cb[n]z are thumb instructions which require specific registers, with no wide forms }
(taicpu(p).opcode<>A_CBZ) and
(taicpu(p).opcode<>A_CBNZ) and
{There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
not (
(taicpu(p).opcode in [A_MLA, A_MUL]) and
@ -1152,6 +1158,85 @@ Implementation
if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
end;
A_MVN:
begin
{
change
mvn reg2,reg1
and reg3,reg4,reg2
dealloc reg2
to
bic reg3,reg4,reg1
}
if (taicpu(p).oper[1]^.typ = top_reg) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1,A_AND,[],[]) and
(((taicpu(hp1).ops=3) and
(taicpu(hp1).oper[2]^.typ=top_reg) and
(MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
((taicpu(hp1).ops=2) and
(taicpu(hp1).oper[1]^.typ=top_reg) and
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole MvnAnd2Bic done', p);
taicpu(hp1).opcode:=A_BIC;
if taicpu(hp1).ops=3 then
begin
if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
taicpu(hp1).loadReg(1,taicpu(hp1).oper[2]^.reg); // Swap operands
taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
end
else
taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
asml.remove(p);
p.free;
p:=hp1;
end;
end;
A_UXTB,
A_SXTB:
begin
if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
{ the reference in strb might not use reg2 }
not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole xXTBStrb2Strb done', p);
taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
asml.remove(p);
p.free;
p:=hp1;
end;
end;
A_UXTH,
A_SXTH:
begin
if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
{ the reference in strb might not use reg2 }
not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole xXTBStrh2Strh done', p);
taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
asml.remove(p);
p.free;
p:=hp1;
end;
end;
A_CMP:
begin
{
@ -1566,9 +1651,340 @@ Implementation
end;
procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
procedure DecrementPreceedingIT(list: TAsmList; p: tai);
var
hp : tai;
l : longint;
begin
{ TODO: Add optimizer code }
hp := tai(p.Previous);
l := 1;
while assigned(hp) and
(l <= 4) do
begin
if hp.typ=ait_instruction then
begin
if (taicpu(hp).opcode>=A_IT) and
(taicpu(hp).opcode <= A_ITTTT) then
begin
if (taicpu(hp).opcode = A_IT) and
(l=1) then
list.Remove(hp)
else
case taicpu(hp).opcode of
A_ITE:
if l=2 then taicpu(hp).opcode := A_IT;
A_ITT:
if l=2 then taicpu(hp).opcode := A_IT;
A_ITEE:
if l=3 then taicpu(hp).opcode := A_ITE;
A_ITTE:
if l=3 then taicpu(hp).opcode := A_ITT;
A_ITET:
if l=3 then taicpu(hp).opcode := A_ITE;
A_ITTT:
if l=3 then taicpu(hp).opcode := A_ITT;
A_ITEEE:
if l=4 then taicpu(hp).opcode := A_ITEE;
A_ITTEE:
if l=4 then taicpu(hp).opcode := A_ITTE;
A_ITETE:
if l=4 then taicpu(hp).opcode := A_ITET;
A_ITTTE:
if l=4 then taicpu(hp).opcode := A_ITTT;
A_ITEET:
if l=4 then taicpu(hp).opcode := A_ITEE;
A_ITTET:
if l=4 then taicpu(hp).opcode := A_ITTE;
A_ITETT:
if l=4 then taicpu(hp).opcode := A_ITET;
A_ITTTT:
if l=4 then taicpu(hp).opcode := A_ITTT;
end;
break;
end;
{else if (taicpu(hp).condition<>taicpu(p).condition) or
(taicpu(hp).condition<>inverse_cond(taicpu(p).condition)) then
break;}
inc(l);
end;
hp := tai(hp.Previous);
end;
end;
function TCpuThumb2AsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
var
hp : taicpu;
hp1,hp2 : tai;
begin
if (p.typ=ait_instruction) and
MatchInstruction(p, A_STM, [C_None], [PF_FD,PF_DB]) and
(taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
(taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
((taicpu(p).oper[1]^.regset^*[8..13,15])=[]) then
begin
hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
AsmL.InsertAfter(hp, p);
asml.Remove(p);
p:=hp;
result:=true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, A_STR, [C_None], [PF_None]) and
(taicpu(p).oper[1]^.ref^.addressmode=AM_PREINDEXED) and
(taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
(taicpu(p).oper[1]^.ref^.offset=-4) and
(getsupreg(taicpu(p).oper[0]^.reg) in [0..7,14]) then
begin
hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
asml.InsertAfter(hp, p);
asml.Remove(p);
p.Free;
p:=hp;
result:=true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, A_LDM, [C_None], [PF_FD,PF_IA]) and
(taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
(taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
((taicpu(p).oper[1]^.regset^*[8..14])=[]) then
begin
hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
asml.InsertBefore(hp, p);
asml.Remove(p);
p.Free;
p:=hp;
result:=true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, A_LDR, [C_None], [PF_None]) and
(taicpu(p).oper[1]^.ref^.addressmode=AM_POSTINDEXED) and
(taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
(taicpu(p).oper[1]^.ref^.offset=4) and
(getsupreg(taicpu(p).oper[0]^.reg) in [0..7,15]) then
begin
hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
asml.InsertBefore(hp, p);
asml.Remove(p);
p.Free;
p:=hp;
result:=true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, A_MOV, [C_None], [PF_None]) and
(taicpu(p).oper[1]^.typ=top_const) and
(taicpu(p).oper[1]^.val >= 0) and
(taicpu(p).oper[1]^.val < 256) and
(not Assigned(FindRegDeAlloc(NR_DEFAULTFLAGS, p))) then
begin
taicpu(p).oppostfix:=PF_S;
result:=true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, A_MVN, [], [PF_None]) and
(taicpu(p).oper[1]^.typ=top_reg) and
(not Assigned(FindRegDeAlloc(NR_DEFAULTFLAGS, p))) then
begin
taicpu(p).oppostfix:=PF_S;
result:=true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
(taicpu(p).ops = 3) and
MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
(not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
(taicpu(p).oper[2]^.typ=top_const) and
(taicpu(p).oper[2]^.val >= 0) and
(taicpu(p).oper[2]^.val < 256) and
(not Assigned(FindRegDeAlloc(NR_DEFAULTFLAGS, p))) then
begin
taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
taicpu(p).oppostfix:=PF_S;
taicpu(p).ops := 2;
result:=true;
end
{else if (p.typ=ait_instruction) and
MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_LSL,A_LSR,A_ASR,A_ROR], [], [PF_None,PF_S]) and
(taicpu(p).ops = 3) and
MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
(taicpu(p).oper[2]^.typ=top_reg) and
(not Assigned(FindRegDeAlloc(NR_DEFAULTFLAGS, p))) then
begin
taicpu(p).ops := 2;
taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
taicpu(p).oppostfix:=PF_S;
result:=true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, [A_AND,A_ORR,A_EOR], [], [PF_None,PF_S]) and
(taicpu(p).ops = 3) and
MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and
(not Assigned(FindRegDeAlloc(NR_DEFAULTFLAGS, p))) then
begin
taicpu(p).oppostfix:=PF_S;
taicpu(p).ops := 2;
result:=true;
end}
else if (p.typ=ait_instruction) and
MatchInstruction(p, [A_AND], [], [PF_None]) and
(taicpu(p).ops = 2) and
(taicpu(p).oper[1]^.typ=top_const) and
((taicpu(p).oper[1]^.val=255) or
(taicpu(p).oper[1]^.val=65535)) then
begin
if taicpu(p).oper[1]^.val=255 then
taicpu(p).opcode:=A_UXTB
else
taicpu(p).opcode:=A_UXTH;
taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
result := true;
end
else if (p.typ=ait_instruction) and
MatchInstruction(p, [A_AND], [], [PF_None]) and
(taicpu(p).ops = 3) and
(taicpu(p).oper[2]^.typ=top_const) and
((taicpu(p).oper[2]^.val=255) or
(taicpu(p).oper[2]^.val=65535)) then
begin
if taicpu(p).oper[2]^.val=255 then
taicpu(p).opcode:=A_UXTB
else
taicpu(p).opcode:=A_UXTH;
taicpu(p).ops:=2;
result := true;
end
{else if (p.typ=ait_instruction) and
MatchInstruction(p, [A_CMP], [C_None], [PF_None]) and
(taicpu(p).oper[1]^.typ=top_const) and
(taicpu(p).oper[1]^.val=0) and
GetNextInstruction(p,hp1) and
(taicpu(hp1).opcode=A_B) and
(taicpu(hp1).condition in [C_EQ,C_NE]) then
begin
if taicpu(hp1).condition = C_EQ then
hp2:=taicpu.op_reg_ref(A_CBZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^)
else
hp2:=taicpu.op_reg_ref(A_CBNZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^);
taicpu(hp2).is_jmp := true;
asml.InsertAfter(hp2, hp1);
asml.Remove(hp1);
hp1.Free;
asml.Remove(p);
p.Free;
p := hp2;
result := true;
end}
else
Result := inherited PeepHoleOptPass1Cpu(p);
end;
procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
var
p,hp1,hp2: tai;
l,l2 : longint;
condition : tasmcond;
hp3: tai;
WasLast: boolean;
{ UsedRegs, TmpUsedRegs: TRegSet; }
begin
p := BlockStart;
{ UsedRegs := []; }
while (p <> BlockEnd) Do
begin
{ UpdateUsedRegs(UsedRegs, tai(p.next)); }
case p.Typ Of
Ait_Instruction:
begin
case taicpu(p).opcode Of
A_B:
if taicpu(p).condition<>C_None then
begin
{ check for
Bxx xxx
<several instructions>
xxx:
}
l:=0;
GetNextInstruction(p, hp1);
while assigned(hp1) and
(l<=4) and
CanBeCond(hp1) and
{ stop on labels }
not(hp1.typ=ait_label) do
begin
inc(l);
if MustBeLast(hp1) then
begin
//hp1:=nil;
GetNextInstruction(hp1,hp1);
break;
end
else
GetNextInstruction(hp1,hp1);
end;
if assigned(hp1) then
begin
if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
begin
if (l<=4) and (l>0) then
begin
condition:=inverse_cond(taicpu(p).condition);
hp2:=p;
GetNextInstruction(p,hp1);
p:=hp1;
repeat
if hp1.typ=ait_instruction then
taicpu(hp1).condition:=condition;
if MustBeLast(hp1) then
begin
GetNextInstruction(hp1,hp1);
break;
end
else
GetNextInstruction(hp1,hp1);
until not(assigned(hp1)) or
not(CanBeCond(hp1)) or
(hp1.typ=ait_label);
{ wait with removing else GetNextInstruction could
ignore the label if it was the only usage in the
jump moved away }
asml.InsertAfter(tai_comment.create(strpnew('Collapsed')), hp2);
DecrementPreceedingIT(asml, hp2);
case l of
1: asml.InsertAfter(taicpu.op_cond(A_IT,condition), hp2);
2: asml.InsertAfter(taicpu.op_cond(A_ITT,condition), hp2);
3: asml.InsertAfter(taicpu.op_cond(A_ITTT,condition), hp2);
4: asml.InsertAfter(taicpu.op_cond(A_ITTTT,condition), hp2);
end;
tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
asml.remove(hp2);
hp2.free;
continue;
end;
end;
end;
end;
end;
end;
end;
p := tai(p.next)
end;
end;
begin

View File

@ -206,6 +206,10 @@
'sel',
'setend',
'sev',
'asr',
'lsr',
'lsl',
'ror',
'shadd16',
'shadd8',
'shasx',
@ -270,12 +274,8 @@
'wfe',
'wfi',
'yield',
'asr',
'lsr',
'lsl',
'pop',
'push',
'ror',
'sdiv',
'udiv',
'movt',
@ -295,5 +295,7 @@
'itett',
'itttt',
'tbb',
'tbh'
'tbh',
'cbz',
'cbnz'
);

View File

@ -295,5 +295,7 @@ attsufNONE,
attsufNONE,
attsufNONE,
attsufNONE,
attsufNONE,
attsufNONE,
attsufNONE
);

View File

@ -618,6 +618,14 @@ reg32,reg32,reg32,reg32 \x16\x00\x80\x90 ARM7
[SEVcc]
[ASRcc]
[LSRcc]
[LSLcc]
[RORcc]
[SHADD16cc]
[SHADD8cc]
[SHASXcc]
@ -702,18 +710,10 @@ reg32,reg32,reg32,reg32 \x16\x00\x80\x90 ARM7
; Thumb-2
[ASRcc]
[LSRcc]
[LSLcc]
[POP]
[PUSH]
[RORcc]
[SDIVcc]
[UDIVcc]
@ -752,3 +752,6 @@ reg32,reg32,reg32,reg32 \x16\x00\x80\x90 ARM7
[TBB]
[TBH]
[CBZ]
[CBNZ]

View File

@ -206,6 +206,10 @@ A_SBFX,
A_SEL,
A_SETEND,
A_SEV,
A_ASR,
A_LSR,
A_LSL,
A_ROR,
A_SHADD16,
A_SHADD8,
A_SHASX,
@ -270,12 +274,8 @@ A_UXTH,
A_WFE,
A_WFI,
A_YIELD,
A_ASR,
A_LSR,
A_LSL,
A_POP,
A_PUSH,
A_ROR,
A_SDIV,
A_UDIV,
A_MOVT,
@ -295,5 +295,7 @@ A_ITTET,
A_ITETT,
A_ITTTT,
A_TBB,
A_TBH
A_TBH,
A_CBZ,
A_CBNZ
);

View File

@ -48,7 +48,7 @@ unit cpubase;
TAsmOp= {$i armop.inc}
{This is a bit of a hack, because there are more than 256 ARM Assembly Ops
But FPC currently can't handle more than 256 elements in a set.}
TCommonAsmOps = Set of A_None .. A_UQSADA8;
TCommonAsmOps = Set of A_None .. A_UQSAX;
{ This should define the array of instructions as string }
op2strtable=array[tasmop] of string[11];

View File

@ -70,10 +70,17 @@ unit rgcpu;
procedure trgintcputhumb2.add_cpu_interferences(p: tai);
var
r : tregister;
hr : longint;
begin
if p.typ=ait_instruction then
begin
case taicpu(p).opcode of
A_CBNZ,
A_CBZ:
begin
for hr := RS_R8 to RS_R15 do
add_edge(getsupreg(taicpu(p).oper[0]^.reg), hr);
end;
A_ADD:
begin
if taicpu(p).ops = 3 then