Added some peephole optimizations, and fixed generic unconditional jump optimizations, for AVR.

Fixed multiplication code generation for AVR controllers without mul instructions.
Added handling of the old interrupt procedure directive such that procedures with that use RETI instead of RET.

git-svn-id: trunk@31030 -
This commit is contained in:
Jeppe Johansen 2015-06-13 12:25:11 +00:00
parent c5b24c5ce3
commit 03880c2f74
4 changed files with 270 additions and 58 deletions

View File

@ -1178,7 +1178,11 @@ Unit AoptObj;
function IsJumpToLabel(hp: taicpu): boolean; function IsJumpToLabel(hp: taicpu): boolean;
begin begin
{$if defined(avr)}
result:=(hp.opcode in aopt_uncondjmp) and
{$else avr}
result:=(hp.opcode=aopt_uncondjmp) and result:=(hp.opcode=aopt_uncondjmp) and
{$endif avr}
{$if defined(arm) or defined(aarch64)} {$if defined(arm) or defined(aarch64)}
(hp.condition=c_None) and (hp.condition=c_None) and
{$endif arm or aarch64} {$endif arm or aarch64}

View File

@ -45,7 +45,7 @@ Implementation
uses uses
cutils, cutils,
cpuinfo, cpuinfo,
aasmbase,aasmcpu, aasmbase,aasmcpu,aasmdata,
globals,globtype, globals,globtype,
cgutils; cgutils;
@ -132,9 +132,10 @@ Implementation
function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean; function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
var var
hp1,hp2,hp3: tai; hp1,hp2,hp3,hp4,hp5: tai;
alloc, dealloc: tai_regalloc; alloc, dealloc: tai_regalloc;
i: integer; i: integer;
l: TAsmLabel;
begin begin
result := false; result := false;
case p.typ of case p.typ of
@ -265,7 +266,8 @@ Implementation
into into
sbi rX,lg(n) sbi rX,lg(n)
} }
if MatchInstruction(hp1,A_ORI) and if (taicpu(p).oper[1]^.val<=31) and
MatchInstruction(hp1,A_ORI) and
(taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
(PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
GetNextInstruction(hp1,hp2) and GetNextInstruction(hp1,hp2) and
@ -275,7 +277,7 @@ Implementation
begin begin
taicpu(p).opcode:=A_SBI; taicpu(p).opcode:=A_SBI;
taicpu(p).loadconst(0,taicpu(p).oper[1]^.val); taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val)-1); taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
asml.Remove(hp1); asml.Remove(hp1);
hp1.Free; hp1.Free;
asml.Remove(hp2); asml.Remove(hp2);
@ -290,7 +292,8 @@ Implementation
into into
cbi rX,lg(n) cbi rX,lg(n)
} }
else if MatchInstruction(hp1,A_ANDI) and else if (taicpu(p).oper[1]^.val<=31) and
MatchInstruction(hp1,A_ANDI) and
(taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
(PopCnt(byte(not(taicpu(hp1).oper[1]^.val)))=1) and (PopCnt(byte(not(taicpu(hp1).oper[1]^.val)))=1) and
GetNextInstruction(hp1,hp2) and GetNextInstruction(hp1,hp2) and
@ -300,11 +303,51 @@ Implementation
begin begin
taicpu(p).opcode:=A_CBI; taicpu(p).opcode:=A_CBI;
taicpu(p).loadconst(0,taicpu(p).oper[1]^.val); taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
taicpu(p).loadconst(1,BsrByte(not(taicpu(hp1).oper[1]^.val))-1); taicpu(p).loadconst(1,BsrByte(not(taicpu(hp1).oper[1]^.val)));
asml.Remove(hp1); asml.Remove(hp1);
hp1.Free; hp1.Free;
asml.Remove(hp2); asml.Remove(hp2);
hp2.Free; hp2.Free;
result:=true;
end
{
in rX,Y
andi rX,n
breq/brne L1
into
sbis/sbic Y,lg(n)
jmp L1
.Ltemp:
}
else if (taicpu(p).oper[1]^.val<=31) and
MatchInstruction(hp1,A_ANDI) and
(taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
(PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
GetNextInstruction(hp1,hp2) and
MatchInstruction(hp2,A_BRxx) and
(taicpu(hp2).condition in [C_EQ,C_NE]) then
begin
if taicpu(hp2).condition=C_EQ then
taicpu(p).opcode:=A_SBIS
else
taicpu(p).opcode:=A_SBIC;
taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
asml.Remove(hp1);
hp1.Free;
taicpu(hp2).condition:=C_None;
if CPUAVR_HAS_JMP_CALL in cpu_capabilities[current_settings.cputype] then
taicpu(hp2).opcode:=A_JMP
else
taicpu(hp2).opcode:=A_RJMP;
current_asmdata.getjumplabel(l);
l.increfs;
asml.InsertAfter(tai_label.create(l), hp2);
result:=true; result:=true;
end; end;
end; end;
@ -528,6 +571,98 @@ Implementation
break; break;
end; end;
end; end;
A_SBIC,
A_SBIS:
begin
{
Turn
sbic/sbis X, y
jmp .L1
op
.L1:
into
sbis/sbic X,y
op
.L1:
}
if GetNextInstruction(p, hp1) and
(hp1.typ=ait_instruction) and
(taicpu(hp1).opcode in [A_JMP,A_RJMP]) and
(taicpu(hp1).ops>0) and
(taicpu(hp1).oper[0]^.typ = top_ref) and
(taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
GetNextInstruction(hp1, hp2) and
(hp2.typ=ait_instruction) and
(not taicpu(hp2).is_jmp) and
GetNextInstruction(hp2, hp3) and
(hp3.typ=ait_label) and
(taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) then
begin
if taicpu(p).opcode=A_SBIC then
taicpu(p).opcode:=A_SBIS
else
taicpu(p).opcode:=A_SBIC;
tai_label(hp3).labsym.decrefs;
AsmL.remove(hp1);
taicpu(hp1).Free;
result:=true;
end
{
Turn
sbiX X, y
jmp .L1
jmp .L2
.L1:
op
.L2:
into
sbiX X,y
.L1:
op
.L2:
}
else if GetNextInstruction(p, hp1) and
(hp1.typ=ait_instruction) and
(taicpu(hp1).opcode in [A_JMP,A_RJMP]) and
(taicpu(hp1).ops>0) and
(taicpu(hp1).oper[0]^.typ = top_ref) and
(taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
GetNextInstruction(hp1, hp2) and
(hp2.typ=ait_instruction) and
(taicpu(hp2).opcode in [A_JMP,A_RJMP]) and
(taicpu(hp2).ops>0) and
(taicpu(hp2).oper[0]^.typ = top_ref) and
(taicpu(hp2).oper[0]^.ref^.symbol is TAsmLabel) and
GetNextInstruction(hp2, hp3) and
(hp3.typ=ait_label) and
(taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) and
GetNextInstruction(hp3, hp4) and
(hp4.typ=ait_instruction) and
GetNextInstruction(hp4, hp5) and
(hp3.typ=ait_label) and
(taicpu(hp2).oper[0]^.ref^.symbol=tai_label(hp5).labsym) then
begin
tai_label(hp3).labsym.decrefs;
tai_label(hp5).labsym.decrefs;
AsmL.remove(hp1);
taicpu(hp1).Free;
AsmL.remove(hp2);
taicpu(hp2).Free;
result:=true;
end;
end;
end; end;
end; end;
end; end;

View File

@ -99,7 +99,7 @@ Const
StoreDst = 0; StoreDst = 0;
aopt_uncondjmp = A_JMP; aopt_uncondjmp = [A_RJMP,A_JMP];
aopt_condjmp = A_BRxx; aopt_condjmp = A_BRxx;
Implementation Implementation

View File

@ -431,7 +431,8 @@ unit cgcpu;
procedure tcgavr.a_op_reg_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister); procedure tcgavr.a_op_reg_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister);
begin begin
if (op in [OP_MUL,OP_IMUL]) and (size in [OS_16,OS_S16]) then if (op in [OP_MUL,OP_IMUL]) and (size in [OS_16,OS_S16]) and
(CPUAVR_HAS_MUL in cpu_capabilities[current_settings.cputype]) then
begin begin
getcpuregister(list,NR_R0); getcpuregister(list,NR_R0);
getcpuregister(list,NR_R1); getcpuregister(list,NR_R1);
@ -577,55 +578,64 @@ unit cgcpu;
begin begin
if size in [OS_8,OS_S8] then if size in [OS_8,OS_S8] then
begin begin
cg.a_reg_alloc(list,NR_R0); if CPUAVR_HAS_MUL in cpu_capabilities[current_settings.cputype] then
cg.a_reg_alloc(list,NR_R1); begin
list.concat(taicpu.op_reg_reg(topcg2asmop[op],dst,src)); cg.a_reg_alloc(list,NR_R0);
list.concat(taicpu.op_reg(A_CLR,NR_R1)); cg.a_reg_alloc(list,NR_R1);
cg.a_reg_dealloc(list,NR_R1); list.concat(taicpu.op_reg_reg(topcg2asmop[op],dst,src));
list.concat(taicpu.op_reg_reg(A_MOV,dst,NR_R0)); list.concat(taicpu.op_reg(A_CLR,NR_R1));
cg.a_reg_dealloc(list,NR_R0); cg.a_reg_dealloc(list,NR_R1);
list.concat(taicpu.op_reg_reg(A_MOV,dst,NR_R0));
cg.a_reg_dealloc(list,NR_R0);
end
else
internalerror(2015061001);
end end
else if size=OS_16 then else if size=OS_16 then
begin begin
tmpreg:=getintregister(list,OS_16); if CPUAVR_HAS_MUL in cpu_capabilities[current_settings.cputype] then
emit_mov(list,tmpreg,dst); begin
emit_mov(list,GetNextReg(tmpreg),GetNextReg(dst)); tmpreg:=getintregister(list,OS_16);
list.concat(taicpu.op_reg_reg(A_MUL,tmpreg,src)); emit_mov(list,tmpreg,dst);
emit_mov(list,dst,NR_R0); emit_mov(list,GetNextReg(tmpreg),GetNextReg(dst));
emit_mov(list,GetNextReg(dst),NR_R1); list.concat(taicpu.op_reg_reg(A_MUL,tmpreg,src));
list.concat(taicpu.op_reg_reg(A_MUL,GetNextReg(tmpreg),src)); emit_mov(list,dst,NR_R0);
list.concat(taicpu.op_reg_reg(A_ADD,GetNextReg(dst),NR_R0)); emit_mov(list,GetNextReg(dst),NR_R1);
list.concat(taicpu.op_reg_reg(A_MUL,tmpreg,GetNextReg(src))); list.concat(taicpu.op_reg_reg(A_MUL,GetNextReg(tmpreg),src));
list.concat(taicpu.op_reg_reg(A_ADD,GetNextReg(dst),NR_R0)); list.concat(taicpu.op_reg_reg(A_ADD,GetNextReg(dst),NR_R0));
list.concat(taicpu.op_reg(A_CLR,NR_R1)); list.concat(taicpu.op_reg_reg(A_MUL,tmpreg,GetNextReg(src)));
list.concat(taicpu.op_reg_reg(A_ADD,GetNextReg(dst),NR_R0));
{ keep code for muls with overflow checking list.concat(taicpu.op_reg(A_CLR,NR_R1));
pd:=search_system_proc('fpc_mul_word'); end
paraloc1.init; else
paraloc2.init; begin
paraloc3.init; { keep code for muls with overflow checking }
paramanager.getintparaloc(list,pd,1,paraloc1); pd:=search_system_proc('fpc_mul_word');
paramanager.getintparaloc(list,pd,2,paraloc2); paraloc1.init;
paramanager.getintparaloc(list,pd,3,paraloc3); paraloc2.init;
a_load_const_cgpara(list,OS_8,0,paraloc3); paraloc3.init;
a_load_reg_cgpara(list,OS_16,src,paraloc2); paramanager.getintparaloc(list,pd,1,paraloc1);
a_load_reg_cgpara(list,OS_16,dst,paraloc1); paramanager.getintparaloc(list,pd,2,paraloc2);
paramanager.freecgpara(list,paraloc3); paramanager.getintparaloc(list,pd,3,paraloc3);
paramanager.freecgpara(list,paraloc2); a_load_const_cgpara(list,OS_8,0,paraloc3);
paramanager.freecgpara(list,paraloc1); a_load_reg_cgpara(list,OS_16,src,paraloc2);
alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default)); a_load_reg_cgpara(list,OS_16,dst,paraloc1);
a_call_name(list,'FPC_MUL_WORD',false); paramanager.freecgpara(list,paraloc3);
dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default)); paramanager.freecgpara(list,paraloc2);
cg.a_reg_alloc(list,NR_R24); paramanager.freecgpara(list,paraloc1);
cg.a_reg_alloc(list,NR_R25); alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
cg.a_load_reg_reg(list,OS_8,OS_8,NR_R24,dst); a_call_name(list,'FPC_MUL_WORD',false);
cg.a_reg_dealloc(list,NR_R24); dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
cg.a_load_reg_reg(list,OS_8,OS_8,NR_R25,GetNextReg(dst)); cg.a_reg_alloc(list,NR_R24);
cg.a_reg_dealloc(list,NR_R25); cg.a_reg_alloc(list,NR_R25);
paraloc3.done; cg.a_load_reg_reg(list,OS_8,OS_8,NR_R24,dst);
paraloc2.done; cg.a_reg_dealloc(list,NR_R24);
paraloc1.done; cg.a_load_reg_reg(list,OS_8,OS_8,NR_R25,GetNextReg(dst));
} cg.a_reg_dealloc(list,NR_R25);
paraloc3.done;
paraloc2.done;
paraloc1.done;
end;
end end
else else
internalerror(2011022002); internalerror(2011022002);
@ -1691,7 +1701,46 @@ unit cgcpu;
regs : tcpuregisterset; regs : tcpuregisterset;
reg : tsuperregister; reg : tsuperregister;
begin begin
if not(nostackframe) then if po_interrupt in current_procinfo.procdef.procoptions then
begin
{ check if the framepointer is actually used, this is done here because
we have to know the size of the locals (must be 0), avr does not know
an sp based stack }
if not(current_procinfo.procdef.stack_tainting_parameter(calleeside)) and
(localsize=0) then
current_procinfo.framepointer:=NR_NO;
{ save int registers,
but only if the procedure returns }
if not(po_noreturn in current_procinfo.procdef.procoptions) then
regs:=rg[R_INTREGISTER].used_in_proc
else
regs:=[];
{ if the framepointer is potentially used, save it always because we need a proper stack frame,
even if the procedure never returns, the procedure could be e.g. a nested one accessing
an outer stackframe }
if current_procinfo.framepointer<>NR_NO then
regs:=regs+[RS_R28,RS_R29];
regs:=regs+[RS_R0];
for reg:=RS_R31 downto RS_R0 do
if reg in regs then
list.concat(taicpu.op_reg(A_PUSH,newreg(R_INTREGISTER,reg,R_SUBWHOLE)));
{ Save SREG }
list.concat(taicpu.op_reg_const(A_IN, NR_R0, $3F));
list.concat(taicpu.op_reg(A_PUSH, NR_R0));
if current_procinfo.framepointer<>NR_NO then
begin
list.concat(taicpu.op_reg_const(A_IN,NR_R28,NIO_SP_LO));
list.concat(taicpu.op_reg_const(A_IN,NR_R29,NIO_SP_HI));
a_adjust_sp(list,-localsize);
end;
end
else if not(nostackframe) then
begin begin
{ check if the framepointer is actually used, this is done here because { check if the framepointer is actually used, this is done here because
we have to know the size of the locals (must be 0), avr does not know we have to know the size of the locals (must be 0), avr does not know
@ -1738,7 +1787,29 @@ unit cgcpu;
} }
if po_noreturn in current_procinfo.procdef.procoptions then if po_noreturn in current_procinfo.procdef.procoptions then
exit; exit;
if not(nostackframe) then if po_interrupt in current_procinfo.procdef.procoptions then
begin
regs:=rg[R_INTREGISTER].used_in_proc;
if current_procinfo.framepointer<>NR_NO then
begin
regs:=regs+[RS_R28,RS_R29];
LocalSize:=current_procinfo.calc_stackframe_size;
a_adjust_sp(list,LocalSize);
end;
{ Reload SREG }
regs:=regs+[RS_R0];
list.concat(taicpu.op_reg(A_POP, NR_R0));
list.concat(taicpu.op_const_reg(A_OUT, $3F, NR_R0));
for reg:=RS_R0 to RS_R31 do
if reg in regs then
list.concat(taicpu.op_reg(A_POP,newreg(R_INTREGISTER,reg,R_SUBWHOLE)));
list.concat(taicpu.op_none(A_RETI));
end
else if not(nostackframe) then
begin begin
regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall); regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
if current_procinfo.framepointer<>NR_NO then if current_procinfo.framepointer<>NR_NO then
@ -1750,8 +1821,10 @@ unit cgcpu;
for reg:=RS_R0 to RS_R31 do for reg:=RS_R0 to RS_R31 do
if reg in regs then if reg in regs then
list.concat(taicpu.op_reg(A_POP,newreg(R_INTREGISTER,reg,R_SUBWHOLE))); list.concat(taicpu.op_reg(A_POP,newreg(R_INTREGISTER,reg,R_SUBWHOLE)));
end; list.concat(taicpu.op_none(A_RET));
list.concat(taicpu.op_none(A_RET)); end
else
list.concat(taicpu.op_none(A_RET));
end; end;