Added some peephole optimizations, and fixed generic unconditional jump optimizations, for AVR.

Fixed multiplication code generation for AVR controllers without mul instructions.
Added handling of the old interrupt procedure directive such that procedures with that use RETI instead of RET.

git-svn-id: trunk@31030 -
This commit is contained in:
Jeppe Johansen 2015-06-13 12:25:11 +00:00
parent c5b24c5ce3
commit 03880c2f74
4 changed files with 270 additions and 58 deletions

View File

@ -1178,7 +1178,11 @@ Unit AoptObj;
function IsJumpToLabel(hp: taicpu): boolean;
begin
{$if defined(avr)}
result:=(hp.opcode in aopt_uncondjmp) and
{$else avr}
result:=(hp.opcode=aopt_uncondjmp) and
{$endif avr}
{$if defined(arm) or defined(aarch64)}
(hp.condition=c_None) and
{$endif arm or aarch64}

View File

@ -45,7 +45,7 @@ Implementation
uses
cutils,
cpuinfo,
aasmbase,aasmcpu,
aasmbase,aasmcpu,aasmdata,
globals,globtype,
cgutils;
@ -132,9 +132,10 @@ Implementation
function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
var
hp1,hp2,hp3: tai;
hp1,hp2,hp3,hp4,hp5: tai;
alloc, dealloc: tai_regalloc;
i: integer;
l: TAsmLabel;
begin
result := false;
case p.typ of
@ -265,7 +266,8 @@ Implementation
into
sbi rX,lg(n)
}
if MatchInstruction(hp1,A_ORI) and
if (taicpu(p).oper[1]^.val<=31) and
MatchInstruction(hp1,A_ORI) and
(taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
(PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
GetNextInstruction(hp1,hp2) and
@ -275,7 +277,7 @@ Implementation
begin
taicpu(p).opcode:=A_SBI;
taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val)-1);
taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
asml.Remove(hp1);
hp1.Free;
asml.Remove(hp2);
@ -290,7 +292,8 @@ Implementation
into
cbi rX,lg(n)
}
else if MatchInstruction(hp1,A_ANDI) and
else if (taicpu(p).oper[1]^.val<=31) and
MatchInstruction(hp1,A_ANDI) and
(taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
(PopCnt(byte(not(taicpu(hp1).oper[1]^.val)))=1) and
GetNextInstruction(hp1,hp2) and
@ -300,11 +303,51 @@ Implementation
begin
taicpu(p).opcode:=A_CBI;
taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
taicpu(p).loadconst(1,BsrByte(not(taicpu(hp1).oper[1]^.val))-1);
taicpu(p).loadconst(1,BsrByte(not(taicpu(hp1).oper[1]^.val)));
asml.Remove(hp1);
hp1.Free;
asml.Remove(hp2);
hp2.Free;
result:=true;
end
{
in rX,Y
andi rX,n
breq/brne L1
into
sbis/sbic Y,lg(n)
jmp L1
.Ltemp:
}
else if (taicpu(p).oper[1]^.val<=31) and
MatchInstruction(hp1,A_ANDI) and
(taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
(PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
GetNextInstruction(hp1,hp2) and
MatchInstruction(hp2,A_BRxx) and
(taicpu(hp2).condition in [C_EQ,C_NE]) then
begin
if taicpu(hp2).condition=C_EQ then
taicpu(p).opcode:=A_SBIS
else
taicpu(p).opcode:=A_SBIC;
taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
asml.Remove(hp1);
hp1.Free;
taicpu(hp2).condition:=C_None;
if CPUAVR_HAS_JMP_CALL in cpu_capabilities[current_settings.cputype] then
taicpu(hp2).opcode:=A_JMP
else
taicpu(hp2).opcode:=A_RJMP;
current_asmdata.getjumplabel(l);
l.increfs;
asml.InsertAfter(tai_label.create(l), hp2);
result:=true;
end;
end;
@ -528,6 +571,98 @@ Implementation
break;
end;
end;
A_SBIC,
A_SBIS:
begin
{
Turn
sbic/sbis X, y
jmp .L1
op
.L1:
into
sbis/sbic X,y
op
.L1:
}
if GetNextInstruction(p, hp1) and
(hp1.typ=ait_instruction) and
(taicpu(hp1).opcode in [A_JMP,A_RJMP]) and
(taicpu(hp1).ops>0) and
(taicpu(hp1).oper[0]^.typ = top_ref) and
(taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
GetNextInstruction(hp1, hp2) and
(hp2.typ=ait_instruction) and
(not taicpu(hp2).is_jmp) and
GetNextInstruction(hp2, hp3) and
(hp3.typ=ait_label) and
(taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) then
begin
if taicpu(p).opcode=A_SBIC then
taicpu(p).opcode:=A_SBIS
else
taicpu(p).opcode:=A_SBIC;
tai_label(hp3).labsym.decrefs;
AsmL.remove(hp1);
taicpu(hp1).Free;
result:=true;
end
{
Turn
sbiX X, y
jmp .L1
jmp .L2
.L1:
op
.L2:
into
sbiX X,y
.L1:
op
.L2:
}
else if GetNextInstruction(p, hp1) and
(hp1.typ=ait_instruction) and
(taicpu(hp1).opcode in [A_JMP,A_RJMP]) and
(taicpu(hp1).ops>0) and
(taicpu(hp1).oper[0]^.typ = top_ref) and
(taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
GetNextInstruction(hp1, hp2) and
(hp2.typ=ait_instruction) and
(taicpu(hp2).opcode in [A_JMP,A_RJMP]) and
(taicpu(hp2).ops>0) and
(taicpu(hp2).oper[0]^.typ = top_ref) and
(taicpu(hp2).oper[0]^.ref^.symbol is TAsmLabel) and
GetNextInstruction(hp2, hp3) and
(hp3.typ=ait_label) and
(taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) and
GetNextInstruction(hp3, hp4) and
(hp4.typ=ait_instruction) and
GetNextInstruction(hp4, hp5) and
(hp3.typ=ait_label) and
(taicpu(hp2).oper[0]^.ref^.symbol=tai_label(hp5).labsym) then
begin
tai_label(hp3).labsym.decrefs;
tai_label(hp5).labsym.decrefs;
AsmL.remove(hp1);
taicpu(hp1).Free;
AsmL.remove(hp2);
taicpu(hp2).Free;
result:=true;
end;
end;
end;
end;
end;

View File

@ -99,7 +99,7 @@ Const
StoreDst = 0;
aopt_uncondjmp = A_JMP;
aopt_uncondjmp = [A_RJMP,A_JMP];
aopt_condjmp = A_BRxx;
Implementation

View File

@ -431,7 +431,8 @@ unit cgcpu;
procedure tcgavr.a_op_reg_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister);
begin
if (op in [OP_MUL,OP_IMUL]) and (size in [OS_16,OS_S16]) then
if (op in [OP_MUL,OP_IMUL]) and (size in [OS_16,OS_S16]) and
(CPUAVR_HAS_MUL in cpu_capabilities[current_settings.cputype]) then
begin
getcpuregister(list,NR_R0);
getcpuregister(list,NR_R1);
@ -577,55 +578,64 @@ unit cgcpu;
begin
if size in [OS_8,OS_S8] then
begin
cg.a_reg_alloc(list,NR_R0);
cg.a_reg_alloc(list,NR_R1);
list.concat(taicpu.op_reg_reg(topcg2asmop[op],dst,src));
list.concat(taicpu.op_reg(A_CLR,NR_R1));
cg.a_reg_dealloc(list,NR_R1);
list.concat(taicpu.op_reg_reg(A_MOV,dst,NR_R0));
cg.a_reg_dealloc(list,NR_R0);
if CPUAVR_HAS_MUL in cpu_capabilities[current_settings.cputype] then
begin
cg.a_reg_alloc(list,NR_R0);
cg.a_reg_alloc(list,NR_R1);
list.concat(taicpu.op_reg_reg(topcg2asmop[op],dst,src));
list.concat(taicpu.op_reg(A_CLR,NR_R1));
cg.a_reg_dealloc(list,NR_R1);
list.concat(taicpu.op_reg_reg(A_MOV,dst,NR_R0));
cg.a_reg_dealloc(list,NR_R0);
end
else
internalerror(2015061001);
end
else if size=OS_16 then
begin
tmpreg:=getintregister(list,OS_16);
emit_mov(list,tmpreg,dst);
emit_mov(list,GetNextReg(tmpreg),GetNextReg(dst));
list.concat(taicpu.op_reg_reg(A_MUL,tmpreg,src));
emit_mov(list,dst,NR_R0);
emit_mov(list,GetNextReg(dst),NR_R1);
list.concat(taicpu.op_reg_reg(A_MUL,GetNextReg(tmpreg),src));
list.concat(taicpu.op_reg_reg(A_ADD,GetNextReg(dst),NR_R0));
list.concat(taicpu.op_reg_reg(A_MUL,tmpreg,GetNextReg(src)));
list.concat(taicpu.op_reg_reg(A_ADD,GetNextReg(dst),NR_R0));
list.concat(taicpu.op_reg(A_CLR,NR_R1));
{ keep code for muls with overflow checking
pd:=search_system_proc('fpc_mul_word');
paraloc1.init;
paraloc2.init;
paraloc3.init;
paramanager.getintparaloc(list,pd,1,paraloc1);
paramanager.getintparaloc(list,pd,2,paraloc2);
paramanager.getintparaloc(list,pd,3,paraloc3);
a_load_const_cgpara(list,OS_8,0,paraloc3);
a_load_reg_cgpara(list,OS_16,src,paraloc2);
a_load_reg_cgpara(list,OS_16,dst,paraloc1);
paramanager.freecgpara(list,paraloc3);
paramanager.freecgpara(list,paraloc2);
paramanager.freecgpara(list,paraloc1);
alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
a_call_name(list,'FPC_MUL_WORD',false);
dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
cg.a_reg_alloc(list,NR_R24);
cg.a_reg_alloc(list,NR_R25);
cg.a_load_reg_reg(list,OS_8,OS_8,NR_R24,dst);
cg.a_reg_dealloc(list,NR_R24);
cg.a_load_reg_reg(list,OS_8,OS_8,NR_R25,GetNextReg(dst));
cg.a_reg_dealloc(list,NR_R25);
paraloc3.done;
paraloc2.done;
paraloc1.done;
}
if CPUAVR_HAS_MUL in cpu_capabilities[current_settings.cputype] then
begin
tmpreg:=getintregister(list,OS_16);
emit_mov(list,tmpreg,dst);
emit_mov(list,GetNextReg(tmpreg),GetNextReg(dst));
list.concat(taicpu.op_reg_reg(A_MUL,tmpreg,src));
emit_mov(list,dst,NR_R0);
emit_mov(list,GetNextReg(dst),NR_R1);
list.concat(taicpu.op_reg_reg(A_MUL,GetNextReg(tmpreg),src));
list.concat(taicpu.op_reg_reg(A_ADD,GetNextReg(dst),NR_R0));
list.concat(taicpu.op_reg_reg(A_MUL,tmpreg,GetNextReg(src)));
list.concat(taicpu.op_reg_reg(A_ADD,GetNextReg(dst),NR_R0));
list.concat(taicpu.op_reg(A_CLR,NR_R1));
end
else
begin
{ keep code for muls with overflow checking }
pd:=search_system_proc('fpc_mul_word');
paraloc1.init;
paraloc2.init;
paraloc3.init;
paramanager.getintparaloc(list,pd,1,paraloc1);
paramanager.getintparaloc(list,pd,2,paraloc2);
paramanager.getintparaloc(list,pd,3,paraloc3);
a_load_const_cgpara(list,OS_8,0,paraloc3);
a_load_reg_cgpara(list,OS_16,src,paraloc2);
a_load_reg_cgpara(list,OS_16,dst,paraloc1);
paramanager.freecgpara(list,paraloc3);
paramanager.freecgpara(list,paraloc2);
paramanager.freecgpara(list,paraloc1);
alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
a_call_name(list,'FPC_MUL_WORD',false);
dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
cg.a_reg_alloc(list,NR_R24);
cg.a_reg_alloc(list,NR_R25);
cg.a_load_reg_reg(list,OS_8,OS_8,NR_R24,dst);
cg.a_reg_dealloc(list,NR_R24);
cg.a_load_reg_reg(list,OS_8,OS_8,NR_R25,GetNextReg(dst));
cg.a_reg_dealloc(list,NR_R25);
paraloc3.done;
paraloc2.done;
paraloc1.done;
end;
end
else
internalerror(2011022002);
@ -1691,7 +1701,46 @@ unit cgcpu;
regs : tcpuregisterset;
reg : tsuperregister;
begin
if not(nostackframe) then
if po_interrupt in current_procinfo.procdef.procoptions then
begin
{ check if the framepointer is actually used, this is done here because
we have to know the size of the locals (must be 0), avr does not know
an sp based stack }
if not(current_procinfo.procdef.stack_tainting_parameter(calleeside)) and
(localsize=0) then
current_procinfo.framepointer:=NR_NO;
{ save int registers,
but only if the procedure returns }
if not(po_noreturn in current_procinfo.procdef.procoptions) then
regs:=rg[R_INTREGISTER].used_in_proc
else
regs:=[];
{ if the framepointer is potentially used, save it always because we need a proper stack frame,
even if the procedure never returns, the procedure could be e.g. a nested one accessing
an outer stackframe }
if current_procinfo.framepointer<>NR_NO then
regs:=regs+[RS_R28,RS_R29];
regs:=regs+[RS_R0];
for reg:=RS_R31 downto RS_R0 do
if reg in regs then
list.concat(taicpu.op_reg(A_PUSH,newreg(R_INTREGISTER,reg,R_SUBWHOLE)));
{ Save SREG }
list.concat(taicpu.op_reg_const(A_IN, NR_R0, $3F));
list.concat(taicpu.op_reg(A_PUSH, NR_R0));
if current_procinfo.framepointer<>NR_NO then
begin
list.concat(taicpu.op_reg_const(A_IN,NR_R28,NIO_SP_LO));
list.concat(taicpu.op_reg_const(A_IN,NR_R29,NIO_SP_HI));
a_adjust_sp(list,-localsize);
end;
end
else if not(nostackframe) then
begin
{ check if the framepointer is actually used, this is done here because
we have to know the size of the locals (must be 0), avr does not know
@ -1738,7 +1787,29 @@ unit cgcpu;
}
if po_noreturn in current_procinfo.procdef.procoptions then
exit;
if not(nostackframe) then
if po_interrupt in current_procinfo.procdef.procoptions then
begin
regs:=rg[R_INTREGISTER].used_in_proc;
if current_procinfo.framepointer<>NR_NO then
begin
regs:=regs+[RS_R28,RS_R29];
LocalSize:=current_procinfo.calc_stackframe_size;
a_adjust_sp(list,LocalSize);
end;
{ Reload SREG }
regs:=regs+[RS_R0];
list.concat(taicpu.op_reg(A_POP, NR_R0));
list.concat(taicpu.op_const_reg(A_OUT, $3F, NR_R0));
for reg:=RS_R0 to RS_R31 do
if reg in regs then
list.concat(taicpu.op_reg(A_POP,newreg(R_INTREGISTER,reg,R_SUBWHOLE)));
list.concat(taicpu.op_none(A_RETI));
end
else if not(nostackframe) then
begin
regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
if current_procinfo.framepointer<>NR_NO then
@ -1750,8 +1821,10 @@ unit cgcpu;
for reg:=RS_R0 to RS_R31 do
if reg in regs then
list.concat(taicpu.op_reg(A_POP,newreg(R_INTREGISTER,reg,R_SUBWHOLE)));
end;
list.concat(taicpu.op_none(A_RET));
list.concat(taicpu.op_none(A_RET));
end
else
list.concat(taicpu.op_none(A_RET));
end;