Added support for X,Y,and Z register aliases plus low/high forms, and post-incrementation in AVR assembler reader.

Cleaned up parameter and funcretloc information generation in AVR paramanger, and made it closer to GCC's calling convention.
Fixed a number of codegenerator bugs generating invalid or broken instructions: CP operands were swapped, a number of instructions had immediate operands, and stack frame epilogue wasn't complete.
Added a bunch of peephole optimizations that clears the generated code up a lot.

git-svn-id: trunk@26925 -
This commit is contained in:
Jeppe Johansen 2014-03-02 15:37:24 +00:00
parent 68f1a51164
commit e33550b67d
8 changed files with 384 additions and 51 deletions

View File

@ -246,12 +246,16 @@ implementation
begin
result:=operand_read;
case opcode of
A_CLR,
A_MOV, A_MOVW:
if opnr=0 then
result:=operand_write;
A_CP,A_CPC,A_CPI,A_PUSH :
;
else
begin
if opnr=0 then
result:=operand_write;
result:=operand_readwrite;
end;
end;
end;

View File

@ -28,10 +28,12 @@ Unit aoptcpu;
Interface
uses cpubase, aasmtai, aopt, aoptcpub;
uses cpubase, cgbase, aasmtai, aopt, aoptcpub;
Type
TCpuAsmOptimizer = class(TAsmOptimizer)
Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
{ uses the same constructor as TAopObj }
function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
procedure PeepHoleOptPass2;override;
@ -40,7 +42,8 @@ Type
Implementation
uses
aasmbase,aasmcpu,cgbase;
aasmbase,aasmcpu,
globals,globtype;
function CanBeCond(p : tai) : boolean;
begin
@ -48,40 +51,254 @@ Implementation
end;
function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
var Next: tai; reg: TRegister): Boolean;
begin
Next:=Current;
repeat
Result:=GetNextInstruction(Next,Next);
until not(cs_opt_level3 in current_settings.optimizerswitches) or not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
(is_calljmp(taicpu(Next).opcode));
end;
function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
var
next1: tai;
hp1,hp2,hp3: tai;
alloc, dealloc: tai_regalloc;
i: integer;
begin
result := false;
case p.typ of
ait_instruction:
begin
case taicpu(p).opcode of
A_LDI:
begin
{ turn
ldi reg0, imm
cp reg1, reg0
dealloc reg0
into
cpi reg1, imm
}
if (taicpu(p).ops=2) and
(taicpu(p).oper[0]^.typ=top_reg) and
(taicpu(p).oper[1]^.typ=top_const) and
GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
(not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
(taicpu(hp1).opcode=A_CP) and
(taicpu(hp1).ops=2) and
(taicpu(hp1).oper[1]^.typ=top_reg) and
(getsupreg(taicpu(hp1).oper[0]^.reg) in [16..31]) and
(taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
begin
taicpu(hp1).opcode:=A_CPI;
taicpu(hp1).loadconst(1, taicpu(p).oper[1]^.val);
alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
if assigned(alloc) and assigned(dealloc) then
begin
asml.Remove(alloc);
alloc.Free;
asml.Remove(dealloc);
dealloc.Free;
end;
GetNextInstruction(p,hp1);
asml.Remove(p);
p.Free;
p:=hp1;
result:=true;
end;
end;
A_CLR:
begin
{ turn the common
clr rX
mov/ld rX, rY
into
mov/ld rX, rY
}
if (taicpu(p).ops=1) and
(taicpu(p).oper[0]^.typ=top_reg) and
GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
(not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
(hp1.typ=ait_instruction) and
(taicpu(hp1).opcode in [A_MOV,A_LD]) and
(taicpu(hp1).ops>0) and
(taicpu(hp1).oper[0]^.typ=top_reg) and
(taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) then
begin
asml.Remove(p);
p.Free;
p:=hp1;
result:=true;
end
{ turn
clr rX
...
adc rY, rX
into
...
adc rY, r1
}
else if (taicpu(p).ops=1) and
(taicpu(p).oper[0]^.typ=top_reg) and
GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
(not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
(hp1.typ=ait_instruction) and
(taicpu(hp1).opcode in [A_ADC,A_SBC]) and
(taicpu(hp1).ops=2) and
(taicpu(hp1).oper[1]^.typ=top_reg) and
(taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
(taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[0]^.reg) and
assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
begin
taicpu(hp1).oper[1]^.reg:=NR_R1;
alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
if assigned(alloc) and assigned(dealloc) then
begin
asml.Remove(alloc);
alloc.Free;
asml.Remove(dealloc);
dealloc.Free;
end;
GetNextInstruction(p,hp1);
asml.Remove(p);
p.free;
p:=hp1;
result:=true;
end;
end;
A_PUSH:
begin
{ turn
push reg0
push reg1
pop reg3
pop reg2
into
movw reg2,reg0
}
if (taicpu(p).ops=1) and
(taicpu(p).oper[0]^.typ=top_reg) and
GetNextInstruction(p,hp1) and
(hp1.typ=ait_instruction) and
(taicpu(hp1).opcode=A_PUSH) and
(getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
GetNextInstruction(hp1,hp2) and
(hp2.typ=ait_instruction) and
(taicpu(hp2).opcode=A_POP) and
GetNextInstruction(hp2,hp3) and
(hp3.typ=ait_instruction) and
(taicpu(hp3).opcode=A_POP) and
(getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp3).oper[0]^.reg)+1) and
((getsupreg(taicpu(hp3).oper[0]^.reg) mod 2)=0) then
begin
taicpu(p).ops:=2;
taicpu(p).opcode:=A_MOVW;
taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
taicpu(p).loadreg(0, taicpu(hp3).oper[0]^.reg);
asml.Remove(hp1);
hp1.Free;
asml.Remove(hp2);
hp2.Free;
asml.Remove(hp3);
hp3.Free;
result:=true;
end;
end;
A_MOV:
begin
{ turn
mov reg0, reg1
push reg0
dealloc reg0
into
push reg1
}
if (taicpu(p).ops=2) and
(taicpu(p).oper[0]^.typ = top_reg) and
(taicpu(p).oper[1]^.typ = top_reg) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
(not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
(hp1.typ = ait_instruction) and
(taicpu(hp1).opcode in [A_PUSH,A_MOV,A_CP,A_CPC,A_ADD,A_SUB,A_EOR,A_AND,A_OR]) and
RegInInstruction(taicpu(p).oper[0]^.reg, hp1) and
(not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
{(taicpu(hp1).ops=1) and
(taicpu(hp1).oper[0]^.typ = top_reg) and
(taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and }
assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
begin
for i := 0 to taicpu(hp1).ops-1 do
if taicpu(hp1).oper[i]^.typ=top_reg then
if taicpu(hp1).oper[i]^.reg=taicpu(p).oper[0]^.reg then
taicpu(hp1).oper[i]^.reg:=taicpu(p).oper[1]^.reg;
alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
if assigned(alloc) and assigned(dealloc) then
begin
asml.Remove(alloc);
alloc.Free;
asml.Remove(dealloc);
dealloc.Free;
end;
GetNextInstruction(p,hp1);
asml.Remove(p);
p.free;
p:=hp1;
result:=true;
end
{ fold
mov reg2,reg0
mov reg3,reg1
to
movw reg2,reg0
}
if (taicpu(p).ops=2) and
else if (taicpu(p).ops=2) and
(taicpu(p).oper[0]^.typ = top_reg) and
(taicpu(p).oper[1]^.typ = top_reg) and
getnextinstruction(p,next1) and
(next1.typ = ait_instruction) and
(taicpu(next1).opcode = A_MOV) and
(taicpu(next1).ops=2) and
(taicpu(next1).oper[0]^.typ = top_reg) and
(taicpu(next1).oper[1]^.typ = top_reg) and
(getsupreg(taicpu(next1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
getnextinstruction(p,hp1) and
(hp1.typ = ait_instruction) and
(taicpu(hp1).opcode = A_MOV) and
(taicpu(hp1).ops=2) and
(taicpu(hp1).oper[0]^.typ = top_reg) and
(taicpu(hp1).oper[1]^.typ = top_reg) and
(getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
((getsupreg(taicpu(p).oper[1]^.reg) mod 2)=0) and
(getsupreg(taicpu(next1).oper[1]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)+1) then
(getsupreg(taicpu(hp1).oper[1]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)+1) then
begin
alloc:=FindRegAllocBackward(taicpu(hp1).oper[0]^.reg,tai(hp1.Previous));
if assigned(alloc) then
begin
asml.Remove(alloc);
asml.InsertBefore(alloc,p);
end;
taicpu(p).opcode:=A_MOVW;
asml.remove(next1);
next1.free;
asml.remove(hp1);
hp1.free;
result := true;
end;
end;

View File

@ -36,7 +36,10 @@ Unit aoptcpub; { Assembler OPTimizer CPU specific Base }
Interface
Uses
cpubase,aasmcpu,AOptBase;
cpubase,
cgbase,
aasmcpu,aasmtai,
AOptBase;
Type
@ -58,6 +61,7 @@ Type
{ ************************************************************************* }
TAoptBaseCpu = class(TAoptBase)
function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
End;
@ -103,12 +107,26 @@ Implementation
{ ************************************************************************* }
{ **************************** TCondRegs ********************************** }
{ ************************************************************************* }
Constructor TCondRegs.init;
Begin
End;
Constructor TCondRegs.init;
Begin
End;
Destructor TCondRegs.Done; {$ifdef inl} inline; {$endif inl}
Begin
End;
Destructor TCondRegs.Done; {$ifdef inl} inline; {$endif inl}
Begin
End;
function TAoptBaseCpu.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
var
i : Longint;
begin
result:=false;
for i:=0 to taicpu(p1).ops-1 do
if (taicpu(p1).oper[i]^.typ=top_reg) and (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
begin
result:=true;
exit;
end;
end;
End.

View File

@ -559,7 +559,7 @@ unit cgcpu;
current_asmdata.getjumplabel(l2);
countreg:=getintregister(list,OS_8);
a_load_reg_reg(list,size,OS_8,src,countreg);
list.concat(taicpu.op_reg_const(A_CP,countreg,0));
list.concat(taicpu.op_reg_const(A_CPI,countreg,0));
a_jmp_flags(list,F_EQ,l2);
cg.a_label(list,l1);
case op of
@ -677,7 +677,7 @@ unit cgcpu;
end;
OP_SUB:
begin
list.concat(taicpu.op_reg_const(A_SUBI,reg,a));
list.concat(taicpu.op_reg_const(A_SUBI,reg,a and mask));
if size in [OS_S16,OS_16,OS_S32,OS_32,OS_S64,OS_64] then
begin
for i:=2 to tcgsize2size[size] do
@ -689,6 +689,20 @@ unit cgcpu;
end;
end;
end;
{OP_ADD:
begin
list.concat(taicpu.op_reg_const(A_SUBI,reg,(-a) and mask));
if size in [OS_S16,OS_16,OS_S32,OS_32,OS_S64,OS_64] then
begin
for i:=2 to tcgsize2size[size] do
begin
NextReg;
mask:=mask shl 8;
inc(shift,8);
list.concat(taicpu.op_reg_const(A_ADC,reg,(a and mask) shr shift));
end;
end;
end; }
else
begin
if size in [OS_64,OS_S64] then
@ -787,11 +801,11 @@ unit cgcpu;
else if (ref.base<>NR_NO) and (ref.index<>NR_NO) then
begin
maybegetcpuregister(list,tmpreg);
emit_mov(list,tmpreg,ref.index);
emit_mov(list,tmpreg,ref.base);
maybegetcpuregister(list,GetNextReg(tmpreg));
emit_mov(list,GetNextReg(tmpreg),GetNextReg(ref.index));
list.concat(taicpu.op_reg_reg(A_ADD,tmpreg,ref.base));
list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(tmpreg),GetNextReg(ref.base)));
emit_mov(list,GetNextReg(tmpreg),GetNextReg(ref.base));
list.concat(taicpu.op_reg_reg(A_ADD,tmpreg,ref.index));
list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(tmpreg),GetNextReg(ref.index)));
ref.base:=tmpreg;
ref.index:=NR_NO;
end
@ -1329,13 +1343,13 @@ unit cgcpu;
reg1:=reg2;
reg2:=tmpreg;
end;
list.concat(taicpu.op_reg_reg(A_CP,reg1,reg2));
list.concat(taicpu.op_reg_reg(A_CP,reg2,reg1));
for i:=2 to tcgsize2size[size] do
begin
reg1:=GetNextReg(reg1);
reg2:=GetNextReg(reg2);
list.concat(taicpu.op_reg_reg(A_CPC,reg1,reg2));
list.concat(taicpu.op_reg_reg(A_CPC,reg2,reg1));
end;
a_jmp_cond(list,cmp_op,l);
@ -1513,6 +1527,8 @@ unit cgcpu;
LocalSize:=current_procinfo.calc_stackframe_size;
a_adjust_sp(list,LocalSize);
regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
regs:=regs+[RS_R28,RS_R29];
for reg:=RS_R0 to RS_R31 do
if reg in regs then

View File

@ -235,11 +235,11 @@ unit cpubase;
}
NR_PIC_OFFSET_REG = NR_R9;
{ Results are returned in this register (32-bit values) }
NR_FUNCTION_RETURN_REG = NR_R0;
RS_FUNCTION_RETURN_REG = RS_R0;
NR_FUNCTION_RETURN_REG = NR_R24;
RS_FUNCTION_RETURN_REG = RS_R24;
{ Low part of 64bit return value }
NR_FUNCTION_RETURN64_LOW_REG = NR_R0;
RS_FUNCTION_RETURN64_LOW_REG = RS_R0;
NR_FUNCTION_RETURN64_LOW_REG = NR_R22;
RS_FUNCTION_RETURN64_LOW_REG = RS_R22;
{ High part of 64bit return value }
NR_FUNCTION_RETURN64_HIGH_REG = NR_R1;
RS_FUNCTION_RETURN64_HIGH_REG = RS_R1;
@ -323,6 +323,8 @@ unit cpubase;
{ returns the register with the offset of ofs of a continuous set of register starting with r and being continued with rhi }
function GetOffsetReg64(const r,rhi: TRegister;ofs : shortint): TRegister;
function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
implementation
uses
@ -476,4 +478,10 @@ unit cpubase;
end;
function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
begin
is_calljmp:= o in jmp_instructions;
end;
end.

View File

@ -162,7 +162,10 @@ unit cpupara;
result:=not(def.size in [1,2,4]);
}
else
result:=inherited ret_in_param(def,pd);
if (def.size > 4) then
result:=true
else
result:=inherited ret_in_param(def,pd);
end;
end;
@ -441,7 +444,57 @@ unit cpupara;
{ Return in register }
else
begin
if retcgsize in [OS_64,OS_S64] then
case retcgsize of
OS_32,OS_S32:
begin
paraloc^.loc:=LOC_REGISTER;
paraloc^.register:=NR_R22;
paraloc^.size:=OS_8;
paraloc^.def:=u8inttype;
paraloc:=result.add_location;
paraloc^.loc:=LOC_REGISTER;
paraloc^.register:=NR_R23;
paraloc^.size:=OS_8;
paraloc^.def:=u8inttype;
paraloc:=result.add_location;
paraloc^.loc:=LOC_REGISTER;
paraloc^.register:=NR_R24;
paraloc^.size:=OS_8;
paraloc^.def:=u8inttype;
paraloc:=result.add_location;
paraloc^.loc:=LOC_REGISTER;
paraloc^.register:=NR_R25;
paraloc^.size:=OS_8;
paraloc^.def:=u8inttype;
end;
OS_16,OS_S16:
begin
paraloc^.loc:=LOC_REGISTER;
paraloc^.register:=NR_R24;
paraloc^.size:=OS_8;
paraloc^.def:=u8inttype;
paraloc:=result.add_location;
paraloc^.loc:=LOC_REGISTER;
paraloc^.register:=NR_R25;
paraloc^.size:=OS_8;
paraloc^.def:=u8inttype;
end;
OS_8,OS_S8:
begin
paraloc^.loc:=LOC_REGISTER;
paraloc^.register:=NR_R24;
paraloc^.size:=OS_8;
paraloc^.def:=u8inttype;
end;
else
internalerror(2014030101);
end;
{if retcgsize in [OS_64,OS_S64] then
begin
paraloc^.loc:=LOC_REGISTER;
paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
@ -457,9 +510,9 @@ unit cpupara;
begin
paraloc^.loc:=LOC_REGISTER;
paraloc^.register:=NR_FUNCTION_RETURN_REG;
paraloc^.size:=OS_32;
paraloc^.def:=u32inttype;
end;
paraloc^.size:=OS_INT;
paraloc^.def:=u16inttype;
end;}
end;
end;

View File

@ -69,14 +69,20 @@ Unit raavrgas;
name : string[2];
reg : tregister;
end;
{
const
extraregs : array[0..19] of treg2str = (
(name: 'X'; reg : NR_Z),
(name: 'Y'; reg : NR_R1),
(name: 'Z'; reg : NR_R2),
extraregs : array[0..8] of treg2str = (
(name: 'X'; reg : NR_R26),
(name: 'XL'; reg : NR_R26),
(name: 'XH'; reg : NR_R27),
(name: 'Y'; reg : NR_R28),
(name: 'YL'; reg : NR_R28),
(name: 'YH'; reg : NR_R29),
(name: 'Z'; reg : NR_R30),
(name: 'ZL'; reg : NR_R30),
(name: 'ZH'; reg : NR_R31)
);
}
var
i : longint;
@ -85,9 +91,9 @@ Unit raavrgas;
{ reg found?
possible aliases are always 2 char
}
if result or (length(s)<>2) then
if result or (not (length(s) in [1,2])) then
exit;
{
for i:=low(extraregs) to high(extraregs) do
begin
if s=extraregs[i].name then
@ -98,7 +104,6 @@ Unit raavrgas;
exit;
end;
end;
}
end;
@ -480,7 +485,16 @@ Unit raavrgas;
{ save the type of register used. }
tempreg:=actasmregister;
Consume(AS_REGISTER);
if (actasmtoken in [AS_END,AS_SEPARATOR,AS_COMMA]) then
if (actasmtoken=AS_PLUS) then
begin
oper.opr.typ:=OPR_REFERENCE;
reference_reset_base(oper.opr.ref,tempreg,0,1);
oper.opr.ref.addressmode:=AM_POSTINCREMENT;
consume(AS_PLUS);
end
else if (actasmtoken in [AS_END,AS_SEPARATOR,AS_COMMA]) then
Begin
if not (oper.opr.typ in [OPR_NONE,OPR_REGISTER]) then
Message(asmr_e_invalid_operand_type);

View File

@ -635,8 +635,7 @@ begin
Add(' /* Internal text space or external memory. */');
Add(' .text :');
Add(' {');
Add(' *(.vectors)');
Add(' KEEP(*(.vectors))');
Add(' KEEP(*(.init, .init.*))');
Add(' /* For data that needs to reside in the lower 64k of progmem. */');
Add(' *(.progmem.gcc*)');
Add(' *(.progmem*)');
@ -862,6 +861,10 @@ begin
success:=DoExec(FindUtil(utilsprefix+'objcopy'),'-O ihex '+
ChangeFileExt(current_module.exefilename,'.elf')+' '+
ChangeFileExt(current_module.exefilename,'.hex'),true,false);
if success then
success:=DoExec(FindUtil(utilsprefix+'objcopy'),'-O binary '+
ChangeFileExt(current_module.exefilename,'.elf')+' '+
ChangeFileExt(current_module.exefilename,'.bin'),true,false);
end;
MakeExecutable:=success; { otherwise a recursive call to link method }