+ AVR: track flag usage

+ AVR: take care of allocated flags when spilling
  * AVR: trgcpu.do_spill_read might also use ADIW
This commit is contained in:
florian 2022-05-01 22:42:30 +02:00
parent 6d6774bcc4
commit 0ae45bd2cf
6 changed files with 146 additions and 7 deletions

View File

@ -388,6 +388,16 @@ interface
mark_AsmBlockStart,mark_AsmBlockEnd,
mark_NoLineInfoStart,mark_NoLineInfoEnd,mark_BlockStart,
mark_Position
{$ifdef avr}
{ spilling on avr destroys the flags as it might use adiw/add/adc, so in case
the flags are allocated during spilling, this marker must be translated into
a push of the flags when assembler post processing is carried out }
,mark_may_store_flags_with_r26
{ spilling on avr destroys the flags as it might use adiw/add/adc, so in case
the flags are allocated during spilling, this marker must be translated into
a pop of the flags when assembler post processing is carried out }
,mark_may_restore_flags_with_r26
{$endif avr}
);
TRegAllocType = (ra_alloc,ra_dealloc,ra_sync,ra_resize,ra_markused);

View File

@ -399,10 +399,11 @@ implementation
function finalizeavrcode(list : TAsmList) : Boolean;
var
CurrOffset : longint;
curtai, firstinstruction: tai;
curtai, firstinstruction, hp: tai;
again : boolean;
l : tasmlabel;
inasmblock : Boolean;
inasmblock, flagsallocated: Boolean;
href: treference;
procedure remove_instruction;
var
@ -467,6 +468,7 @@ implementation
curtai:=tai(list.first);
inasmblock:=false;
firstinstruction:=nil;
flagsallocated:=false;
while assigned(curtai) do
begin
case curtai.typ of
@ -557,12 +559,59 @@ implementation
end;
end;
end;
ait_regalloc:
case tai_regalloc(curtai).ratype of
ra_alloc:
if (tai_regalloc(curtai).reg=NR_DEFAULTFLAGS) then
begin
{ there are still douple allocations/deallocations in the cg, so
this ie cannot be enabled
if flagsallocated then
Internalerror(2022050101);
}
flagsallocated:=true;
end;
ra_dealloc:
if (tai_regalloc(curtai).reg=NR_DEFAULTFLAGS) then
begin
{ there are still douple allocations/deallocations in the cg, so
this ie cannot be enabled
if not(flagsallocated) then
Internalerror(2022050102);
}
flagsallocated:=false;
end;
end;
ait_marker:
case tai_marker(curtai).Kind of
mark_AsmBlockStart:
inasmblock:=true;
mark_AsmBlockEnd:
inasmblock:=false;
mark_may_store_flags_with_r26:
begin
if flagsallocated then
begin
hp:=taicpu.op_reg_const(A_IN,NR_R26,63);
list.insertafter(hp,curtai);
list.insertafter(taicpu.op_reg(A_PUSH,NR_R26),hp);
list.Remove(curtai);
curtai.Free;
curtai:=hp;
end;
end;
mark_may_restore_flags_with_r26:
begin
if flagsallocated then
begin
hp:=taicpu.op_reg(A_POP,NR_R26);
list.insertafter(hp,curtai);
list.insertafter(taicpu.op_const_reg(A_OUT,63,NR_R26),hp);
list.Remove(curtai);
curtai.Free;
curtai:=hp;
end;
end;
end;
end;
curtai:=tai(curtai.next);

View File

@ -442,6 +442,7 @@ unit cgcpu;
else
list.concat(taicpu.op_reg(A_ROR,GetOffsetReg64(dst,dsthi,tcgsize2size[size]-i)));
end;
cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
list.concat(taicpu.op_reg(A_DEC,countreg));
a_jmp_flags(list,F_NE,l1);
executionweight:=oldexecutionweight;
@ -806,22 +807,34 @@ unit cgcpu;
case op of
OP_ADD:
begin
if tcgsize2size[size]>1 then
cg.a_reg_alloc(list, NR_DEFAULTFLAGS);
list.concat(taicpu.op_reg_reg(A_ADD,dst,src));
for i:=2 to tcgsize2size[size] do
begin
NextSrcDstPreInc;
list.concat(taicpu.op_reg_reg(A_ADC,dst,src));
end;
if tcgsize2size[size]>1 then
cg.a_reg_dealloc(list, NR_DEFAULTFLAGS);
end;
OP_SUB:
begin
if tcgsize2size[size]>1 then
cg.a_reg_alloc(list, NR_DEFAULTFLAGS);
list.concat(taicpu.op_reg_reg(A_SUB,dst,src));
for i:=2 to tcgsize2size[size] do
begin
NextSrcDstPreInc;
list.concat(taicpu.op_reg_reg(A_SBC,dst,src));
end;
if tcgsize2size[size]>1 then
cg.a_reg_dealloc(list, NR_DEFAULTFLAGS);
end;
OP_NEG:
@ -847,6 +860,9 @@ unit cgcpu;
if i<tcgsize2size[size] then
NextTmp;
end;
if tcgsize2size[size]>1 then
cg.a_reg_alloc(list, NR_DEFAULTFLAGS);
list.concat(taicpu.op_reg(A_NEG,dst));
tmpreg:=GetNextReg(dst);
for i:=2 to tcgsize2size[size] do
@ -856,6 +872,9 @@ unit cgcpu;
if i<tcgsize2size[size] then
NextTmp;
end;
if tcgsize2size[size]>1 then
cg.a_reg_dealloc(list, NR_DEFAULTFLAGS);
end
else if size in [OS_S8,OS_8] then
list.concat(taicpu.op_reg(A_NEG,dst))
@ -898,6 +917,7 @@ unit cgcpu;
current_asmdata.getjumplabel(l2);
countreg:=getintregister(list,OS_8);
a_load_reg_reg(list,size,OS_8,src,countreg);
cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
list.concat(taicpu.op_reg(A_TST,countreg));
a_jmp_flags(list,F_EQ,l2);
cg.a_label(list,l1);
@ -951,7 +971,7 @@ unit cgcpu;
end;
end;
end;
cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
list.concat(taicpu.op_reg(A_DEC,countreg));
a_jmp_flags(list,F_NE,l1);
{ keep registers alive }
@ -1086,6 +1106,7 @@ unit cgcpu;
if (op=OP_SAR) and (a>=(tcgsize2size[size]*8-1)) then
begin
current_asmdata.getjumplabel(l1);
cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
list.concat(taicpu.op_reg(A_TST,GetOffsetReg64(reg,reghi,tcgsize2size[size]-1)));
a_load_const_reg(list,OS_8,0,GetOffsetReg64(reg,reghi,tcgsize2size[size]-1));
a_jmp_flags(list,F_PL,l1);
@ -1097,6 +1118,7 @@ unit cgcpu;
else if (op=OP_SHR) and (a=(tcgsize2size[size]*8-1)) then
begin
current_asmdata.getjumplabel(l1);
cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
list.concat(taicpu.op_reg(A_TST,GetOffsetReg64(reg,reghi,tcgsize2size[size]-1)));
a_load_const_reg(list,OS_8,0,GetOffsetReg64(reg,reghi,0));
a_jmp_flags(list,F_PL,l1);
@ -1171,6 +1193,10 @@ unit cgcpu;
OP_ADD:
begin
curvalue:=a and mask;
if tcgsize2size[size]>1 then
cg.a_reg_alloc(list, NR_DEFAULTFLAGS);
if curvalue=0 then
list.concat(taicpu.op_reg_reg(A_ADD,reg,GetDefaultZeroReg))
else if (curvalue=1) and (tcgsize2size[size]=1) then
@ -1201,6 +1227,8 @@ unit cgcpu;
end;
end;
end;
if tcgsize2size[size]>1 then
cg.a_reg_dealloc(list, NR_DEFAULTFLAGS);
end;
else
begin
@ -1308,8 +1336,10 @@ unit cgcpu;
emit_mov(list,GetNextReg(tmpreg),GetNextReg(ref.base));
if ref.index<>NR_NO then
begin
cg.a_reg_alloc(list, NR_DEFAULTFLAGS);
list.concat(taicpu.op_reg_reg(A_ADD,tmpreg,ref.index));
list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(tmpreg),GetNextReg(ref.index)));
cg.a_reg_dealloc(list, NR_DEFAULTFLAGS);
end;
if ref.offset>0 then
list.concat(taicpu.op_reg_const(A_ADIW,tmpreg,ref.offset))
@ -1340,13 +1370,17 @@ unit cgcpu;
if (ref.base<>NR_NO) then
begin
cg.a_reg_alloc(list, NR_DEFAULTFLAGS);
list.concat(taicpu.op_reg_reg(A_ADD,tmpreg,ref.base));
list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(tmpreg),GetNextReg(ref.base)));
cg.a_reg_dealloc(list, NR_DEFAULTFLAGS);
end;
if (ref.index<>NR_NO) then
begin
cg.a_reg_alloc(list, NR_DEFAULTFLAGS);
list.concat(taicpu.op_reg_reg(A_ADD,tmpreg,ref.index));
list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(tmpreg),GetNextReg(ref.index)));
cg.a_reg_dealloc(list, NR_DEFAULTFLAGS);
end;
ref.symbol:=nil;
ref.offset:=0;
@ -1359,8 +1393,10 @@ unit cgcpu;
emit_mov(list,tmpreg,ref.base);
maybegetcpuregister(list,GetNextReg(tmpreg));
emit_mov(list,GetNextReg(tmpreg),GetNextReg(ref.base));
cg.a_reg_alloc(list, NR_DEFAULTFLAGS);
list.concat(taicpu.op_reg_reg(A_ADD,tmpreg,ref.index));
list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(tmpreg),GetNextReg(ref.index)));
cg.a_reg_dealloc(list, NR_DEFAULTFLAGS);
ref.base:=tmpreg;
ref.index:=NR_NO;
end
@ -1921,6 +1957,8 @@ unit cgcpu;
end;
end;
cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
{ If doing a signed test for x<0, we can simply test the sign bit
of the most significant byte }
if (cmp_op in [OC_LT,OC_GTE]) and
@ -1949,6 +1987,8 @@ unit cgcpu;
end;
a_jmp_cond(list,cmp_op,l);
cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
end
else
inherited a_cmp_const_reg_label(list,size,cmp_op,a,reg,l);
@ -1992,6 +2032,9 @@ unit cgcpu;
reg1:=reg2;
reg2:=tmpreg;
end;
cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
list.concat(taicpu.op_reg_reg(A_CP,reg2,reg1));
for i:=2 to tcgsize2size[size] do
@ -2002,6 +2045,7 @@ unit cgcpu;
end;
a_jmp_cond(list,cmp_op,l);
cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
end;
@ -2574,13 +2618,17 @@ unit cgcpu;
if (ref.base<>NR_NO) then
begin
cg.a_reg_alloc(list, NR_DEFAULTFLAGS);
list.concat(taicpu.op_reg_reg(A_ADD,r,ref.base));
list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(r),GetNextReg(ref.base)));
cg.a_reg_dealloc(list, NR_DEFAULTFLAGS);
end;
if (ref.index<>NR_NO) then
begin
cg.a_reg_alloc(list, NR_DEFAULTFLAGS);
list.concat(taicpu.op_reg_reg(A_ADD,r,ref.index));
list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(r),GetNextReg(ref.index)));
cg.a_reg_dealloc(list, NR_DEFAULTFLAGS);
end;
end
else if (ref.base<>NR_NO)then
@ -2589,8 +2637,10 @@ unit cgcpu;
emit_mov(list,GetNextReg(r),GetNextReg(ref.base));
if (ref.index<>NR_NO) then
begin
cg.a_reg_alloc(list, NR_DEFAULTFLAGS);
list.concat(taicpu.op_reg_reg(A_ADD,r,ref.index));
list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(r),GetNextReg(ref.index)));
cg.a_reg_dealloc(list, NR_DEFAULTFLAGS);
end;
end
else if (ref.index<>NR_NO) then
@ -2698,6 +2748,7 @@ unit cgcpu;
list.concat(taicpu.op_reg_ref(GetLoad(srcref),GetDefaultTmpReg,srcref));
list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,GetDefaultTmpReg));
cg.ungetcpuregister(list,GetDefaultTmpReg);
cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
if tcgsize2size[countregsize] = 1 then
list.concat(taicpu.op_reg(A_DEC,countreg))
else

View File

@ -135,6 +135,8 @@ interface
var
i : byte;
begin
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,tmpreg1,tmpreg2));
for i:=2 to tcgsize2size[left.location.size] do
begin
@ -203,6 +205,8 @@ interface
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
end;
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
if (not unsigned) and
(right.location.loc=LOC_CONSTANT) and
(right.location.value=0) and

View File

@ -103,12 +103,14 @@ implementation
case left.location.loc of
LOC_FLAGS :
begin
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
location_copy(location,left.location);
inverse_flags(location.resflags);
end;
LOC_SUBSETREG,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF,
LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE :
begin
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,GetDefaultZeroReg,left.location.register));

View File

@ -95,19 +95,40 @@ unit rgcpu;
helpins : tai;
tmpref : treference;
helplist : TAsmList;
ofs : asizeint;
begin
if (abs(spilltemp.offset)>63) or (CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype]) then
begin
helplist:=TAsmList.create;
helplist.concat(taicpu.op_reg_const(A_LDI,NR_R26,lo(word(spilltemp.offset))));
helplist.concat(taicpu.op_reg_const(A_LDI,NR_R27,hi(word(spilltemp.offset))));
helplist.concat(taicpu.op_reg_reg(A_ADD,NR_R26,spilltemp.base));
helplist.concat(taicpu.op_reg_reg(A_ADC,NR_R27,cg.GetNextReg(spilltemp.base)));
helplist.concat(tai_regalloc.alloc(NR_R26,nil));
helplist.concat(tai_regalloc.alloc(NR_R27,nil));
helplist.concat(tai_marker.Create(mark_may_store_flags_with_r26));
if (CPUAVR_HAS_ADIW in cpu_capabilities[current_settings.cputype]) and (ofs>0) and (ofs<=126) then
begin
{ this might be converted into movw }
helplist.concat(taicpu.op_reg_reg(A_MOV,NR_R26,spilltemp.base));
helplist.concat(taicpu.op_reg_reg(A_MOV,NR_R27,cg.GetNextReg(spilltemp.base)));
while ofs>0 do
begin
helplist.concat(taicpu.op_reg_const(A_ADIW,NR_R26,min(63,ofs)));
dec(ofs,min(63,ofs));
end;
end
else
begin
helplist.concat(taicpu.op_reg_const(A_LDI,NR_R26,lo(word(spilltemp.offset))));
helplist.concat(taicpu.op_reg_const(A_LDI,NR_R27,hi(word(spilltemp.offset))));
helplist.concat(taicpu.op_reg_reg(A_ADD,NR_R26,spilltemp.base));
helplist.concat(taicpu.op_reg_reg(A_ADC,NR_R27,cg.GetNextReg(spilltemp.base)));
end;
reference_reset_base(tmpref,NR_R26,0,spilltemp.temppos,1,[]);
helpins:=spilling_create_load(tmpref,tempreg);
helplist.concat(helpins);
helplist.concat(tai_marker.Create(mark_may_restore_flags_with_r26));
helplist.concat(tai_regalloc.dealloc(NR_R26,nil));
helplist.concat(tai_regalloc.dealloc(NR_R27,nil));
list.insertlistafter(pos,helplist);
helplist.free;
end
@ -129,6 +150,7 @@ unit rgcpu;
helplist.concat(tai_regalloc.alloc(NR_R26,nil));
helplist.concat(tai_regalloc.alloc(NR_R27,nil));
helplist.concat(tai_marker.Create(mark_may_store_flags_with_r26));
if (CPUAVR_HAS_ADIW in cpu_capabilities[current_settings.cputype]) and (ofs>0) and (ofs<=126) then
begin
{ this might be converted into movw }
@ -150,6 +172,7 @@ unit rgcpu;
reference_reset_base(tmpref,NR_R26,0,spilltemp.temppos,1,[]);
helplist.concat(spilling_create_store(tempreg,tmpref));
helplist.concat(tai_marker.Create(mark_may_restore_flags_with_r26));
helplist.concat(tai_regalloc.dealloc(NR_R26,nil));
helplist.concat(tai_regalloc.dealloc(NR_R27,nil));
list.insertlistafter(pos,helplist);