diff --git a/compiler/aasmtai.pas b/compiler/aasmtai.pas index 87c8d0812d..fc5aa93bdc 100644 --- a/compiler/aasmtai.pas +++ b/compiler/aasmtai.pas @@ -388,6 +388,16 @@ interface mark_AsmBlockStart,mark_AsmBlockEnd, mark_NoLineInfoStart,mark_NoLineInfoEnd,mark_BlockStart, mark_Position +{$ifdef avr} + { spilling on avr destroys the flags as it might use adiw/add/adc, so in case + the flags are allocated during spilling, this marker must be translated into + a push of the flags when assembler post processing is carried out } + ,mark_may_store_flags_with_r26 + { spilling on avr destroys the flags as it might use adiw/add/adc, so in case + the flags are allocated during spilling, this marker must be translated into + a pop of the flags when assembler post processing is carried out } + ,mark_may_restore_flags_with_r26 +{$endif avr} ); TRegAllocType = (ra_alloc,ra_dealloc,ra_sync,ra_resize,ra_markused); diff --git a/compiler/avr/aasmcpu.pas b/compiler/avr/aasmcpu.pas index 89805552cd..2420cef520 100644 --- a/compiler/avr/aasmcpu.pas +++ b/compiler/avr/aasmcpu.pas @@ -399,10 +399,11 @@ implementation function finalizeavrcode(list : TAsmList) : Boolean; var CurrOffset : longint; - curtai, firstinstruction: tai; + curtai, firstinstruction, hp: tai; again : boolean; l : tasmlabel; - inasmblock : Boolean; + inasmblock, flagsallocated: Boolean; + href: treference; procedure remove_instruction; var @@ -467,6 +468,7 @@ implementation curtai:=tai(list.first); inasmblock:=false; firstinstruction:=nil; + flagsallocated:=false; while assigned(curtai) do begin case curtai.typ of @@ -557,12 +559,59 @@ implementation end; end; end; + ait_regalloc: + case tai_regalloc(curtai).ratype of + ra_alloc: + if (tai_regalloc(curtai).reg=NR_DEFAULTFLAGS) then + begin + { there are still douple allocations/deallocations in the cg, so + this ie cannot be enabled + if flagsallocated then + Internalerror(2022050101); + } + flagsallocated:=true; + end; + ra_dealloc: + if (tai_regalloc(curtai).reg=NR_DEFAULTFLAGS) then + begin + { there are still douple allocations/deallocations in the cg, so + this ie cannot be enabled + if not(flagsallocated) then + Internalerror(2022050102); + } + flagsallocated:=false; + end; + end; ait_marker: case tai_marker(curtai).Kind of mark_AsmBlockStart: inasmblock:=true; mark_AsmBlockEnd: inasmblock:=false; + mark_may_store_flags_with_r26: + begin + if flagsallocated then + begin + hp:=taicpu.op_reg_const(A_IN,NR_R26,63); + list.insertafter(hp,curtai); + list.insertafter(taicpu.op_reg(A_PUSH,NR_R26),hp); + list.Remove(curtai); + curtai.Free; + curtai:=hp; + end; + end; + mark_may_restore_flags_with_r26: + begin + if flagsallocated then + begin + hp:=taicpu.op_reg(A_POP,NR_R26); + list.insertafter(hp,curtai); + list.insertafter(taicpu.op_const_reg(A_OUT,63,NR_R26),hp); + list.Remove(curtai); + curtai.Free; + curtai:=hp; + end; + end; end; end; curtai:=tai(curtai.next); diff --git a/compiler/avr/cgcpu.pas b/compiler/avr/cgcpu.pas index dcfd4824c2..58b051eacd 100644 --- a/compiler/avr/cgcpu.pas +++ b/compiler/avr/cgcpu.pas @@ -442,6 +442,7 @@ unit cgcpu; else list.concat(taicpu.op_reg(A_ROR,GetOffsetReg64(dst,dsthi,tcgsize2size[size]-i))); end; + cg.a_reg_alloc(list,NR_DEFAULTFLAGS); list.concat(taicpu.op_reg(A_DEC,countreg)); a_jmp_flags(list,F_NE,l1); executionweight:=oldexecutionweight; @@ -806,22 +807,34 @@ unit cgcpu; case op of OP_ADD: begin + if tcgsize2size[size]>1 then + cg.a_reg_alloc(list, NR_DEFAULTFLAGS); + list.concat(taicpu.op_reg_reg(A_ADD,dst,src)); for i:=2 to tcgsize2size[size] do begin NextSrcDstPreInc; list.concat(taicpu.op_reg_reg(A_ADC,dst,src)); end; + + if tcgsize2size[size]>1 then + cg.a_reg_dealloc(list, NR_DEFAULTFLAGS); end; OP_SUB: begin + if tcgsize2size[size]>1 then + cg.a_reg_alloc(list, NR_DEFAULTFLAGS); + list.concat(taicpu.op_reg_reg(A_SUB,dst,src)); for i:=2 to tcgsize2size[size] do begin NextSrcDstPreInc; list.concat(taicpu.op_reg_reg(A_SBC,dst,src)); end; + + if tcgsize2size[size]>1 then + cg.a_reg_dealloc(list, NR_DEFAULTFLAGS); end; OP_NEG: @@ -847,6 +860,9 @@ unit cgcpu; if i1 then + cg.a_reg_alloc(list, NR_DEFAULTFLAGS); + list.concat(taicpu.op_reg(A_NEG,dst)); tmpreg:=GetNextReg(dst); for i:=2 to tcgsize2size[size] do @@ -856,6 +872,9 @@ unit cgcpu; if i1 then + cg.a_reg_dealloc(list, NR_DEFAULTFLAGS); end else if size in [OS_S8,OS_8] then list.concat(taicpu.op_reg(A_NEG,dst)) @@ -898,6 +917,7 @@ unit cgcpu; current_asmdata.getjumplabel(l2); countreg:=getintregister(list,OS_8); a_load_reg_reg(list,size,OS_8,src,countreg); + cg.a_reg_alloc(list,NR_DEFAULTFLAGS); list.concat(taicpu.op_reg(A_TST,countreg)); a_jmp_flags(list,F_EQ,l2); cg.a_label(list,l1); @@ -951,7 +971,7 @@ unit cgcpu; end; end; end; - + cg.a_reg_alloc(list,NR_DEFAULTFLAGS); list.concat(taicpu.op_reg(A_DEC,countreg)); a_jmp_flags(list,F_NE,l1); { keep registers alive } @@ -1086,6 +1106,7 @@ unit cgcpu; if (op=OP_SAR) and (a>=(tcgsize2size[size]*8-1)) then begin current_asmdata.getjumplabel(l1); + cg.a_reg_alloc(list,NR_DEFAULTFLAGS); list.concat(taicpu.op_reg(A_TST,GetOffsetReg64(reg,reghi,tcgsize2size[size]-1))); a_load_const_reg(list,OS_8,0,GetOffsetReg64(reg,reghi,tcgsize2size[size]-1)); a_jmp_flags(list,F_PL,l1); @@ -1097,6 +1118,7 @@ unit cgcpu; else if (op=OP_SHR) and (a=(tcgsize2size[size]*8-1)) then begin current_asmdata.getjumplabel(l1); + cg.a_reg_alloc(list,NR_DEFAULTFLAGS); list.concat(taicpu.op_reg(A_TST,GetOffsetReg64(reg,reghi,tcgsize2size[size]-1))); a_load_const_reg(list,OS_8,0,GetOffsetReg64(reg,reghi,0)); a_jmp_flags(list,F_PL,l1); @@ -1171,6 +1193,10 @@ unit cgcpu; OP_ADD: begin curvalue:=a and mask; + + if tcgsize2size[size]>1 then + cg.a_reg_alloc(list, NR_DEFAULTFLAGS); + if curvalue=0 then list.concat(taicpu.op_reg_reg(A_ADD,reg,GetDefaultZeroReg)) else if (curvalue=1) and (tcgsize2size[size]=1) then @@ -1201,6 +1227,8 @@ unit cgcpu; end; end; end; + if tcgsize2size[size]>1 then + cg.a_reg_dealloc(list, NR_DEFAULTFLAGS); end; else begin @@ -1308,8 +1336,10 @@ unit cgcpu; emit_mov(list,GetNextReg(tmpreg),GetNextReg(ref.base)); if ref.index<>NR_NO then begin + cg.a_reg_alloc(list, NR_DEFAULTFLAGS); list.concat(taicpu.op_reg_reg(A_ADD,tmpreg,ref.index)); list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(tmpreg),GetNextReg(ref.index))); + cg.a_reg_dealloc(list, NR_DEFAULTFLAGS); end; if ref.offset>0 then list.concat(taicpu.op_reg_const(A_ADIW,tmpreg,ref.offset)) @@ -1340,13 +1370,17 @@ unit cgcpu; if (ref.base<>NR_NO) then begin + cg.a_reg_alloc(list, NR_DEFAULTFLAGS); list.concat(taicpu.op_reg_reg(A_ADD,tmpreg,ref.base)); list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(tmpreg),GetNextReg(ref.base))); + cg.a_reg_dealloc(list, NR_DEFAULTFLAGS); end; if (ref.index<>NR_NO) then begin + cg.a_reg_alloc(list, NR_DEFAULTFLAGS); list.concat(taicpu.op_reg_reg(A_ADD,tmpreg,ref.index)); list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(tmpreg),GetNextReg(ref.index))); + cg.a_reg_dealloc(list, NR_DEFAULTFLAGS); end; ref.symbol:=nil; ref.offset:=0; @@ -1359,8 +1393,10 @@ unit cgcpu; emit_mov(list,tmpreg,ref.base); maybegetcpuregister(list,GetNextReg(tmpreg)); emit_mov(list,GetNextReg(tmpreg),GetNextReg(ref.base)); + cg.a_reg_alloc(list, NR_DEFAULTFLAGS); list.concat(taicpu.op_reg_reg(A_ADD,tmpreg,ref.index)); list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(tmpreg),GetNextReg(ref.index))); + cg.a_reg_dealloc(list, NR_DEFAULTFLAGS); ref.base:=tmpreg; ref.index:=NR_NO; end @@ -1921,6 +1957,8 @@ unit cgcpu; end; end; + cg.a_reg_alloc(list,NR_DEFAULTFLAGS); + { If doing a signed test for x<0, we can simply test the sign bit of the most significant byte } if (cmp_op in [OC_LT,OC_GTE]) and @@ -1949,6 +1987,8 @@ unit cgcpu; end; a_jmp_cond(list,cmp_op,l); + + cg.a_reg_dealloc(list,NR_DEFAULTFLAGS); end else inherited a_cmp_const_reg_label(list,size,cmp_op,a,reg,l); @@ -1992,6 +2032,9 @@ unit cgcpu; reg1:=reg2; reg2:=tmpreg; end; + + cg.a_reg_alloc(list,NR_DEFAULTFLAGS); + list.concat(taicpu.op_reg_reg(A_CP,reg2,reg1)); for i:=2 to tcgsize2size[size] do @@ -2002,6 +2045,7 @@ unit cgcpu; end; a_jmp_cond(list,cmp_op,l); + cg.a_reg_dealloc(list,NR_DEFAULTFLAGS); end; @@ -2574,13 +2618,17 @@ unit cgcpu; if (ref.base<>NR_NO) then begin + cg.a_reg_alloc(list, NR_DEFAULTFLAGS); list.concat(taicpu.op_reg_reg(A_ADD,r,ref.base)); list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(r),GetNextReg(ref.base))); + cg.a_reg_dealloc(list, NR_DEFAULTFLAGS); end; if (ref.index<>NR_NO) then begin + cg.a_reg_alloc(list, NR_DEFAULTFLAGS); list.concat(taicpu.op_reg_reg(A_ADD,r,ref.index)); list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(r),GetNextReg(ref.index))); + cg.a_reg_dealloc(list, NR_DEFAULTFLAGS); end; end else if (ref.base<>NR_NO)then @@ -2589,8 +2637,10 @@ unit cgcpu; emit_mov(list,GetNextReg(r),GetNextReg(ref.base)); if (ref.index<>NR_NO) then begin + cg.a_reg_alloc(list, NR_DEFAULTFLAGS); list.concat(taicpu.op_reg_reg(A_ADD,r,ref.index)); list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(r),GetNextReg(ref.index))); + cg.a_reg_dealloc(list, NR_DEFAULTFLAGS); end; end else if (ref.index<>NR_NO) then @@ -2698,6 +2748,7 @@ unit cgcpu; list.concat(taicpu.op_reg_ref(GetLoad(srcref),GetDefaultTmpReg,srcref)); list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,GetDefaultTmpReg)); cg.ungetcpuregister(list,GetDefaultTmpReg); + cg.a_reg_alloc(list,NR_DEFAULTFLAGS); if tcgsize2size[countregsize] = 1 then list.concat(taicpu.op_reg(A_DEC,countreg)) else diff --git a/compiler/avr/navradd.pas b/compiler/avr/navradd.pas index e704557887..6082056e45 100644 --- a/compiler/avr/navradd.pas +++ b/compiler/avr/navradd.pas @@ -135,6 +135,8 @@ interface var i : byte; begin + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); + current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,tmpreg1,tmpreg2)); for i:=2 to tcgsize2size[left.location.size] do begin @@ -203,6 +205,8 @@ interface hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false); end; + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); + if (not unsigned) and (right.location.loc=LOC_CONSTANT) and (right.location.value=0) and diff --git a/compiler/avr/navrmat.pas b/compiler/avr/navrmat.pas index 751dfc8cef..ce2ba7c78a 100644 --- a/compiler/avr/navrmat.pas +++ b/compiler/avr/navrmat.pas @@ -103,12 +103,14 @@ implementation case left.location.loc of LOC_FLAGS : begin + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); location_copy(location,left.location); inverse_flags(location.resflags); end; LOC_SUBSETREG,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF, LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE : begin + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true); current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,GetDefaultZeroReg,left.location.register)); diff --git a/compiler/avr/rgcpu.pas b/compiler/avr/rgcpu.pas index 33d86e84d3..5b2d1d0191 100644 --- a/compiler/avr/rgcpu.pas +++ b/compiler/avr/rgcpu.pas @@ -95,19 +95,40 @@ unit rgcpu; helpins : tai; tmpref : treference; helplist : TAsmList; + ofs : asizeint; begin if (abs(spilltemp.offset)>63) or (CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype]) then begin helplist:=TAsmList.create; - helplist.concat(taicpu.op_reg_const(A_LDI,NR_R26,lo(word(spilltemp.offset)))); - helplist.concat(taicpu.op_reg_const(A_LDI,NR_R27,hi(word(spilltemp.offset)))); - helplist.concat(taicpu.op_reg_reg(A_ADD,NR_R26,spilltemp.base)); - helplist.concat(taicpu.op_reg_reg(A_ADC,NR_R27,cg.GetNextReg(spilltemp.base))); + helplist.concat(tai_regalloc.alloc(NR_R26,nil)); + helplist.concat(tai_regalloc.alloc(NR_R27,nil)); + helplist.concat(tai_marker.Create(mark_may_store_flags_with_r26)); + if (CPUAVR_HAS_ADIW in cpu_capabilities[current_settings.cputype]) and (ofs>0) and (ofs<=126) then + begin + { this might be converted into movw } + helplist.concat(taicpu.op_reg_reg(A_MOV,NR_R26,spilltemp.base)); + helplist.concat(taicpu.op_reg_reg(A_MOV,NR_R27,cg.GetNextReg(spilltemp.base))); + while ofs>0 do + begin + helplist.concat(taicpu.op_reg_const(A_ADIW,NR_R26,min(63,ofs))); + dec(ofs,min(63,ofs)); + end; + end + else + begin + helplist.concat(taicpu.op_reg_const(A_LDI,NR_R26,lo(word(spilltemp.offset)))); + helplist.concat(taicpu.op_reg_const(A_LDI,NR_R27,hi(word(spilltemp.offset)))); + helplist.concat(taicpu.op_reg_reg(A_ADD,NR_R26,spilltemp.base)); + helplist.concat(taicpu.op_reg_reg(A_ADC,NR_R27,cg.GetNextReg(spilltemp.base))); + end; reference_reset_base(tmpref,NR_R26,0,spilltemp.temppos,1,[]); helpins:=spilling_create_load(tmpref,tempreg); helplist.concat(helpins); + helplist.concat(tai_marker.Create(mark_may_restore_flags_with_r26)); + helplist.concat(tai_regalloc.dealloc(NR_R26,nil)); + helplist.concat(tai_regalloc.dealloc(NR_R27,nil)); list.insertlistafter(pos,helplist); helplist.free; end @@ -129,6 +150,7 @@ unit rgcpu; helplist.concat(tai_regalloc.alloc(NR_R26,nil)); helplist.concat(tai_regalloc.alloc(NR_R27,nil)); + helplist.concat(tai_marker.Create(mark_may_store_flags_with_r26)); if (CPUAVR_HAS_ADIW in cpu_capabilities[current_settings.cputype]) and (ofs>0) and (ofs<=126) then begin { this might be converted into movw } @@ -150,6 +172,7 @@ unit rgcpu; reference_reset_base(tmpref,NR_R26,0,spilltemp.temppos,1,[]); helplist.concat(spilling_create_store(tempreg,tmpref)); + helplist.concat(tai_marker.Create(mark_may_restore_flags_with_r26)); helplist.concat(tai_regalloc.dealloc(NR_R26,nil)); helplist.concat(tai_regalloc.dealloc(NR_R27,nil)); list.insertlistafter(pos,helplist);