From d24cbbf9f5c9a76a45288846a31e9f3fc20eb49a Mon Sep 17 00:00:00 2001 From: Jeppe Johansen Date: Sun, 8 Dec 2013 16:50:15 +0000 Subject: [PATCH] Changed debug information to dwarf for ARM_embedded, and set local minimum alignment to 4. Fixed a bug where ARMv7M targets would not use the DIV instructions. Moved many size-optimizing Thumb2 peephole optimizations to PostPeepHoleOptsCpu. Previously those optimizations could make it impossible to reuse the shared arm peephole optimizations. Reenabled a fixed MLA/MLS peephole optimization. Refactored some FindRegDealloc+regLoadedWithNewValue into RegEndOfLife calls. Fixed some broken UXTB/UXTH optimizations. Previously they would also match UXT* instructions with ROR shifter ops. git-svn-id: trunk@26198 - --- compiler/arm/aoptcpu.pas | 438 ++++++++++++++++++++++------------- compiler/arm/narmmat.pas | 6 +- compiler/systems/i_embed.pas | 4 +- 3 files changed, 281 insertions(+), 167 deletions(-) diff --git a/compiler/arm/aoptcpu.pas b/compiler/arm/aoptcpu.pas index a7dc722080..9556676f27 100644 --- a/compiler/arm/aoptcpu.pas +++ b/compiler/arm/aoptcpu.pas @@ -68,6 +68,7 @@ Type { uses the same constructor as TAopObj } function PeepHoleOptPass1Cpu(var p: tai): boolean; override; procedure PeepHoleOptPass2;override; + function PostPeepHoleOptsCpu(var p: tai): boolean; override; End; function MustBeLast(p : tai) : boolean; @@ -202,6 +203,9 @@ Implementation {Loads to all register in the registerset} A_LDM: regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^); + A_POP: + regLoadedWithNewValue := (getsupreg(reg) in p.oper[0]^.regset^) or + (reg=NR_STACK_POINTER_REG); end; if regLoadedWithNewValue then @@ -940,8 +944,7 @@ Implementation (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and GetNextInstructionUsingReg(p,hp1, taicpu(p).oper[0]^.reg) and - (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or - regLoadedWithNewValue(taicpu(p).oper[0]^.reg, hp1)) then + RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then begin if (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and @@ -974,7 +977,87 @@ Implementation result:=true; end; end; + { Change + mov rx, ry, lsr/ror #xxx + uxtb/uxth rz,rx/and rz,rx,0xFF + dealloc rx + to + + uxtb/uxth rz,ry,ror #xxx + } + if (taicpu(p).ops=3) and + (taicpu(p).oper[2]^.typ = top_shifterop) and + (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and + (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ROR]) and + (GenerateThumb2Code) and + GetNextInstructionUsingReg(p,hp1, taicpu(p).oper[0]^.reg) and + RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then + begin + if MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and + (taicpu(hp1).ops = 2) and + (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and + MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then + begin + taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg; + taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^); + taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR; + taicpu(hp1).ops := 3; + + GetNextInstruction(p,hp1); + + asml.Remove(p); + p.Free; + + p:=hp1; + + result:=true; + exit; + end + else if MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and + (taicpu(hp1).ops=2) and + (taicpu(p).oper[2]^.shifterop^.shiftimm in [16]) and + MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then + begin + taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg; + taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^); + taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR; + taicpu(hp1).ops := 3; + + GetNextInstruction(p,hp1); + + asml.Remove(p); + p.Free; + + p:=hp1; + + result:=true; + exit; + end + else if MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and + (taicpu(hp1).ops = 3) and + (taicpu(hp1).oper[2]^.typ = top_const) and + (taicpu(hp1).oper[2]^.val = $FF) and + (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and + MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then + begin + taicpu(hp1).ops := 3; + taicpu(hp1).opcode := A_UXTB; + taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg; + taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^); + taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR; + + GetNextInstruction(p,hp1); + + asml.Remove(p); + p.Free; + + p:=hp1; + + result:=true; + exit; + end; + end; { optimize mov rX, yyyy @@ -1107,8 +1190,7 @@ Implementation MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^.reg)) or ((taicpu(hp1).ops=2) and MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg))))) and - (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or - regLoadedWithNewValue(taicpu(p).oper[0]^.reg, hp1)) and + RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and (taicpu(hp1).ops >= 2) and {Currently we can't fold into another shifterop} (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and @@ -1241,8 +1323,7 @@ Implementation { Only fold if there isn't another shifterop already. } (taicpu(hp1).oper[1]^.ref^.shiftmode = SM_None) and not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and - (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or - regLoadedWithNewValue(taicpu(p).oper[0]^.reg, hp1)) then + RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then begin { If the register we want to do the shift for resides in base, we need to swap that} if (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then @@ -1359,6 +1440,7 @@ Implementation MatchInstruction(p, A_AND, [C_None], [PF_None]) and GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and MatchInstruction(hp1, [A_UXTB,A_UXTH], [C_None], [PF_None]) and + (taicpu(hp1).ops = 2) and RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and { reg1 might not be modified inbetween } @@ -1607,18 +1689,16 @@ Implementation else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and + (taicpu(hp1).ops = 2) and MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and { reg1 might not be modified inbetween } not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then begin DebugMsg('Peephole UxtbUxth2Uxtb done', p); - taicpu(hp1).opcode:=A_UXTB; - taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg); - GetNextInstruction(p,hp2); - asml.remove(p); - p.free; - p:=hp2; + taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg); + asml.remove(hp1); + hp1.free; result:=true; end { @@ -1632,18 +1712,16 @@ Implementation else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and + (taicpu(hp1).ops = 2) and MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and { reg1 might not be modified inbetween } not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then begin DebugMsg('Peephole UxtbUxtb2Uxtb done', p); - taicpu(hp1).opcode:=A_UXTB; - taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg); - GetNextInstruction(p,hp2); - asml.remove(p); - p.free; - p:=hp2; + taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg); + asml.remove(hp1); + hp1.free; result:=true; end { @@ -1656,6 +1734,7 @@ Implementation } else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and + (taicpu(p).ops=2) and MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and (taicpu(hp1).ops=3) and (taicpu(hp1).oper[2]^.typ=top_const) and @@ -1718,6 +1797,7 @@ Implementation else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and + (taicpu(hp1).ops=2) and MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and { reg1 might not be modified inbetween } @@ -2319,6 +2399,7 @@ Implementation var hp : taicpu; hp1,hp2 : tai; + oldreg : TRegister; begin result:=false; if inherited PeepHoleOptPass1Cpu(p) then @@ -2336,7 +2417,7 @@ Implementation p:=hp; result:=true; end - else if (p.typ=ait_instruction) and + {else if (p.typ=ait_instruction) and MatchInstruction(p, A_STR, [C_None], [PF_None]) and (taicpu(p).oper[1]^.ref^.addressmode=AM_PREINDEXED) and (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and @@ -2350,7 +2431,7 @@ Implementation p.Free; p:=hp; result:=true; - end + end} else if (p.typ=ait_instruction) and MatchInstruction(p, A_LDM, [C_None], [PF_FD,PF_IA]) and (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and @@ -2365,7 +2446,7 @@ Implementation p:=hp; result:=true; end - else if (p.typ=ait_instruction) and + {else if (p.typ=ait_instruction) and MatchInstruction(p, A_LDR, [C_None], [PF_None]) and (taicpu(p).oper[1]^.ref^.addressmode=AM_POSTINDEXED) and (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and @@ -2379,132 +2460,7 @@ Implementation p.Free; p:=hp; result:=true; - end - else if (p.typ=ait_instruction) and - MatchInstruction(p, A_MOV, [C_None], [PF_None]) and - (taicpu(p).oper[1]^.typ=top_const) and - (taicpu(p).oper[1]^.val >= 0) and - (taicpu(p).oper[1]^.val < 256) and - (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then - begin - DebugMsg('Peephole Mov2Movs done', p); - asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p); - asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p); - IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs); - taicpu(p).oppostfix:=PF_S; - result:=true; - end - else if (p.typ=ait_instruction) and - MatchInstruction(p, A_MVN, [C_None], [PF_None]) and - (taicpu(p).oper[1]^.typ=top_reg) and - (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then - begin - DebugMsg('Peephole Mvn2Mvns done', p); - asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p); - asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p); - IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs); - taicpu(p).oppostfix:=PF_S; - result:=true; - end - else if (p.typ=ait_instruction) and - MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and - (taicpu(p).ops = 3) and - MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and - (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and - (taicpu(p).oper[2]^.typ=top_const) and - (taicpu(p).oper[2]^.val >= 0) and - (taicpu(p).oper[2]^.val < 256) and - (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then - begin - DebugMsg('Peephole AddSub2*s done', p); - asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p); - asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p); - IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs); - taicpu(p).loadconst(1,taicpu(p).oper[2]^.val); - taicpu(p).oppostfix:=PF_S; - taicpu(p).ops := 2; - result:=true; - end - else if (p.typ=ait_instruction) and - MatchInstruction(p, [A_ADD], [C_None], [PF_None]) and - (taicpu(p).ops = 3) and - MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and - (taicpu(p).oper[2]^.typ=top_reg) then - begin - DebugMsg('Peephole AddRRR2AddRR done', p); - taicpu(p).ops := 2; - taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg); - result:=true; - end - else if (p.typ=ait_instruction) and - MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_None]) and - (taicpu(p).ops = 3) and - MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and - (taicpu(p).oper[2]^.typ=top_reg) and - (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then - begin - asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p); - asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p); - IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs); - taicpu(p).ops := 2; - taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg); - taicpu(p).oppostfix:=PF_S; - result:=true; - end - else if (p.typ=ait_instruction) and - MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_S]) and - (taicpu(p).ops = 3) and - MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and - (taicpu(p).oper[2]^.typ in [top_reg,top_const]) then - begin - taicpu(p).ops := 2; - if taicpu(p).oper[2]^.typ=top_reg then - taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg) - else - taicpu(p).loadconst(1,taicpu(p).oper[2]^.val); - result:=true; - end - else if (p.typ=ait_instruction) and - MatchInstruction(p, [A_AND,A_ORR,A_EOR], [C_None], [PF_None,PF_S]) and - (taicpu(p).ops = 3) and - MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and - (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then - begin - asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p); - asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p); - IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs); - taicpu(p).oppostfix:=PF_S; - taicpu(p).ops := 2; - result:=true; - end - else if (p.typ=ait_instruction) and - MatchInstruction(p, [A_MOV], [C_None], [PF_None]) and - (taicpu(p).ops=3) and - (taicpu(p).oper[2]^.typ=top_shifterop) and - (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSL,SM_LSR,SM_ASR,SM_ROR]) and - //MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and - (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then - begin - DebugMsg('Peephole Mov2Shift done', p); - asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p); - asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p); - IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs); - taicpu(p).oppostfix:=PF_S; - //taicpu(p).ops := 2; - - case taicpu(p).oper[2]^.shifterop^.shiftmode of - SM_LSL: taicpu(p).opcode:=A_LSL; - SM_LSR: taicpu(p).opcode:=A_LSR; - SM_ASR: taicpu(p).opcode:=A_ASR; - SM_ROR: taicpu(p).opcode:=A_ROR; - end; - - if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then - taicpu(p).loadreg(2, taicpu(p).oper[2]^.shifterop^.rs) - else - taicpu(p).loadconst(2, taicpu(p).oper[2]^.shifterop^.shiftimm); - result:=true; - end + end} else if (p.typ=ait_instruction) and MatchInstruction(p, [A_AND], [], [PF_None]) and (taicpu(p).ops = 2) and @@ -2547,11 +2503,8 @@ Implementation into - mls/mla x,y,z,w + mls/mla x,z,w,y } - { - According to Jeppe Johansen this currently uses operands in the wrong order. - else if (p.typ=ait_instruction) and MatchInstruction(p, [A_MUL], [C_None], [PF_None]) and (taicpu(p).ops=3) and @@ -2562,26 +2515,33 @@ Implementation MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and (((taicpu(hp1).ops=3) and (taicpu(hp1).oper[2]^.typ=top_reg) and - (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or - (MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and - (taicpu(hp1).opcode=A_ADD)))) or + ((MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and + (not RegModifiedBetween(taicpu(hp1).oper[1]^.reg, p, hp1))) or + ((MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and + (taicpu(hp1).opcode=A_ADD) and + (not RegModifiedBetween(taicpu(hp1).oper[2]^.reg, p, hp1)))))) or ((taicpu(hp1).ops=2) and (taicpu(hp1).oper[1]^.typ=top_reg) and MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and - assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and - not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and - not(RegModifiedBetween(taicpu(p).oper[2]^.reg,p,hp1)) then + (RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1))) then begin if taicpu(hp1).opcode=A_ADD then begin taicpu(hp1).opcode:=A_MLA; if taicpu(hp1).ops=3 then - if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then - taicpu(hp1).loadreg(1,taicpu(hp1).oper[2]^.reg); + begin + if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then + oldreg:=taicpu(hp1).oper[2]^.reg + else + oldreg:=taicpu(hp1).oper[1]^.reg; + end + else + oldreg:=taicpu(hp1).oper[0]^.reg; - taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg); - taicpu(hp1).loadreg(3,taicpu(p).oper[2]^.reg); + taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg); + taicpu(hp1).loadreg(2,taicpu(p).oper[2]^.reg); + taicpu(hp1).loadreg(3,oldreg); DebugMsg('MulAdd2MLA done', p); @@ -2612,7 +2572,6 @@ Implementation result:=true; end - } {else if (p.typ=ait_instruction) and MatchInstruction(p, [A_CMP], [C_None], [PF_None]) and (taicpu(p).oper[1]^.typ=top_const) and @@ -2739,6 +2698,161 @@ Implementation end; end; + + function TCpuThumb2AsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean; + begin + result:=false; + + if p.typ = ait_instruction then + begin + if MatchInstruction(p, A_MOV, [C_None], [PF_None]) and + (taicpu(p).oper[1]^.typ=top_const) and + (taicpu(p).oper[1]^.val >= 0) and + (taicpu(p).oper[1]^.val < 256) and + (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then + begin + DebugMsg('Peephole Mov2Movs done', p); + asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p); + asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p); + IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs); + taicpu(p).oppostfix:=PF_S; + result:=true; + end + else if MatchInstruction(p, A_MVN, [C_None], [PF_None]) and + (taicpu(p).oper[1]^.typ=top_reg) and + (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then + begin + DebugMsg('Peephole Mvn2Mvns done', p); + asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p); + asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p); + IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs); + taicpu(p).oppostfix:=PF_S; + result:=true; + end + else if MatchInstruction(p, A_RSB, [C_None], [PF_None]) and + (taicpu(p).ops = 3) and + (taicpu(p).oper[2]^.typ=top_const) and + (taicpu(p).oper[2]^.val=0) and + (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then + begin + DebugMsg('Peephole Rsb2Rsbs done', p); + asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p); + asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p); + IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs); + taicpu(p).oppostfix:=PF_S; + result:=true; + end + else if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and + (taicpu(p).ops = 3) and + MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and + (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and + (taicpu(p).oper[2]^.typ=top_const) and + (taicpu(p).oper[2]^.val >= 0) and + (taicpu(p).oper[2]^.val < 256) and + (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then + begin + DebugMsg('Peephole AddSub2*s done', p); + asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p); + asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p); + IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs); + taicpu(p).loadconst(1,taicpu(p).oper[2]^.val); + taicpu(p).oppostfix:=PF_S; + taicpu(p).ops := 2; + result:=true; + end + else if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and + (taicpu(p).ops = 2) and + (taicpu(p).oper[1]^.typ=top_reg) and + (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then + begin + DebugMsg('Peephole AddSub2*s done', p); + asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p); + asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p); + IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs); + taicpu(p).oppostfix:=PF_S; + result:=true; + end + else if MatchInstruction(p, [A_ADD], [C_None], [PF_None]) and + (taicpu(p).ops = 3) and + MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and + (taicpu(p).oper[2]^.typ=top_reg) then + begin + DebugMsg('Peephole AddRRR2AddRR done', p); + taicpu(p).ops := 2; + taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg); + result:=true; + end + else if MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_None]) and + (taicpu(p).ops = 3) and + MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and + (taicpu(p).oper[2]^.typ=top_reg) and + (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then + begin + DebugMsg('Peephole opXXY2opsXY done', p); + asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p); + asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p); + IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs); + taicpu(p).ops := 2; + taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg); + taicpu(p).oppostfix:=PF_S; + result:=true; + end + else if MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_S]) and + (taicpu(p).ops = 3) and + MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and + (taicpu(p).oper[2]^.typ in [top_reg,top_const]) then + begin + DebugMsg('Peephole opXXY2opXY done', p); + taicpu(p).ops := 2; + if taicpu(p).oper[2]^.typ=top_reg then + taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg) + else + taicpu(p).loadconst(1,taicpu(p).oper[2]^.val); + result:=true; + end + else if MatchInstruction(p, [A_ADD,A_AND,A_ORR,A_EOR], [C_None], [PF_None,PF_S]) and + (taicpu(p).ops = 3) and + MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and + (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then + begin + DebugMsg('Peephole opXYX2opsXY done', p); + asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p); + asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p); + IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs); + taicpu(p).oppostfix:=PF_S; + taicpu(p).ops := 2; + result:=true; + end + else if MatchInstruction(p, [A_MOV], [C_None], [PF_None]) and + (taicpu(p).ops=3) and + (taicpu(p).oper[2]^.typ=top_shifterop) and + (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSL,SM_LSR,SM_ASR,SM_ROR]) and + //MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and + (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then + begin + DebugMsg('Peephole Mov2Shift done', p); + asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p); + asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p); + IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs); + taicpu(p).oppostfix:=PF_S; + + case taicpu(p).oper[2]^.shifterop^.shiftmode of + SM_LSL: taicpu(p).opcode:=A_LSL; + SM_LSR: taicpu(p).opcode:=A_LSR; + SM_ASR: taicpu(p).opcode:=A_ASR; + SM_ROR: taicpu(p).opcode:=A_ROR; + end; + + if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then + taicpu(p).loadreg(2, taicpu(p).oper[2]^.shifterop^.rs) + else + taicpu(p).loadconst(2, taicpu(p).oper[2]^.shifterop^.shiftimm); + result:=true; + end + end; + end; + + begin casmoptimizer:=TCpuAsmOptimizer; cpreregallocscheduler:=TCpuPreRegallocScheduler; diff --git a/compiler/arm/narmmat.pas b/compiler/arm/narmmat.pas index 065c1c2aad..cbca6149e6 100644 --- a/compiler/arm/narmmat.pas +++ b/compiler/arm/narmmat.pas @@ -80,11 +80,11 @@ implementation ) and not(is_64bitint(resultdef)) then result:=nil - else if ((GenerateThumbCode) and (CPUARM_HAS_THUMB_IDIV in cpu_capabilities[current_settings.cputype])) and + else if ((GenerateThumbCode or GenerateThumb2Code) and (CPUARM_HAS_THUMB_IDIV in cpu_capabilities[current_settings.cputype])) and (nodetype=divn) and not(is_64bitint(resultdef)) then result:=nil - else if ((GenerateThumbCode) and (CPUARM_HAS_THUMB_IDIV in cpu_capabilities[current_settings.cputype])) and + else if ((GenerateThumbCode or GenerateThumb2Code) and (CPUARM_HAS_THUMB_IDIV in cpu_capabilities[current_settings.cputype])) and (nodetype=modn) and not(is_64bitint(resultdef)) then begin @@ -221,7 +221,7 @@ implementation secondpass(left); secondpass(right); - if ((GenerateThumbCode) and (CPUARM_HAS_THUMB_IDIV in cpu_capabilities[current_settings.cputype])) and + if ((GenerateThumbCode or GenerateThumb2Code) and (CPUARM_HAS_THUMB_IDIV in cpu_capabilities[current_settings.cputype])) and (nodetype=divn) and not(is_64bitint(resultdef)) then begin diff --git a/compiler/systems/i_embed.pas b/compiler/systems/i_embed.pas index b39eaa3e80..a11e6e3161 100644 --- a/compiler/systems/i_embed.pas +++ b/compiler/systems/i_embed.pas @@ -67,7 +67,7 @@ unit i_embed; linkextern : ld_embedded; ar : ar_gnu_ar; res : res_none; - dbg : dbg_stabs; + dbg : dbg_dwarf2; script : script_unix; endian : endian_little; alignment : @@ -79,7 +79,7 @@ unit i_embed; constalignmax : 4; varalignmin : 0; varalignmax : 4; - localalignmin : 0; + localalignmin : 4; localalignmax : 4; recordalignmin : 0; recordalignmax : 4;