diff --git a/.gitattributes b/.gitattributes index a8f0540a67..32068c7bed 100644 --- a/.gitattributes +++ b/.gitattributes @@ -18515,9 +18515,11 @@ tests/webtbs/tw3777.pp svneol=native#text/plain tests/webtbs/tw37779.pp svneol=native#text/pascal tests/webtbs/tw3778.pp svneol=native#text/plain tests/webtbs/tw37780.pp svneol=native#text/plain +tests/webtbs/tw37796.pp svneol=native#text/pascal tests/webtbs/tw3780.pp svneol=native#text/plain tests/webtbs/tw37806.pp svneol=native#text/pascal tests/webtbs/tw3782.pp svneol=native#text/plain +tests/webtbs/tw37823.pp svneol=native#text/pascal tests/webtbs/tw3796.pp svneol=native#text/plain tests/webtbs/tw3805.pp svneol=native#text/plain tests/webtbs/tw3814.pp svneol=native#text/plain diff --git a/compiler/aarch64/aoptcpu.pas b/compiler/aarch64/aoptcpu.pas index d9f65b181f..31b5e969cd 100644 --- a/compiler/aarch64/aoptcpu.pas +++ b/compiler/aarch64/aoptcpu.pas @@ -43,12 +43,13 @@ Interface function PostPeepHoleOptsCpu(var p: tai): boolean; override; function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;override; function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;override; - function LookForPostindexedPattern(p : taicpu) : boolean; + function LookForPostindexedPattern(var p : tai) : boolean; private + function RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string): boolean; function OptPass1Shift(var p: tai): boolean; function OptPostCMP(var p: tai): boolean; function OptPass1Data(var p: tai): boolean; - function RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string): boolean; + function OptPass1FData(var p: tai): Boolean; function OptPass1STP(var p: tai): boolean; function OptPass1Mov(var p: tai): boolean; function OptPass1FMov(var p: tai): Boolean; @@ -172,20 +173,20 @@ Implementation ldr/str regX,[reg1], regY/const } - function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean; + function TCpuAsmOptimizer.LookForPostindexedPattern(var p: tai) : boolean; var hp1 : tai; begin Result:=false; - if (p.oper[1]^.typ = top_ref) and - (p.oper[1]^.ref^.addressmode=AM_OFFSET) and - (p.oper[1]^.ref^.index=NR_NO) and - (p.oper[1]^.ref^.offset=0) and - GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and + if (taicpu(p).oper[1]^.typ = top_ref) and + (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and + (taicpu(p).oper[1]^.ref^.index=NR_NO) and + (taicpu(p).oper[1]^.ref^.offset=0) and + GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[1]^.ref^.base) and { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition } MatchInstruction(hp1, [A_ADD, A_SUB], [PF_None]) and - (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and - (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and + (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[1]^.ref^.base) and + (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[1]^.ref^.base) and ( { valid offset? } (taicpu(hp1).oper[2]^.typ=top_const) and @@ -193,16 +194,20 @@ Implementation (abs(taicpu(hp1).oper[2]^.val)<256) ) and { don't apply the optimization if the base register is loaded } - (getsupreg(p.oper[0]^.reg)<>getsupreg(p.oper[1]^.ref^.base)) and + (getsupreg(taicpu(p).oper[0]^.reg)<>getsupreg(taicpu(p).oper[1]^.ref^.base)) and not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) then begin - DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p); - p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED; - if taicpu(hp1).opcode=A_ADD then - p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val + if taicpu(p).opcode = A_LDR then + DebugMsg('Peephole LdrAdd/Sub2Ldr Postindex done', p) else - p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val; + DebugMsg('Peephole StrAdd/Sub2Str Postindex done', p); + + taicpu(p).oper[1]^.ref^.addressmode:=AM_POSTINDEXED; + if taicpu(hp1).opcode=A_ADD then + taicpu(p).oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val + else + taicpu(p).oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val; asml.Remove(hp1); hp1.Free; Result:=true; @@ -398,10 +403,17 @@ Implementation var hp1: tai; begin - result:=false; - if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and - RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then - Result:=true; + Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and + RemoveSuperfluousMove(p, hp1, 'DataMov2Data'); + end; + + + function TCpuAsmOptimizer.OptPass1FData(var p: tai): Boolean; + var + hp1: tai; + begin + Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and + RemoveSuperfluousFMov(p, hp1, 'FOpFMov2FOp'); end; @@ -431,21 +443,20 @@ Implementation (taicpu(p).oper[2]^.ref^.index=NR_NO) and (taicpu(p).oper[2]^.ref^.offset=-16) and (taicpu(p).oper[2]^.ref^.addressmode=AM_PREINDEXED) and - GetNextInstruction(p, hp1) and - GetNextInstruction(hp1, hp2) and - SkipEntryExitMarker(hp2, hp2) and - GetNextInstruction(hp2, hp3) and - SkipEntryExitMarker(hp3, hp3) and - GetNextInstruction(hp3, hp4) and + GetNextInstruction(p, hp1) and MatchInstruction(hp1, A_MOV, [C_None], [PF_NONE]) and MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and (taicpu(hp1).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.reg = NR_STACK_POINTER_REG) and + GetNextInstruction(hp1, hp2) and + SkipEntryExitMarker(hp2, hp2) and MatchInstruction(hp2, A_BL, [C_None], [PF_NONE]) and (taicpu(hp2).oper[0]^.typ = top_ref) and + GetNextInstruction(hp2, hp3) and + SkipEntryExitMarker(hp3, hp3) and MatchInstruction(hp3, A_LDP, [C_None], [PF_NONE]) and MatchOpType(taicpu(hp3),top_reg,top_reg,top_ref) and (taicpu(hp3).oper[0]^.reg = NR_X29) and @@ -455,6 +466,7 @@ Implementation (taicpu(hp3).oper[2]^.ref^.offset=16) and (taicpu(hp3).oper[2]^.ref^.addressmode=AM_POSTINDEXED) and + GetNextInstruction(hp3, hp4) and MatchInstruction(hp4, A_RET, [C_None], [PF_None]) and (taicpu(hp4).ops = 0) then begin @@ -728,14 +740,9 @@ Implementation if p.typ=ait_instruction then begin case taicpu(p).opcode of - A_LDR: - begin - Result:=LookForPostindexedPattern(taicpu(p)); - end; + A_LDR, A_STR: - begin - Result:=LookForPostindexedPattern(taicpu(p)); - end; + Result:=LookForPostindexedPattern(p); A_MOV: Result:=OptPass1Mov(p); A_STP: @@ -773,11 +780,7 @@ Implementation A_FNEG, A_FCVT, A_FABS: - begin - if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and - RemoveSuperfluousFMov(p, hp1, 'FOpFMov2FOp') then - Result:=true; - end; + Result:=OptPass1FData(p); A_FMOV: Result:=OptPass1FMov(p); else diff --git a/compiler/arm/aoptcpu.pas b/compiler/arm/aoptcpu.pas index 48116f6709..9cf8c50eb6 100644 --- a/compiler/arm/aoptcpu.pas +++ b/compiler/arm/aoptcpu.pas @@ -62,6 +62,21 @@ Type protected function LookForPreindexedPattern(p: taicpu): boolean; function LookForPostindexedPattern(p: taicpu): boolean; + + + { Individual optimisation routines } + function OptPass1DataCheckMov(var p: tai): Boolean; + function OptPass1ADDSUB(var p: tai): Boolean; + function OptPass1And(var p: tai): Boolean; override; { There's optimisation code that's general for all ARM platforms } + function OptPass1CMP(var p: tai): Boolean; + function OptPass1LDR(var p: tai): Boolean; + function OptPass1STM(var p: tai): Boolean; + function OptPass1STR(var p: tai): Boolean; + function OptPass1MOV(var p: tai): Boolean; + function OptPass1MUL(var p: tai): Boolean; + function OptPass1MVN(var p: tai): Boolean; + function OptPass1VMov(var p: tai): Boolean; + function OptPass1VOp(var p: tai): Boolean; End; TCpuPreRegallocScheduler = class(TAsmScheduler) @@ -117,7 +132,7 @@ Implementation (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then begin - asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp); + asml.insertafter(tai_comment.Create(strpnew('Peephole Optimization: CmpMovMov - Removed redundant moveq')), movp); asml.remove(movp); movp.free; Result:=true; @@ -355,7 +370,7 @@ Implementation dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next)); if assigned(dealloc) then begin - DebugMsg('Peephole '+optimizer+' removed superfluous vmov', movp); + DebugMsg('Peephole Optimization: '+optimizer+' removed superfluous vmov', movp); result:=true; { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation @@ -498,7 +513,7 @@ Implementation not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) and GenerateARMCode then begin - DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p); + DebugMsg('Peephole Optimization: Str/LdrAdd/Sub2Str/Ldr Postindex done', p); p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED; if taicpu(hp1).oper[2]^.typ=top_const then begin @@ -522,1297 +537,1408 @@ Implementation end; - function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean; + function TCpuAsmOptimizer.OptPass1ADDSUB(var p: tai): Boolean; var - hp1,hp2,hp3,hp4: tai; - i, i2: longint; - tempop: tasmop; + hp1,hp2: tai; oldreg: tregister; - dealloc: tai_regalloc; - - function IsPowerOf2(const value: DWord): boolean; inline; - begin - Result:=(value and (value - 1)) = 0; - end; - begin - result := false; - case p.typ of - ait_instruction: - begin - { - change - reg,x,y - cmp reg,#0 - into - s reg,x,y - } - { this optimization can applied only to the currently enabled operations because - the other operations do not update all flags and FPC does not track flag usage } - if MatchInstruction(p, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND, - A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and - GetNextInstruction(p, hp1) and - { mlas is only allowed in arm mode } - ((taicpu(p).opcode<>A_MLA) or - (current_settings.instructionset<>is_thumb)) and - MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and - (taicpu(hp1).oper[1]^.typ = top_const) and - (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and - (taicpu(hp1).oper[1]^.val = 0) and - GetNextInstruction(hp1, hp2) and - { be careful here, following instructions could use other flags - however after a jump fpc never depends on the value of flags } - { All above instructions set Z and N according to the following - Z := result = 0; - N := result[31]; - EQ = Z=1; NE = Z=0; - MI = N=1; PL = N=0; } - (MatchInstruction(hp2, A_B, [C_EQ,C_NE,C_MI,C_PL], []) or - { mov is also possible, but only if there is no shifter operand, it could be an rxx, - we are too lazy to check if it is rxx or something else } - (MatchInstruction(hp2, A_MOV, [C_EQ,C_NE,C_MI,C_PL], []) and (taicpu(hp2).ops=2))) and - assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then - begin - DebugMsg('Peephole OpCmp2OpS done', p); + Result := OptPass1DataCheckMov(p); - taicpu(p).oppostfix:=PF_S; + { + change + add/sub reg2,reg1,const1 + str/ldr reg3,[reg2,const2] + dealloc reg2 + to + str/ldr reg3,[reg1,const2+/-const1] + } + if (not GenerateThumbCode) and + (taicpu(p).ops>2) and + (taicpu(p).oper[1]^.typ = top_reg) and + (taicpu(p).oper[2]^.typ = top_const) then + begin + hp1:=p; + while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and + { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition } + MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and + (taicpu(hp1).oper[1]^.typ = top_ref) and + (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and + { don't optimize if the register is stored/overwritten } + (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and + (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and + (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and + { new offset must be valid: either in the range of 8 or 12 bit, depend on the + ldr postfix } + (((taicpu(p).opcode=A_ADD) and + isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix) + ) or + ((taicpu(p).opcode=A_SUB) and + isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix) + ) + ) do + begin + { neither reg1 nor reg2 might be changed inbetween } + if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or + RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then + break; + { reg2 must be either overwritten by the ldr or it is deallocated afterwards } + if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or + assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then + begin + { remember last instruction } + hp2:=hp1; + DebugMsg('Peephole Optimization: Add/SubLdr2Ldr done', p); + hp1:=p; + { fix all ldr/str } + while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do + begin + taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg; + if taicpu(p).opcode=A_ADD then + inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val) + else + dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val); + if hp1=hp2 then + break; + end; + RemoveCurrentP(p); + result:=true; + Exit; + end; + end; + end; - { move flag allocation if possible } - GetLastInstruction(hp1, hp2); - hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next)); - if assigned(hp2) then - begin - asml.Remove(hp2); - asml.insertbefore(hp2, p); - end; + if (taicpu(p).condition = C_None) and + (taicpu(p).oppostfix = PF_None) and + LookForPreindexedPattern(taicpu(p)) then + begin + DebugMsg('Peephole Optimization: Add/Sub to Preindexed done', p); + RemoveCurrentP(p); + Result:=true; + Exit; + end; + end; - asml.remove(hp1); - hp1.free; - Result:=true; - end - else - case taicpu(p).opcode of - A_STR: + + function TCpuAsmOptimizer.OptPass1MUL(var p: tai): Boolean; + var + hp1,hp2: tai; + oldreg: tregister; + begin + Result := OptPass1DataCheckMov(p); + { + Turn + mul reg0, z,w + sub/add x, y, reg0 + dealloc reg0 + + into + + mls/mla x,z,w,y + } + if (taicpu(p).condition = C_None) and + (taicpu(p).oppostfix = PF_None) and + (taicpu(p).ops=3) and + (taicpu(p).oper[0]^.typ = top_reg) and + (taicpu(p).oper[1]^.typ = top_reg) and + (taicpu(p).oper[2]^.typ = top_reg) and + GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and + MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and + (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and + (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p, hp1)) and + + (((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype>=cpu_armv4)) or + ((taicpu(hp1).opcode=A_SUB) and (current_settings.cputype in [cpu_armv6t2,cpu_armv7,cpu_armv7a,cpu_armv7r,cpu_armv7m,cpu_armv7em]))) and + + // CPUs before ARMv6 don't recommend having the same Rd and Rm for MLA. + // TODO: A workaround would be to swap Rm and Rs + (not ((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype<=cpu_armv6) and MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^))) and + + (((taicpu(hp1).ops=3) and + (taicpu(hp1).oper[2]^.typ=top_reg) and + ((MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and + (not RegModifiedBetween(taicpu(hp1).oper[1]^.reg, p, hp1))) or + ((MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and + (taicpu(hp1).opcode=A_ADD) and + (not RegModifiedBetween(taicpu(hp1).oper[2]^.reg, p, hp1)))))) or + ((taicpu(hp1).ops=2) and + (taicpu(hp1).oper[1]^.typ=top_reg) and + MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and + (RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1))) then + begin + if taicpu(hp1).opcode=A_ADD then + begin + taicpu(hp1).opcode:=A_MLA; + + if taicpu(hp1).ops=3 then + begin + if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then + oldreg:=taicpu(hp1).oper[2]^.reg + else + oldreg:=taicpu(hp1).oper[1]^.reg; + end + else + oldreg:=taicpu(hp1).oper[0]^.reg; + + taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg); + taicpu(hp1).loadreg(2,taicpu(p).oper[2]^.reg); + taicpu(hp1).loadreg(3,oldreg); + + DebugMsg('Peephole Optimization: MulAdd2MLA done', p); + end + else + begin + taicpu(hp1).opcode:=A_MLS; + + taicpu(hp1).loadreg(3,taicpu(hp1).oper[1]^.reg); + + if taicpu(hp1).ops=2 then + taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg) + else + taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg); + + taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg); + + DebugMsg('Peephole Optimization: MulSub2MLS done', p); + AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs); + AllocRegBetween(taicpu(hp1).oper[2]^.reg,p,hp1,UsedRegs); + AllocRegBetween(taicpu(hp1).oper[3]^.reg,p,hp1,UsedRegs); + + end; + + taicpu(hp1).ops:=4; + RemoveCurrentP(p); + Result := True; + Exit; + end + end; + + + function TCpuAsmOptimizer.OptPass1And(var p: tai): Boolean; + begin + Result := OptPass1DataCheckMov(p); + Result := inherited OptPass1And(p) or Result; + end; + + + function TCpuAsmOptimizer.OptPass1DataCheckMov(var p: tai): Boolean; + var + hp1: tai; + begin + { + change + op reg1, ... + mov reg2, reg1 + to + op reg2, ... + } + Result := (taicpu(p).ops >= 3) and + GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and + RemoveSuperfluousMove(p, hp1, 'DataMov2Data'); + end; + + + function TCpuAsmOptimizer.OptPass1CMP(var p: tai): Boolean; + var + hp1, hp2, hp_last: tai; + MovRem1, MovRem2: Boolean; + begin + Result := False; + + { These optimizations can be applied only to the currently enabled operations because + the other operations do not update all flags and FPC does not track flag usage } + if (taicpu(p).condition = C_None) and + (taicpu(p).oper[1]^.typ = top_const) and + GetNextInstruction(p, hp1) then + begin + { + change + cmp reg,const1 + moveq reg,const1 + movne reg,const2 + to + cmp reg,const1 + movne reg,const2 + } + if MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and + (taicpu(hp1).oper[1]^.typ = top_const) and + GetNextInstruction(hp1, hp2) and + MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and + (taicpu(hp2).oper[1]^.typ = top_const) then + begin + MovRem1 := RemoveRedundantMove(p, hp1, asml); + MovRem2 := RemoveRedundantMove(p, hp2, asml); + + Result:= MovRem1 or MovRem2; + + { Make sure that hp1 is still the next instruction after p } + if MovRem1 then + if MovRem2 then begin - { change - str reg1,ref - ldr reg2,ref - into - str reg1,ref - mov reg2,reg1 - } - if (taicpu(p).oper[1]^.typ = top_ref) and - (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and - (taicpu(p).oppostfix=PF_None) and - (taicpu(p).condition=C_None) and - GetNextInstructionUsingRef(p,hp1,taicpu(p).oper[1]^.ref^) and - MatchInstruction(hp1, A_LDR, [taicpu(p).condition], [PF_None]) and - (taicpu(hp1).oper[1]^.typ=top_ref) and - (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and - not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and - ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1))) and - ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.base, p, hp1))) then - begin - if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then - begin - DebugMsg('Peephole StrLdr2StrMov 1 done', hp1); - asml.remove(hp1); - hp1.free; - end - else - begin - taicpu(hp1).opcode:=A_MOV; - taicpu(hp1).oppostfix:=PF_None; - taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg); - DebugMsg('Peephole StrLdr2StrMov 2 done', hp1); - end; - result := true; - end - { change - str reg1,ref - str reg2,ref - into - strd reg1,reg2,ref - } - else if (GenerateARMCode or GenerateThumb2Code) and - (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and - (taicpu(p).oppostfix=PF_None) and - (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and - GetNextInstruction(p,hp1) and - MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and - not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and - (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and - { str ensures that either base or index contain no register, else ldr wouldn't - use an offset either - } - (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and - (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and - (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and - (abs(taicpu(p).oper[1]^.ref^.offset)<256) and - AlignedToQWord(taicpu(p).oper[1]^.ref^) then - begin - DebugMsg('Peephole StrStr2Strd done', p); - taicpu(p).oppostfix:=PF_D; - taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^); - taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg); - taicpu(p).ops:=3; - asml.remove(hp1); - hp1.free; - result:=true; - end; - Result:=LookForPostindexedPattern(taicpu(p)) or Result; - end; - A_LDR: - begin - { change - ldr reg1,ref - ldr reg2,ref - into ... - } - if (taicpu(p).oper[1]^.typ = top_ref) and - (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and - GetNextInstruction(p,hp1) and - { ldrd is not allowed here } - MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then - begin - { - ... - ldr reg1,ref - mov reg2,reg1 - } - if (taicpu(p).oppostfix=taicpu(hp1).oppostfix) and - RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and - (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and - (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and - (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then - begin - if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then - begin - DebugMsg('Peephole LdrLdr2Ldr done', hp1); - asml.remove(hp1); - hp1.free; - end - else - begin - DebugMsg('Peephole LdrLdr2LdrMov done', hp1); - taicpu(hp1).opcode:=A_MOV; - taicpu(hp1).oppostfix:=PF_None; - taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg); - end; - result := true; - end - { - ... - ldrd reg1,reg1+1,ref - } - else if (GenerateARMCode or GenerateThumb2Code) and - (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and - { ldrd does not allow any postfixes ... } - (taicpu(p).oppostfix=PF_None) and - not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and - (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and - { ldr ensures that either base or index contain no register, else ldr wouldn't - use an offset either - } - (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and - (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and - (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and - (abs(taicpu(p).oper[1]^.ref^.offset)<256) and - AlignedToQWord(taicpu(p).oper[1]^.ref^) then - begin - DebugMsg('Peephole LdrLdr2Ldrd done', p); - taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^); - taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg); - taicpu(p).ops:=3; - taicpu(p).oppostfix:=PF_D; - asml.remove(hp1); - hp1.free; - result:=true; - end; - end; - - { - Change - - ldrb dst1, [REF] - and dst2, dst1, #255 - - into - - ldrb dst2, [ref] - } - if not(GenerateThumbCode) and - (taicpu(p).oppostfix=PF_B) and - GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and - MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_NONE]) and - (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and - (taicpu(hp1).oper[2]^.typ = top_const) and - (taicpu(hp1).oper[2]^.val = $FF) and - not(RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) and - RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then - begin - DebugMsg('Peephole LdrbAnd2Ldrb done', p); - taicpu(p).oper[0]^.reg := taicpu(hp1).oper[0]^.reg; - asml.remove(hp1); - hp1.free; - result:=true; - end; - Result:=LookForPostindexedPattern(taicpu(p)) or Result; - { Remove superfluous mov after ldr - changes - ldr reg1, ref - mov reg2, reg1 - to - ldr reg2, ref - - conditions are: - * no ldrd usage - * reg1 must be released after mov - * mov can not contain shifterops - * ldr+mov have the same conditions - * mov does not set flags - } - if (taicpu(p).oppostfix<>PF_D) and - GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and - RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr') then - Result:=true; - end; - A_MOV: - begin - { fold - mov reg1,reg0, shift imm1 - mov reg1,reg1, shift imm2 - } - if (taicpu(p).ops=3) and - (taicpu(p).oper[2]^.typ = top_shifterop) and - (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and - getnextinstruction(p,hp1) and - MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and - (taicpu(hp1).ops=3) and - MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and - MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and - (taicpu(hp1).oper[2]^.typ = top_shifterop) and - (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then - begin - { fold - mov reg1,reg0, lsl 16 - mov reg1,reg1, lsr 16 - strh reg1, ... - dealloc reg1 - to - strh reg1, ... - dealloc reg1 - } - if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and - (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and - (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and - (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and - getnextinstruction(hp1,hp2) and - MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and - MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then - begin - TransferUsedRegs(TmpUsedRegs); - UpdateUsedRegs(TmpUsedRegs, tai(p.next)); - UpdateUsedRegs(TmpUsedRegs, tai(hp1.next)); - if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then - begin - DebugMsg('Peephole optimizer removed superfluous 16 Bit zero extension', hp1); - taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg); - asml.remove(p); - asml.remove(hp1); - p.free; - hp1.free; - p:=hp2; - Result:=true; - end; - end - { fold - mov reg1,reg0, shift imm1 - mov reg1,reg1, shift imm2 - to - mov reg1,reg0, shift imm1+imm2 - } - else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or - { asr makes no use after a lsr, the asr can be foled into the lsr } - ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then - begin - inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm); - { avoid overflows } - if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then - case taicpu(p).oper[2]^.shifterop^.shiftmode of - SM_ROR: - taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31; - SM_ASR: - taicpu(p).oper[2]^.shifterop^.shiftimm:=31; - SM_LSR, - SM_LSL: - begin - hp2:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0); - InsertLLItem(p.previous, p.next, hp2); - p.free; - p:=hp2; - end; - else - internalerror(2008072803); - end; - DebugMsg('Peephole ShiftShift2Shift 1 done', p); - asml.remove(hp1); - hp1.free; - result := true; - end - { fold - mov reg1,reg0, shift imm1 - mov reg1,reg1, shift imm2 - mov reg1,reg1, shift imm3 ... - mov reg2,reg1, shift imm3 ... - } - else if GetNextInstructionUsingReg(hp1,hp2, taicpu(hp1).oper[0]^.reg) and - MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and - (taicpu(hp2).ops=3) and - MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and - RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp2)) and - (taicpu(hp2).oper[2]^.typ = top_shifterop) and - (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then - begin - { mov reg1,reg0, lsl imm1 - mov reg1,reg1, lsr/asr imm2 - mov reg2,reg1, lsl imm3 ... - to - mov reg1,reg0, lsl imm1 - mov reg2,reg1, lsr/asr imm2-imm3 - if - imm1>=imm2 - } - if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and - (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and - (taicpu(p).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then - begin - if (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then - begin - if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,p,hp1)) and - not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then - begin - DebugMsg('Peephole ShiftShiftShift2ShiftShift 1a done', p); - inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm-taicpu(hp1).oper[2]^.shifterop^.shiftimm); - taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg; - asml.remove(hp1); - asml.remove(hp2); - hp1.free; - hp2.free; - - if taicpu(p).oper[2]^.shifterop^.shiftimm>=32 then - begin - taicpu(p).freeop(1); - taicpu(p).freeop(2); - taicpu(p).loadconst(1,0); - end; - result := true; - end; - end - else if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then - begin - DebugMsg('Peephole ShiftShiftShift2ShiftShift 1b done', p); - - dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm); - taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg; - asml.remove(hp2); - hp2.free; - result := true; - end; - end - { mov reg1,reg0, lsr/asr imm1 - mov reg1,reg1, lsl imm2 - mov reg1,reg1, lsr/asr imm3 ... - - if imm3>=imm1 and imm2>=imm1 - to - mov reg1,reg0, lsl imm2-imm1 - mov reg1,reg1, lsr/asr imm3 ... - } - else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and - (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and - (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and - (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then - begin - dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm); - taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg; - DebugMsg('Peephole ShiftShiftShift2ShiftShift 2 done', p); - asml.remove(p); - p.free; - p:=hp2; - if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then - begin - taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg; - asml.remove(hp1); - hp1.free; - p:=hp2; - end; - result := true; - end; - end; - end; - { Change the common - mov r0, r0, lsr #xxx - and r0, r0, #yyy/bic r0, r0, #xxx - - and remove the superfluous and/bic if possible - - This could be extended to handle more cases. - } - if (taicpu(p).ops=3) and - (taicpu(p).oper[2]^.typ = top_shifterop) and - (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and - (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and - GetNextInstructionUsingReg(p,hp1, taicpu(p).oper[0]^.reg) and - (hp1.typ=ait_instruction) and - (taicpu(hp1).ops>=1) and - (taicpu(hp1).oper[0]^.typ=top_reg) and - (not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) and - RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then - begin - if (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and - MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and - (taicpu(hp1).ops=3) and - MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and - (taicpu(hp1).oper[2]^.typ = top_const) and - { Check if the AND actually would only mask out bits being already zero because of the shift - } - ((($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm) and taicpu(hp1).oper[2]^.val) = - ($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm)) then - begin - DebugMsg('Peephole LsrAnd2Lsr done', hp1); - taicpu(p).oper[0]^.reg:=taicpu(hp1).oper[0]^.reg; - asml.remove(hp1); - hp1.free; - result:=true; - end - else if MatchInstruction(hp1, A_BIC, [taicpu(p).condition], [taicpu(p).oppostfix]) and - (taicpu(hp1).ops=3) and - MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and - (taicpu(hp1).oper[2]^.typ = top_const) and - { Check if the BIC actually would only mask out bits beeing already zero because of the shift } - (taicpu(hp1).oper[2]^.val<>0) and - (BsfDWord(taicpu(hp1).oper[2]^.val)>=32-taicpu(p).oper[2]^.shifterop^.shiftimm) then - begin - DebugMsg('Peephole LsrBic2Lsr done', hp1); - taicpu(p).oper[0]^.reg:=taicpu(hp1).oper[0]^.reg; - asml.remove(hp1); - hp1.free; - result:=true; - end; - end; - { Change - mov rx, ry, lsr/ror #xxx - uxtb/uxth rz,rx/and rz,rx,0xFF - dealloc rx - - to - - uxtb/uxth rz,ry,ror #xxx - } - if (taicpu(p).ops=3) and - (taicpu(p).oper[2]^.typ = top_shifterop) and - (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and - (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ROR]) and - (GenerateThumb2Code) and - GetNextInstructionUsingReg(p,hp1, taicpu(p).oper[0]^.reg) and - RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then - begin - if MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and - (taicpu(hp1).ops = 2) and - (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and - MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then - begin - taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg; - taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^); - taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR; - taicpu(hp1).ops := 3; - - GetNextInstruction(p,hp1); - - asml.Remove(p); - p.Free; - - p:=hp1; - - result:=true; - exit; - end - else if MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and - (taicpu(hp1).ops=2) and - (taicpu(p).oper[2]^.shifterop^.shiftimm in [16]) and - MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then - begin - taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg; - taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^); - taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR; - taicpu(hp1).ops := 3; - - GetNextInstruction(p,hp1); - - asml.Remove(p); - p.Free; - - p:=hp1; - - result:=true; - exit; - end - else if MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and - (taicpu(hp1).ops = 3) and - (taicpu(hp1).oper[2]^.typ = top_const) and - (taicpu(hp1).oper[2]^.val = $FF) and - (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and - MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then - begin - taicpu(hp1).ops := 3; - taicpu(hp1).opcode := A_UXTB; - taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg; - taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^); - taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR; - - GetNextInstruction(p,hp1); - - asml.Remove(p); - p.Free; - - p:=hp1; - - result:=true; - exit; - end; - end; - { - optimize - mov rX, yyyy - .... - } - if (taicpu(p).ops = 2) and - GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and - (tai(hp1).typ = ait_instruction) then - begin - { - This removes the mul from - mov rX,0 - ... - mul ...,rX,... - } - if false and (taicpu(p).oper[1]^.typ = top_const) and - (taicpu(p).oper[1]^.val=0) and - MatchInstruction(hp1, [A_MUL,A_MLA], [taicpu(p).condition], [taicpu(p).oppostfix]) and - (((taicpu(hp1).oper[1]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^)) or - ((taicpu(hp1).oper[2]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[2]^))) then - begin - TransferUsedRegs(TmpUsedRegs); - UpdateUsedRegs(TmpUsedRegs, tai(p.next)); - UpdateUsedRegs(TmpUsedRegs, tai(hp1.next)); - DebugMsg('Peephole MovMUL/MLA2Mov0 done', p); - if taicpu(hp1).opcode=A_MUL then - taicpu(hp1).loadconst(1,0) - else - taicpu(hp1).loadreg(1,taicpu(hp1).oper[3]^.reg); - taicpu(hp1).ops:=2; - taicpu(hp1).opcode:=A_MOV; - if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then - RemoveCurrentP(p); - Result:=true; - exit; - end - else if (taicpu(p).oper[1]^.typ = top_const) and - (taicpu(p).oper[1]^.val=0) and - MatchInstruction(hp1, A_MLA, [taicpu(p).condition], [taicpu(p).oppostfix]) and - MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[3]^) then - begin - TransferUsedRegs(TmpUsedRegs); - UpdateUsedRegs(TmpUsedRegs, tai(p.next)); - UpdateUsedRegs(TmpUsedRegs, tai(hp1.next)); - DebugMsg('Peephole MovMLA2MUL 1 done', p); - taicpu(hp1).ops:=3; - taicpu(hp1).opcode:=A_MUL; - if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then - RemoveCurrentP(p); - Result:=true; - exit; - end - { - This changes the very common - mov r0, #0 - str r0, [...] - mov r0, #0 - str r0, [...] - - and removes all superfluous mov instructions - } - else if (taicpu(p).oper[1]^.typ = top_const) and - (taicpu(hp1).opcode=A_STR) then - while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and - MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and - GetNextInstruction(hp1, hp2) and - MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and - (taicpu(hp2).ops = 2) and - MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and - MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do - begin - DebugMsg('Peephole MovStrMov done', hp2); - GetNextInstruction(hp2,hp1); - asml.remove(hp2); - hp2.free; - result:=true; - if not assigned(hp1) then break; - end - { - This removes the first mov from - mov rX,... - mov rX,... - } - else if taicpu(hp1).opcode=A_MOV then - while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and - (taicpu(hp1).ops = 2) and - MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and - { don't remove the first mov if the second is a mov rX,rX } - not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do - begin - DebugMsg('Peephole MovMov done', p); - asml.remove(p); - p.free; - p:=hp1; - GetNextInstruction(hp1,hp1); - result:=true; - if not assigned(hp1) then - break; - end; - if RedundantMovProcess(p,hp1) then - begin - Result:=true; - { p might not point at a mov anymore } - exit; - end; - end; - - { Fold the very common sequence - mov regA, regB - ldr* regA, [regA] - to - ldr* regA, [regB] - CAUTION! If this one is successful p might not be a mov instruction anymore! - } - if (taicpu(p).opcode = A_MOV) and - (taicpu(p).ops = 2) and - (taicpu(p).oper[1]^.typ = top_reg) and - (taicpu(p).oppostfix = PF_NONE) and - GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and - MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], []) and - (taicpu(hp1).oper[1]^.typ = top_ref) and - { We can change the base register only when the instruction uses AM_OFFSET } - ((taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) or - ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and - (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg)) - ) and - not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and - - // Make sure that Thumb code doesn't propagate a high register into a reference - ((GenerateThumbCode and - (getsupreg(taicpu(p).oper[1]^.reg) < RS_R8)) or - (not GenerateThumbCode)) and - - RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then - begin - DebugMsg('Peephole MovLdr2Ldr done', hp1); - if (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and - (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then - taicpu(hp1).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg; - - if taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then - taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg; - - dealloc:=FindRegDeAlloc(taicpu(p).oper[1]^.reg, tai(p.Next)); - if Assigned(dealloc) then - begin - asml.remove(dealloc); - asml.InsertAfter(dealloc,hp1); - end; - - GetNextInstruction(p, hp1); - asml.remove(p); - p.free; - p:=hp1; - result:=true; - end; - - { This folds shifterops into following instructions - mov r0, r1, lsl #8 - add r2, r3, r0 - - to - - add r2, r3, r1, lsl #8 - CAUTION! If this one is successful p might not be a mov instruction anymore! - } - if (taicpu(p).opcode = A_MOV) and - (taicpu(p).ops = 3) and - (taicpu(p).oper[1]^.typ = top_reg) and - (taicpu(p).oper[2]^.typ = top_shifterop) and - (taicpu(p).oppostfix = PF_NONE) and - GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and - MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC, - A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST, - A_CMP, A_CMN], - [taicpu(p).condition], [PF_None]) and - (not ((GenerateThumb2Code) and - (taicpu(hp1).opcode in [A_SBC]) and - (((taicpu(hp1).ops=3) and - MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^.reg)) or - ((taicpu(hp1).ops=2) and - MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg))))) and - RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and - (taicpu(hp1).ops >= 2) and - {Currently we can't fold into another shifterop} - (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and - {Folding rrx is problematic because of the C-Flag, as we currently can't check - NR_DEFAULTFLAGS for modification} - ( - {Everything is fine if we don't use RRX} - (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) or - ( - {If it is RRX, then check if we're just accessing the next instruction} - GetNextInstruction(p, hp2) and - (hp1 = hp2) - ) - ) and - { reg1 might not be modified inbetween } - not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and - { The shifterop can contain a register, might not be modified} - ( - (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) or - not(RegModifiedBetween(taicpu(p).oper[2]^.shifterop^.rs, p, hp1)) - ) and - ( - {Only ONE of the two src operands is allowed to match} - MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor - MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^) - ) then - begin - if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then - I2:=0 - else - I2:=1; - for I:=I2 to taicpu(hp1).ops-1 do - if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then - begin - { If the parameter matched on the second op from the RIGHT - we have to switch the parameters, this will not happen for CMP - were we're only evaluating the most right parameter - } - if I <> taicpu(hp1).ops-1 then - begin - {The SUB operators need to be changed when we swap parameters} - case taicpu(hp1).opcode of - A_SUB: tempop:=A_RSB; - A_SBC: tempop:=A_RSC; - A_RSB: tempop:=A_SUB; - A_RSC: tempop:=A_SBC; - else tempop:=taicpu(hp1).opcode; - end; - if taicpu(hp1).ops = 3 then - hp2:=taicpu.op_reg_reg_reg_shifterop(tempop, - taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg, - taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^) - else - hp2:=taicpu.op_reg_reg_shifterop(tempop, - taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg, - taicpu(p).oper[2]^.shifterop^); - end - else - if taicpu(hp1).ops = 3 then - hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode, - taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg, - taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^) - else - hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode, - taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg, - taicpu(p).oper[2]^.shifterop^); - if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then - AllocRegBetween(taicpu(p).oper[2]^.shifterop^.rs,p,hp1,UsedRegs); - AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs); - asml.insertbefore(hp2, hp1); - GetNextInstruction(p, hp2); - asml.remove(p); - asml.remove(hp1); - p.free; - hp1.free; - p:=hp2; - DebugMsg('Peephole FoldShiftProcess done', p); - Result:=true; - break; - end; - end; - { - Fold - mov r1, r1, lsl #2 - ldr/ldrb r0, [r0, r1] - to - ldr/ldrb r0, [r0, r1, lsl #2] - - XXX: This still needs some work, as we quite often encounter something like - mov r1, r2, lsl #2 - add r2, r3, #imm - ldr r0, [r2, r1] - which can't be folded because r2 is overwritten between the shift and the ldr. - We could try to shuffle the registers around and fold it into. - add r1, r3, #imm - ldr r0, [r1, r2, lsl #2] - } - if (not(GenerateThumbCode)) and - (taicpu(p).opcode = A_MOV) and - (taicpu(p).ops = 3) and - (taicpu(p).oper[1]^.typ = top_reg) and - (taicpu(p).oper[2]^.typ = top_shifterop) and - { RRX is tough to handle, because it requires tracking the C-Flag, - it is also extremly unlikely to be emitted this way} - (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) and - (taicpu(p).oper[2]^.shifterop^.shiftimm <> 0) and - { thumb2 allows only lsl #0..#3 } - (not(GenerateThumb2Code) or - ((taicpu(p).oper[2]^.shifterop^.shiftimm in [0..3]) and - (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) - ) - ) and - (taicpu(p).oppostfix = PF_NONE) and - GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and - {Only LDR, LDRB, STR, STRB can handle scaled register indexing} - (MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B]) or - (GenerateThumb2Code and - MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B, PF_SB, PF_H, PF_SH])) - ) and - ( - {If this is address by offset, one of the two registers can be used} - ((taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and - ( - (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) xor - (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) - ) - ) or - {For post and preindexed only the index register can be used} - ((taicpu(hp1).oper[1]^.ref^.addressmode in [AM_POSTINDEXED, AM_PREINDEXED]) and - ( - (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) and - (taicpu(hp1).oper[1]^.ref^.base <> taicpu(p).oper[0]^.reg) - ) and - (not GenerateThumb2Code) - ) - ) and - { Only fold if both registers are used. Otherwise we are folding p with itself } - (taicpu(hp1).oper[1]^.ref^.index<>NR_NO) and - (taicpu(hp1).oper[1]^.ref^.base<>NR_NO) and - { Only fold if there isn't another shifterop already, and offset is zero. } - (taicpu(hp1).oper[1]^.ref^.offset = 0) and - (taicpu(hp1).oper[1]^.ref^.shiftmode = SM_None) and - not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and - RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then - begin - { If the register we want to do the shift for resides in base, we need to swap that} - if (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then - taicpu(hp1).oper[1]^.ref^.base := taicpu(hp1).oper[1]^.ref^.index; - taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg; - taicpu(hp1).oper[1]^.ref^.shiftmode := taicpu(p).oper[2]^.shifterop^.shiftmode; - taicpu(hp1).oper[1]^.ref^.shiftimm := taicpu(p).oper[2]^.shifterop^.shiftimm; - DebugMsg('Peephole FoldShiftLdrStr done', hp1); - GetNextInstruction(p, hp1); - asml.remove(p); - p.free; - p:=hp1; - Result:=true; - end; - { - Often we see shifts and then a superfluous mov to another register - In the future this might be handled in RedundantMovProcess when it uses RegisterTracking - } - if (taicpu(p).opcode = A_MOV) and - GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and - RemoveSuperfluousMove(p, hp1, 'MovMov2Mov') then - Result:=true; - end; - A_ADD, - A_ADC, - A_RSB, - A_RSC, - A_SUB, - A_SBC, - A_BIC, - A_EOR, - A_ORR, - A_MLA, - A_MLS, - A_MUL, - A_QADD,A_QADD16,A_QADD8, - A_QSUB,A_QSUB16,A_QSUB8, - A_QDADD,A_QDSUB,A_QASX,A_QSAX, - A_SHADD16,A_SHADD8,A_UHADD16,A_UHADD8, - A_SHSUB16,A_SHSUB8,A_UHSUB16,A_UHSUB8, - A_PKHTB,A_PKHBT, - A_SMUAD,A_SMUSD: - begin - { - change - add/sub reg2,reg1,const1 - str/ldr reg3,[reg2,const2] - dealloc reg2 - to - str/ldr reg3,[reg1,const2+/-const1] - } - if (not GenerateThumbCode) and - (taicpu(p).opcode in [A_ADD,A_SUB]) and - (taicpu(p).ops>2) and - (taicpu(p).oper[1]^.typ = top_reg) and - (taicpu(p).oper[2]^.typ = top_const) then - begin - hp1:=p; - while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and - { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition } - MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and - (taicpu(hp1).oper[1]^.typ = top_ref) and - (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and - { don't optimize if the register is stored/overwritten } - (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and - (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and - (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and - { new offset must be valid: either in the range of 8 or 12 bit, depend on the - ldr postfix } - (((taicpu(p).opcode=A_ADD) and - isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix) - ) or - ((taicpu(p).opcode=A_SUB) and - isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix) - ) - ) do - begin - { neither reg1 nor reg2 might be changed inbetween } - if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or - RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then - break; - { reg2 must be either overwritten by the ldr or it is deallocated afterwards } - if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or - assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then - begin - { remember last instruction } - hp2:=hp1; - DebugMsg('Peephole Add/SubLdr2Ldr done', p); - hp1:=p; - { fix all ldr/str } - while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do - begin - taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg; - if taicpu(p).opcode=A_ADD then - inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val) - else - dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val); - if hp1=hp2 then - break; - end; - GetNextInstruction(p,hp1); - asml.remove(p); - p.free; - p:=hp1; - result:=true; - break; - end; - end; - end; - { - change - add reg1, ... - mov reg2, reg1 - to - add reg2, ... - } - if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and - (taicpu(p).ops>=3) and - RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then - Result:=true; - - if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and - LookForPreindexedPattern(taicpu(p)) then - begin - GetNextInstruction(p,hp1); - DebugMsg('Peephole Add/Sub to Preindexed done', p); - asml.remove(p); - p.free; - p:=hp1; - Result:=true; - end; - { - Turn - mul reg0, z,w - sub/add x, y, reg0 - dealloc reg0 - - into - - mls/mla x,z,w,y - } - if MatchInstruction(p, [A_MUL], [C_None], [PF_None]) and - (taicpu(p).ops=3) and - (taicpu(p).oper[0]^.typ = top_reg) and - (taicpu(p).oper[1]^.typ = top_reg) and - (taicpu(p).oper[2]^.typ = top_reg) and - GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and - MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and - (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and - (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p, hp1)) and - - (((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype>=cpu_armv4)) or - ((taicpu(hp1).opcode=A_SUB) and (current_settings.cputype in [cpu_armv6t2,cpu_armv7,cpu_armv7a,cpu_armv7r,cpu_armv7m,cpu_armv7em]))) and - - // CPUs before ARMv6 don't recommend having the same Rd and Rm for MLA. - // TODO: A workaround would be to swap Rm and Rs - (not ((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype<=cpu_armv6) and MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^))) and - - (((taicpu(hp1).ops=3) and - (taicpu(hp1).oper[2]^.typ=top_reg) and - ((MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and - (not RegModifiedBetween(taicpu(hp1).oper[1]^.reg, p, hp1))) or - ((MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and - (taicpu(hp1).opcode=A_ADD) and - (not RegModifiedBetween(taicpu(hp1).oper[2]^.reg, p, hp1)))))) or - ((taicpu(hp1).ops=2) and - (taicpu(hp1).oper[1]^.typ=top_reg) and - MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and - (RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1))) then - begin - if taicpu(hp1).opcode=A_ADD then - begin - taicpu(hp1).opcode:=A_MLA; - - if taicpu(hp1).ops=3 then - begin - if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then - oldreg:=taicpu(hp1).oper[2]^.reg - else - oldreg:=taicpu(hp1).oper[1]^.reg; - end - else - oldreg:=taicpu(hp1).oper[0]^.reg; - - taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg); - taicpu(hp1).loadreg(2,taicpu(p).oper[2]^.reg); - taicpu(hp1).loadreg(3,oldreg); - - DebugMsg('MulAdd2MLA done', p); - - taicpu(hp1).ops:=4; - - asml.remove(p); - p.free; - p:=hp1; - end - else - begin - taicpu(hp1).opcode:=A_MLS; - - - taicpu(hp1).loadreg(3,taicpu(hp1).oper[1]^.reg); - - if taicpu(hp1).ops=2 then - taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg) - else - taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg); - - taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg); - - DebugMsg('MulSub2MLS done', p); - AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs); - AllocRegBetween(taicpu(hp1).oper[2]^.reg,p,hp1,UsedRegs); - AllocRegBetween(taicpu(hp1).oper[3]^.reg,p,hp1,UsedRegs); - - taicpu(hp1).ops:=4; - RemoveCurrentP(p, hp1); // <-- Is this actually safe? hp1 is not necessarily the next instruction. [Kit] - end; - - result:=true; - end - end; -{$ifdef dummy} - A_MVN: - begin - { - change - mvn reg2,reg1 - and reg3,reg4,reg2 - dealloc reg2 - to - bic reg3,reg4,reg1 - } - if (taicpu(p).oper[1]^.typ = top_reg) and - GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and - MatchInstruction(hp1,A_AND,[],[]) and - (((taicpu(hp1).ops=3) and - (taicpu(hp1).oper[2]^.typ=top_reg) and - (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or - MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or - ((taicpu(hp1).ops=2) and - (taicpu(hp1).oper[1]^.typ=top_reg) and - MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and - assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and - { reg1 might not be modified inbetween } - not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then - begin - DebugMsg('Peephole MvnAnd2Bic done', p); - taicpu(hp1).opcode:=A_BIC; - - if taicpu(hp1).ops=3 then - begin - if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then - taicpu(hp1).loadReg(1,taicpu(hp1).oper[2]^.reg); // Swap operands - - taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg); - end - else - taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg); - GetNextInstruction(p, hp1); - asml.remove(p); - p.free; - p:=hp1; - end; - end; -{$endif dummy} - A_UXTB: - Result:=OptPass1UXTB(p); - A_UXTH: - Result:=OptPass1UXTH(p); - A_SXTB: - Result:=OptPass1SXTB(p); - A_SXTH: - Result:=OptPass1SXTH(p); - A_CMP: - begin - { - change - cmp reg,const1 - moveq reg,const1 - movne reg,const2 - to - cmp reg,const1 - movne reg,const2 - } - if (taicpu(p).oper[1]^.typ = top_const) and - GetNextInstruction(p, hp1) and - MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and - (taicpu(hp1).oper[1]^.typ = top_const) and - GetNextInstruction(hp1, hp2) and - MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and - (taicpu(hp1).oper[1]^.typ = top_const) then - begin - Result:=RemoveRedundantMove(p, hp1, asml) or Result; - Result:=RemoveRedundantMove(p, hp2, asml) or Result; - end; - end; - A_STM: - begin - { - change - stmfd r13!,[r14] - sub r13,r13,#4 - bl abc - add r13,r13,#4 - ldmfd r13!,[r15] - into - b abc - } - if not(ts_thumb_interworking in current_settings.targetswitches) and - MatchInstruction(p, A_STM, [C_None], [PF_FD]) and - GetNextInstruction(p, hp1) and - GetNextInstruction(hp1, hp2) and - SkipEntryExitMarker(hp2, hp2) and - GetNextInstruction(hp2, hp3) and - SkipEntryExitMarker(hp3, hp3) and - GetNextInstruction(hp3, hp4) and - (taicpu(p).oper[0]^.typ = top_ref) and - (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and - (taicpu(p).oper[0]^.ref^.base=NR_NO) and - (taicpu(p).oper[0]^.ref^.offset=0) and - (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and - (taicpu(p).oper[1]^.typ = top_regset) and - (taicpu(p).oper[1]^.regset^ = [RS_R14]) and - - MatchInstruction(hp1, A_SUB, [C_None], [PF_NONE]) and - (taicpu(hp1).oper[0]^.typ = top_reg) and - (taicpu(hp1).oper[0]^.reg = NR_STACK_POINTER_REG) and - MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^) and - (taicpu(hp1).oper[2]^.typ = top_const) and - - MatchInstruction(hp3, A_ADD, [C_None], [PF_NONE]) and - MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[0]^) and - MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[1]^) and - MatchOperand(taicpu(hp1).oper[2]^,taicpu(hp3).oper[2]^) and - - MatchInstruction(hp2, [A_BL,A_BLX], [C_None], [PF_NONE]) and - (taicpu(hp2).oper[0]^.typ = top_ref) and - - MatchInstruction(hp4, A_LDM, [C_None], [PF_FD]) and - MatchOperand(taicpu(p).oper[0]^,taicpu(hp4).oper[0]^) and - (taicpu(hp4).oper[1]^.typ = top_regset) and - (taicpu(hp4).oper[1]^.regset^ = [RS_R15]) then - begin - asml.Remove(p); - asml.Remove(hp1); - asml.Remove(hp3); - asml.Remove(hp4); - taicpu(hp2).opcode:=A_B; - p.free; - hp1.free; - hp3.free; - hp4.free; - p:=hp2; - DebugMsg('Peephole Bl2B done', p); - end; - end; - A_VMOV: - begin - { - change - vmov reg0,reg1,reg2 - vmov reg1,reg2,reg0 - into - vmov reg0,reg1,reg2 - - can be applied regardless if reg0 or reg2 is the vfp register - } - if (taicpu(p).ops = 3) and - GetNextInstruction(p, hp1) and - MatchInstruction(hp1, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and - (taicpu(hp1).ops = 3) and - MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[2]^) and - MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[0]^) and - MatchOperand(taicpu(p).oper[2]^, taicpu(hp1).oper[1]^) then - begin - asml.Remove(hp1); - hp1.free; - DebugMsg('Peephole VMovVMov2VMov done', p); - end; - end; - A_AND: - Result:=OptPass1And(p); - A_VLDR, - A_VADD, - A_VMUL, - A_VDIV, - A_VSUB, - A_VSQRT, - A_VNEG, - A_VCVT, - A_VABS: - begin - if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and - RemoveSuperfluousVMov(p, hp1, 'VOpVMov2VOp') then - Result:=true; + if not GetNextInstruction(p, hp1) then + Exit; end else - ; + hp1 := hp2; + end; + + { + change + reg,x,y + cmp reg,#0 + into + s reg,x,y + } + if (taicpu(p).oppostfix = PF_None) and + (taicpu(p).oper[1]^.val = 0) and + { be careful here, following instructions could use other flags + however after a jump fpc never depends on the value of flags } + { All above instructions set Z and N according to the following + Z := result = 0; + N := result[31]; + EQ = Z=1; NE = Z=0; + MI = N=1; PL = N=0; } + (MatchInstruction(hp1, A_B, [C_EQ,C_NE,C_MI,C_PL], []) or + { mov is also possible, but only if there is no shifter operand, it could be an rxx, + we are too lazy to check if it is rxx or something else } + (MatchInstruction(hp1, A_MOV, [C_EQ,C_NE,C_MI,C_PL], []) and (taicpu(hp1).ops=2))) and + GetLastInstruction(p, hp_last) and + MatchInstruction(hp_last, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR, + A_EOR,A_AND,A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and + ( + { mlas is only allowed in arm mode } + (taicpu(hp_last).opcode<>A_MLA) or + (current_settings.instructionset<>is_thumb) + ) and + (taicpu(hp_last).oper[0]^.reg = taicpu(p).oper[0]^.reg) and + assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp1.Next))) then + begin + DebugMsg('Peephole Optimization: OpCmp2OpS done', hp_last); + + taicpu(hp_last).oppostfix:=PF_S; + + { move flag allocation if possible } + hp1:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp_last.Next)); + if assigned(hp1) then + begin + asml.Remove(hp1); + asml.insertbefore(hp1, hp_last); + end; + + RemoveCurrentP(p); + Result:=true; + end; + end; + end; + + + function TCpuAsmOptimizer.OptPass1LDR(var p: tai): Boolean; + var + hp1: tai; + begin + Result := False; + + { change + ldr reg1,ref + ldr reg2,ref + into ... + } + if (taicpu(p).oper[1]^.typ = top_ref) and + (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and + GetNextInstruction(p,hp1) and + { ldrd is not allowed here } + MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then + begin + { + ... + ldr reg1,ref + mov reg2,reg1 + } + if (taicpu(p).oppostfix=taicpu(hp1).oppostfix) and + RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and + (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and + (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and + (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then + begin + if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then + begin + DebugMsg('Peephole Optimization: LdrLdr2Ldr done', hp1); + asml.remove(hp1); + hp1.free; + end + else + begin + DebugMsg('Peephole Optimization: LdrLdr2LdrMov done', hp1); + taicpu(hp1).opcode:=A_MOV; + taicpu(hp1).oppostfix:=PF_None; + taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg); + end; + result := true; + end + { + ... + ldrd reg1,reg1+1,ref + } + else if (GenerateARMCode or GenerateThumb2Code) and + (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and + { ldrd does not allow any postfixes ... } + (taicpu(p).oppostfix=PF_None) and + not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and + (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and + { ldr ensures that either base or index contain no register, else ldr wouldn't + use an offset either + } + (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and + (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and + (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and + (abs(taicpu(p).oper[1]^.ref^.offset)<256) and + AlignedToQWord(taicpu(p).oper[1]^.ref^) then + begin + DebugMsg('Peephole Optimization: LdrLdr2Ldrd done', p); + taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^); + taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg); + taicpu(p).ops:=3; + taicpu(p).oppostfix:=PF_D; + asml.remove(hp1); + hp1.free; + result:=true; + end; + end; + + { + Change + + ldrb dst1, [REF] + and dst2, dst1, #255 + + into + + ldrb dst2, [ref] + } + if not(GenerateThumbCode) and + (taicpu(p).oppostfix=PF_B) and + GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and + MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_NONE]) and + (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and + (taicpu(hp1).oper[2]^.typ = top_const) and + (taicpu(hp1).oper[2]^.val = $FF) and + not(RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) and + RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then + begin + DebugMsg('Peephole Optimization: LdrbAnd2Ldrb done', p); + taicpu(p).oper[0]^.reg := taicpu(hp1).oper[0]^.reg; + asml.remove(hp1); + hp1.free; + result:=true; + end; + Result:=LookForPostindexedPattern(taicpu(p)) or Result; + { Remove superfluous mov after ldr + changes + ldr reg1, ref + mov reg2, reg1 + to + ldr reg2, ref + + conditions are: + * no ldrd usage + * reg1 must be released after mov + * mov can not contain shifterops + * ldr+mov have the same conditions + * mov does not set flags + } + if (taicpu(p).oppostfix<>PF_D) and + GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and + RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr') then + Result:=true; + end; + + + function TCpuAsmOptimizer.OptPass1STM(var p: tai): Boolean; + var + hp1, hp2, hp3, hp4: tai; + begin + Result := False; + + { + change + stmfd r13!,[r14] + sub r13,r13,#4 + bl abc + add r13,r13,#4 + ldmfd r13!,[r15] + into + b abc + } + if not(ts_thumb_interworking in current_settings.targetswitches) and + (taicpu(p).condition = C_None) and + (taicpu(p).oppostfix = PF_FD) and + (taicpu(p).oper[0]^.typ = top_ref) and + (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and + (taicpu(p).oper[0]^.ref^.base=NR_NO) and + (taicpu(p).oper[0]^.ref^.offset=0) and + (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and + (taicpu(p).oper[1]^.typ = top_regset) and + (taicpu(p).oper[1]^.regset^ = [RS_R14]) and + GetNextInstruction(p, hp1) and + MatchInstruction(hp1, A_SUB, [C_None], [PF_NONE]) and + (taicpu(hp1).oper[0]^.typ = top_reg) and + (taicpu(hp1).oper[0]^.reg = NR_STACK_POINTER_REG) and + MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^) and + (taicpu(hp1).oper[2]^.typ = top_const) and + + GetNextInstruction(hp1, hp2) and + SkipEntryExitMarker(hp2, hp2) and + + MatchInstruction(hp2, [A_BL,A_BLX], [C_None], [PF_NONE]) and + (taicpu(hp2).oper[0]^.typ = top_ref) and + + GetNextInstruction(hp2, hp3) and + SkipEntryExitMarker(hp3, hp3) and + MatchInstruction(hp3, A_ADD, [C_None], [PF_NONE]) and + MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[0]^) and + MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[1]^) and + MatchOperand(taicpu(hp1).oper[2]^,taicpu(hp3).oper[2]^) and + + GetNextInstruction(hp3, hp4) and + MatchInstruction(hp4, A_LDM, [C_None], [PF_FD]) and + MatchOperand(taicpu(p).oper[0]^,taicpu(hp4).oper[0]^) and + (taicpu(hp4).oper[1]^.typ = top_regset) and + (taicpu(hp4).oper[1]^.regset^ = [RS_R15]) then + begin + asml.Remove(hp1); + asml.Remove(hp3); + asml.Remove(hp4); + taicpu(hp2).opcode:=A_B; + hp1.free; + hp3.free; + hp4.free; + RemoveCurrentp(p, hp2); + DebugMsg('Peephole Optimization: Bl2B done', p); + Result := True; + end; + end; + + + + function TCpuAsmOptimizer.OptPass1STR(var p: tai): Boolean; + var + hp1: tai; + begin + Result := False; + + { Common conditions } + if (taicpu(p).oper[1]^.typ = top_ref) and + (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and + (taicpu(p).oppostfix=PF_None) then + begin + { change + str reg1,ref + ldr reg2,ref + into + str reg1,ref + mov reg2,reg1 + } + if (taicpu(p).condition=C_None) and + GetNextInstructionUsingRef(p,hp1,taicpu(p).oper[1]^.ref^) and + MatchInstruction(hp1, A_LDR, [taicpu(p).condition], [PF_None]) and + (taicpu(hp1).oper[1]^.typ=top_ref) and + (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and + not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and + ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1))) and + ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.base, p, hp1))) then + begin + if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then + begin + DebugMsg('Peephole Optimization: StrLdr2StrMov 1 done', hp1); + asml.remove(hp1); + hp1.free; + end + else + begin + taicpu(hp1).opcode:=A_MOV; + taicpu(hp1).oppostfix:=PF_None; + taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg); + DebugMsg('Peephole Optimization: StrLdr2StrMov 2 done', hp1); + end; + result := True; + end + { change + str reg1,ref + str reg2,ref + into + strd reg1,reg2,ref + } + else if (GenerateARMCode or GenerateThumb2Code) and + (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and + not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and + (abs(taicpu(p).oper[1]^.ref^.offset)<256) and + AlignedToQWord(taicpu(p).oper[1]^.ref^) and + GetNextInstruction(p,hp1) and + MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and + (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and + { str ensures that either base or index contain no register, else ldr wouldn't + use an offset either + } + (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and + (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and + (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) then + begin + DebugMsg('Peephole Optimization: StrStr2Strd done', p); + taicpu(p).oppostfix:=PF_D; + taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^); + taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg); + taicpu(p).ops:=3; + asml.remove(hp1); + hp1.free; + result:=true; + end; + end; + + Result:=LookForPostindexedPattern(taicpu(p)) or Result; + end; + + + function TCpuAsmOptimizer.OptPass1MOV(var p: tai): Boolean; + var + hp1, hpfar1, hp2, hp3: tai; + i, i2: longint; + tempop: tasmop; + dealloc: tai_regalloc; + begin + Result := False; + hp1 := nil; + + { fold + mov reg1,reg0, shift imm1 + mov reg1,reg1, shift imm2 + } + if (taicpu(p).ops=3) and + (taicpu(p).oper[2]^.typ = top_shifterop) and + (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and + getnextinstruction(p,hp1) and + MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and + (taicpu(hp1).ops=3) and + MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and + MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and + (taicpu(hp1).oper[2]^.typ = top_shifterop) and + (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then + begin + { fold + mov reg1,reg0, lsl 16 + mov reg1,reg1, lsr 16 + strh reg1, ... + dealloc reg1 + to + strh reg1, ... + dealloc reg1 + } + if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and + (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and + (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and + (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and + getnextinstruction(hp1,hp2) and + MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and + MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then + begin + TransferUsedRegs(TmpUsedRegs); + UpdateUsedRegs(TmpUsedRegs, tai(p.next)); + UpdateUsedRegs(TmpUsedRegs, tai(hp1.next)); + if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then + begin + DebugMsg('Peephole Optimization: removed superfluous 16 Bit zero extension', hp1); + taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg); + asml.remove(hp1); + hp1.free; + + RemoveCurrentP(p, hp2); + Result:=true; + Exit; + end; + end + { fold + mov reg1,reg0, shift imm1 + mov reg1,reg1, shift imm2 + to + mov reg1,reg0, shift imm1+imm2 + } + else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or + { asr makes no use after a lsr, the asr can be foled into the lsr } + ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then + begin + inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm); + { avoid overflows } + if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then + case taicpu(p).oper[2]^.shifterop^.shiftmode of + SM_ROR: + taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31; + SM_ASR: + taicpu(p).oper[2]^.shifterop^.shiftimm:=31; + SM_LSR, + SM_LSL: + begin + hp2:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0); + InsertLLItem(p.previous, p.next, hp2); + p.free; + p:=hp2; + end; + else + internalerror(2008072803); + end; + DebugMsg('Peephole Optimization: ShiftShift2Shift 1 done', p); + asml.remove(hp1); + hp1.free; + hp1 := nil; + result := true; + end + { fold + mov reg1,reg0, shift imm1 + mov reg1,reg1, shift imm2 + mov reg1,reg1, shift imm3 ... + mov reg2,reg1, shift imm3 ... + } + else if GetNextInstructionUsingReg(hp1,hp2, taicpu(hp1).oper[0]^.reg) and + MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and + (taicpu(hp2).ops=3) and + MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and + RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp2)) and + (taicpu(hp2).oper[2]^.typ = top_shifterop) and + (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then + begin + { mov reg1,reg0, lsl imm1 + mov reg1,reg1, lsr/asr imm2 + mov reg2,reg1, lsl imm3 ... + to + mov reg1,reg0, lsl imm1 + mov reg2,reg1, lsr/asr imm2-imm3 + if + imm1>=imm2 + } + if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and + (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and + (taicpu(p).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then + begin + if (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then + begin + if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,p,hp1)) and + not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then + begin + DebugMsg('Peephole Optimization: ShiftShiftShift2ShiftShift 1a done', p); + inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm-taicpu(hp1).oper[2]^.shifterop^.shiftimm); + taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg; + asml.remove(hp1); + asml.remove(hp2); + hp1.free; + hp2.free; + + if taicpu(p).oper[2]^.shifterop^.shiftimm>=32 then + begin + taicpu(p).freeop(1); + taicpu(p).freeop(2); + taicpu(p).loadconst(1,0); + end; + result := true; + Exit; + end; + end + else if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then + begin + DebugMsg('Peephole Optimization: ShiftShiftShift2ShiftShift 1b done', p); + + dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm); + taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg; + asml.remove(hp2); + hp2.free; + result := true; + Exit; + end; + end + { mov reg1,reg0, lsr/asr imm1 + mov reg1,reg1, lsl imm2 + mov reg1,reg1, lsr/asr imm3 ... + + if imm3>=imm1 and imm2>=imm1 + to + mov reg1,reg0, lsl imm2-imm1 + mov reg1,reg1, lsr/asr imm3 ... + } + else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and + (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and + (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and + (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then + begin + dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm); + taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg; + DebugMsg('Peephole Optimization: ShiftShiftShift2ShiftShift 2 done', p); + if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then + begin + taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg; + asml.remove(hp1); + hp1.free; + end; + + RemoveCurrentp(p); + result := true; + Exit; + end; + end; + end; + + { All the optimisations from this point on require GetNextInstructionUsingReg + to return True } + if not ( + GetNextInstructionUsingReg(p, hpfar1, taicpu(p).oper[0]^.reg) and + (hpfar1.typ = ait_instruction) + ) then + Exit; + + { Change the common + mov r0, r0, lsr #xxx + and r0, r0, #yyy/bic r0, r0, #xxx + + and remove the superfluous and/bic if possible + + This could be extended to handle more cases. + } + + { Change + mov rx, ry, lsr/ror #xxx + uxtb/uxth rz,rx/and rz,rx,0xFF + dealloc rx + + to + + uxtb/uxth rz,ry,ror #xxx + } + if (GenerateThumb2Code) and + (taicpu(p).ops=3) and + (taicpu(p).oper[2]^.typ = top_shifterop) and + (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and + (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ROR]) and + RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then + begin + if MatchInstruction(hpfar1, A_UXTB, [C_None], [PF_None]) and + (taicpu(hpfar1).ops = 2) and + (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and + MatchOperand(taicpu(hpfar1).oper[1]^, taicpu(p).oper[0]^.reg) then + begin + taicpu(hpfar1).oper[1]^.reg := taicpu(p).oper[1]^.reg; + taicpu(hpfar1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^); + taicpu(hpfar1).oper[2]^.shifterop^.shiftmode:=SM_ROR; + taicpu(hpfar1).ops := 3; + + if not Assigned(hp1) then + GetNextInstruction(p,hp1); + + RemoveCurrentP(p, hp1); + + result:=true; + exit; + end + else if MatchInstruction(hpfar1, A_UXTH, [C_None], [PF_None]) and + (taicpu(hpfar1).ops=2) and + (taicpu(p).oper[2]^.shifterop^.shiftimm in [16]) and + MatchOperand(taicpu(hpfar1).oper[1]^, taicpu(p).oper[0]^.reg) then + begin + taicpu(hpfar1).oper[1]^.reg := taicpu(p).oper[1]^.reg; + taicpu(hpfar1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^); + taicpu(hpfar1).oper[2]^.shifterop^.shiftmode:=SM_ROR; + taicpu(hpfar1).ops := 3; + + if not Assigned(hp1) then + GetNextInstruction(p,hp1); + + RemoveCurrentP(p, hp1); + + result:=true; + exit; + end + else if MatchInstruction(hpfar1, A_AND, [C_None], [PF_None]) and + (taicpu(hpfar1).ops = 3) and + (taicpu(hpfar1).oper[2]^.typ = top_const) and + (taicpu(hpfar1).oper[2]^.val = $FF) and + (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and + MatchOperand(taicpu(hpfar1).oper[1]^, taicpu(p).oper[0]^.reg) then + begin + taicpu(hpfar1).ops := 3; + taicpu(hpfar1).opcode := A_UXTB; + taicpu(hpfar1).oper[1]^.reg := taicpu(p).oper[1]^.reg; + taicpu(hpfar1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^); + taicpu(hpfar1).oper[2]^.shifterop^.shiftmode:=SM_ROR; + + if not Assigned(hp1) then + GetNextInstruction(p,hp1); + + RemoveCurrentP(p, hp1); + + result:=true; + exit; + end; + end; + + { 2-operald mov optimisations } + if (taicpu(p).ops = 2) then + begin + { + This removes the mul from + mov rX,0 + ... + mul ...,rX,... + } + if (taicpu(p).oper[1]^.typ = top_const) then + begin +(* if false and + (taicpu(p).oper[1]^.val=0) and + MatchInstruction(hpfar1, [A_MUL,A_MLA], [taicpu(p).condition], [taicpu(p).oppostfix]) and + (((taicpu(hpfar1).oper[1]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^)) or + ((taicpu(hpfar1).oper[2]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[2]^))) then + begin + TransferUsedRegs(TmpUsedRegs); + UpdateUsedRegs(TmpUsedRegs, tai(p.next)); + UpdateUsedRegs(TmpUsedRegs, tai(hpfar1.next)); + DebugMsg('Peephole Optimization: MovMUL/MLA2Mov0 done', p); + if taicpu(hpfar1).opcode=A_MUL then + taicpu(hpfar1).loadconst(1,0) + else + taicpu(hpfar1).loadreg(1,taicpu(hpfar1).oper[3]^.reg); + taicpu(hpfar1).ops:=2; + taicpu(hpfar1).opcode:=A_MOV; + if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hpfar1,TmpUsedRegs)) then + RemoveCurrentP(p); + Result:=true; + exit; + end + else*) if (taicpu(p).oper[1]^.val=0) and + MatchInstruction(hpfar1, A_MLA, [taicpu(p).condition], [taicpu(p).oppostfix]) and + MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[3]^) then + begin + TransferUsedRegs(TmpUsedRegs); + UpdateUsedRegs(TmpUsedRegs, tai(p.next)); + UpdateUsedRegs(TmpUsedRegs, tai(hpfar1.next)); + DebugMsg('Peephole Optimization: MovMLA2MUL 1 done', p); + taicpu(hpfar1).ops:=3; + taicpu(hpfar1).opcode:=A_MUL; + if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hpfar1,TmpUsedRegs)) then + begin + RemoveCurrentP(p); + Result:=true; + end; + exit; + end + { + This changes the very common + mov r0, #0 + str r0, [...] + mov r0, #0 + str r0, [...] + + and removes all superfluous mov instructions + } + else if (taicpu(hpfar1).opcode=A_STR) then + begin + hp1 := hpfar1; + while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and + MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[0]^) and + GetNextInstruction(hp1, hp2) and + MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and + (taicpu(hp2).ops = 2) and + MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and + MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do + begin + DebugMsg('Peephole Optimization: MovStrMov done', hp2); + GetNextInstruction(hp2,hp1); + asml.remove(hp2); + hp2.free; + result:=true; + if not assigned(hp1) then break; + end; + + if Result then + Exit; end; + end; + { + This removes the first mov from + mov rX,... + mov rX,... + } + if taicpu(hpfar1).opcode=A_MOV then + begin + hp1 := p; + while MatchInstruction(hpfar1, A_MOV, [taicpu(hp1).condition], [taicpu(hp1).oppostfix]) and + (taicpu(hpfar1).ops = 2) and + MatchOperand(taicpu(hp1).oper[0]^, taicpu(hpfar1).oper[0]^) and + { don't remove the first mov if the second is a mov rX,rX } + not(MatchOperand(taicpu(hpfar1).oper[0]^, taicpu(hpfar1).oper[1]^)) do + begin + { Defer removing the first p until after the while loop } + if p <> hp1 then + begin + DebugMsg('Peephole Optimization: MovMov done', hp1); + asml.remove(hp1); + hp1.free; + end; + hp1:=hpfar1; + GetNextInstruction(hpfar1,hpfar1); + result:=true; + if not assigned(hpfar1) then + Break; + end; + + if Result then + begin + DebugMsg('Peephole Optimization: MovMov done', p); + RemoveCurrentp(p); + Exit; + end; + end; + + if RedundantMovProcess(p,hpfar1) then + begin + Result:=true; + { p might not point at a mov anymore } + exit; + end; + + { Fold the very common sequence + mov regA, regB + ldr* regA, [regA] + to + ldr* regA, [regB] + CAUTION! If this one is successful p might not be a mov instruction anymore! + } + if + // Make sure that Thumb code doesn't propagate a high register into a reference + ( + ( + GenerateThumbCode and + (getsupreg(taicpu(p).oper[1]^.reg) < RS_R8) + ) or (not GenerateThumbCode) + ) and + (taicpu(p).oper[1]^.typ = top_reg) and + (taicpu(p).oppostfix = PF_NONE) and + MatchInstruction(hpfar1, [A_LDR, A_STR], [taicpu(p).condition], []) and + (taicpu(hpfar1).oper[1]^.typ = top_ref) and + { We can change the base register only when the instruction uses AM_OFFSET } + ((taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) or + ((taicpu(hpfar1).oper[1]^.ref^.addressmode = AM_OFFSET) and + (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg)) + ) and + not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hpfar1)) and + RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then + begin + DebugMsg('Peephole Optimization: MovLdr2Ldr done', hpfar1); + if (taicpu(hpfar1).oper[1]^.ref^.addressmode = AM_OFFSET) and + (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then + taicpu(hpfar1).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg; + + if taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then + taicpu(hpfar1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg; + + dealloc:=FindRegDeAlloc(taicpu(p).oper[1]^.reg, tai(p.Next)); + if Assigned(dealloc) then + begin + asml.remove(dealloc); + asml.InsertAfter(dealloc,hpfar1); + end; + + if not Assigned(hp1) then + GetNextInstruction(p, hp1); + + RemoveCurrentP(p, hp1); + + result:=true; + Exit; + end + end + + { 3-operald mov optimisations } + else if (taicpu(p).ops = 3) then + begin + + if (taicpu(p).oper[2]^.typ = top_shifterop) and + (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and + (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and + (taicpu(hpfar1).ops>=1) and + (taicpu(hpfar1).oper[0]^.typ=top_reg) and + (not RegModifiedBetween(taicpu(hpfar1).oper[0]^.reg, p, hpfar1)) and + RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then + begin + if (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and + MatchInstruction(hpfar1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and + (taicpu(hpfar1).ops=3) and + MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^) and + (taicpu(hpfar1).oper[2]^.typ = top_const) and + { Check if the AND actually would only mask out bits being already zero because of the shift + } + ((($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm) and taicpu(hpfar1).oper[2]^.val) = + ($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm)) then + begin + DebugMsg('Peephole Optimization: LsrAnd2Lsr done', hpfar1); + taicpu(p).oper[0]^.reg:=taicpu(hpfar1).oper[0]^.reg; + asml.remove(hpfar1); + hpfar1.free; + result:=true; + Exit; + end + else if MatchInstruction(hpfar1, A_BIC, [taicpu(p).condition], [taicpu(p).oppostfix]) and + (taicpu(hpfar1).ops=3) and + MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^) and + (taicpu(hpfar1).oper[2]^.typ = top_const) and + { Check if the BIC actually would only mask out bits beeing already zero because of the shift } + (taicpu(hpfar1).oper[2]^.val<>0) and + (BsfDWord(taicpu(hpfar1).oper[2]^.val)>=32-taicpu(p).oper[2]^.shifterop^.shiftimm) then + begin + DebugMsg('Peephole Optimization: LsrBic2Lsr done', hpfar1); + taicpu(p).oper[0]^.reg:=taicpu(hpfar1).oper[0]^.reg; + asml.remove(hpfar1); + hpfar1.free; + result:=true; + Exit; + end; + end; + { This folds shifterops into following instructions + mov r0, r1, lsl #8 + add r2, r3, r0 + + to + + add r2, r3, r1, lsl #8 + CAUTION! If this one is successful p might not be a mov instruction anymore! + } + if (taicpu(p).oper[1]^.typ = top_reg) and + (taicpu(p).oper[2]^.typ = top_shifterop) and + (taicpu(p).oppostfix = PF_NONE) and + MatchInstruction(hpfar1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC, + A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST, + A_CMP, A_CMN], + [taicpu(p).condition], [PF_None]) and + (not ((GenerateThumb2Code) and + (taicpu(hpfar1).opcode in [A_SBC]) and + (((taicpu(hpfar1).ops=3) and + MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^.reg)) or + ((taicpu(hpfar1).ops=2) and + MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[0]^.reg))))) and + RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) and + (taicpu(hpfar1).ops >= 2) and + {Currently we can't fold into another shifterop} + (taicpu(hpfar1).oper[taicpu(hpfar1).ops-1]^.typ = top_reg) and + {Folding rrx is problematic because of the C-Flag, as we currently can't check + NR_DEFAULTFLAGS for modification} + ( + {Everything is fine if we don't use RRX} + (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) or + ( + {If it is RRX, then check if we're just accessing the next instruction} + Assigned(hp1) and + (hpfar1 = hp1) + ) + ) and + { reg1 might not be modified inbetween } + not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hpfar1)) and + { The shifterop can contain a register, might not be modified} + ( + (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) or + not(RegModifiedBetween(taicpu(p).oper[2]^.shifterop^.rs, p, hpfar1)) + ) and + ( + {Only ONE of the two src operands is allowed to match} + MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[taicpu(hpfar1).ops-2]^) xor + MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[taicpu(hpfar1).ops-1]^) + ) then + begin + if taicpu(hpfar1).opcode in [A_TST, A_TEQ, A_CMN] then + I2:=0 + else + I2:=1; + for I:=I2 to taicpu(hpfar1).ops-1 do + if MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[I]^.reg) then + begin + { If the parameter matched on the second op from the RIGHT + we have to switch the parameters, this will not happen for CMP + were we're only evaluating the most right parameter + } + if I <> taicpu(hpfar1).ops-1 then + begin + {The SUB operators need to be changed when we swap parameters} + case taicpu(hpfar1).opcode of + A_SUB: tempop:=A_RSB; + A_SBC: tempop:=A_RSC; + A_RSB: tempop:=A_SUB; + A_RSC: tempop:=A_SBC; + else tempop:=taicpu(hpfar1).opcode; + end; + if taicpu(hpfar1).ops = 3 then + hp2:=taicpu.op_reg_reg_reg_shifterop(tempop, + taicpu(hpfar1).oper[0]^.reg, taicpu(hpfar1).oper[2]^.reg, + taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^) + else + hp2:=taicpu.op_reg_reg_shifterop(tempop, + taicpu(hpfar1).oper[0]^.reg, taicpu(p).oper[1]^.reg, + taicpu(p).oper[2]^.shifterop^); + end + else + if taicpu(hpfar1).ops = 3 then + hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hpfar1).opcode, + taicpu(hpfar1).oper[0]^.reg, taicpu(hpfar1).oper[1]^.reg, + taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^) + else + hp2:=taicpu.op_reg_reg_shifterop(taicpu(hpfar1).opcode, + taicpu(hpfar1).oper[0]^.reg, taicpu(p).oper[1]^.reg, + taicpu(p).oper[2]^.shifterop^); + if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then + AllocRegBetween(taicpu(p).oper[2]^.shifterop^.rs,p,hpfar1,UsedRegs); + AllocRegBetween(taicpu(p).oper[1]^.reg,p,hpfar1,UsedRegs); + asml.insertbefore(hp2, hpfar1); + asml.remove(hpfar1); + hpfar1.free; + DebugMsg('Peephole Optimization: FoldShiftProcess done', hp2); + + if not Assigned(hp1) then + GetNextInstruction(p, hp1) + else if hp1 = hpfar1 then + { If hp1 = hpfar1, then it's a dangling pointer } + hp1 := hp2; + + RemoveCurrentP(p, hp1); + Result:=true; + Exit; + end; end; - else - ; - end; + { + Fold + mov r1, r1, lsl #2 + ldr/ldrb r0, [r0, r1] + to + ldr/ldrb r0, [r0, r1, lsl #2] + + XXX: This still needs some work, as we quite often encounter something like + mov r1, r2, lsl #2 + add r2, r3, #imm + ldr r0, [r2, r1] + which can't be folded because r2 is overwritten between the shift and the ldr. + We could try to shuffle the registers around and fold it into. + add r1, r3, #imm + ldr r0, [r1, r2, lsl #2] + } + if (not(GenerateThumbCode)) and + { thumb2 allows only lsl #0..#3 } + (not(GenerateThumb2Code) or + ((taicpu(p).oper[2]^.shifterop^.shiftimm in [0..3]) and + (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) + ) + ) and + (taicpu(p).oper[1]^.typ = top_reg) and + (taicpu(p).oper[2]^.typ = top_shifterop) and + { RRX is tough to handle, because it requires tracking the C-Flag, + it is also extremly unlikely to be emitted this way} + (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) and + (taicpu(p).oper[2]^.shifterop^.shiftimm <> 0) and + (taicpu(p).oppostfix = PF_NONE) and + {Only LDR, LDRB, STR, STRB can handle scaled register indexing} + (MatchInstruction(hpfar1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B]) or + (GenerateThumb2Code and + MatchInstruction(hpfar1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B, PF_SB, PF_H, PF_SH])) + ) and + ( + {If this is address by offset, one of the two registers can be used} + ((taicpu(hpfar1).oper[1]^.ref^.addressmode=AM_OFFSET) and + ( + (taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) xor + (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) + ) + ) or + {For post and preindexed only the index register can be used} + ((taicpu(hpfar1).oper[1]^.ref^.addressmode in [AM_POSTINDEXED, AM_PREINDEXED]) and + ( + (taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) and + (taicpu(hpfar1).oper[1]^.ref^.base <> taicpu(p).oper[0]^.reg) + ) and + (not GenerateThumb2Code) + ) + ) and + { Only fold if both registers are used. Otherwise we are folding p with itself } + (taicpu(hpfar1).oper[1]^.ref^.index<>NR_NO) and + (taicpu(hpfar1).oper[1]^.ref^.base<>NR_NO) and + { Only fold if there isn't another shifterop already, and offset is zero. } + (taicpu(hpfar1).oper[1]^.ref^.offset = 0) and + (taicpu(hpfar1).oper[1]^.ref^.shiftmode = SM_None) and + not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hpfar1)) and + RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then + begin + { If the register we want to do the shift for resides in base, we need to swap that} + if (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then + taicpu(hpfar1).oper[1]^.ref^.base := taicpu(hpfar1).oper[1]^.ref^.index; + taicpu(hpfar1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg; + taicpu(hpfar1).oper[1]^.ref^.shiftmode := taicpu(p).oper[2]^.shifterop^.shiftmode; + taicpu(hpfar1).oper[1]^.ref^.shiftimm := taicpu(p).oper[2]^.shifterop^.shiftimm; + DebugMsg('Peephole Optimization: FoldShiftLdrStr done', hpfar1); + RemoveCurrentP(p); + Result:=true; + Exit; + end; + end; + { + Often we see shifts and then a superfluous mov to another register + In the future this might be handled in RedundantMovProcess when it uses RegisterTracking + } + if RemoveSuperfluousMove(p, hpfar1, 'MovMov2Mov') then + Result:=true; + end; + + + function TCpuAsmOptimizer.OptPass1MVN(var p: tai): Boolean; + var + hp1: tai; + begin + { + change + mvn reg2,reg1 + and reg3,reg4,reg2 + dealloc reg2 + to + bic reg3,reg4,reg1 + } + Result := False; + if (taicpu(p).oper[1]^.typ = top_reg) and + GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and + MatchInstruction(hp1,A_AND,[],[]) and + (((taicpu(hp1).ops=3) and + (taicpu(hp1).oper[2]^.typ=top_reg) and + (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or + MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or + ((taicpu(hp1).ops=2) and + (taicpu(hp1).oper[1]^.typ=top_reg) and + MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and + assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and + { reg1 might not be modified inbetween } + not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then + begin + DebugMsg('Peephole Optimization: MvnAnd2Bic done', p); + taicpu(hp1).opcode:=A_BIC; + + if taicpu(hp1).ops=3 then + begin + if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then + taicpu(hp1).loadReg(1,taicpu(hp1).oper[2]^.reg); // Swap operands + + taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg); + end + else + taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg); + + RemoveCurrentp(p); + Result := True; + Exit; + end; + end; + + + function TCpuAsmOptimizer.OptPass1VMov(var p: tai): Boolean; + var + hp1: tai; + begin + { + change + vmov reg0,reg1,reg2 + vmov reg1,reg2,reg0 + into + vmov reg0,reg1,reg2 + + can be applied regardless if reg0 or reg2 is the vfp register + } + Result := False; + if (taicpu(p).ops = 3) then + while GetNextInstruction(p, hp1) and + MatchInstruction(hp1, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and + (taicpu(hp1).ops = 3) and + MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[2]^) and + MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[0]^) and + MatchOperand(taicpu(p).oper[2]^, taicpu(hp1).oper[1]^) do + begin + asml.Remove(hp1); + hp1.free; + DebugMsg('Peephole Optimization: VMovVMov2VMov done', p); + { Can we do it again? } + end; + end; + + + function TCpuAsmOptimizer.OptPass1VOp(var p: tai): Boolean; + var + hp1: tai; + begin + Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and + RemoveSuperfluousVMov(p, hp1, 'VOpVMov2VOp'); + end; + + + function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean; + begin + result := false; + if p.typ = ait_instruction then + begin + case taicpu(p).opcode of + A_CMP: + Result := OptPass1CMP(p); + A_STR: + Result := OptPass1STR(p); + A_LDR: + Result := OptPass1LDR(p); + A_MOV: + Result := OptPass1MOV(p); + A_AND: + Result := OptPass1And(p); + A_ADD, + A_SUB: + Result := OptPass1ADDSUB(p); + A_MUL: + REsult := OptPass1MUL(p); + A_ADC, + A_RSB, + A_RSC, + A_SBC, + A_BIC, + A_EOR, + A_ORR, + A_MLA, + A_MLS, + A_QADD,A_QADD16,A_QADD8, + A_QSUB,A_QSUB16,A_QSUB8, + A_QDADD,A_QDSUB,A_QASX,A_QSAX, + A_SHADD16,A_SHADD8,A_UHADD16,A_UHADD8, + A_SHSUB16,A_SHSUB8,A_UHSUB16,A_UHSUB8, + A_PKHTB,A_PKHBT, + A_SMUAD,A_SMUSD: + Result := OptPass1DataCheckMov(p); +{$ifdef dummy} + A_MVN: + Result := OPtPass1MVN(p); +{$endif dummy} + A_UXTB: + Result := OptPass1UXTB(p); + A_UXTH: + Result := OptPass1UXTH(p); + A_SXTB: + Result := OptPass1SXTB(p); + A_SXTH: + Result := OptPass1SXTH(p); + A_STM: + Result := OptPass1STM(p); + A_VMOV: + Result := OptPass1VMov(p); + A_VLDR, + A_VADD, + A_VMUL, + A_VDIV, + A_VSUB, + A_VSQRT, + A_VNEG, + A_VCVT, + A_VABS: + Result := OptPass1VOp(p); + else + ; + end; + end; end; diff --git a/compiler/armgen/aoptarm.pas b/compiler/armgen/aoptarm.pas index 9e919445d7..ecb58fd8b4 100644 --- a/compiler/armgen/aoptarm.pas +++ b/compiler/armgen/aoptarm.pas @@ -47,7 +47,7 @@ Type function OptPass1UXTH(var p: tai): Boolean; function OptPass1SXTB(var p: tai): Boolean; function OptPass1SXTH(var p: tai): Boolean; - function OptPass1And(var p: tai): Boolean; + function OptPass1And(var p: tai): Boolean; virtual; End; function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean; @@ -170,18 +170,26 @@ Implementation function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai; Out Next: tai; reg: TRegister): Boolean; + var + gniResult: Boolean; begin Next:=Current; + Result := False; repeat - Result:=GetNextInstruction(Next,Next); - until not (Result) or - not(cs_opt_level3 in current_settings.optimizerswitches) or - (Next.typ<>ait_instruction) or - RegInInstruction(reg,Next) or - is_calljmp(taicpu(Next).opcode) + + gniResult:=GetNextInstruction(Next,Next); + if gniResult and RegInInstruction(reg,Next) then + { Found something } + Exit(True); + + until not gniResult or + not(cs_opt_level3 in current_settings.optimizerswitches) or + (Next.typ<>ait_instruction) or + is_calljmp(taicpu(Next).opcode) {$ifdef ARM} - or RegModifiedByInstruction(NR_PC,Next); + or RegModifiedByInstruction(NR_PC,Next) {$endif ARM} + ; end; diff --git a/compiler/htypechk.pas b/compiler/htypechk.pas index b989fb2259..425dec407c 100644 --- a/compiler/htypechk.pas +++ b/compiler/htypechk.pas @@ -1803,6 +1803,7 @@ implementation mayberesettypeconvs; exit; end; + arrayconstructorn, setconstn, stringconstn, guidconstn : @@ -2106,6 +2107,7 @@ implementation (tstringdef(def_to).encoding=tstringdef(p.resultdef).encoding) then eq:=te_equal end; + formaldef, setdef : begin { set can also be a not yet converted array constructor } diff --git a/compiler/ncal.pas b/compiler/ncal.pas index 7dfe331509..ab9001d972 100644 --- a/compiler/ncal.pas +++ b/compiler/ncal.pas @@ -1192,6 +1192,13 @@ implementation (parasym.vardef.typ=setdef) then inserttypeconv(left,parasym.vardef); + { if an array constructor can be a set and it is passed to + a formaldef, a set must be passed, see also issue #37796 } + if (left.nodetype=arrayconstructorn) and + (parasym.vardef.typ=formaldef) and + (arrayconstructor_can_be_set(left)) then + left:=arrayconstructor_to_set(left,false); + { set some settings needed for arrayconstructor } if is_array_constructor(left.resultdef) then begin diff --git a/compiler/nflw.pas b/compiler/nflw.pas index ad8ea8f1f9..210de18b5f 100644 --- a/compiler/nflw.pas +++ b/compiler/nflw.pas @@ -2193,7 +2193,8 @@ implementation p2:=current_procinfo; while true do begin - if (p2.flags*[pi_needs_implicit_finally,pi_uses_exceptions,pi_has_implicit_finally])<>[] then + if ((cs_implicit_exceptions in current_settings.moduleswitches) and ((p2.flags*[pi_needs_implicit_finally,pi_has_implicit_finally])<>[])) or + ((p2.flags*[pi_uses_exceptions])<>[]) then Message(cg_e_goto_across_procedures_with_exceptions_not_allowed); if labelsym.owner=p2.procdef.localst then break; diff --git a/packages/fcl-passrc/src/pasresolver.pp b/packages/fcl-passrc/src/pasresolver.pp index a140573ccf..f693ed30be 100644 --- a/packages/fcl-passrc/src/pasresolver.pp +++ b/packages/fcl-passrc/src/pasresolver.pp @@ -16315,7 +16315,7 @@ begin ParamType,ConstraintClass,ErrorPos); exit(cIncompatible); end; - if TPasClassType(ParamType).ObjKind<>okClass then + if not (TPasClassType(ParamType).ObjKind in [okClass,okInterface]) then begin if ErrorPos<>nil then RaiseMsg(20190904175144,nXExpectedButYFound,sXExpectedButYFound, @@ -29830,7 +29830,7 @@ begin Result:=nil; while ClassEl<>nil do begin - if IndexOfImplementedInterface(ClassEl,Intf)>=0 then + if (ClassEl=Intf) or (IndexOfImplementedInterface(ClassEl,Intf)>=0) then exit(ClassEl); ClassEl:=GetPasClassAncestor(ClassEl,true) as TPasClassType; end; diff --git a/packages/pastojs/src/fppas2js.pp b/packages/pastojs/src/fppas2js.pp index dfdfbafd21..77447d848e 100644 --- a/packages/pastojs/src/fppas2js.pp +++ b/packages/pastojs/src/fppas2js.pp @@ -5658,12 +5658,18 @@ begin else if not (ConEl is TPasType) then RaiseNotYetImplemented(20191018180031,ConEl,GetObjPath(Param)); - if ConEl is TPasClassType then - begin - if TPasClassType(ConEl).IsExternal then - TIName:=Pas2JSBuiltInNames[pbitnTIExtClass] + TypeEl:=ResolveAliasType(TPasType(ConEl)); + if TypeEl is TPasClassType then + case TPasClassType(TypeEl).ObjKind of + okClass: + if TPasClassType(TypeEl).IsExternal then + TIName:=Pas2JSBuiltInNames[pbitnTIExtClass] + else + TIName:=Pas2JSBuiltInNames[pbitnTIClass]; + okInterface: + TIName:=Pas2JSBuiltInNames[pbitnTIInterface]; else - TIName:=Pas2JSBuiltInNames[pbitnTIClass]; + RaiseNotYetImplemented(20200927100825,ConEl,GetObjPath(Param)); end else RaiseNotYetImplemented(20191018180131,ConEl,GetObjPath(Param)); diff --git a/packages/pastojs/tests/tcgenerics.pas b/packages/pastojs/tests/tcgenerics.pas index 7e299330b5..d0fa2ffeb8 100644 --- a/packages/pastojs/tests/tcgenerics.pas +++ b/packages/pastojs/tests/tcgenerics.pas @@ -52,6 +52,7 @@ type // class interfaces procedure TestGen_ClassInterface_Corba; procedure TestGen_ClassInterface_InterfacedObject; + procedure TestGen_ClassInterface_COM_RTTI; // statements Procedure TestGen_InlineSpec_Constructor; @@ -1478,6 +1479,46 @@ begin ''])); end; +procedure TTestGenerics.TestGen_ClassInterface_COM_RTTI; +begin + StartProgram(true,[supTInterfacedObject]); + Add([ + '{$mode delphi}', + 'type', + ' TBird = class', + ' function Fly: T;', + ' end;', + ' IAnt = interface', + ' procedure InterfaceProc;', + ' end;', + 'function TBird.Fly: T;', + 'begin', + ' if TypeInfo(T)=nil then ;', + 'end;', + 'var Bird: TBird;', + ' Ant: IAnt;', + 'begin', + ' Ant := Bird.Fly;', + '']); + ConvertProgram; + CheckSource('TestGen_ClassInterface_COM_RTTI', + LinesToStr([ // statements + 'rtl.createClass(this, "TBird", pas.system.TObject, function () {', + ' this.Fly$G1 = function () {', + ' var Result = null;', + ' if ($mod.$rtti["IAnt"] === null) ;', + ' return Result;', + ' };', + '});', + 'rtl.createInterface(this, "IAnt", "{B9D0FF27-A446-3A1B-AA85-F167837AA297}", ["InterfaceProc"], pas.system.IUnknown);', + 'this.Bird = null;', + 'this.Ant = null;', + '']), + LinesToStr([ // $mod.$main + 'rtl.setIntfP($mod, "Ant", $mod.Bird.Fly$G1(), true);', + ''])); +end; + procedure TTestGenerics.TestGen_InlineSpec_Constructor; begin StartProgram(false); diff --git a/rtl/objpas/math.pp b/rtl/objpas/math.pp index f87c811e0a..e6f3b511aa 100644 --- a/rtl/objpas/math.pp +++ b/rtl/objpas/math.pp @@ -509,6 +509,7 @@ function MaxValue(const data : PInteger; Const N : Integer) : Integer; { returns random values with gaussian distribution } function RandG(mean,stddev : float) : float; + function RandomRange(const aFrom, aTo: Integer): Integer; function RandomRange(const aFrom, aTo: Int64): Int64; diff --git a/tests/webtbs/tw37796.pp b/tests/webtbs/tw37796.pp new file mode 100644 index 0000000000..98aa6f93a9 --- /dev/null +++ b/tests/webtbs/tw37796.pp @@ -0,0 +1,38 @@ +program tformal; +{$mode objfpc} + +uses + sysutils; + +type + TFontStyle = ( + fsItalic, + fsBold, + fsUnderlined, + fsStrikeOut + ); + TFontStyles = set of TFontStyle; + +var aFS: TFontStyles; + +procedure Any(const Anything); +begin + aFS:=aFS+TFontStyles(Anything); + Writeln(IntToHex(PLongInt(@Anything)^, 8)); +end; + +procedure DoIt; +begin + Any([fsItalic, fsBold]); //unit1.pas(31,25) Error: Variable identifier expected + if aFS<>[fsItalic, fsBold] then + halt(1); + Any(Cardinal([fsItalic, fsBold])); //ok +end; + +begin + aFS:=[]; + writeln(Cardinal(aFS)); + DoIt; + writeln(Cardinal(aFS)); + writeln('ok'); +end. diff --git a/tests/webtbs/tw37823.pp b/tests/webtbs/tw37823.pp new file mode 100644 index 0000000000..180f9dcd12 --- /dev/null +++ b/tests/webtbs/tw37823.pp @@ -0,0 +1,21 @@ +{$MODE ISO} +{$implicitExceptions off} +{$Q+} +{$R+} +program gt; + label 1; + procedure jump; + var + a: integer; + b: rawbytestring; + begin + b := 'nanu'; + writeln('nanu'); + goto 1; + end; +begin + jump; + writeln('not jumped!'); +1: +writeln('jumped!'); +end.