diff --git a/compiler/aarch64/aoptcpu.pas b/compiler/aarch64/aoptcpu.pas index 42d54dd898..199a5a6eb1 100644 --- a/compiler/aarch64/aoptcpu.pas +++ b/compiler/aarch64/aoptcpu.pas @@ -62,6 +62,7 @@ Interface function OptPass1Mov(var p: tai): boolean; function OptPass1MOVZ(var p: tai): boolean; function OptPass1FMov(var p: tai): Boolean; + function OptPass1SXTW(var p: tai): Boolean; function OptPass2LDRSTR(var p: tai): boolean; End; @@ -715,6 +716,72 @@ Implementation end; + function TCpuAsmOptimizer.OptPass1SXTW(var p : tai) : Boolean; + var + hp1: tai; + begin + Result:=false; + { + change + sxtw reg2,reg1 + str reg2,[...] + dealloc reg2 + to + str reg1,[...] + } + if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and + (taicpu(p).ops=2) and + GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and + MatchInstruction(hp1, A_STR, [C_None], [PF_None]) and + (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBD) and + RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and + { the reference in strb might not use reg2 } + not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and + { reg1 might not be modified inbetween } + not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then + begin + DebugMsg('Peephole SXTHStr2Str done', p); + taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg); + GetNextInstruction(p, hp1); + asml.remove(p); + p.free; + p:=hp1; + result:=true; + end + { + change + sxtw reg2,reg1 + sxtw reg3,reg2 + dealloc reg2 + to + sxtw reg3,reg1 + } + else if MatchInstruction(p, A_SXTW, [C_None], [PF_None]) and + (taicpu(p).ops=2) and + GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and + MatchInstruction(hp1, A_SXTW, [C_None], [PF_None]) and + (taicpu(hp1).ops=2) and + MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and + RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and + { reg1 might not be modified inbetween } + not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then + begin + DebugMsg('Peephole SxtwSxtw2Sxtw done', p); + AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs); + taicpu(hp1).opcode:=A_SXTW; + taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg); + GetNextInstruction(p, hp1); + asml.remove(p); + p.free; + p:=hp1; + result:=true; + end + else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and + RemoveSuperfluousMove(p, hp1, 'SxtwMov2Data') then + Result:=true; + end; + + function TCpuAsmOptimizer.OptPass2LDRSTR(var p: tai): boolean; var hp1, hp1_last: tai; @@ -1018,6 +1085,8 @@ Implementation Result:=OptPass1SXTB(p); A_SXTH: Result:=OptPass1SXTH(p); + A_SXTW: + Result:=OptPass1SXTW(p); // A_VLDR, A_FMADD, A_FMSUB,