diff --git a/compiler/aarch64/aoptcpu.pas b/compiler/aarch64/aoptcpu.pas index 5594c2550c..734de5a56a 100644 --- a/compiler/aarch64/aoptcpu.pas +++ b/compiler/aarch64/aoptcpu.pas @@ -40,12 +40,15 @@ Interface function PeepHoleOptPass1Cpu(var p: tai): boolean; override; function PostPeepHoleOptsCpu(var p: tai): boolean; override; function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;override; + function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;override; function GetNextInstructionUsingReg(Current : tai; out Next : tai; reg : TRegister) : Boolean; function LookForPostindexedPattern(p : taicpu) : boolean; procedure DebugMsg(const s : string; p : tai); private function OptPass1Shift(var p: tai): boolean; function OptPostCMP(var p: tai): boolean; + function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean; + function OptPass1Data(var p: tai): boolean; End; Implementation @@ -195,6 +198,119 @@ Implementation end; + function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean; + var + p: taicpu; + i: longint; + begin + instructionLoadsFromReg := false; + if not (assigned(hp) and (hp.typ = ait_instruction)) then + exit; + p:=taicpu(hp); + + i:=1; + + { Start on oper[0]? } + if taicpu(hp).spilling_get_operation_type(0) in [operand_read, operand_readwrite] then + i:=0; + + while(i=3) and + { We can't optimize if there is a shiftop } + (taicpu(movp).ops=2) and + MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and + { don't mess with moves to fp } + (taicpu(movp).oper[0]^.reg<>NR_FP) and + { the destination register of the mov might not be used beween p and movp } + not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and + { Take care to only do this for instructions which REALLY load to the first register. + Otherwise + str reg0, [reg1] + mov reg2, reg0 + will be optimized to + str reg2, [reg1] + } + RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then + begin + dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next)); + if assigned(dealloc) then + begin + DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp); + result:=true; + + { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation + and remove it if possible } + asml.Remove(dealloc); + alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous)); + if assigned(alloc) then + begin + asml.Remove(alloc); + alloc.free; + dealloc.free; + end + else + asml.InsertAfter(dealloc,p); + + { try to move the allocation of the target register } + GetLastInstruction(movp,hp1); + alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next)); + if assigned(alloc) then + begin + asml.Remove(alloc); + asml.InsertBefore(alloc,p); + { adjust used regs } + IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs); + end; + + { finally get rid of the mov } + taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg); + { Remove preindexing and postindexing for LDR in some cases. + For example: + ldr reg2,[reg1, xxx]! + mov reg1,reg2 + must be translated to: + ldr reg1,[reg1, xxx] + + Preindexing must be removed there, since the same register is used as the base and as the target. + Such case is not allowed for ARM CPU and produces crash. } + if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref) + and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base) + then + taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET; + asml.remove(movp); + movp.free; + end; + end; + end; + + { optimize ldr/str regX,[reg1] @@ -348,11 +464,22 @@ Implementation end; + function TCpuAsmOptimizer.OptPass1Data(var p : tai): boolean; + var + hp1: tai; + begin + result:=false; + if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and + RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then + Result:=true; + end; + + function TCpuAsmOptimizer.OptPostCMP(var p : tai): boolean; var hp1,hp2: tai; begin - result:=false; + Result:=false; if MatchOpType(taicpu(p),top_reg,top_const) and (taicpu(p).oper[1]^.val=0) and GetNextInstruction(p,hp1) and @@ -400,6 +527,16 @@ Implementation A_ASR, A_LSL: Result:=OptPass1Shift(p); + A_ADD, + A_ADC, + A_SUB, + A_SBC, + A_AND, + A_BIC, + A_EOR, + A_ORR, + A_MUL: + Result:=OptPass1Data(p); else ; end;