diff --git a/compiler/aarch64/aasmcpu.pas b/compiler/aarch64/aasmcpu.pas index fe9af332b4..213d808cc4 100644 --- a/compiler/aarch64/aasmcpu.pas +++ b/compiler/aarch64/aasmcpu.pas @@ -187,6 +187,7 @@ uses constructor op_reg_reg_shifterop(op : tasmop;_op1,_op2 : tregister;_op3 : tshifterop); constructor op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister; const _op4 : tshifterop); constructor op_reg_reg_reg_cond(op : tasmop;_op1,_op2,_op3 : tregister; const _op4: tasmcond); + constructor op_reg_const_ref(op: tasmop; _op1: tregister; _op2: aint; _op3: treference); constructor op_const_ref(op:tasmop; _op1: aint; _op2: treference); @@ -607,6 +608,16 @@ implementation end; + constructor taicpu.op_reg_const_ref(op : tasmop;_op1 : tregister;_op2 : aint;_op3 : treference); + begin + inherited create(op); + ops:=3; + loadreg(0,_op1); + loadconst(1,_op2); + loadref(2,_op3); + end; + + function taicpu.is_same_reg_move(regtype: Tregistertype):boolean; begin { allow the register allocator to remove unnecessary moves } diff --git a/compiler/aarch64/aoptcpu.pas b/compiler/aarch64/aoptcpu.pas index f8570794f9..5cb055d8e0 100644 --- a/compiler/aarch64/aoptcpu.pas +++ b/compiler/aarch64/aoptcpu.pas @@ -52,6 +52,7 @@ Interface function RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string): boolean; function OptPass1Shift(var p: tai): boolean; function OptPostCMP(var p: tai): boolean; + function OptPostAnd(var p: tai): Boolean; function OptPass1Data(var p: tai): boolean; function OptPass1FData(var p: tai): Boolean; function OptPass1STP(var p: tai): boolean; @@ -787,6 +788,44 @@ Implementation end; + function TCpuAsmOptimizer.OptPostAnd(var p: tai): Boolean; + var + hp1, hp2: tai; + hp3: taicpu; + begin + Result:=false; + if MatchOpType(taicpu(p),top_reg,top_reg,top_const) and + (PopCnt(QWord(taicpu(p).oper[2]^.val))=1) and + GetNextInstruction(p,hp1) and + MatchInstruction(hp1,A_CMP,[PF_None]) and + MatchOpType(taicpu(hp1),top_reg,top_const) and + (taicpu(hp1).oper[1]^.val=0) and + MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) and + RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and + GetNextInstruction(hp1,hp2) and + MatchInstruction(hp2,A_B,[PF_None]) and + (taicpu(hp2).condition in [C_EQ,C_NE]) then + begin + case taicpu(hp2).condition of + C_NE: + hp3:=taicpu.op_reg_const_ref(A_TBNZ,taicpu(p).oper[1]^.reg,BsfQWord(taicpu(p).oper[2]^.val),taicpu(hp2).oper[0]^.ref^); + C_EQ: + hp3:=taicpu.op_reg_const_ref(A_TBZ,taicpu(p).oper[1]^.reg,BsfQWord(taicpu(p).oper[2]^.val),taicpu(hp2).oper[0]^.ref^); + else + Internalerror(2021100201); + end; + taicpu(hp3).fileinfo:=taicpu(hp1).fileinfo; + asml.insertbefore(hp3, hp1); + + RemoveInstruction(hp1); + RemoveInstruction(hp2); + RemoveCurrentP(p); + DebugMsg(SPeepholeOptimization + 'AndCmpB.E/NE2Tbnz/Tbz done', p); + Result:=true; + end; + end; + + function TCpuAsmOptimizer.OptPostCMP(var p : tai): boolean; var hp1,hp2: tai; @@ -907,6 +946,8 @@ Implementation case taicpu(p).opcode of A_CMP: Result:=OptPostCMP(p); + A_AND: + Result:=OptPostAnd(p); else ; end;