From 52d3756c26a78ca17b6ed0d282444c389a9e1e29 Mon Sep 17 00:00:00 2001 From: florian Date: Mon, 8 May 2017 20:44:27 +0000 Subject: [PATCH] * factored out OptPass1Movx and merged i386 and x86-64 version git-svn-id: trunk@36159 - --- compiler/i386/aoptcpu.pas | 194 +--------------------- compiler/x86/aoptx86.pas | 259 ++++++++++++++++++++++++++++- compiler/x86_64/aoptcpu.pas | 318 +++--------------------------------- 3 files changed, 286 insertions(+), 485 deletions(-) diff --git a/compiler/i386/aoptcpu.pas b/compiler/i386/aoptcpu.pas index 92e1a14994..e5df97eb13 100644 --- a/compiler/i386/aoptcpu.pas +++ b/compiler/i386/aoptcpu.pas @@ -1182,198 +1182,8 @@ begin A_MOVSX, A_MOVZX : begin - if (taicpu(p).oper[1]^.typ = top_reg) and - GetNextInstruction(p,hp1) and - (hp1.typ = ait_instruction) and - IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and - (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and - GetNextInstruction(hp1,hp2) and - MatchInstruction(hp2,A_MOV,[]) and - (taicpu(hp2).oper[0]^.typ = top_reg) and - OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and - (((taicpu(hp1).ops=2) and - (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or - ((taicpu(hp1).ops=1) and - (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and - not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then - { change movsX/movzX reg/ref, reg2 } - { add/sub/or/... reg3/$const, reg2 } - { mov reg2 reg/ref } - { to add/sub/or/... reg3/$const, reg/ref } - begin - { by example: - movswl %si,%eax movswl %si,%eax p - decl %eax addl %edx,%eax hp1 - movw %ax,%si movw %ax,%si hp2 - -> - movswl %si,%eax movswl %si,%eax p - decw %eax addw %edx,%eax hp1 - movw %ax,%si movw %ax,%si hp2 - } - taicpu(hp1).changeopsize(taicpu(hp2).opsize); - { - -> - movswl %si,%eax movswl %si,%eax p - decw %si addw %dx,%si hp1 - movw %ax,%si movw %ax,%si hp2 - } - case taicpu(hp1).ops of - 1: - taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^); - 2: - begin - taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^); - if (taicpu(hp1).oper[0]^.typ = top_reg) then - setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg)); - end; - else - internalerror(2008042701); - end; - { - -> - decw %si addw %dx,%si p - } - asml.remove(p); - asml.remove(hp2); - p.free; - hp2.free; - p := hp1 - end - { removes superfluous And's after movzx's } - else if taicpu(p).opcode=A_MOVZX then - begin - if (taicpu(p).oper[1]^.typ = top_reg) and - GetNextInstruction(p, hp1) and - (tai(hp1).typ = ait_instruction) and - (taicpu(hp1).opcode = A_AND) and - (taicpu(hp1).oper[0]^.typ = top_const) and - (taicpu(hp1).oper[1]^.typ = top_reg) and - (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then - case taicpu(p).opsize Of - S_BL, S_BW: - if (taicpu(hp1).oper[0]^.val = $ff) then - begin - asml.remove(hp1); - hp1.free; - end; - S_WL: - if (taicpu(hp1).oper[0]^.val = $ffff) then - begin - asml.remove(hp1); - hp1.free; - end; - end; - {changes some movzx constructs to faster synonims (all examples - are given with eax/ax, but are also valid for other registers)} - if (taicpu(p).oper[1]^.typ = top_reg) then - if (taicpu(p).oper[0]^.typ = top_reg) then - case taicpu(p).opsize of - S_BW: - begin - if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and - not(cs_opt_size in current_settings.optimizerswitches) then - {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"} - begin - taicpu(p).opcode := A_AND; - taicpu(p).changeopsize(S_W); - taicpu(p).loadConst(0,$ff); - end - else if GetNextInstruction(p, hp1) and - (tai(hp1).typ = ait_instruction) and - (taicpu(hp1).opcode = A_AND) and - (taicpu(hp1).oper[0]^.typ = top_const) and - (taicpu(hp1).oper[1]^.typ = top_reg) and - (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then - {Change "movzbw %reg1, %reg2; andw $const, %reg2" - to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"} - begin - taicpu(p).opcode := A_MOV; - taicpu(p).changeopsize(S_W); - setsubreg(taicpu(p).oper[0]^.reg,R_SUBW); - taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff); - end; - end; - S_BL: - begin - if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and - not(cs_opt_size in current_settings.optimizerswitches) then - {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"} - begin - taicpu(p).opcode := A_AND; - taicpu(p).changeopsize(S_L); - taicpu(p).loadConst(0,$ff) - end - else if GetNextInstruction(p, hp1) and - (tai(hp1).typ = ait_instruction) and - (taicpu(hp1).opcode = A_AND) and - (taicpu(hp1).oper[0]^.typ = top_const) and - (taicpu(hp1).oper[1]^.typ = top_reg) and - (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then - {Change "movzbl %reg1, %reg2; andl $const, %reg2" - to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"} - begin - taicpu(p).opcode := A_MOV; - taicpu(p).changeopsize(S_L); - setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE); - taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff); - end - end; - S_WL: - begin - if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and - not(cs_opt_size in current_settings.optimizerswitches) then - {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"} - begin - taicpu(p).opcode := A_AND; - taicpu(p).changeopsize(S_L); - taicpu(p).loadConst(0,$ffff); - end - else if GetNextInstruction(p, hp1) and - (tai(hp1).typ = ait_instruction) and - (taicpu(hp1).opcode = A_AND) and - (taicpu(hp1).oper[0]^.typ = top_const) and - (taicpu(hp1).oper[1]^.typ = top_reg) and - (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then - {Change "movzwl %reg1, %reg2; andl $const, %reg2" - to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"} - begin - taicpu(p).opcode := A_MOV; - taicpu(p).changeopsize(S_L); - setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE); - taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff); - end; - end; - end - else if (taicpu(p).oper[0]^.typ = top_ref) then - begin - if GetNextInstruction(p, hp1) and - (tai(hp1).typ = ait_instruction) and - (taicpu(hp1).opcode = A_AND) and - (taicpu(hp1).oper[0]^.typ = Top_Const) and - (taicpu(hp1).oper[1]^.typ = Top_Reg) and - (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then - begin - taicpu(p).opcode := A_MOV; - case taicpu(p).opsize Of - S_BL: - begin - taicpu(p).changeopsize(S_L); - taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff); - end; - S_WL: - begin - taicpu(p).changeopsize(S_L); - taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff); - end; - S_BW: - begin - taicpu(p).changeopsize(S_W); - taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff); - end; - end; - end; - end; - end; + If OptPass1Movx(p) then + Continue end; (* should not be generated anymore by the current code generator diff --git a/compiler/x86/aoptx86.pas b/compiler/x86/aoptx86.pas index c983723031..984db44e87 100644 --- a/compiler/x86/aoptx86.pas +++ b/compiler/x86/aoptx86.pas @@ -23,7 +23,7 @@ unit aoptx86; {$i fpcdefs.inc} -{ $define DEBUG_AOPTCPU} +{$define DEBUG_AOPTCPU} interface @@ -59,6 +59,7 @@ unit aoptx86; function OptPass1VMOVAP(var p : tai) : boolean; function OptPass1VOP(const p : tai) : boolean; function OptPass1MOV(var p : tai) : boolean; + function OptPass1Movx(var p : tai) : boolean; function OptPass2MOV(var p : tai) : boolean; function OptPass2Imul(var p : tai) : boolean; @@ -1701,6 +1702,262 @@ unit aoptx86; end; + function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean; + var + hp1,hp2: tai; + begin + result:=false; + if (taicpu(p).oper[1]^.typ = top_reg) and + GetNextInstruction(p,hp1) and + (hp1.typ = ait_instruction) and + IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and + GetNextInstruction(hp1,hp2) and + MatchInstruction(hp2,A_MOV,[]) and + (taicpu(hp2).oper[0]^.typ = top_reg) and + OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and +{$ifdef i386} + { not all registers have byte size sub registers on i386 } + ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and +{$endif i386} + (((taicpu(hp1).ops=2) and + (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or + ((taicpu(hp1).ops=1) and + (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and + not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then + begin + { change movsX/movzX reg/ref, reg2 + add/sub/or/... reg3/$const, reg2 + mov reg2 reg/ref + to add/sub/or/... reg3/$const, reg/ref } + + { by example: + movswl %si,%eax movswl %si,%eax p + decl %eax addl %edx,%eax hp1 + movw %ax,%si movw %ax,%si hp2 + -> + movswl %si,%eax movswl %si,%eax p + decw %eax addw %edx,%eax hp1 + movw %ax,%si movw %ax,%si hp2 + } + taicpu(hp1).changeopsize(taicpu(hp2).opsize); + { + -> + movswl %si,%eax movswl %si,%eax p + decw %si addw %dx,%si hp1 + movw %ax,%si movw %ax,%si hp2 + } + case taicpu(hp1).ops of + 1: + taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^); + 2: + begin + taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^); + if (taicpu(hp1).oper[0]^.typ = top_reg) then + setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg)); + end; + else + internalerror(2008042701); + end; + { + -> + decw %si addw %dx,%si p + } + DebugMsg('PeepHole Optimization,var3',p); + asml.remove(p); + asml.remove(hp2); + p.free; + hp2.free; + p:=hp1; + end + { removes superfluous And's after movzx's } + else if taicpu(p).opcode=A_MOVZX then + begin + if (taicpu(p).oper[1]^.typ = top_reg) and + GetNextInstruction(p, hp1) and + (tai(hp1).typ = ait_instruction) and + (taicpu(hp1).opcode = A_AND) and + (taicpu(hp1).oper[0]^.typ = top_const) and + (taicpu(hp1).oper[1]^.typ = top_reg) and + (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then + begin + case taicpu(p).opsize Of + S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}: + if (taicpu(hp1).oper[0]^.val = $ff) then + begin + DebugMsg('PeepHole Optimization,var4',p); + asml.remove(hp1); + hp1.free; + end; + S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}: + if (taicpu(hp1).oper[0]^.val = $ffff) then + begin + DebugMsg('PeepHole Optimization,var5',p); + asml.remove(hp1); + hp1.free; + end; +{$ifdef x86_64} + S_LQ: + if (taicpu(hp1).oper[0]^.val = $ffffffff) then + begin + if (cs_asm_source in current_settings.globalswitches) then + asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var6')),p); + asml.remove(hp1); + hp1.Free; + end; +{$endif x86_64} + end; + end; + { changes some movzx constructs to faster synonims (all examples + are given with eax/ax, but are also valid for other registers)} + if (taicpu(p).oper[1]^.typ = top_reg) then + if (taicpu(p).oper[0]^.typ = top_reg) then + case taicpu(p).opsize of + S_BW: + begin + if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and + not(cs_opt_size in current_settings.optimizerswitches) then + {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"} + begin + taicpu(p).opcode := A_AND; + taicpu(p).changeopsize(S_W); + taicpu(p).loadConst(0,$ff); + DebugMsg('PeepHole Optimization,var7',p); + end + else if GetNextInstruction(p, hp1) and + (tai(hp1).typ = ait_instruction) and + (taicpu(hp1).opcode = A_AND) and + (taicpu(hp1).oper[0]^.typ = top_const) and + (taicpu(hp1).oper[1]^.typ = top_reg) and + (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then + { Change "movzbw %reg1, %reg2; andw $const, %reg2" + to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"} + begin + DebugMsg('PeepHole Optimization,var8',p); + taicpu(p).opcode := A_MOV; + taicpu(p).changeopsize(S_W); + setsubreg(taicpu(p).oper[0]^.reg,R_SUBW); + taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff); + end; + end; + S_BL: + begin + if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and + not(cs_opt_size in current_settings.optimizerswitches) then + { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" } + begin + taicpu(p).opcode := A_AND; + taicpu(p).changeopsize(S_L); + taicpu(p).loadConst(0,$ff) + end + else if GetNextInstruction(p, hp1) and + (tai(hp1).typ = ait_instruction) and + (taicpu(hp1).opcode = A_AND) and + (taicpu(hp1).oper[0]^.typ = top_const) and + (taicpu(hp1).oper[1]^.typ = top_reg) and + (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then + { Change "movzbl %reg1, %reg2; andl $const, %reg2" + to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"} + begin + DebugMsg('PeepHole Optimization,var10',p); + taicpu(p).opcode := A_MOV; + taicpu(p).changeopsize(S_L); + { do not use R_SUBWHOLE + as movl %rdx,%eax + is invalid in assembler PM } + setsubreg(taicpu(p).oper[0]^.reg, R_SUBD); + taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff); + end + end; + S_WL: + begin + if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and + not(cs_opt_size in current_settings.optimizerswitches) then + { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" } + begin + DebugMsg('PeepHole Optimization,var11',p); + taicpu(p).opcode := A_AND; + taicpu(p).changeopsize(S_L); + taicpu(p).loadConst(0,$ffff); + end + else if GetNextInstruction(p, hp1) and + (tai(hp1).typ = ait_instruction) and + (taicpu(hp1).opcode = A_AND) and + (taicpu(hp1).oper[0]^.typ = top_const) and + (taicpu(hp1).oper[1]^.typ = top_reg) and + (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then + { Change "movzwl %reg1, %reg2; andl $const, %reg2" + to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"} + begin + DebugMsg('PeepHole Optimization,var12',p); + taicpu(p).opcode := A_MOV; + taicpu(p).changeopsize(S_L); + { do not use R_SUBWHOLE + as movl %rdx,%eax + is invalid in assembler PM } + setsubreg(taicpu(p).oper[0]^.reg, R_SUBD); + taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff); + end; + end; + end + else if (taicpu(p).oper[0]^.typ = top_ref) then + begin + if GetNextInstruction(p, hp1) and + (tai(hp1).typ = ait_instruction) and + (taicpu(hp1).opcode = A_AND) and + MatchOpType(taicpu(hp1),top_const,top_reg) and + (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then + begin + taicpu(p).opcode := A_MOV; + case taicpu(p).opsize Of + S_BL: + begin + DebugMsg('PeepHole Optimization,var13',p); + taicpu(p).changeopsize(S_L); + taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff); + end; + S_WL: + begin + DebugMsg('PeepHole Optimization,var14',p); + taicpu(p).changeopsize(S_L); + taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff); + end; + S_BW: + begin + DebugMsg('PeepHole Optimization,var15',p); + taicpu(p).changeopsize(S_W); + taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff); + end; +{$ifdef x86_64} + S_BQ: + begin + DebugMsg('PeepHole Optimization,var16',p); + taicpu(p).changeopsize(S_Q); + taicpu(hp1).loadConst( + 0, taicpu(hp1).oper[0]^.val and $ff); + end; + S_WQ: + begin + DebugMsg('PeepHole Optimization,var17',p); + taicpu(p).changeopsize(S_Q); + taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff); + end; + S_LQ: + begin + DebugMsg('PeepHole Optimization,var18',p); + taicpu(p).changeopsize(S_Q); + taicpu(hp1).loadConst( + 0, taicpu(hp1).oper[0]^.val and $ffffffff); + end; +{$endif x86_64} + else + Internalerror(2017050704) + end; + end; + end; + end; + end; + + function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean; var hp1 : tai; diff --git a/compiler/x86_64/aoptcpu.pas b/compiler/x86_64/aoptcpu.pas index 34479fc607..42be321e3b 100644 --- a/compiler/x86_64/aoptcpu.pas +++ b/compiler/x86_64/aoptcpu.pas @@ -62,303 +62,37 @@ uses end; end; -function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean; -var - hp1, hp2: tai; -begin - Result := False; - case p.typ of - ait_instruction: + + function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean; begin - case taicpu(p).opcode of - A_AND: - Result:=OptPass1AND(p); - A_MOV: - Result:=OptPass1MOV(p); - A_MOVSX, - A_MOVZX: - begin - if (taicpu(p).oper[1]^.typ = top_reg) and - GetNextInstruction(p, hp1) and - (hp1.typ = ait_instruction) and - IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) and - GetNextInstruction(hp1, hp2) and - (hp2.typ = ait_instruction) and - (taicpu(hp2).opcode = A_MOV) and - (taicpu(hp2).oper[0]^.typ = top_reg) and - OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) then - { change movsX/movzX reg/ref, reg2 } - { add/sub/or/... reg3/$const, reg2 } - { mov reg2 reg/ref } - { to add/sub/or/... reg3/$const, reg/ref } + result:=False; + case p.typ of + ait_instruction: begin - { by example: - movswl %si,%eax movswl %si,%eax p - decl %eax addl %edx,%eax hp1 - movw %ax,%si movw %ax,%si hp2 - -> - movswl %si,%eax movswl %si,%eax p - decw %eax addw %edx,%eax hp1 - movw %ax,%si movw %ax,%si hp2 - } - taicpu(hp1).changeopsize(taicpu(hp2).opsize); - { - -> - movswl %si,%eax movswl %si,%eax p - decw %si addw %dx,%si hp1 - movw %ax,%si movw %ax,%si hp2 - } - case taicpu(hp1).ops of - 1: - taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^); - 2: - begin - taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^); - if (taicpu(hp1).oper[0]^.typ = top_reg) then - setsubreg(taicpu(hp1).oper[0]^.reg, - getsubreg(taicpu(hp2).oper[0]^.reg)); - end; - else - internalerror(2008042701); - end; - { - -> - decw %si addw %dx,%si p - } - if (cs_asm_source in current_settings.globalswitches) then - asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var3')),p); - asml.remove(p); - asml.remove(hp2); - p.Free; - hp2.Free; - p := hp1; - end - { removes superfluous And's after movzx's } - else if taicpu(p).opcode = A_MOVZX then - begin - if (taicpu(p).oper[1]^.typ = top_reg) and - GetNextInstruction(p, hp1) and - (tai(hp1).typ = ait_instruction) and - (taicpu(hp1).opcode = A_AND) and - (taicpu(hp1).oper[0]^.typ = top_const) and - (taicpu(hp1).oper[1]^.typ = top_reg) and - (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then - begin - case taicpu(p).opsize of - S_BL, S_BW, S_BQ: - if (taicpu(hp1).oper[0]^.val = $ff) then - begin - if (cs_asm_source in current_settings.globalswitches) then - asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var4')),p); - asml.remove(hp1); - hp1.Free; - end; - S_WL, S_WQ: - if (taicpu(hp1).oper[0]^.val = $ffff) then - begin - if (cs_asm_source in current_settings.globalswitches) then - asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var5')),p); - asml.remove(hp1); - hp1.Free; - end; - S_LQ: - if (taicpu(hp1).oper[0]^.val = $ffffffff) then - begin - if (cs_asm_source in current_settings.globalswitches) then - asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var6')),p); - asml.remove(hp1); - hp1.Free; - end; - end; - end; - { changes some movzx constructs to faster synonims (all examples - are given with eax/ax, but are also valid for other registers)} - if (taicpu(p).oper[1]^.typ = top_reg) then - if (taicpu(p).oper[0]^.typ = top_reg) then - case taicpu(p).opsize of - S_BW: - begin - if (getsupreg(taicpu(p).oper[0]^.reg) = - getsupreg(taicpu(p).oper[1]^.reg)) and not - (cs_opt_size in current_settings.optimizerswitches) then - {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"} - begin - taicpu(p).opcode := A_AND; - taicpu(p).changeopsize(S_W); - taicpu(p).loadConst(0, $ff); - if (cs_asm_source in current_settings.globalswitches) then - asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var7')),p); - end - else if GetNextInstruction(p, hp1) and - (tai(hp1).typ = ait_instruction) and - (taicpu(hp1).opcode = A_AND) and - (taicpu(hp1).oper[0]^.typ = top_const) and - (taicpu(hp1).oper[1]^.typ = top_reg) and - (taicpu(hp1).oper[1]^.reg = - taicpu(p).oper[1]^.reg) then - { Change "movzbw %reg1, %reg2; andw $const, %reg2" - to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"} - begin - if (cs_asm_source in current_settings.globalswitches) then - asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var8')),p); - taicpu(p).opcode := A_MOV; - taicpu(p).changeopsize(S_W); - setsubreg(taicpu(p).oper[0]^.reg, R_SUBW); - taicpu(hp1).loadConst( - 0, taicpu(hp1).oper[0]^.val and $ff); - end; - end; - S_BL: - begin - if (getsupreg(taicpu(p).oper[0]^.reg) = - getsupreg(taicpu(p).oper[1]^.reg)) and not - (cs_opt_size in current_settings.optimizerswitches) then - { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"} - begin - if (cs_asm_source in current_settings.globalswitches) then - asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var9')),p); - taicpu(p).opcode := A_AND; - taicpu(p).changeopsize(S_L); - taicpu(p).loadConst(0, $ff); - end - else if GetNextInstruction(p, hp1) and - (tai(hp1).typ = ait_instruction) and - (taicpu(hp1).opcode = A_AND) and - (taicpu(hp1).oper[0]^.typ = top_const) and - (taicpu(hp1).oper[1]^.typ = top_reg) and - (taicpu(hp1).oper[1]^.reg = - taicpu(p).oper[1]^.reg) then - { Change "movzbl %reg1, %reg2; andl $const, %reg2" - to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"} - begin - if (cs_asm_source in current_settings.globalswitches) then - asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var10')),p); - taicpu(p).opcode := A_MOV; - taicpu(p).changeopsize(S_L); - { do not use R_SUBWHOLE - as movl %rdx,%eax - is invalid in assembler PM } - setsubreg(taicpu(p).oper[0]^.reg, R_SUBD); - taicpu(hp1).loadConst( - 0, taicpu(hp1).oper[0]^.val and $ff); - end; - end; - S_WL: - begin - if (getsupreg(taicpu(p).oper[0]^.reg) = - getsupreg(taicpu(p).oper[1]^.reg)) and not - (cs_opt_size in current_settings.optimizerswitches) then - { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" } - begin - if (cs_asm_source in current_settings.globalswitches) then - asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var11')),p); - taicpu(p).opcode := A_AND; - taicpu(p).changeopsize(S_L); - taicpu(p).loadConst(0, $ffff); - end - else if GetNextInstruction(p, hp1) and - (tai(hp1).typ = ait_instruction) and - (taicpu(hp1).opcode = A_AND) and - (taicpu(hp1).oper[0]^.typ = top_const) and - (taicpu(hp1).oper[1]^.typ = top_reg) and - (taicpu(hp1).oper[1]^.reg = - taicpu(p).oper[1]^.reg) then - { Change "movzwl %reg1, %reg2; andl $const, %reg2" - to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"} - begin - if (cs_asm_source in current_settings.globalswitches) then - asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var12')),p); - taicpu(p).opcode := A_MOV; - taicpu(p).changeopsize(S_L); - { do not use R_SUBWHOLE - as movl %rdx,%eax - is invalid in assembler PM } - setsubreg(taicpu(p).oper[0]^.reg, R_SUBD); - taicpu(hp1).loadConst( - 0, taicpu(hp1).oper[0]^.val and $ffff); - end; - end; - end - else if (taicpu(p).oper[0]^.typ = top_ref) then - begin - if GetNextInstruction(p, hp1) and - (tai(hp1).typ = ait_instruction) and - (taicpu(hp1).opcode = A_AND) and - MatchOpType(taicpu(hp1),top_const,top_reg) and - (taicpu(hp1).oper[1]^.reg = - taicpu(p).oper[1]^.reg) then - begin - taicpu(p).opcode := A_MOV; - case taicpu(p).opsize of - S_BL: - begin - if (cs_asm_source in current_settings.globalswitches) then - asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var13')),p); - taicpu(p).changeopsize(S_L); - taicpu(hp1).loadConst( - 0, taicpu(hp1).oper[0]^.val and $ff); - end; - S_WL: - begin - if (cs_asm_source in current_settings.globalswitches) then - asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var14')),p); - taicpu(p).changeopsize(S_L); - taicpu(hp1).loadConst( - 0, taicpu(hp1).oper[0]^.val and $ffff); - end; - S_BW: - begin - if (cs_asm_source in current_settings.globalswitches) then - asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var15')),p); - taicpu(p).changeopsize(S_W); - taicpu(hp1).loadConst( - 0, taicpu(hp1).oper[0]^.val and $ff); - end; - S_BQ: - begin - if (cs_asm_source in current_settings.globalswitches) then - asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var16')),p); - taicpu(p).changeopsize(S_Q); - taicpu(hp1).loadConst( - 0, taicpu(hp1).oper[0]^.val and $ff); - end; - S_WQ: - begin - if (cs_asm_source in current_settings.globalswitches) then - asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var17')),p); - taicpu(p).changeopsize(S_Q); - taicpu(hp1).loadConst( - 0, taicpu(hp1).oper[0]^.val and $ffff); - end; - S_LQ: - begin - if (cs_asm_source in current_settings.globalswitches) then - asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var18')),p); - taicpu(p).changeopsize(S_Q); - taicpu(hp1).loadConst( - 0, taicpu(hp1).oper[0]^.val and $ffffffff); - end; - end; - end; - end; + case taicpu(p).opcode of + A_AND: + Result:=OptPass1AND(p); + A_MOV: + Result:=OptPass1MOV(p); + A_MOVSX, + A_MOVZX: + Result:=OptPass1Movx(p); + A_VMOVAPS, + A_VMOVAPD: + result:=OptPass1VMOVAP(p); + A_VDIVSD, + A_VDIVSS, + A_VSUBSD, + A_VSUBSS, + A_VMULSD, + A_VMULSS, + A_VADDSD, + A_VADDSS: + result:=OptPass1VOP(p); end; end; - A_VMOVAPS, - A_VMOVAPD: - result:=OptPass1VMOVAP(p); - A_VDIVSD, - A_VDIVSS, - A_VSUBSD, - A_VSUBSS, - A_VMULSD, - A_VMULSS, - A_VADDSD, - A_VADDSS: - result:=OptPass1VOP(p); + end; end; - end; - end; -end; function TCpuAsmOptimizer.PeepHoleOptPass2Cpu(var p : tai) : boolean;