From 3487c34ed9a2c87128c39bab5a5dcd336e5ef66f Mon Sep 17 00:00:00 2001 From: florian Date: Sat, 2 Nov 2019 18:48:23 +0000 Subject: [PATCH] * unified TX86AsmOptimizer.OptPass1MOVAP and TX86AsmOptimizer.OptPass1VMOVAP git-svn-id: trunk@43370 - --- compiler/i386/aoptcpu.pas | 14 +-- compiler/x86/aoptx86.pas | 167 +++++++++++++++++++++++------------- compiler/x86_64/aoptcpu.pas | 11 ++- 3 files changed, 121 insertions(+), 71 deletions(-) diff --git a/compiler/i386/aoptcpu.pas b/compiler/i386/aoptcpu.pas index 596dc67a6d..46081fb78f 100644 --- a/compiler/i386/aoptcpu.pas +++ b/compiler/i386/aoptcpu.pas @@ -547,9 +547,15 @@ begin A_SUB: if OptPass1Sub(p) then continue; + A_MOVAPD, + A_MOVAPS, + A_MOVUPD, + A_MOVUPS, A_VMOVAPS, - A_VMOVAPD: - if OptPass1VMOVAP(p) then + A_VMOVAPD, + A_VMOVUPS, + A_VMOVUPD: + if OptPass1_V_MOVAP(p) then continue; A_VDIVSD, A_VDIVSS, @@ -573,10 +579,6 @@ begin A_ADDSS: if OptPass1OP(p) then continue; - A_MOVAPD, - A_MOVAPS: - if OptPass1MOVAP(p) then - continue; A_VMOVSD, A_VMOVSS, A_MOVSD, diff --git a/compiler/x86/aoptx86.pas b/compiler/x86/aoptx86.pas index 9b40b1239b..6fb42e4ba2 100644 --- a/compiler/x86/aoptx86.pas +++ b/compiler/x86/aoptx86.pas @@ -62,11 +62,10 @@ unit aoptx86; function PrePeepholeOptIMUL(var p : tai) : boolean; function OptPass1AND(var p : tai) : boolean; - function OptPass1VMOVAP(var p : tai) : boolean; + function OptPass1_V_MOVAP(var p : tai) : boolean; function OptPass1VOP(var p : tai) : boolean; function OptPass1MOV(var p : tai) : boolean; function OptPass1Movx(var p : tai) : boolean; - function OptPass1MOVAP(var p : tai) : boolean; function OptPass1MOVXX(var p : tai) : boolean; function OptPass1OP(var p : tai) : boolean; function OptPass1LEA(var p : tai) : boolean; @@ -1130,61 +1129,7 @@ unit aoptx86; end; - function TX86AsmOptimizer.OptPass1MOVAP(var p : tai) : boolean; - var - hp1,hp2 : tai; - begin - result:=false; - if MatchOpType(taicpu(p),top_reg,top_reg) and - GetNextInstruction(p, hp1) and - (hp1.typ = ait_instruction) and - GetNextInstruction(hp1, hp2) and - MatchInstruction(hp2,taicpu(p).opcode,[]) and - OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and - MatchOpType(taicpu(hp2),top_reg,top_reg) and - MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and - (((taicpu(p).opcode=A_MOVAPS) and - ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or - (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or - ((taicpu(p).opcode=A_MOVAPD) and - ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or - (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD))) - ) then - { change - movapX reg,reg2 - addsX/subsX/... reg3, reg2 - movapX reg2,reg - to - addsX/subsX/... reg3,reg - } - begin - TransferUsedRegs(TmpUsedRegs); - UpdateUsedRegs(TmpUsedRegs, tai(p.next)); - UpdateUsedRegs(TmpUsedRegs, tai(hp1.next)); - If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then - begin - DebugMsg(SPeepholeOptimization + 'MovapXOpMovapX2Op ('+ - debug_op2str(taicpu(p).opcode)+' '+ - debug_op2str(taicpu(hp1).opcode)+' '+ - debug_op2str(taicpu(hp2).opcode)+') done',p); - { we cannot eliminate the first move if - the operations uses the same register for source and dest } - if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then - begin - asml.remove(p); - p.Free; - end; - taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^); - asml.remove(hp2); - hp2.Free; - p:=hp1; - result:=true; - end; - end - end; - - - function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean; + function TX86AsmOptimizer.OptPass1_V_MOVAP(var p : tai) : boolean; var hp1,hp2 : tai; begin @@ -1234,12 +1179,72 @@ unit aoptx86; result:=true; end end - else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and + else if MatchInstruction(hp1,[A_VFMADDPD, + A_VFMADD132PD, + A_VFMADD132PS, + A_VFMADD132SD, + A_VFMADD132SS, + A_VFMADD213PD, + A_VFMADD213PS, + A_VFMADD213SD, + A_VFMADD213SS, + A_VFMADD231PD, + A_VFMADD231PS, + A_VFMADD231SD, + A_VFMADD231SS, + A_VFMADDSUB132PD, + A_VFMADDSUB132PS, + A_VFMADDSUB213PD, + A_VFMADDSUB213PS, + A_VFMADDSUB231PD, + A_VFMADDSUB231PS, + A_VFMSUB132PD, + A_VFMSUB132PS, + A_VFMSUB132SD, + A_VFMSUB132SS, + A_VFMSUB213PD, + A_VFMSUB213PS, + A_VFMSUB213SD, + A_VFMSUB213SS, + A_VFMSUB231PD, + A_VFMSUB231PS, + A_VFMSUB231SD, + A_VFMSUB231SS, + A_VFMSUBADD132PD, + A_VFMSUBADD132PS, + A_VFMSUBADD213PD, + A_VFMSUBADD213PS, + A_VFMSUBADD231PD, + A_VFMSUBADD231PS, + A_VFNMADD132PD, + A_VFNMADD132PS, + A_VFNMADD132SD, + A_VFNMADD132SS, + A_VFNMADD213PD, + A_VFNMADD213PS, + A_VFNMADD213SD, + A_VFNMADD213SS, + A_VFNMADD231PD, + A_VFNMADD231PS, + A_VFNMADD231SD, + A_VFNMADD231SS, + A_VFNMSUB132PD, + A_VFNMSUB132PS, + A_VFNMSUB132SD, + A_VFNMSUB132SS, + A_VFNMSUB213PD, + A_VFNMSUB213PS, + A_VFNMSUB213SD, + A_VFNMSUB213SS, + A_VFNMSUB231PD, + A_VFNMSUB231PS, + A_VFNMSUB231SD, + A_VFNMSUB231SS],[S_NO]) and { we mix single and double opperations here because we assume that the compiler generates vmovapd only after double operations and vmovaps only after single operations } MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and GetNextInstruction(hp1,hp2) and - MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and + MatchInstruction(hp2,[A_VMOVAPD,A_VMOVAPS,A_MOVAPD,A_MOVAPS],[S_NO]) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then begin TransferUsedRegs(TmpUsedRegs); @@ -1255,6 +1260,50 @@ unit aoptx86; hp2.Free; p:=hp1; end; + end + else if (hp1.typ = ait_instruction) and + GetNextInstruction(hp1, hp2) and + MatchInstruction(hp2,taicpu(p).opcode,[]) and + OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and + MatchOpType(taicpu(hp2),top_reg,top_reg) and + MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and + (((taicpu(p).opcode=A_MOVAPS) and + ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or + (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or + ((taicpu(p).opcode=A_MOVAPD) and + ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or + (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD))) + ) then + { change + movapX reg,reg2 + addsX/subsX/... reg3, reg2 + movapX reg2,reg + to + addsX/subsX/... reg3,reg + } + begin + TransferUsedRegs(TmpUsedRegs); + UpdateUsedRegs(TmpUsedRegs, tai(p.next)); + UpdateUsedRegs(TmpUsedRegs, tai(hp1.next)); + If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then + begin + DebugMsg(SPeepholeOptimization + 'MovapXOpMovapX2Op ('+ + debug_op2str(taicpu(p).opcode)+' '+ + debug_op2str(taicpu(hp1).opcode)+' '+ + debug_op2str(taicpu(hp2).opcode)+') done',p); + { we cannot eliminate the first move if + the operations uses the same register for source and dest } + if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then + begin + asml.remove(p); + p.Free; + end; + taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^); + asml.remove(hp2); + hp2.Free; + p:=hp1; + result:=true; + end; end; end; end; diff --git a/compiler/x86_64/aoptcpu.pas b/compiler/x86_64/aoptcpu.pas index dbe6bf2081..98c655b707 100644 --- a/compiler/x86_64/aoptcpu.pas +++ b/compiler/x86_64/aoptcpu.pas @@ -79,16 +79,15 @@ uses A_MOVSX, A_MOVZX: Result:=OptPass1Movx(p); + A_MOVAPD, + A_MOVAPS, + A_MOVUPD, + A_MOVUPS, A_VMOVAPS, A_VMOVAPD, A_VMOVUPS, A_VMOVUPD: - result:=OptPass1VMOVAP(p); - A_MOVAPD, - A_MOVAPS, - A_MOVUPD, - A_MOVUPS: - result:=OptPass1MOVAP(p); + result:=OptPass1_V_MOVAP(p); A_VDIVSD, A_VDIVSS, A_VSUBSD,