diff --git a/compiler/x86/aoptx86.pas b/compiler/x86/aoptx86.pas index 8cb21652af..2044554590 100644 --- a/compiler/x86/aoptx86.pas +++ b/compiler/x86/aoptx86.pas @@ -73,6 +73,8 @@ unit aoptx86; } function GetNextInstructionUsingRegTrackingUse(Current: tai; out Next: tai; reg: TRegister): Boolean; function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override; + private + function SkipSimpleInstructions(var hp1: tai): Boolean; protected class function IsMOVZXAcceptable: Boolean; static; inline; @@ -149,6 +151,7 @@ unit aoptx86; function PostPeepholeOptTestOr(var p : tai) : Boolean; function PostPeepholeOptCall(var p : tai) : Boolean; function PostPeepholeOptLea(var p : tai) : Boolean; + function PostPeepholeOptPush(var p: tai): Boolean; procedure ConvertJumpToRET(const p: tai; const ret_p: tai); @@ -5739,25 +5742,26 @@ unit aoptx86; end; - function TX86AsmOptimizer.PostPeepholeOptLea(var p : tai) : Boolean; + function TX86AsmOptimizer.SkipSimpleInstructions(var hp1 : tai) : Boolean; + begin + { we can skip all instructions not messing with the stack pointer } + while assigned(hp1) and {MatchInstruction(taicpu(hp1),[A_LEA,A_MOV,A_MOVQ,A_MOVSQ,A_MOVSX,A_MOVSXD,A_MOVZX, + A_AND,A_OR,A_XOR,A_ADD,A_SHR,A_SHL,A_IMUL,A_SETcc,A_SAR,A_SUB,A_TEST,A_CMOVcc, + A_MOVSS,A_MOVSD,A_MOVAPS,A_MOVUPD,A_MOVAPD,A_MOVUPS, + A_VMOVSS,A_VMOVSD,A_VMOVAPS,A_VMOVUPD,A_VMOVAPD,A_VMOVUPS],[]) and} + ({(taicpu(hp1).ops=0) or } + ({(MatchOpType(taicpu(hp1),top_reg,top_reg) or MatchOpType(taicpu(hp1),top_const,top_reg) or + (MatchOpType(taicpu(hp1),top_ref,top_reg)) + ) and } + not(RegInInstruction(NR_STACK_POINTER_REG,hp1)) { and not(RegInInstruction(NR_FRAME_POINTER_REG,hp1))} + ) + ) do + GetNextInstruction(hp1,hp1); + Result:=assigned(hp1); + end; - function SkipSimpleInstructions(var hp1 : tai) : Boolean; - begin - { we can skip all instructions not messing with the stack pointer } - while assigned(hp1) and {MatchInstruction(taicpu(hp1),[A_LEA,A_MOV,A_MOVQ,A_MOVSQ,A_MOVSX,A_MOVSXD,A_MOVZX, - A_AND,A_OR,A_XOR,A_ADD,A_SHR,A_SHL,A_IMUL,A_SETcc,A_SAR,A_SUB,A_TEST,A_CMOVcc, - A_MOVSS,A_MOVSD,A_MOVAPS,A_MOVUPD,A_MOVAPD,A_MOVUPS, - A_VMOVSS,A_VMOVSD,A_VMOVAPS,A_VMOVUPD,A_VMOVAPD,A_VMOVUPS],[]) and} - ({(taicpu(hp1).ops=0) or } - ({(MatchOpType(taicpu(hp1),top_reg,top_reg) or MatchOpType(taicpu(hp1),top_const,top_reg) or - (MatchOpType(taicpu(hp1),top_ref,top_reg)) - ) and } - not(RegInInstruction(NR_STACK_POINTER_REG,hp1)) { and not(RegInInstruction(NR_FRAME_POINTER_REG,hp1))} - ) - ) do - GetNextInstruction(hp1,hp1); - Result:=assigned(hp1); - end; + + function TX86AsmOptimizer.PostPeepholeOptLea(var p : tai) : Boolean; var hp1, hp2, hp3, hp4: tai; @@ -5821,6 +5825,58 @@ unit aoptx86; end; + function TX86AsmOptimizer.PostPeepholeOptPush(var p : tai) : Boolean; + + var + hp1, hp2, hp3, hp4: tai; + begin + Result:=false; + { replace + push %rax + call procname + pop %rcx + ret + by + jmp procname + + but do it only on level 4 because it destroys stack back traces + + It depends on the fact, that the sequence push rax/pop rcx is used for stack alignment as rcx is volatile + for all supported calling conventions + } + if (cs_opt_level4 in current_settings.optimizerswitches) and + MatchOpType(taicpu(p),top_reg) and + (taicpu(p).oper[0]^.reg=NR_RAX) and + GetNextInstruction(p, hp1) and + { Take a copy of hp1 } + SetAndTest(hp1, hp4) and + { trick to skip label } + ((hp1.typ=ait_instruction) or GetNextInstruction(hp1, hp1)) and + SkipSimpleInstructions(hp1) and + MatchInstruction(hp1,A_CALL,[S_NO]) and + GetNextInstruction(hp1, hp2) and + MatchInstruction(hp2,A_POP,[taicpu(p).opsize]) and + MatchOpType(taicpu(hp2),top_reg) and + (taicpu(hp2).oper[0]^.reg=NR_RCX) and + GetNextInstruction(hp2, hp3) and + { trick to skip label } + ((hp3.typ=ait_instruction) or GetNextInstruction(hp3, hp3)) and + MatchInstruction(hp3,A_RET,[S_NO]) and + (taicpu(hp3).ops=0) then + begin + taicpu(hp1).opcode := A_JMP; + taicpu(hp1).is_jmp := true; + DebugMsg(SPeepholeOptimization + 'PushCallPushRet2Jmp done',p); + RemoveCurrentP(p, hp4); + AsmL.Remove(hp2); + hp2.free; + AsmL.Remove(hp3); + hp3.free; + Result:=true; + end; + end; + + function TX86AsmOptimizer.PostPeepholeOptMov(var p : tai) : Boolean; var Value, RegName: string; diff --git a/compiler/x86_64/aoptcpu.pas b/compiler/x86_64/aoptcpu.pas index 26403a7a6d..a8d0819a71 100644 --- a/compiler/x86_64/aoptcpu.pas +++ b/compiler/x86_64/aoptcpu.pas @@ -194,6 +194,8 @@ uses Result:=PostPeepholeOptCall(p); A_LEA: Result:=PostPeepholeOptLea(p); + A_PUSH: + Result:=PostPeepholeOptPush(p); else ; end; diff --git a/tests/test/tasm23b.pp b/tests/test/tasm23b.pp index 75a9502a6a..f4a65d758a 100644 --- a/tests/test/tasm23b.pp +++ b/tests/test/tasm23b.pp @@ -6,7 +6,7 @@ program tasm23b; {$S-} const - t_size = 33; + t_size = 25; procedure t; assembler; asm mov eax, [rbx[5]][rdi][54][-17][45][4] { mov eax, [rbx+rdi+5Bh] }