mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-24 19:29:07 +02:00
* x86: Additional optimisation in OptPass2MOV to help with sub-optimal MOV/CMP/MOV triplets (and a minor refactor).
This commit is contained in:
parent
f4e955d04f
commit
755d221230
@ -10003,13 +10003,64 @@ unit aoptx86;
|
|||||||
if not GetNextInstruction(p, hp1) then
|
if not GetNextInstruction(p, hp1) then
|
||||||
Exit;
|
Exit;
|
||||||
|
|
||||||
if MatchInstruction(hp1, A_CMP, A_TEST, [taicpu(p).opsize])
|
if MatchInstruction(hp1, A_CMP, A_TEST, []) then
|
||||||
and DoMovCmpMemOpt(p, hp1) then
|
begin
|
||||||
|
if (taicpu(hp1).opsize = taicpu(p).opsize) and DoMovCmpMemOpt(p, hp1) then
|
||||||
begin
|
begin
|
||||||
Result := True;
|
Result := True;
|
||||||
Exit;
|
Exit;
|
||||||
end
|
end;
|
||||||
else if MatchInstruction(hp1, A_JMP, [S_NO]) then
|
|
||||||
|
{ This optimisation is only effective on a second run of Pass 2,
|
||||||
|
hence -O3 or above.
|
||||||
|
|
||||||
|
Change:
|
||||||
|
mov %reg1,%reg2
|
||||||
|
cmp/test (contains %reg1)
|
||||||
|
mov x, %reg1
|
||||||
|
(another mov or a j(c))
|
||||||
|
|
||||||
|
To:
|
||||||
|
mov %reg1,%reg2
|
||||||
|
mov x, %reg1
|
||||||
|
cmp (%reg1 replaced with %reg2)
|
||||||
|
(another mov or a j(c))
|
||||||
|
|
||||||
|
The requirement of an additional MOV or a jump ensures there
|
||||||
|
isn't performance loss, since a j(c) will permit macro-fusion
|
||||||
|
with the cmp instruction, while another MOV likely means it's
|
||||||
|
not all being executed in a single cycle due to parallelisation.
|
||||||
|
}
|
||||||
|
if (cs_opt_level3 in current_settings.optimizerswitches) and
|
||||||
|
MatchOpType(taicpu(p), top_reg, top_reg) and
|
||||||
|
RegInInstruction(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
|
||||||
|
GetNextInstruction(hp1, hp2) and
|
||||||
|
MatchInstruction(hp2, A_MOV, []) and
|
||||||
|
(taicpu(hp2).oper[1]^.typ = top_reg) and
|
||||||
|
{ Registers don't have to be the same size in this case }
|
||||||
|
SuperRegistersEqual(taicpu(hp2).oper[1]^.reg, taicpu(p).oper[0]^.reg) and
|
||||||
|
GetNextInstruction(hp2, hp3) and
|
||||||
|
MatchInstruction(hp3, A_MOV, A_Jcc, []) and
|
||||||
|
{ Make sure the operands in the camparison can be safely replaced }
|
||||||
|
(
|
||||||
|
not RegInOp(taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^) or
|
||||||
|
ReplaceRegisterInOper(taicpu(hp1), 0, taicpu(p).oper[0]^.reg, taicpu(p).oper[1]^.reg)
|
||||||
|
) and
|
||||||
|
(
|
||||||
|
not RegInOp(taicpu(p).oper[0]^.reg, taicpu(hp1).oper[1]^) or
|
||||||
|
ReplaceRegisterInOper(taicpu(hp1), 1, taicpu(p).oper[0]^.reg, taicpu(p).oper[1]^.reg)
|
||||||
|
) then
|
||||||
|
begin
|
||||||
|
DebugMsg(SPeepholeOptimization + 'MOV/CMP/MOV -> MOV/MOV/CMP', p);
|
||||||
|
AsmL.Remove(hp2);
|
||||||
|
AsmL.InsertAfter(hp2, p);
|
||||||
|
|
||||||
|
Result := True;
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
if MatchInstruction(hp1, A_JMP, [S_NO]) then
|
||||||
begin
|
begin
|
||||||
{ Sometimes the MOVs that OptPass2JMP produces can be improved
|
{ Sometimes the MOVs that OptPass2JMP produces can be improved
|
||||||
further, but we can't just put this jump optimisation in pass 1
|
further, but we can't just put this jump optimisation in pass 1
|
||||||
@ -10019,21 +10070,30 @@ unit aoptx86;
|
|||||||
UpdateUsedRegs(tai(p.Next));
|
UpdateUsedRegs(tai(p.Next));
|
||||||
|
|
||||||
if OptPass2JMP(hp1) then
|
if OptPass2JMP(hp1) then
|
||||||
|
begin
|
||||||
|
{ Restore register state }
|
||||||
|
RestoreUsedRegs(TempTracking);
|
||||||
|
ReleaseUsedRegs(TempTracking);
|
||||||
|
|
||||||
{ call OptPass1MOV once to potentially merge any MOVs that were created }
|
{ call OptPass1MOV once to potentially merge any MOVs that were created }
|
||||||
Result := OptPass1MOV(p);
|
OptPass1MOV(p);
|
||||||
{ OptPass2MOV will now exit but will be called again if OptPass1MOV
|
Result := True;
|
||||||
returned True and the instruction is still a MOV, thus checking
|
Exit;
|
||||||
the optimisations below }
|
end;
|
||||||
|
|
||||||
{ If OptPass2JMP returned False, no optimisations were done to
|
{ If OptPass2JMP returned False, no optimisations were done to
|
||||||
the jump and there are no further optimisations that can be done
|
the jump and there are no further optimisations that can be done
|
||||||
to the MOV instruction on this pass }
|
to the MOV instruction on this pass other than FuncMov2Func }
|
||||||
|
|
||||||
{ Restore register state }
|
{ Restore register state }
|
||||||
RestoreUsedRegs(TempTracking);
|
RestoreUsedRegs(TempTracking);
|
||||||
ReleaseUsedRegs(TempTracking);
|
ReleaseUsedRegs(TempTracking);
|
||||||
end
|
|
||||||
else if MatchOpType(taicpu(p),top_reg,top_reg) and
|
Result := FuncMov2Func(p, hp1);
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
|
||||||
|
if MatchOpType(taicpu(p),top_reg,top_reg) and
|
||||||
(taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif x86_64}]) and
|
(taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif x86_64}]) and
|
||||||
MatchInstruction(hp1,A_ADD,A_SUB,[taicpu(p).opsize]) and
|
MatchInstruction(hp1,A_ADD,A_SUB,[taicpu(p).opsize]) and
|
||||||
(taicpu(hp1).oper[1]^.typ = top_reg) and
|
(taicpu(hp1).oper[1]^.typ = top_reg) and
|
||||||
@ -10076,8 +10136,9 @@ unit aoptx86;
|
|||||||
Exit;
|
Exit;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
end
|
end;
|
||||||
else if MatchOpType(taicpu(p),top_reg,top_reg) and
|
|
||||||
|
if MatchOpType(taicpu(p),top_reg,top_reg) and
|
||||||
{$ifdef x86_64}
|
{$ifdef x86_64}
|
||||||
MatchInstruction(hp1,A_MOVZX,A_MOVSX,A_MOVSXD,[]) and
|
MatchInstruction(hp1,A_MOVZX,A_MOVSX,A_MOVSXD,[]) and
|
||||||
{$else x86_64}
|
{$else x86_64}
|
||||||
@ -10105,11 +10166,12 @@ unit aoptx86;
|
|||||||
Result:=true;
|
Result:=true;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
exit;
|
Exit;
|
||||||
end
|
end;
|
||||||
else if MatchOpType(taicpu(p),top_reg,top_reg) and
|
|
||||||
|
if MatchOpType(taicpu(p),top_reg,top_reg) and
|
||||||
IsXCHGAcceptable and
|
IsXCHGAcceptable and
|
||||||
{ XCHG doesn't support 8-byte registers }
|
{ XCHG doesn't support 8-bit registers }
|
||||||
(taicpu(p).opsize <> S_B) and
|
(taicpu(p).opsize <> S_B) and
|
||||||
MatchInstruction(hp1, A_MOV, []) and
|
MatchInstruction(hp1, A_MOV, []) and
|
||||||
MatchOpType(taicpu(hp1),top_reg,top_reg) and
|
MatchOpType(taicpu(hp1),top_reg,top_reg) and
|
||||||
@ -10146,8 +10208,9 @@ unit aoptx86;
|
|||||||
Result := True;
|
Result := True;
|
||||||
Exit;
|
Exit;
|
||||||
end;
|
end;
|
||||||
end
|
end;
|
||||||
else if MatchOpType(taicpu(p),top_reg,top_reg) and
|
|
||||||
|
if MatchOpType(taicpu(p),top_reg,top_reg) and
|
||||||
MatchInstruction(hp1, A_SAR, []) then
|
MatchInstruction(hp1, A_SAR, []) then
|
||||||
begin
|
begin
|
||||||
if MatchOperand(taicpu(hp1).oper[0]^, 31) then
|
if MatchOperand(taicpu(hp1).oper[0]^, 31) then
|
||||||
@ -10172,7 +10235,9 @@ unit aoptx86;
|
|||||||
taicpu(p).clearop(1);
|
taicpu(p).clearop(1);
|
||||||
taicpu(p).clearop(0);
|
taicpu(p).clearop(0);
|
||||||
taicpu(p).ops:=0;
|
taicpu(p).ops:=0;
|
||||||
|
|
||||||
Result := True;
|
Result := True;
|
||||||
|
Exit;
|
||||||
end
|
end
|
||||||
else if (cs_opt_size in current_settings.optimizerswitches) and
|
else if (cs_opt_size in current_settings.optimizerswitches) and
|
||||||
(taicpu(p).oper[0]^.reg = NR_EDX) and
|
(taicpu(p).oper[0]^.reg = NR_EDX) and
|
||||||
@ -10194,6 +10259,9 @@ unit aoptx86;
|
|||||||
taicpu(hp1).clearop(1);
|
taicpu(hp1).clearop(1);
|
||||||
taicpu(hp1).clearop(0);
|
taicpu(hp1).clearop(0);
|
||||||
taicpu(hp1).ops:=0;
|
taicpu(hp1).ops:=0;
|
||||||
|
|
||||||
|
Include(OptsToCheck, aoc_ForceNewIteration);
|
||||||
|
Exit;
|
||||||
end;
|
end;
|
||||||
{$ifndef x86_64}
|
{$ifndef x86_64}
|
||||||
end
|
end
|
||||||
@ -10273,6 +10341,9 @@ unit aoptx86;
|
|||||||
else
|
else
|
||||||
;
|
;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
Result := True;
|
||||||
|
Exit;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
{$else x86_64}
|
{$else x86_64}
|
||||||
@ -10299,7 +10370,9 @@ unit aoptx86;
|
|||||||
taicpu(p).clearop(1);
|
taicpu(p).clearop(1);
|
||||||
taicpu(p).clearop(0);
|
taicpu(p).clearop(0);
|
||||||
taicpu(p).ops:=0;
|
taicpu(p).ops:=0;
|
||||||
|
|
||||||
Result := True;
|
Result := True;
|
||||||
|
Exit;
|
||||||
end
|
end
|
||||||
else if (cs_opt_size in current_settings.optimizerswitches) and
|
else if (cs_opt_size in current_settings.optimizerswitches) and
|
||||||
(taicpu(p).oper[0]^.reg = NR_RDX) and
|
(taicpu(p).oper[0]^.reg = NR_RDX) and
|
||||||
@ -10321,11 +10394,15 @@ unit aoptx86;
|
|||||||
taicpu(hp1).clearop(1);
|
taicpu(hp1).clearop(1);
|
||||||
taicpu(hp1).clearop(0);
|
taicpu(hp1).clearop(0);
|
||||||
taicpu(hp1).ops:=0;
|
taicpu(hp1).ops:=0;
|
||||||
|
|
||||||
|
Include(OptsToCheck, aoc_ForceNewIteration);
|
||||||
|
Exit;
|
||||||
{$endif x86_64}
|
{$endif x86_64}
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
end
|
end;
|
||||||
else if MatchInstruction(hp1, A_MOV, []) and
|
|
||||||
|
if MatchInstruction(hp1, A_MOV, []) and
|
||||||
(taicpu(hp1).oper[1]^.typ = top_reg) then
|
(taicpu(hp1).oper[1]^.typ = top_reg) then
|
||||||
{ Though "GetNextInstruction" could be factored out, along with
|
{ Though "GetNextInstruction" could be factored out, along with
|
||||||
the instructions that depend on hp2, it is an expensive call that
|
the instructions that depend on hp2, it is an expensive call that
|
||||||
@ -10376,6 +10453,8 @@ unit aoptx86;
|
|||||||
taicpu(hp1).ops:=0;
|
taicpu(hp1).ops:=0;
|
||||||
|
|
||||||
RemoveInstruction(hp2);
|
RemoveInstruction(hp2);
|
||||||
|
|
||||||
|
Include(OptsToCheck, aoc_ForceNewIteration);
|
||||||
(*
|
(*
|
||||||
{$ifdef x86_64}
|
{$ifdef x86_64}
|
||||||
end
|
end
|
||||||
@ -10423,13 +10502,16 @@ unit aoptx86;
|
|||||||
taicpu(hp1).ops:=0;
|
taicpu(hp1).ops:=0;
|
||||||
|
|
||||||
RemoveInstruction(hp2);
|
RemoveInstruction(hp2);
|
||||||
|
|
||||||
|
Include(OptsToCheck, aoc_ForceNewIteration);
|
||||||
{$endif x86_64}
|
{$endif x86_64}
|
||||||
*)
|
*)
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
{$ifdef x86_64}
|
{$ifdef x86_64}
|
||||||
end
|
end;
|
||||||
else if (taicpu(p).opsize = S_L) and
|
|
||||||
|
if (taicpu(p).opsize = S_L) and
|
||||||
(taicpu(p).oper[1]^.typ = top_reg) and
|
(taicpu(p).oper[1]^.typ = top_reg) and
|
||||||
(
|
(
|
||||||
MatchInstruction(hp1, A_MOV,[]) and
|
MatchInstruction(hp1, A_MOV,[]) and
|
||||||
@ -10502,10 +10584,17 @@ unit aoptx86;
|
|||||||
DebugMsg(SPeepholeOptimization + 'MovMov*Shr2MovMov*Rcr', p);
|
DebugMsg(SPeepholeOptimization + 'MovMov*Shr2MovMov*Rcr', p);
|
||||||
|
|
||||||
if (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) then
|
if (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) then
|
||||||
|
begin
|
||||||
{ Change first MOV command to have the same register as the final output }
|
{ Change first MOV command to have the same register as the final output }
|
||||||
taicpu(p).oper[1]^.reg := taicpu(hp1).oper[1]^.reg
|
taicpu(p).oper[1]^.reg := taicpu(hp1).oper[1]^.reg;
|
||||||
|
AllocRegBetween(taicpu(hp1).oper[1]^.reg, p, hp1, UsedRegs);
|
||||||
|
Result := True;
|
||||||
|
end
|
||||||
else
|
else
|
||||||
|
begin
|
||||||
taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
|
taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
|
||||||
|
Include(OptsToCheck, aoc_ForceNewIteration);
|
||||||
|
end;
|
||||||
|
|
||||||
{ Change second MOV command to an ADD command. This is easier than
|
{ Change second MOV command to an ADD command. This is easier than
|
||||||
converting the existing command because it means we don't have to
|
converting the existing command because it means we don't have to
|
||||||
@ -10520,6 +10609,8 @@ unit aoptx86;
|
|||||||
taicpu(hp3).opcode := A_RCR;
|
taicpu(hp3).opcode := A_RCR;
|
||||||
taicpu(hp3).changeopsize(S_L);
|
taicpu(hp3).changeopsize(S_L);
|
||||||
setsubreg(taicpu(hp3).oper[1]^.reg, R_SUBD);
|
setsubreg(taicpu(hp3).oper[1]^.reg, R_SUBD);
|
||||||
|
{ Don't need to Exit yet as p is still a MOV and hp1 hasn't been
|
||||||
|
called, so FuncMov2Func below is safe to call }
|
||||||
{$endif x86_64}
|
{$endif x86_64}
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user