mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-22 14:09:59 +02:00
Merge branch 'cmp-jne-mov' into 'main'
[x86] Deterministic MOV optimisations See merge request freepascal.org/fpc/source!596
This commit is contained in:
commit
c16c2ecd33
@ -3115,9 +3115,7 @@ unit aoptx86;
|
||||
var
|
||||
hp1, hp2, hp3, hp4: tai;
|
||||
DoOptimisation, TempBool: Boolean;
|
||||
{$ifdef x86_64}
|
||||
NewConst: TCGInt;
|
||||
{$endif x86_64}
|
||||
|
||||
procedure convert_mov_value(signed_movop: tasmop; max_value: tcgint); inline;
|
||||
begin
|
||||
@ -3217,12 +3215,13 @@ unit aoptx86;
|
||||
var
|
||||
GetNextInstruction_p, TempRegUsed, CrossJump: Boolean;
|
||||
PreMessage, RegName1, RegName2, InputVal, MaskNum: string;
|
||||
NewSize: topsize; NewOffset: asizeint;
|
||||
NewSize: topsize; NewOffset: TCGInt;
|
||||
p_SourceReg, p_TargetReg, NewMMReg: TRegister;
|
||||
SourceRef, TargetRef: TReference;
|
||||
MovAligned, MovUnaligned: TAsmOp;
|
||||
ThisRef: TReference;
|
||||
JumpTracking: TLinkedList;
|
||||
X: Integer;
|
||||
begin
|
||||
Result:=false;
|
||||
|
||||
@ -5149,6 +5148,71 @@ unit aoptx86;
|
||||
Exit;
|
||||
end;
|
||||
|
||||
if (taicpu(p).opsize = S_B) and
|
||||
(taicpu(p).oper[0]^.typ = top_const) and
|
||||
{ Make sure it doesn't affect the comparison instruction }
|
||||
not MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^) and
|
||||
not MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) and
|
||||
(
|
||||
(taicpu(p).oper[1]^.typ <> top_reg) or
|
||||
not RegInInstruction(taicpu(p).oper[1]^.reg, hp1)
|
||||
) and
|
||||
{ Test the value to see if it's 0 or 1 now to delay the expensive
|
||||
GetNextInstruction calls for as long as possible }
|
||||
(taicpu(p).oper[0]^.val in [0, 1]) and
|
||||
GetNextInstruction(hp1, hp2) and
|
||||
MatchInstruction(hp2, A_Jcc, []) and
|
||||
GetNextInstruction(hp2, hp3) and
|
||||
MatchInstruction(hp3, A_MOV, A_OR, [S_B]) and
|
||||
(taicpu(hp3).oper[0]^.typ = top_const) and
|
||||
(
|
||||
(
|
||||
(taicpu(p).oper[0]^.val = 0) and
|
||||
(taicpu(hp3).oper[0]^.val = 1)
|
||||
) or
|
||||
(
|
||||
(taicpu(hp3).opcode = A_MOV) and
|
||||
(taicpu(p).oper[0]^.val = 1) and
|
||||
(taicpu(hp3).oper[0]^.val = 0)
|
||||
)
|
||||
) and
|
||||
MatchOperand(taicpu(hp3).oper[1]^, taicpu(p).oper[1]^) and
|
||||
GetNextInstruction(hp3, hp4) and (hp4.typ = ait_label) and
|
||||
(GetLabelWithSym(TAsmLabel(JumpTargetOp(taicpu(hp2))^.ref^.symbol)) = hp4) then
|
||||
begin
|
||||
{
|
||||
Change:
|
||||
movb 0,reg/ref
|
||||
cmpb $1,45(%rcx)
|
||||
j(c) .Lbl
|
||||
movb 1,reg/ref
|
||||
.Lbl:
|
||||
|
||||
To:
|
||||
cmpb $1,45(%rcx)
|
||||
set(~c) reg/ref (or set(c) if 0 and 1 are switched around)
|
||||
}
|
||||
taicpu(hp3).opcode := A_SETcc;
|
||||
taicpu(hp3).loadoper(0, taicpu(hp3).oper[1]^);
|
||||
taicpu(hp3).ops := 1;
|
||||
if (taicpu(p).oper[0]^.val = 0) then
|
||||
begin
|
||||
taicpu(hp3).condition := inverse_cond(taicpu(hp2).condition);
|
||||
DebugMsg(SPeepholeOptimization + 'Mov0J(c)Mov1LbL -> Set(~c)',p);
|
||||
end
|
||||
else
|
||||
begin
|
||||
taicpu(hp3).condition := taicpu(hp2).condition;
|
||||
DebugMsg(SPeepholeOptimization + 'Mov1J(c)Mov1LbL -> Set(c)',p);
|
||||
end;
|
||||
|
||||
tai_label(hp4).labsym.decrefs;
|
||||
RemoveInstruction(hp2);
|
||||
RemoveCurrentP(p, hp1);
|
||||
Result := True;
|
||||
Exit;
|
||||
end;
|
||||
|
||||
if DoMovCmpMemOpt(p, hp1) then
|
||||
begin
|
||||
Result := True;
|
||||
@ -5536,6 +5600,359 @@ unit aoptx86;
|
||||
Result := True;
|
||||
Exit;
|
||||
end;
|
||||
|
||||
{ Arithmetic shortcutting optimisations }
|
||||
if MatchOpType(taicpu(p), top_const, top_reg) then
|
||||
begin
|
||||
hp2 := p;
|
||||
CrossJump := False;
|
||||
while GetNextInstructionUsingReg(hp2, hp1, p_TargetReg) and
|
||||
(hp1.typ = ait_instruction) and
|
||||
(
|
||||
is_calljmp(taicpu(hp1).opcode) or
|
||||
RegReadByInstruction(p_TargetReg, hp1)
|
||||
) do
|
||||
begin
|
||||
TransferUsedRegs(TmpUsedRegs);
|
||||
UpdateUsedRegsBetween(TmpUsedRegs, p, hp1);
|
||||
|
||||
{ mov x,%reg
|
||||
...
|
||||
op with a ref that contains %reg
|
||||
|
||||
Remove %reg if possible and add to offset
|
||||
}
|
||||
for X := 0 to taicpu(hp1).ops - 1 do
|
||||
if taicpu(hp1).oper[X]^.typ = top_ref then
|
||||
begin
|
||||
NewOffset := taicpu(hp1).oper[X]^.ref^.offset; { Initialise }
|
||||
if (taicpu(hp1).opcode = A_LEA) or
|
||||
(
|
||||
{ We can't have a reference that becomes completely
|
||||
empty (save for an offset) should the register get
|
||||
stripped out }
|
||||
((taicpu(hp1).oper[X]^.ref^.base <> NR_NO) and (taicpu(hp1).oper[X]^.ref^.index <> NR_NO)) or
|
||||
Assigned(taicpu(hp1).oper[X]^.ref^.symbol) or
|
||||
Assigned(taicpu(hp1).oper[X]^.ref^.relsymbol)
|
||||
) then
|
||||
begin
|
||||
TempRegUsed := False; { Reusing the variable to detect if a change was made }
|
||||
|
||||
{ We can only make this optimisation if both base and
|
||||
index are set or a symbol is used }
|
||||
if (taicpu(hp1).oper[X]^.ref^.index = p_TargetReg) then
|
||||
begin
|
||||
Inc(NewOffset, TCGInt(taicpu(p).oper[0]^.val) * max(taicpu(hp1).oper[X]^.ref^.scalefactor, 1));
|
||||
if (NewOffset >= -2147483648) and (NewOffset <= $7FFFFFFF) then
|
||||
begin
|
||||
taicpu(hp1).oper[X]^.ref^.index := NR_NO;
|
||||
taicpu(hp1).oper[X]^.ref^.offset := NewOffset;
|
||||
taicpu(hp1).oper[X]^.ref^.scalefactor := 0;
|
||||
TempRegUsed := True;
|
||||
end;
|
||||
end;
|
||||
|
||||
if (taicpu(hp1).oper[X]^.ref^.base = p_TargetReg) and
|
||||
(
|
||||
(taicpu(hp1).opcode = A_LEA) or
|
||||
(taicpu(hp1).oper[X]^.ref^.index <> p_TargetReg)
|
||||
) then
|
||||
begin
|
||||
Inc(NewOffset, taicpu(p).oper[0]^.val);
|
||||
if (NewOffset >= -2147483648) and (NewOffset <= $7FFFFFFF) then
|
||||
begin
|
||||
taicpu(hp1).oper[X]^.ref^.base := NR_NO;
|
||||
taicpu(hp1).oper[X]^.ref^.offset := NewOffset;
|
||||
TempRegUsed := True;
|
||||
end;
|
||||
end;
|
||||
|
||||
if TempRegUsed then
|
||||
begin
|
||||
{ Handle the case if the reference becomes empty
|
||||
in a LEA instruction, as in this case it can be
|
||||
translated to a MOV or a null operation }
|
||||
if (taicpu(hp1).oper[X]^.ref^.base = NR_NO) and
|
||||
(taicpu(hp1).oper[X]^.ref^.index = NR_NO) and
|
||||
not Assigned(taicpu(hp1).oper[X]^.ref^.symbol) and
|
||||
not Assigned(taicpu(hp1).oper[X]^.ref^.relsymbol) then
|
||||
begin
|
||||
DebugMsg(SPeepholeOptimization + debug_regname(p_TargetReg) + ' = $' + debug_tostr(taicpu(p).oper[0]^.val) + '; removed register from reference. amended offset and converted LEA to MOV (Lea2Mov 2)', hp1);
|
||||
if (NewOffset = taicpu(p).oper[0]^.val) and Reg1WriteOverwritesReg2Entirely(taicpu(hp1).oper[1]^.reg, p_TargetReg) then
|
||||
begin
|
||||
{ Since this would set the register to a value that it's already equal to, just remove it }
|
||||
DebugMsg(SPeepholeOptimization + 'Mov2Nop 5c done', hp1);
|
||||
RemoveInstruction(hp1);
|
||||
hp1 := nil;
|
||||
end
|
||||
else
|
||||
begin
|
||||
taicpu(hp1).opcode := A_MOV;
|
||||
taicpu(hp1).loadconst(0, NewOffset);
|
||||
end;
|
||||
end
|
||||
else
|
||||
DebugMsg(SPeepholeOptimization + debug_regname(p_TargetReg) + ' = $' + debug_tostr(taicpu(p).oper[0]^.val) + '; removed register from reference and amended offset', hp1);
|
||||
|
||||
if Assigned(hp1) and not RegUsedBetween(p_TargetReg, p, hp1) and
|
||||
not RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs) then
|
||||
begin
|
||||
{ If the original register is no longer used, we can remove the initial MOV }
|
||||
DebugMsg(SPeepholeOptimization + 'Mov2Nop 9a', p);
|
||||
RemoveCurrentP(p);
|
||||
Result := True;
|
||||
Exit;
|
||||
end;
|
||||
|
||||
Include(OptsToCheck, aoc_ForceNewIteration);
|
||||
end;
|
||||
end;
|
||||
|
||||
{ Only one reference per instruction }
|
||||
Break;
|
||||
end;
|
||||
|
||||
if not Assigned(hp1) then
|
||||
{ hp1 got deleted by the Mov2Nop 5c optimisation above }
|
||||
Continue;
|
||||
|
||||
case taicpu(hp1).opcode of
|
||||
A_Jcc:
|
||||
begin
|
||||
{ Don't continue on lower optimisation settings }
|
||||
if not (cs_opt_level2 in current_settings.optimizerswitches) then
|
||||
Break;
|
||||
|
||||
CrossJump := True;
|
||||
{ Go to the next instruction }
|
||||
hp2 := hp1;
|
||||
Continue;
|
||||
end;
|
||||
|
||||
A_ADD, A_SUB:
|
||||
{ mov 0,%reg1
|
||||
...
|
||||
add/sub %reg1,%reg2
|
||||
|
||||
Remove add/sub
|
||||
|
||||
And:
|
||||
mov x,%reg1
|
||||
...
|
||||
add/sub %reg1,%reg2
|
||||
|
||||
Change to:
|
||||
mov x,%reg1
|
||||
...
|
||||
add/sub x,%reg2
|
||||
}
|
||||
if MatchOperand(taicpu(hp1).oper[0]^, p_TargetReg) and
|
||||
(
|
||||
{ "add/sub %reg,%reg" is fine to remove since %reg = 0
|
||||
still makes it an identity operation }
|
||||
MatchOperand(taicpu(hp1).oper[1]^, p_TargetReg) or
|
||||
not RegModifiedByInstruction(p_TargetReg, hp1)
|
||||
)
|
||||
then
|
||||
begin
|
||||
TempRegUsed := True; { Reusing to identify if the optimisation is invalid }
|
||||
|
||||
TransferUsedRegs(TmpUsedRegs);
|
||||
UpdateUsedRegsBetween(TmpUsedRegs, p, hp1);
|
||||
|
||||
if not RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs) then
|
||||
begin
|
||||
if (taicpu(p).oper[0]^.val <> 0) then
|
||||
begin
|
||||
if MatchOperand(taicpu(hp1).oper[1]^, p_TargetReg) then
|
||||
begin
|
||||
{$ifndef x86_64}
|
||||
NewConst := 2 * taicpu(p).oper[0]^.val;
|
||||
{$else x86_64}
|
||||
{ Protect against overflows }
|
||||
NewConst := (taicpu(p).oper[0]^.val and $7FFFFFFFFFFFFFFF) shl 1;
|
||||
if (NewConst > $7FFFFFFF) or (NewConst < -2147483648) then
|
||||
{ Constant out of range }
|
||||
TempRegUsed := False
|
||||
else
|
||||
{$endif x86_64}
|
||||
begin
|
||||
{ add %reg,%reg doubles the value }
|
||||
taicpu(hp1).opcode := A_MOV;
|
||||
|
||||
case taicpu(hp1).opsize of
|
||||
S_B: taicpu(hp1).loadconst(0, NewConst and $FF);
|
||||
S_W: taicpu(hp1).loadconst(0, NewConst and $FFFF);
|
||||
S_L: taicpu(hp1).loadconst(0, NewConst and $FFFFFFFF);
|
||||
{$ifdef x86_64}
|
||||
S_Q: taicpu(hp1).loadconst(0, NewConst);
|
||||
{$endif x86_64}
|
||||
else
|
||||
InternalError(2024022001);
|
||||
end;
|
||||
end;
|
||||
end
|
||||
else
|
||||
begin
|
||||
{$ifdef x86_64}
|
||||
if (taicpu(p).oper[0]^.val > $7FFFFFFF) or (taicpu(p).oper[0]^.val < -2147483648) then
|
||||
{ Constant out of range }
|
||||
TempRegUsed := False
|
||||
else
|
||||
{$endif x86_64}
|
||||
taicpu(hp1).loadconst(0, taicpu(p).oper[0]^.val);
|
||||
end;
|
||||
|
||||
if TempRegUsed then
|
||||
DebugMsg(SPeepholeOptimization + 'Adapting ADD/SUB since ' + debug_regname(p_TargetReg) + ' = $' + debug_tostr(taicpu(p).oper[0]^.val), hp1);
|
||||
end;
|
||||
|
||||
if TempRegUsed then
|
||||
begin
|
||||
if (taicpu(p).oper[0]^.val = 0) then
|
||||
DebugMsg(SPeepholeOptimization + 'Removing ADD/SUB since ' + debug_regname(p_TargetReg) + ' = $0', hp1);
|
||||
|
||||
if not CrossJump and
|
||||
not RegUsedBetween(p_TargetReg, p, hp1) and
|
||||
not RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs) then
|
||||
begin
|
||||
{ If the original register is no longer used, we can remove the initial MOV }
|
||||
DebugMsg(SPeepholeOptimization + 'Mov2Nop 9', p);
|
||||
if (taicpu(p).oper[0]^.val = 0) then
|
||||
RemoveInstruction(hp1);
|
||||
RemoveCurrentP(p);
|
||||
Result := True;
|
||||
Exit;
|
||||
end;
|
||||
|
||||
if (taicpu(p).oper[0]^.val = 0) then
|
||||
RemoveInstruction(hp1);
|
||||
|
||||
Include(OptsToCheck, aoc_ForceNewIteration);
|
||||
Continue;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
A_NOT:
|
||||
if MatchOperand(taicpu(hp1).oper[0]^, p_TargetReg) then
|
||||
begin
|
||||
TempRegUsed := False; { Reusing to identify if the optimisation is invalid }
|
||||
|
||||
TransferUsedRegs(TmpUsedRegs);
|
||||
if not (cs_opt_level3 in current_settings.optimizerswitches) then
|
||||
{ hp1 is adjacent }
|
||||
UpdateUsedRegs(TmpUsedRegs, tai(p.Next))
|
||||
else
|
||||
UpdateUsedRegsBetween(TmpUsedRegs, p, hp1);
|
||||
|
||||
if not RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs) then
|
||||
begin
|
||||
NewConst := not taicpu(p).oper[0]^.val;
|
||||
{$ifdef x86_64}
|
||||
if (NewConst <= $7FFFFFFF) and (NewConst >= -2147483648) then
|
||||
{$endif x86_64}
|
||||
begin
|
||||
TempRegUsed := True;
|
||||
DebugMsg(SPeepholeOptimization + 'Adapting NOT since ' + debug_regname(p_TargetReg) + ' = $' + debug_tostr(taicpu(p).oper[0]^.val) + ' (And2Nop 2)', hp1);
|
||||
taicpu(hp1).opcode := A_MOV;
|
||||
taicpu(hp1).ops := 2;
|
||||
taicpu(hp1).loadconst(0, NewConst);
|
||||
taicpu(hp1).loadreg(1, p_TargetReg);
|
||||
end;
|
||||
end;
|
||||
|
||||
if TempRegUsed then
|
||||
begin
|
||||
if not CrossJump and
|
||||
not RegUsedBetween(p_TargetReg, p, hp1) and
|
||||
not RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs) then
|
||||
begin
|
||||
{ If the original register is no longer used, we can remove the initial MOV }
|
||||
DebugMsg(SPeepholeOptimization + 'Mov2Nop 9b', p);
|
||||
RemoveCurrentP(p);
|
||||
Result := True;
|
||||
end;
|
||||
|
||||
Include(OptsToCheck, aoc_ForceNewIteration);
|
||||
Exit;
|
||||
end;
|
||||
|
||||
end;
|
||||
|
||||
A_AND, A_OR, A_XOR:
|
||||
begin
|
||||
TempRegUsed := False; { Reusing to identify if the optimisation is invalid }
|
||||
|
||||
if MatchOperand(taicpu(hp1).oper[0]^, p_TargetReg) then
|
||||
begin
|
||||
|
||||
TransferUsedRegs(TmpUsedRegs);
|
||||
if not (cs_opt_level3 in current_settings.optimizerswitches) then
|
||||
{ hp1 is adjacent }
|
||||
UpdateUsedRegs(TmpUsedRegs, tai(p.Next))
|
||||
else
|
||||
UpdateUsedRegsBetween(TmpUsedRegs, p, hp1);
|
||||
|
||||
if not RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)
|
||||
{$ifdef x86_64}
|
||||
and (taicpu(p).oper[0]^.val <= $7FFFFFFF) and (taicpu(p).oper[0]^.val >= -2147483648)
|
||||
{$endif x86_64}
|
||||
then
|
||||
begin
|
||||
TempRegUsed := True;
|
||||
taicpu(hp1).loadconst(0, taicpu(p).oper[0]^.val);
|
||||
end;
|
||||
|
||||
end
|
||||
else if (taicpu(hp1).oper[0]^.typ = top_const) and
|
||||
MatchOperand(taicpu(hp1).oper[1]^, p_TargetReg) then
|
||||
begin
|
||||
TempRegUsed := True;
|
||||
DebugMsg(SPeepholeOptimization + 'Adapting ' + upcase(debug_op2str(taicpu(hp1).opcode)) + ' since ' + debug_regname(p_TargetReg) + ' = $' + debug_tostr(taicpu(p).oper[0]^.val) + ' (And2Nop 2)', hp1);
|
||||
case taicpu(hp1).opcode of
|
||||
A_AND:
|
||||
taicpu(hp1).oper[0]^.val := taicpu(hp1).oper[0]^.val and taicpu(p).oper[0]^.val;
|
||||
A_OR:
|
||||
taicpu(hp1).oper[0]^.val := taicpu(hp1).oper[0]^.val or taicpu(p).oper[0]^.val;
|
||||
A_XOR:
|
||||
taicpu(hp1).oper[0]^.val := taicpu(hp1).oper[0]^.val xor taicpu(p).oper[0]^.val;
|
||||
else
|
||||
InternalError(2024022101)
|
||||
end;
|
||||
taicpu(hp1).opcode := A_MOV;
|
||||
end;
|
||||
|
||||
if TempRegUsed then
|
||||
begin
|
||||
if not CrossJump and
|
||||
not RegUsedBetween(p_TargetReg, p, hp1) and
|
||||
not RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs) then
|
||||
begin
|
||||
{ If the original register is no longer used, we can remove the initial MOV }
|
||||
DebugMsg(SPeepholeOptimization + 'Mov2Nop 9b', p);
|
||||
RemoveCurrentP(p);
|
||||
Result := True;
|
||||
end;
|
||||
|
||||
Include(OptsToCheck, aoc_ForceNewIteration);
|
||||
Exit;
|
||||
end;
|
||||
end
|
||||
else
|
||||
;
|
||||
end;
|
||||
|
||||
if (cs_opt_level2 in current_settings.optimizerswitches) and
|
||||
not RegModifiedByInstruction(p_TargetReg, hp1) then
|
||||
begin
|
||||
{ Try the next instruction }
|
||||
hp2 := hp1;
|
||||
Continue;
|
||||
end;
|
||||
Break;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
@ -6269,8 +6686,17 @@ unit aoptx86;
|
||||
internalerror(2022022001);
|
||||
|
||||
{ changes "lea (%reg1), %reg2" into "mov %reg1, %reg2" }
|
||||
if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
|
||||
(taicpu(p).oper[0]^.ref^.index = NR_NO) and
|
||||
if (
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.base <> NR_NO) and
|
||||
(taicpu(p).oper[0]^.ref^.index = NR_NO)
|
||||
) or
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.base = NR_NO) and
|
||||
(taicpu(p).oper[0]^.ref^.index <> NR_NO) and
|
||||
(taicpu(p).oper[0]^.ref^.scalefactor <= 1)
|
||||
)
|
||||
) and
|
||||
(
|
||||
{ do not mess with leas accessing the stack pointer
|
||||
unless it's a null operation }
|
||||
@ -6284,6 +6710,13 @@ unit aoptx86;
|
||||
begin
|
||||
if (taicpu(p).oper[0]^.ref^.offset = 0) then
|
||||
begin
|
||||
{ If index contains a register, base is definitely NR_NO }
|
||||
if (taicpu(p).oper[0]^.ref^.index <> NR_NO) then
|
||||
begin
|
||||
taicpu(p).oper[0]^.ref^.base := taicpu(p).oper[0]^.ref^.index;
|
||||
taicpu(p).oper[0]^.ref^.index := NR_NO;
|
||||
end;
|
||||
|
||||
if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) then
|
||||
begin
|
||||
taicpu(p).opcode := A_MOV;
|
||||
@ -6382,14 +6815,20 @@ unit aoptx86;
|
||||
begin
|
||||
TransferUsedRegs(TmpUsedRegs);
|
||||
UpdateUsedRegs(TmpUsedRegs, tai(p.next));
|
||||
{ reg1 is not updated so it might not be used afterwards }
|
||||
{ reg1 is not updated so it must not be used afterwards }
|
||||
if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
|
||||
begin
|
||||
DebugMsg(SPeepholeOptimization + 'LeaOp2Op done',p);
|
||||
if taicpu(p).oper[0]^.ref^.base<>NR_NO then
|
||||
taicpu(hp1).oper[ref]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
|
||||
begin
|
||||
taicpu(hp1).oper[ref]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
|
||||
AllocRegBetween(taicpu(p).oper[0]^.ref^.base, p, hp1, UsedRegs);
|
||||
end;
|
||||
if taicpu(p).oper[0]^.ref^.index<>NR_NO then
|
||||
taicpu(hp1).oper[ref]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
|
||||
begin
|
||||
taicpu(hp1).oper[ref]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
|
||||
AllocRegBetween(taicpu(p).oper[0]^.ref^.index, p, hp1, UsedRegs);
|
||||
end;
|
||||
if taicpu(p).oper[0]^.ref^.symbol<>nil then
|
||||
taicpu(hp1).oper[ref]^.ref^.symbol:=taicpu(p).oper[0]^.ref^.symbol;
|
||||
if taicpu(p).oper[0]^.ref^.relsymbol<>nil then
|
||||
@ -6544,7 +6983,9 @@ unit aoptx86;
|
||||
|
||||
if (taicpu(p).oper[0]^.ref^.offset <> 0) then
|
||||
Inc(taicpu(hp1).oper[0]^.ref^.offset, taicpu(p).oper[0]^.ref^.offset * max(taicpu(hp1).oper[0]^.ref^.scalefactor, 1));
|
||||
|
||||
taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.ref^.index;
|
||||
AllocRegBetween(taicpu(p).oper[0]^.ref^.index, p, hp1, UsedRegs);
|
||||
|
||||
{ Just to prevent miscalculations }
|
||||
if (taicpu(hp1).oper[0]^.ref^.scalefactor = 0) then
|
||||
@ -8180,7 +8621,7 @@ unit aoptx86;
|
||||
var
|
||||
v: TCGInt;
|
||||
true_hp1, hp1, hp2, p_dist, p_jump, hp1_dist, p_label, hp1_label: tai;
|
||||
FirstMatch, TempBool: Boolean;
|
||||
FirstMatch, TempBool, FoundJNE: Boolean;
|
||||
NewReg: TRegister;
|
||||
JumpLabel, JumpLabel_dist, JumpLabel_far: TAsmLabel;
|
||||
begin
|
||||
@ -8208,9 +8649,11 @@ unit aoptx86;
|
||||
|
||||
{ Also handle cases where there are multiple jumps in a row }
|
||||
p_jump := hp1;
|
||||
FoundJNE := False;
|
||||
while Assigned(p_jump) and MatchInstruction(p_jump, A_JCC, []) do
|
||||
begin
|
||||
Prefetch(p_jump.Next);
|
||||
|
||||
if IsJumpToLabel(taicpu(p_jump)) then
|
||||
begin
|
||||
{ Do jump optimisations first in case the condition becomes
|
||||
@ -8332,6 +8775,11 @@ unit aoptx86;
|
||||
end;
|
||||
end;
|
||||
|
||||
{ We can't do this at the start of the while loop because of
|
||||
constructs such as "jne @lbl1; jmp @lbl2; @lbl1" that are
|
||||
optimised by DoJumpOptimizations into "je @lbl2" }
|
||||
FoundJNE := FoundJNE or (taicpu(p_jump).condition in [C_NE, C_NZ]);
|
||||
|
||||
{ Search for:
|
||||
cmp ###,###
|
||||
j(c1) @lbl1
|
||||
@ -8383,6 +8831,66 @@ unit aoptx86;
|
||||
GetNextInstruction(p_jump, p_jump);
|
||||
end;
|
||||
|
||||
{ Search for:
|
||||
cmp x,(reg/ref)
|
||||
(maybe other jumps)
|
||||
jne @lbl1
|
||||
(Operation with (reg/ref))
|
||||
|
||||
(reg/ref) is deterministic and equal to x
|
||||
}
|
||||
if FoundJNE and (taicpu(p).oper[1]^.typ = top_ref) then
|
||||
{ p_jump is one instruction after the last jump }
|
||||
while MatchInstruction(p_jump, A_MOV, [taicpu(p).opsize]) do
|
||||
begin
|
||||
if MatchOperand(taicpu(p).oper[1]^, taicpu(p_jump).oper[0]^) then
|
||||
begin
|
||||
|
||||
{ Make sure the reference is decremented when it is replaced }
|
||||
if Assigned(taicpu(p_jump).oper[0]^.ref^.symbol) then
|
||||
taicpu(p_jump).oper[0]^.ref^.symbol.decrefs
|
||||
else if Assigned(taicpu(p_jump).oper[0]^.ref^.relsymbol) then
|
||||
taicpu(p_jump).oper[0]^.ref^.relsymbol.decrefs;
|
||||
|
||||
if MatchOperand(taicpu(p).oper[0]^, taicpu(p_jump).oper[1]^) then
|
||||
begin
|
||||
DebugMsg(SPeepholeOptimization + 'CMP/JNE/MOV -> reference value was deterministic (Mov2Nop 7)', p_jump);
|
||||
RemoveInstruction(p_jump);
|
||||
end
|
||||
else
|
||||
begin
|
||||
if taicpu(p).oper[0]^.typ = top_reg then
|
||||
begin
|
||||
TransferUsedRegs(TmpUsedRegs);
|
||||
AllocRegBetween(taicpu(p).oper[0]^.reg, p, p_jump, TmpUsedRegs);
|
||||
end;
|
||||
|
||||
DebugMsg(SPeepholeOptimization + 'CMP/JNE/MOV -> reference value was deterministic (Mov2Mov 7) - changed from "mov ' +
|
||||
debug_operstr(taicpu(p_jump).oper[0]^) + ',' + debug_operstr(taicpu(p_jump).oper[1]^) + '" to "mov ' +
|
||||
debug_operstr(taicpu(p).oper[0]^) + ',' + debug_operstr(taicpu(p_jump).oper[1]^) + '"', p_jump);
|
||||
|
||||
taicpu(p_jump).loadoper(0, taicpu(p).oper[0]^);
|
||||
end;
|
||||
|
||||
Include(OptsToCheck, aoc_ForceNewIteration);
|
||||
end
|
||||
else if MatchOperand(taicpu(p).oper[1]^, taicpu(p_jump).oper[1]^) then
|
||||
{ Reference is written to }
|
||||
Break;
|
||||
|
||||
{ See if a later MOV modifies the same address }
|
||||
if (
|
||||
(taicpu(p_jump).oper[1]^.typ <> top_reg) or
|
||||
not RegInRef(taicpu(p_jump).oper[1]^.reg, taicpu(p).oper[1]^.ref^)
|
||||
) then
|
||||
begin
|
||||
if not GetNextInstruction(p_jump, p_jump) then
|
||||
Break;
|
||||
end
|
||||
else
|
||||
Break;
|
||||
end;
|
||||
|
||||
if (
|
||||
{ Don't call GetNextInstruction again if we already have it }
|
||||
(true_hp1 = p_jump) or
|
||||
|
Loading…
Reference in New Issue
Block a user