mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-12 16:49:20 +02:00
* x86: Changed the names of temporary register
stores in OptPass1MOV for reasons of clarity, and other minor optimisations.
This commit is contained in:
parent
627fb9a25b
commit
f62ffa74dc
@ -2752,7 +2752,7 @@ unit aoptx86;
|
|||||||
GetNextInstruction_p, TempRegUsed, CrossJump: Boolean;
|
GetNextInstruction_p, TempRegUsed, CrossJump: Boolean;
|
||||||
PreMessage, RegName1, RegName2, InputVal, MaskNum: string;
|
PreMessage, RegName1, RegName2, InputVal, MaskNum: string;
|
||||||
NewSize: topsize;
|
NewSize: topsize;
|
||||||
CurrentReg, ActiveReg: TRegister;
|
p_SourceReg, p_TargetReg, NewMMReg: TRegister;
|
||||||
SourceRef, TargetRef: TReference;
|
SourceRef, TargetRef: TReference;
|
||||||
MovAligned, MovUnaligned: TAsmOp;
|
MovAligned, MovUnaligned: TAsmOp;
|
||||||
ThisRef: TReference;
|
ThisRef: TReference;
|
||||||
@ -2777,110 +2777,115 @@ unit aoptx86;
|
|||||||
if not GetNextInstruction_p or (hp1.typ <> ait_instruction) then
|
if not GetNextInstruction_p or (hp1.typ <> ait_instruction) then
|
||||||
Exit;
|
Exit;
|
||||||
|
|
||||||
{ Look for:
|
{ Prevent compiler warnings }
|
||||||
mov %reg1,%reg2
|
p_TargetReg := NR_NO;
|
||||||
??? %reg2,r/m
|
|
||||||
Change to:
|
if taicpu(p).oper[1]^.typ = top_reg then
|
||||||
mov %reg1,%reg2
|
|
||||||
??? %reg1,r/m
|
|
||||||
}
|
|
||||||
if MatchOpType(taicpu(p), top_reg, top_reg) then
|
|
||||||
begin
|
begin
|
||||||
CurrentReg := taicpu(p).oper[1]^.reg;
|
{ Saves on a large number of dereferences }
|
||||||
|
p_TargetReg := taicpu(p).oper[1]^.reg;
|
||||||
|
|
||||||
if RegReadByInstruction(CurrentReg, hp1) and
|
{ Look for:
|
||||||
DeepMOVOpt(taicpu(p), taicpu(hp1)) then
|
mov %reg1,%reg2
|
||||||
|
??? %reg2,r/m
|
||||||
|
Change to:
|
||||||
|
mov %reg1,%reg2
|
||||||
|
??? %reg1,r/m
|
||||||
|
}
|
||||||
|
if taicpu(p).oper[0]^.typ = top_reg then
|
||||||
begin
|
begin
|
||||||
{ A change has occurred, just not in p }
|
if RegReadByInstruction(p_TargetReg, hp1) and
|
||||||
Result := True;
|
DeepMOVOpt(taicpu(p), taicpu(hp1)) then
|
||||||
|
|
||||||
TransferUsedRegs(TmpUsedRegs);
|
|
||||||
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
|
|
||||||
|
|
||||||
if not RegUsedAfterInstruction(CurrentReg, hp1, TmpUsedRegs) and
|
|
||||||
{ Just in case something didn't get modified (e.g. an
|
|
||||||
implicit register) }
|
|
||||||
not RegReadByInstruction(CurrentReg, hp1) then
|
|
||||||
begin
|
begin
|
||||||
{ We can remove the original MOV }
|
{ A change has occurred, just not in p }
|
||||||
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3 done',p);
|
|
||||||
RemoveCurrentp(p, hp1);
|
|
||||||
|
|
||||||
{ UsedRegs got updated by RemoveCurrentp }
|
|
||||||
Result := True;
|
|
||||||
Exit;
|
|
||||||
end;
|
|
||||||
|
|
||||||
{ If we know a MOV instruction has become a null operation, we might as well
|
|
||||||
get rid of it now to save time. }
|
|
||||||
if (taicpu(hp1).opcode = A_MOV) and
|
|
||||||
(taicpu(hp1).oper[1]^.typ = top_reg) and
|
|
||||||
SuperRegistersEqual(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[0]^.reg) and
|
|
||||||
{ Just being a register is enough to confirm it's a null operation }
|
|
||||||
(taicpu(hp1).oper[0]^.typ = top_reg) then
|
|
||||||
begin
|
|
||||||
|
|
||||||
Result := True;
|
Result := True;
|
||||||
|
|
||||||
{ Speed-up to reduce a pipeline stall... if we had something like...
|
TransferUsedRegs(TmpUsedRegs);
|
||||||
|
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
|
||||||
|
|
||||||
movl %eax,%edx
|
if not RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs) and
|
||||||
movw %dx,%ax
|
{ Just in case something didn't get modified (e.g. an
|
||||||
|
implicit register) }
|
||||||
... the second instruction would change to movw %ax,%ax, but
|
not RegReadByInstruction(p_TargetReg, hp1) then
|
||||||
given that it is now %ax that's active rather than %eax,
|
|
||||||
penalties might occur due to a partial register write, so instead,
|
|
||||||
change it to a MOVZX instruction when optimising for speed.
|
|
||||||
}
|
|
||||||
if not (cs_opt_size in current_settings.optimizerswitches) and
|
|
||||||
IsMOVZXAcceptable and
|
|
||||||
(taicpu(hp1).opsize < taicpu(p).opsize)
|
|
||||||
{$ifdef x86_64}
|
|
||||||
{ operations already implicitly set the upper 64 bits to zero }
|
|
||||||
and not ((taicpu(hp1).opsize = S_L) and (taicpu(p).opsize = S_Q))
|
|
||||||
{$endif x86_64}
|
|
||||||
then
|
|
||||||
begin
|
begin
|
||||||
CurrentReg := taicpu(hp1).oper[1]^.reg;
|
{ We can remove the original MOV }
|
||||||
|
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3 done',p);
|
||||||
|
RemoveCurrentp(p, hp1);
|
||||||
|
|
||||||
DebugMsg(SPeepholeOptimization + 'Zero-extension to minimise pipeline stall (Mov2Movz)',hp1);
|
{ UsedRegs got updated by RemoveCurrentp }
|
||||||
case taicpu(p).opsize of
|
Result := True;
|
||||||
S_W:
|
Exit;
|
||||||
if taicpu(hp1).opsize = S_B then
|
|
||||||
taicpu(hp1).opsize := S_BL
|
|
||||||
else
|
|
||||||
InternalError(2020012911);
|
|
||||||
S_L{$ifdef x86_64}, S_Q{$endif x86_64}:
|
|
||||||
case taicpu(hp1).opsize of
|
|
||||||
S_B:
|
|
||||||
taicpu(hp1).opsize := S_BL;
|
|
||||||
S_W:
|
|
||||||
taicpu(hp1).opsize := S_WL;
|
|
||||||
else
|
|
||||||
InternalError(2020012912);
|
|
||||||
end;
|
|
||||||
else
|
|
||||||
InternalError(2020012910);
|
|
||||||
end;
|
|
||||||
|
|
||||||
taicpu(hp1).opcode := A_MOVZX;
|
|
||||||
taicpu(hp1).oper[1]^.reg := newreg(getregtype(CurrentReg), getsupreg(CurrentReg), R_SUBD)
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
GetNextInstruction_p := GetNextInstruction(hp1, hp2);
|
|
||||||
DebugMsg(SPeepholeOptimization + 'Mov2Nop 4 done',hp1);
|
|
||||||
RemoveInstruction(hp1);
|
|
||||||
|
|
||||||
{ The instruction after what was hp1 is now the immediate next instruction,
|
|
||||||
so we can continue to make optimisations if it's present }
|
|
||||||
if not GetNextInstruction_p or (hp2.typ <> ait_instruction) then
|
|
||||||
Exit;
|
|
||||||
|
|
||||||
hp1 := hp2;
|
|
||||||
end;
|
end;
|
||||||
end;
|
|
||||||
|
|
||||||
|
{ If we know a MOV instruction has become a null operation, we might as well
|
||||||
|
get rid of it now to save time. }
|
||||||
|
if (taicpu(hp1).opcode = A_MOV) and
|
||||||
|
(taicpu(hp1).oper[1]^.typ = top_reg) and
|
||||||
|
SuperRegistersEqual(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[0]^.reg) and
|
||||||
|
{ Just being a register is enough to confirm it's a null operation }
|
||||||
|
(taicpu(hp1).oper[0]^.typ = top_reg) then
|
||||||
|
begin
|
||||||
|
|
||||||
|
Result := True;
|
||||||
|
|
||||||
|
{ Speed-up to reduce a pipeline stall... if we had something like...
|
||||||
|
|
||||||
|
movl %eax,%edx
|
||||||
|
movw %dx,%ax
|
||||||
|
|
||||||
|
... the second instruction would change to movw %ax,%ax, but
|
||||||
|
given that it is now %ax that's active rather than %eax,
|
||||||
|
penalties might occur due to a partial register write, so instead,
|
||||||
|
change it to a MOVZX instruction when optimising for speed.
|
||||||
|
}
|
||||||
|
if not (cs_opt_size in current_settings.optimizerswitches) and
|
||||||
|
IsMOVZXAcceptable and
|
||||||
|
(taicpu(hp1).opsize < taicpu(p).opsize)
|
||||||
|
{$ifdef x86_64}
|
||||||
|
{ operations already implicitly set the upper 64 bits to zero }
|
||||||
|
and not ((taicpu(hp1).opsize = S_L) and (taicpu(p).opsize = S_Q))
|
||||||
|
{$endif x86_64}
|
||||||
|
then
|
||||||
|
begin
|
||||||
|
DebugMsg(SPeepholeOptimization + 'Zero-extension to minimise pipeline stall (Mov2Movz)',hp1);
|
||||||
|
case taicpu(p).opsize of
|
||||||
|
S_W:
|
||||||
|
if taicpu(hp1).opsize = S_B then
|
||||||
|
taicpu(hp1).opsize := S_BL
|
||||||
|
else
|
||||||
|
InternalError(2020012911);
|
||||||
|
S_L{$ifdef x86_64}, S_Q{$endif x86_64}:
|
||||||
|
case taicpu(hp1).opsize of
|
||||||
|
S_B:
|
||||||
|
taicpu(hp1).opsize := S_BL;
|
||||||
|
S_W:
|
||||||
|
taicpu(hp1).opsize := S_WL;
|
||||||
|
else
|
||||||
|
InternalError(2020012912);
|
||||||
|
end;
|
||||||
|
else
|
||||||
|
InternalError(2020012910);
|
||||||
|
end;
|
||||||
|
|
||||||
|
taicpu(hp1).opcode := A_MOVZX;
|
||||||
|
setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBD);
|
||||||
|
end
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
GetNextInstruction_p := GetNextInstruction(hp1, hp2);
|
||||||
|
DebugMsg(SPeepholeOptimization + 'Mov2Nop 4 done',hp1);
|
||||||
|
RemoveInstruction(hp1);
|
||||||
|
|
||||||
|
{ The instruction after what was hp1 is now the immediate next instruction,
|
||||||
|
so we can continue to make optimisations if it's present }
|
||||||
|
if not GetNextInstruction_p or (hp2.typ <> ait_instruction) then
|
||||||
|
Exit;
|
||||||
|
|
||||||
|
hp1 := hp2;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
end;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
@ -3375,15 +3380,15 @@ unit aoptx86;
|
|||||||
if (taicpu(p).oper[1]^.typ = top_reg) and
|
if (taicpu(p).oper[1]^.typ = top_reg) and
|
||||||
MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
|
MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
|
||||||
begin
|
begin
|
||||||
CurrentReg := taicpu(p).oper[1]^.reg;
|
{ Remember that p_TargetReg contains taicpu(p).oper[1]^.reg }
|
||||||
TransferUsedRegs(TmpUsedRegs);
|
TransferUsedRegs(TmpUsedRegs);
|
||||||
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
|
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
|
||||||
{ we have
|
{ we have
|
||||||
mov x, %treg
|
mov x, %treg
|
||||||
mov %treg, y
|
mov %treg, y
|
||||||
}
|
}
|
||||||
if not(RegInOp(CurrentReg, taicpu(hp1).oper[1]^)) then
|
if not(RegInOp(p_TargetReg, taicpu(hp1).oper[1]^)) then
|
||||||
if not(RegUsedAfterInstruction(CurrentReg, hp1, TmpUsedRegs)) then
|
if not(RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs)) then
|
||||||
{ we've got
|
{ we've got
|
||||||
|
|
||||||
mov x, %treg
|
mov x, %treg
|
||||||
@ -3485,8 +3490,8 @@ unit aoptx86;
|
|||||||
RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and
|
RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and
|
||||||
not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then
|
not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then
|
||||||
begin
|
begin
|
||||||
CurrentReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3);
|
NewMMReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3);
|
||||||
if CurrentReg <> NR_NO then
|
if NewMMReg <> NR_NO then
|
||||||
begin
|
begin
|
||||||
{ Remember that the offsets are 8 ahead }
|
{ Remember that the offsets are 8 ahead }
|
||||||
if ((SourceRef.offset mod 16) = 8) and
|
if ((SourceRef.offset mod 16) = 8) and
|
||||||
@ -3500,7 +3505,7 @@ unit aoptx86;
|
|||||||
taicpu(p).opcode := MovUnaligned;
|
taicpu(p).opcode := MovUnaligned;
|
||||||
|
|
||||||
taicpu(p).opsize := S_XMM;
|
taicpu(p).opsize := S_XMM;
|
||||||
taicpu(p).oper[1]^.reg := CurrentReg;
|
taicpu(p).oper[1]^.reg := NewMMReg;
|
||||||
|
|
||||||
if ((TargetRef.offset mod 16) = 8) and
|
if ((TargetRef.offset mod 16) = 8) and
|
||||||
(
|
(
|
||||||
@ -3513,9 +3518,9 @@ unit aoptx86;
|
|||||||
taicpu(hp1).opcode := MovUnaligned;
|
taicpu(hp1).opcode := MovUnaligned;
|
||||||
|
|
||||||
taicpu(hp1).opsize := S_XMM;
|
taicpu(hp1).opsize := S_XMM;
|
||||||
taicpu(hp1).oper[0]^.reg := CurrentReg;
|
taicpu(hp1).oper[0]^.reg := NewMMReg;
|
||||||
|
|
||||||
DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(CurrentReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 1)', p);
|
DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(NewMMReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 1)', p);
|
||||||
|
|
||||||
RemoveInstruction(hp2);
|
RemoveInstruction(hp2);
|
||||||
RemoveInstruction(hp3);
|
RemoveInstruction(hp3);
|
||||||
@ -3541,8 +3546,8 @@ unit aoptx86;
|
|||||||
RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and
|
RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and
|
||||||
not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then
|
not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then
|
||||||
begin
|
begin
|
||||||
CurrentReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3);
|
NewMMReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3);
|
||||||
if CurrentReg <> NR_NO then
|
if NewMMReg <> NR_NO then
|
||||||
begin
|
begin
|
||||||
{ hp2 and hp3 are the starting offsets, so mod = 0 this time }
|
{ hp2 and hp3 are the starting offsets, so mod = 0 this time }
|
||||||
if ((SourceRef.offset mod 16) = 0) and
|
if ((SourceRef.offset mod 16) = 0) and
|
||||||
@ -3556,7 +3561,7 @@ unit aoptx86;
|
|||||||
taicpu(hp2).opcode := MovUnaligned;
|
taicpu(hp2).opcode := MovUnaligned;
|
||||||
|
|
||||||
taicpu(hp2).opsize := S_XMM;
|
taicpu(hp2).opsize := S_XMM;
|
||||||
taicpu(hp2).oper[1]^.reg := CurrentReg;
|
taicpu(hp2).oper[1]^.reg := NewMMReg;
|
||||||
|
|
||||||
if ((TargetRef.offset mod 16) = 0) and
|
if ((TargetRef.offset mod 16) = 0) and
|
||||||
(
|
(
|
||||||
@ -3569,9 +3574,9 @@ unit aoptx86;
|
|||||||
taicpu(hp3).opcode := MovUnaligned;
|
taicpu(hp3).opcode := MovUnaligned;
|
||||||
|
|
||||||
taicpu(hp3).opsize := S_XMM;
|
taicpu(hp3).opsize := S_XMM;
|
||||||
taicpu(hp3).oper[0]^.reg := CurrentReg;
|
taicpu(hp3).oper[0]^.reg := NewMMReg;
|
||||||
|
|
||||||
DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(CurrentReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 2)', p);
|
DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(NewMMReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 2)', p);
|
||||||
|
|
||||||
RemoveInstruction(hp1);
|
RemoveInstruction(hp1);
|
||||||
RemoveCurrentP(p, hp2);
|
RemoveCurrentP(p, hp2);
|
||||||
@ -3799,37 +3804,37 @@ unit aoptx86;
|
|||||||
}
|
}
|
||||||
if MatchOpType(taicpu(p), top_reg, top_reg) then
|
if MatchOpType(taicpu(p), top_reg, top_reg) then
|
||||||
begin
|
begin
|
||||||
CurrentReg := taicpu(p).oper[0]^.reg;
|
p_SourceReg := taicpu(p).oper[0]^.reg;
|
||||||
ActiveReg := taicpu(p).oper[1]^.reg;
|
{ Remember that p_TargetReg contains taicpu(p).oper[1]^.reg }
|
||||||
|
|
||||||
if (taicpu(hp1).oper[0]^.typ = top_ref) { The other operand will be a register } and
|
if (taicpu(hp1).oper[0]^.typ = top_ref) { The other operand will be a register } and
|
||||||
(taicpu(hp1).oper[1]^.reg = CurrentReg) and
|
(taicpu(hp1).oper[1]^.reg = p_SourceReg) and
|
||||||
RegInRef(CurrentReg, taicpu(hp1).oper[0]^.ref^) and
|
RegInRef(p_SourceReg, taicpu(hp1).oper[0]^.ref^) and
|
||||||
GetNextInstruction(hp1, hp2) and
|
GetNextInstruction(hp1, hp2) and
|
||||||
MatchInstruction(hp2, A_MOV, [taicpu(p).opsize]) and
|
MatchInstruction(hp2, A_MOV, [taicpu(p).opsize]) and
|
||||||
(taicpu(hp2).oper[0]^.typ = top_ref) { The other operand will be a register } then
|
(taicpu(hp2).oper[0]^.typ = top_ref) { The other operand will be a register } then
|
||||||
begin
|
begin
|
||||||
SourceRef := taicpu(hp2).oper[0]^.ref^;
|
SourceRef := taicpu(hp2).oper[0]^.ref^;
|
||||||
if RegInRef(ActiveReg, SourceRef) and
|
if RegInRef(p_TargetReg, SourceRef) and
|
||||||
{ If %reg1 also appears in the second reference, then it will
|
{ If %reg1 also appears in the second reference, then it will
|
||||||
not refer to the same memory block as the first reference }
|
not refer to the same memory block as the first reference }
|
||||||
not RegInRef(CurrentReg, SourceRef) then
|
not RegInRef(p_SourceReg, SourceRef) then
|
||||||
begin
|
begin
|
||||||
{ Check to see if the references match if %reg2 is changed to %reg1 }
|
{ Check to see if the references match if %reg2 is changed to %reg1 }
|
||||||
if SourceRef.base = ActiveReg then
|
if SourceRef.base = p_TargetReg then
|
||||||
SourceRef.base := CurrentReg;
|
SourceRef.base := p_SourceReg;
|
||||||
|
|
||||||
if SourceRef.index = ActiveReg then
|
if SourceRef.index = p_TargetReg then
|
||||||
SourceRef.index := CurrentReg;
|
SourceRef.index := p_SourceReg;
|
||||||
|
|
||||||
{ RefsEqual also checks to ensure both references are non-volatile }
|
{ RefsEqual also checks to ensure both references are non-volatile }
|
||||||
if RefsEqual(taicpu(hp1).oper[0]^.ref^, SourceRef) then
|
if RefsEqual(taicpu(hp1).oper[0]^.ref^, SourceRef) then
|
||||||
begin
|
begin
|
||||||
taicpu(hp2).loadreg(0, CurrentReg);
|
taicpu(hp2).loadreg(0, p_SourceReg);
|
||||||
|
|
||||||
DebugMsg(SPeepholeOptimization + 'Optimised register duplication and memory read (MovMovMov2MovMovMov)', p);
|
DebugMsg(SPeepholeOptimization + 'Optimised register duplication and memory read (MovMovMov2MovMovMov)', p);
|
||||||
Result := True;
|
Result := True;
|
||||||
if taicpu(hp2).oper[1]^.reg = ActiveReg then
|
if taicpu(hp2).oper[1]^.reg = p_TargetReg then
|
||||||
begin
|
begin
|
||||||
DebugMsg(SPeepholeOptimization + 'Mov2Nop 5a done', p);
|
DebugMsg(SPeepholeOptimization + 'Mov2Nop 5a done', p);
|
||||||
RemoveCurrentP(p, hp1);
|
RemoveCurrentP(p, hp1);
|
||||||
@ -3842,7 +3847,7 @@ unit aoptx86;
|
|||||||
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
|
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
|
||||||
UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
|
UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
|
||||||
|
|
||||||
if not RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs) then
|
if not RegUsedAfterInstruction(p_TargetReg, hp2, TmpUsedRegs) then
|
||||||
begin
|
begin
|
||||||
DebugMsg(SPeepholeOptimization + 'Mov2Nop 5b done', p);
|
DebugMsg(SPeepholeOptimization + 'Mov2Nop 5b done', p);
|
||||||
RemoveCurrentP(p, hp1);
|
RemoveCurrentP(p, hp1);
|
||||||
@ -3871,8 +3876,7 @@ unit aoptx86;
|
|||||||
{ Initialise CrossJump (if it becomes True at any point, it will remain True) }
|
{ Initialise CrossJump (if it becomes True at any point, it will remain True) }
|
||||||
CrossJump := (taicpu(hp1).opcode = A_Jcc);
|
CrossJump := (taicpu(hp1).opcode = A_Jcc);
|
||||||
|
|
||||||
{ Saves on a large number of dereferences }
|
{ Remember that p_TargetReg contains taicpu(p).oper[1]^.reg }
|
||||||
ActiveReg := taicpu(p).oper[1]^.reg;
|
|
||||||
|
|
||||||
TransferUsedRegs(TmpUsedRegs);
|
TransferUsedRegs(TmpUsedRegs);
|
||||||
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
|
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
|
||||||
@ -3882,16 +3886,16 @@ unit aoptx86;
|
|||||||
else
|
else
|
||||||
JumpTracking := nil;
|
JumpTracking := nil;
|
||||||
|
|
||||||
while GetNextInstructionUsingRegCond(hp3,hp2,ActiveReg,JumpTracking,CrossJump) and
|
while GetNextInstructionUsingRegCond(hp3,hp2,p_TargetReg,JumpTracking,CrossJump) and
|
||||||
{ GetNextInstructionUsingRegCond only searches one instruction ahead unless -O3 is specified }
|
{ GetNextInstructionUsingRegCond only searches one instruction ahead unless -O3 is specified }
|
||||||
(hp2.typ=ait_instruction) do
|
(hp2.typ=ait_instruction) do
|
||||||
begin
|
begin
|
||||||
case taicpu(hp2).opcode of
|
case taicpu(hp2).opcode of
|
||||||
A_POP:
|
A_POP:
|
||||||
if MatchOperand(taicpu(hp2).oper[0]^,ActiveReg) then
|
if MatchOperand(taicpu(hp2).oper[0]^,p_TargetReg) then
|
||||||
begin
|
begin
|
||||||
if not CrossJump and
|
if not CrossJump and
|
||||||
not RegUsedBetween(ActiveReg, p, hp2) then
|
not RegUsedBetween(p_TargetReg, p, hp2) then
|
||||||
begin
|
begin
|
||||||
{ We can remove the original MOV since the register
|
{ We can remove the original MOV since the register
|
||||||
wasn't used between it and its popping from the stack }
|
wasn't used between it and its popping from the stack }
|
||||||
@ -3905,7 +3909,7 @@ unit aoptx86;
|
|||||||
Break;
|
Break;
|
||||||
end;
|
end;
|
||||||
A_MOV:
|
A_MOV:
|
||||||
if MatchOperand(taicpu(hp2).oper[0]^,ActiveReg) and
|
if MatchOperand(taicpu(hp2).oper[0]^,p_TargetReg) and
|
||||||
((taicpu(p).oper[0]^.typ=top_const) or
|
((taicpu(p).oper[0]^.typ=top_const) or
|
||||||
((taicpu(p).oper[0]^.typ=top_reg) and
|
((taicpu(p).oper[0]^.typ=top_reg) and
|
||||||
not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp2))
|
not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp2))
|
||||||
@ -3924,8 +3928,8 @@ unit aoptx86;
|
|||||||
|
|
||||||
TempRegUsed :=
|
TempRegUsed :=
|
||||||
CrossJump { Assume the register is in use if it crossed a conditional jump } or
|
CrossJump { Assume the register is in use if it crossed a conditional jump } or
|
||||||
RegReadByInstruction(ActiveReg, hp3) or
|
RegReadByInstruction(p_TargetReg, hp3) or
|
||||||
RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs);
|
RegUsedAfterInstruction(p_TargetReg, hp2, TmpUsedRegs);
|
||||||
|
|
||||||
case taicpu(p).oper[0]^.typ Of
|
case taicpu(p).oper[0]^.typ Of
|
||||||
top_reg:
|
top_reg:
|
||||||
@ -3938,17 +3942,17 @@ unit aoptx86;
|
|||||||
|
|
||||||
mov %reg, y
|
mov %reg, y
|
||||||
}
|
}
|
||||||
CurrentReg := taicpu(p).oper[0]^.reg; { Saves on a handful of pointer dereferences }
|
p_SourceReg := taicpu(p).oper[0]^.reg; { Saves on a handful of pointer dereferences }
|
||||||
RegName1 := debug_regname(taicpu(hp2).oper[0]^.reg);
|
RegName1 := debug_regname(taicpu(hp2).oper[0]^.reg);
|
||||||
if MatchOperand(taicpu(hp2).oper[1]^, CurrentReg) then
|
if MatchOperand(taicpu(hp2).oper[1]^, p_SourceReg) then
|
||||||
begin
|
begin
|
||||||
{ %reg = y - remove hp2 completely (doing it here instead of relying on
|
{ %reg = y - remove hp2 completely (doing it here instead of relying on
|
||||||
the "mov %reg,%reg" optimisation might cut down on a pass iteration) }
|
the "mov %reg,%reg" optimisation might cut down on a pass iteration) }
|
||||||
|
|
||||||
if TempRegUsed then
|
if TempRegUsed then
|
||||||
begin
|
begin
|
||||||
DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + RegName1 + '; removed unnecessary instruction (MovMov2MovNop 6b}',hp2);
|
DebugMsg(SPeepholeOptimization + debug_regname(p_SourceReg) + ' = ' + RegName1 + '; removed unnecessary instruction (MovMov2MovNop 6b}',hp2);
|
||||||
AllocRegBetween(CurrentReg, p, hp2, UsedRegs);
|
AllocRegBetween(p_SourceReg, p, hp2, UsedRegs);
|
||||||
{ Set the start of the next GetNextInstructionUsingRegCond search
|
{ Set the start of the next GetNextInstructionUsingRegCond search
|
||||||
to start at the entry right before hp2 (which is about to be removed) }
|
to start at the entry right before hp2 (which is about to be removed) }
|
||||||
hp3 := tai(hp2.Previous);
|
hp3 := tai(hp2.Previous);
|
||||||
@ -3971,19 +3975,19 @@ unit aoptx86;
|
|||||||
end
|
end
|
||||||
else
|
else
|
||||||
begin
|
begin
|
||||||
AllocRegBetween(CurrentReg, p, hp2, UsedRegs);
|
AllocRegBetween(p_SourceReg, p, hp2, UsedRegs);
|
||||||
taicpu(hp2).loadReg(0, CurrentReg);
|
taicpu(hp2).loadReg(0, p_SourceReg);
|
||||||
|
|
||||||
DebugMsg(SPeepholeOptimization + RegName1 + ' = ' + debug_regname(CurrentReg) + '; changed to minimise pipeline stall (MovMov2Mov 6a}',hp2);
|
DebugMsg(SPeepholeOptimization + RegName1 + ' = ' + debug_regname(p_SourceReg) + '; changed to minimise pipeline stall (MovMov2Mov 6a}',hp2);
|
||||||
|
|
||||||
{ Check to see if the register also appears in the reference }
|
{ Check to see if the register also appears in the reference }
|
||||||
if (taicpu(hp2).oper[1]^.typ = top_ref) then
|
if (taicpu(hp2).oper[1]^.typ = top_ref) then
|
||||||
ReplaceRegisterInRef(taicpu(hp2).oper[1]^.ref^, ActiveReg, CurrentReg);
|
ReplaceRegisterInRef(taicpu(hp2).oper[1]^.ref^, p_TargetReg, p_SourceReg);
|
||||||
|
|
||||||
{ Don't remove the first instruction if the temporary register is in use }
|
{ Don't remove the first instruction if the temporary register is in use }
|
||||||
if not TempRegUsed and
|
if not TempRegUsed and
|
||||||
{ ReplaceRegisterInRef won't actually replace the register if it's a different size }
|
{ ReplaceRegisterInRef won't actually replace the register if it's a different size }
|
||||||
not RegInOp(ActiveReg, taicpu(hp2).oper[1]^) then
|
not RegInOp(p_TargetReg, taicpu(hp2).oper[1]^) then
|
||||||
begin
|
begin
|
||||||
DebugMsg(SPeepholeOptimization + 'MovMov2Mov 6 done',p);
|
DebugMsg(SPeepholeOptimization + 'MovMov2Mov 6 done',p);
|
||||||
RemoveCurrentP(p, hp1);
|
RemoveCurrentP(p, hp1);
|
||||||
@ -4037,11 +4041,11 @@ unit aoptx86;
|
|||||||
end;
|
end;
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
if MatchOperand(taicpu(hp2).oper[1]^, ActiveReg) then
|
if MatchOperand(taicpu(hp2).oper[1]^, p_TargetReg) then
|
||||||
begin
|
begin
|
||||||
if not CrossJump and
|
if not CrossJump and
|
||||||
not RegUsedBetween(ActiveReg, p, hp2) and
|
not RegUsedBetween(p_TargetReg, p, hp2) and
|
||||||
not RegReadByInstruction(ActiveReg, hp2) then
|
not RegReadByInstruction(p_TargetReg, hp2) then
|
||||||
begin
|
begin
|
||||||
{ Register is not used before it is overwritten }
|
{ Register is not used before it is overwritten }
|
||||||
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3a done',p);
|
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3a done',p);
|
||||||
@ -4067,8 +4071,8 @@ unit aoptx86;
|
|||||||
end;
|
end;
|
||||||
A_MOVZX, A_MOVSX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}:
|
A_MOVZX, A_MOVSX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}:
|
||||||
if MatchOpType(taicpu(hp2), top_reg, top_reg) and
|
if MatchOpType(taicpu(hp2), top_reg, top_reg) and
|
||||||
MatchOperand(taicpu(hp2).oper[0]^, ActiveReg) and
|
MatchOperand(taicpu(hp2).oper[0]^, p_TargetReg) and
|
||||||
SuperRegistersEqual(taicpu(hp2).oper[1]^.reg, ActiveReg) then
|
SuperRegistersEqual(taicpu(hp2).oper[1]^.reg, p_TargetReg) then
|
||||||
begin
|
begin
|
||||||
{
|
{
|
||||||
Change from:
|
Change from:
|
||||||
@ -4108,10 +4112,10 @@ unit aoptx86;
|
|||||||
{ Also catches MOV/S/Z instructions that aren't modified }
|
{ Also catches MOV/S/Z instructions that aren't modified }
|
||||||
if taicpu(p).oper[0]^.typ = top_reg then
|
if taicpu(p).oper[0]^.typ = top_reg then
|
||||||
begin
|
begin
|
||||||
CurrentReg := taicpu(p).oper[0]^.reg;
|
p_SourceReg := taicpu(p).oper[0]^.reg;
|
||||||
if
|
if
|
||||||
not RegModifiedByInstruction(CurrentReg, hp3) and
|
not RegModifiedByInstruction(p_SourceReg, hp3) and
|
||||||
not RegModifiedBetween(CurrentReg, hp3, hp2) and
|
not RegModifiedBetween(p_SourceReg, hp3, hp2) and
|
||||||
DeepMOVOpt(taicpu(p), taicpu(hp2)) then
|
DeepMOVOpt(taicpu(p), taicpu(hp2)) then
|
||||||
begin
|
begin
|
||||||
Result := True;
|
Result := True;
|
||||||
@ -4120,7 +4124,7 @@ unit aoptx86;
|
|||||||
implicit register). Also, if it does read from this
|
implicit register). Also, if it does read from this
|
||||||
register, then there's no longer an advantage to
|
register, then there's no longer an advantage to
|
||||||
changing the register on subsequent instructions.}
|
changing the register on subsequent instructions.}
|
||||||
if not RegReadByInstruction(ActiveReg, hp2) then
|
if not RegReadByInstruction(p_TargetReg, hp2) then
|
||||||
begin
|
begin
|
||||||
{ If a conditional jump was crossed, do not delete
|
{ If a conditional jump was crossed, do not delete
|
||||||
the original MOV no matter what }
|
the original MOV no matter what }
|
||||||
@ -4128,7 +4132,7 @@ unit aoptx86;
|
|||||||
{ RegEndOfLife returns True if the register is
|
{ RegEndOfLife returns True if the register is
|
||||||
deallocated before the next instruction or has
|
deallocated before the next instruction or has
|
||||||
been loaded with a new value }
|
been loaded with a new value }
|
||||||
RegEndOfLife(ActiveReg, taicpu(hp2)) then
|
RegEndOfLife(p_TargetReg, taicpu(hp2)) then
|
||||||
begin
|
begin
|
||||||
{ We can remove the original MOV }
|
{ We can remove the original MOV }
|
||||||
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3b done',p);
|
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3b done',p);
|
||||||
@ -4138,7 +4142,7 @@ unit aoptx86;
|
|||||||
Exit;
|
Exit;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
if not RegModifiedByInstruction(ActiveReg, hp2) then
|
if not RegModifiedByInstruction(p_TargetReg, hp2) then
|
||||||
begin
|
begin
|
||||||
{ See if there's more we can optimise }
|
{ See if there's more we can optimise }
|
||||||
hp3 := hp2;
|
hp3 := hp2;
|
||||||
@ -4566,10 +4570,10 @@ unit aoptx86;
|
|||||||
}
|
}
|
||||||
if MatchOpType(taicpu(p), top_reg, top_reg) then
|
if MatchOpType(taicpu(p), top_reg, top_reg) then
|
||||||
begin
|
begin
|
||||||
CurrentReg := taicpu(p).oper[0]^.reg;
|
p_SourceReg := taicpu(p).oper[0]^.reg;
|
||||||
ActiveReg := taicpu(p).oper[1]^.reg;
|
{ Remember that p_TargetReg contains taicpu(p).oper[1]^.reg }
|
||||||
TransferUsedRegs(TmpUsedRegs);
|
TransferUsedRegs(TmpUsedRegs);
|
||||||
if not RegUsedAfterInstruction(CurrentReg, p, TmpUsedRegs) and
|
if not RegUsedAfterInstruction(p_SourceReg, p, TmpUsedRegs) and
|
||||||
GetLastInstruction(p, hp2) and
|
GetLastInstruction(p, hp2) and
|
||||||
(hp2.typ = ait_instruction) and
|
(hp2.typ = ait_instruction) and
|
||||||
{ Have to make sure it's an instruction that only reads from
|
{ Have to make sure it's an instruction that only reads from
|
||||||
@ -4578,25 +4582,21 @@ unit aoptx86;
|
|||||||
(taicpu(hp2).ops = 2) and
|
(taicpu(hp2).ops = 2) and
|
||||||
(insprop[taicpu(hp2).opcode].Ch * [Ch_Rop1, Ch_Wop2] = [Ch_Rop1, Ch_Wop2]) and
|
(insprop[taicpu(hp2).opcode].Ch * [Ch_Rop1, Ch_Wop2] = [Ch_Rop1, Ch_Wop2]) and
|
||||||
(taicpu(hp2).oper[1]^.typ = top_reg) and
|
(taicpu(hp2).oper[1]^.typ = top_reg) and
|
||||||
(taicpu(hp2).oper[1]^.reg = CurrentReg) then
|
(taicpu(hp2).oper[1]^.reg = p_SourceReg) then
|
||||||
begin
|
begin
|
||||||
case taicpu(hp2).opcode of
|
case taicpu(hp2).opcode of
|
||||||
A_FSTSW, A_FNSTSW,
|
A_FSTSW, A_FNSTSW,
|
||||||
A_IN, A_INS, A_OUT, A_OUTS,
|
A_IN, A_INS, A_OUT, A_OUTS,
|
||||||
A_CMPS, A_LODS, A_MOVS, A_SCAS, A_STOS,
|
A_CMPS, A_LODS, A_MOVS, A_SCAS, A_STOS:
|
||||||
{ These routines have explicit operands, but they are restricted in
|
{ These routines have explicit operands, but they are restricted in
|
||||||
what they can be (e.g. IN and OUT can only read from AL, AX or
|
what they can be (e.g. IN and OUT can only read from AL, AX or
|
||||||
EAX. }
|
EAX. }
|
||||||
A_CMOVcc:
|
|
||||||
{ CMOV is not valid either because then CurrentReg will depend
|
|
||||||
on an unknown value if the condition is False and hence is
|
|
||||||
not a pure write }
|
|
||||||
;
|
;
|
||||||
else
|
else
|
||||||
begin
|
begin
|
||||||
DebugMsg(SPeepholeOptimization + 'Removed MOV and changed destination on previous instruction to optimise register usage (FuncMov2Func)', p);
|
DebugMsg(SPeepholeOptimization + 'Removed MOV and changed destination on previous instruction to optimise register usage (FuncMov2Func)', p);
|
||||||
taicpu(hp2).oper[1]^.reg := ActiveReg;
|
taicpu(hp2).oper[1]^.reg := p_TargetReg;
|
||||||
AllocRegBetween(ActiveReg, hp2, p, TmpUsedRegs);
|
AllocRegBetween(p_TargetReg, hp2, p, TmpUsedRegs);
|
||||||
RemoveCurrentp(p, hp1);
|
RemoveCurrentp(p, hp1);
|
||||||
Result := True;
|
Result := True;
|
||||||
Exit;
|
Exit;
|
||||||
|
Loading…
Reference in New Issue
Block a user