* x86: Changed the names of temporary register

stores in OptPass1MOV for reasons of clarity,
    and other minor optimisations.
This commit is contained in:
J. Gareth "Curious Kit" Moreton 2022-04-19 14:20:45 +01:00 committed by FPK
parent 627fb9a25b
commit f62ffa74dc

View File

@ -2752,7 +2752,7 @@ unit aoptx86;
GetNextInstruction_p, TempRegUsed, CrossJump: Boolean; GetNextInstruction_p, TempRegUsed, CrossJump: Boolean;
PreMessage, RegName1, RegName2, InputVal, MaskNum: string; PreMessage, RegName1, RegName2, InputVal, MaskNum: string;
NewSize: topsize; NewSize: topsize;
CurrentReg, ActiveReg: TRegister; p_SourceReg, p_TargetReg, NewMMReg: TRegister;
SourceRef, TargetRef: TReference; SourceRef, TargetRef: TReference;
MovAligned, MovUnaligned: TAsmOp; MovAligned, MovUnaligned: TAsmOp;
ThisRef: TReference; ThisRef: TReference;
@ -2777,110 +2777,115 @@ unit aoptx86;
if not GetNextInstruction_p or (hp1.typ <> ait_instruction) then if not GetNextInstruction_p or (hp1.typ <> ait_instruction) then
Exit; Exit;
{ Look for: { Prevent compiler warnings }
mov %reg1,%reg2 p_TargetReg := NR_NO;
??? %reg2,r/m
Change to: if taicpu(p).oper[1]^.typ = top_reg then
mov %reg1,%reg2
??? %reg1,r/m
}
if MatchOpType(taicpu(p), top_reg, top_reg) then
begin begin
CurrentReg := taicpu(p).oper[1]^.reg; { Saves on a large number of dereferences }
p_TargetReg := taicpu(p).oper[1]^.reg;
if RegReadByInstruction(CurrentReg, hp1) and { Look for:
DeepMOVOpt(taicpu(p), taicpu(hp1)) then mov %reg1,%reg2
??? %reg2,r/m
Change to:
mov %reg1,%reg2
??? %reg1,r/m
}
if taicpu(p).oper[0]^.typ = top_reg then
begin begin
{ A change has occurred, just not in p } if RegReadByInstruction(p_TargetReg, hp1) and
Result := True; DeepMOVOpt(taicpu(p), taicpu(hp1)) then
TransferUsedRegs(TmpUsedRegs);
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
if not RegUsedAfterInstruction(CurrentReg, hp1, TmpUsedRegs) and
{ Just in case something didn't get modified (e.g. an
implicit register) }
not RegReadByInstruction(CurrentReg, hp1) then
begin begin
{ We can remove the original MOV } { A change has occurred, just not in p }
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3 done',p);
RemoveCurrentp(p, hp1);
{ UsedRegs got updated by RemoveCurrentp }
Result := True;
Exit;
end;
{ If we know a MOV instruction has become a null operation, we might as well
get rid of it now to save time. }
if (taicpu(hp1).opcode = A_MOV) and
(taicpu(hp1).oper[1]^.typ = top_reg) and
SuperRegistersEqual(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[0]^.reg) and
{ Just being a register is enough to confirm it's a null operation }
(taicpu(hp1).oper[0]^.typ = top_reg) then
begin
Result := True; Result := True;
{ Speed-up to reduce a pipeline stall... if we had something like... TransferUsedRegs(TmpUsedRegs);
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
movl %eax,%edx if not RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs) and
movw %dx,%ax { Just in case something didn't get modified (e.g. an
implicit register) }
... the second instruction would change to movw %ax,%ax, but not RegReadByInstruction(p_TargetReg, hp1) then
given that it is now %ax that's active rather than %eax,
penalties might occur due to a partial register write, so instead,
change it to a MOVZX instruction when optimising for speed.
}
if not (cs_opt_size in current_settings.optimizerswitches) and
IsMOVZXAcceptable and
(taicpu(hp1).opsize < taicpu(p).opsize)
{$ifdef x86_64}
{ operations already implicitly set the upper 64 bits to zero }
and not ((taicpu(hp1).opsize = S_L) and (taicpu(p).opsize = S_Q))
{$endif x86_64}
then
begin begin
CurrentReg := taicpu(hp1).oper[1]^.reg; { We can remove the original MOV }
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3 done',p);
RemoveCurrentp(p, hp1);
DebugMsg(SPeepholeOptimization + 'Zero-extension to minimise pipeline stall (Mov2Movz)',hp1); { UsedRegs got updated by RemoveCurrentp }
case taicpu(p).opsize of Result := True;
S_W: Exit;
if taicpu(hp1).opsize = S_B then
taicpu(hp1).opsize := S_BL
else
InternalError(2020012911);
S_L{$ifdef x86_64}, S_Q{$endif x86_64}:
case taicpu(hp1).opsize of
S_B:
taicpu(hp1).opsize := S_BL;
S_W:
taicpu(hp1).opsize := S_WL;
else
InternalError(2020012912);
end;
else
InternalError(2020012910);
end;
taicpu(hp1).opcode := A_MOVZX;
taicpu(hp1).oper[1]^.reg := newreg(getregtype(CurrentReg), getsupreg(CurrentReg), R_SUBD)
end
else
begin
GetNextInstruction_p := GetNextInstruction(hp1, hp2);
DebugMsg(SPeepholeOptimization + 'Mov2Nop 4 done',hp1);
RemoveInstruction(hp1);
{ The instruction after what was hp1 is now the immediate next instruction,
so we can continue to make optimisations if it's present }
if not GetNextInstruction_p or (hp2.typ <> ait_instruction) then
Exit;
hp1 := hp2;
end; end;
end;
{ If we know a MOV instruction has become a null operation, we might as well
get rid of it now to save time. }
if (taicpu(hp1).opcode = A_MOV) and
(taicpu(hp1).oper[1]^.typ = top_reg) and
SuperRegistersEqual(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[0]^.reg) and
{ Just being a register is enough to confirm it's a null operation }
(taicpu(hp1).oper[0]^.typ = top_reg) then
begin
Result := True;
{ Speed-up to reduce a pipeline stall... if we had something like...
movl %eax,%edx
movw %dx,%ax
... the second instruction would change to movw %ax,%ax, but
given that it is now %ax that's active rather than %eax,
penalties might occur due to a partial register write, so instead,
change it to a MOVZX instruction when optimising for speed.
}
if not (cs_opt_size in current_settings.optimizerswitches) and
IsMOVZXAcceptable and
(taicpu(hp1).opsize < taicpu(p).opsize)
{$ifdef x86_64}
{ operations already implicitly set the upper 64 bits to zero }
and not ((taicpu(hp1).opsize = S_L) and (taicpu(p).opsize = S_Q))
{$endif x86_64}
then
begin
DebugMsg(SPeepholeOptimization + 'Zero-extension to minimise pipeline stall (Mov2Movz)',hp1);
case taicpu(p).opsize of
S_W:
if taicpu(hp1).opsize = S_B then
taicpu(hp1).opsize := S_BL
else
InternalError(2020012911);
S_L{$ifdef x86_64}, S_Q{$endif x86_64}:
case taicpu(hp1).opsize of
S_B:
taicpu(hp1).opsize := S_BL;
S_W:
taicpu(hp1).opsize := S_WL;
else
InternalError(2020012912);
end;
else
InternalError(2020012910);
end;
taicpu(hp1).opcode := A_MOVZX;
setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBD);
end
else
begin
GetNextInstruction_p := GetNextInstruction(hp1, hp2);
DebugMsg(SPeepholeOptimization + 'Mov2Nop 4 done',hp1);
RemoveInstruction(hp1);
{ The instruction after what was hp1 is now the immediate next instruction,
so we can continue to make optimisations if it's present }
if not GetNextInstruction_p or (hp2.typ <> ait_instruction) then
Exit;
hp1 := hp2;
end;
end;
end;
end; end;
end; end;
@ -3375,15 +3380,15 @@ unit aoptx86;
if (taicpu(p).oper[1]^.typ = top_reg) and if (taicpu(p).oper[1]^.typ = top_reg) and
MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
begin begin
CurrentReg := taicpu(p).oper[1]^.reg; { Remember that p_TargetReg contains taicpu(p).oper[1]^.reg }
TransferUsedRegs(TmpUsedRegs); TransferUsedRegs(TmpUsedRegs);
UpdateUsedRegs(TmpUsedRegs, tai(p.Next)); UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
{ we have { we have
mov x, %treg mov x, %treg
mov %treg, y mov %treg, y
} }
if not(RegInOp(CurrentReg, taicpu(hp1).oper[1]^)) then if not(RegInOp(p_TargetReg, taicpu(hp1).oper[1]^)) then
if not(RegUsedAfterInstruction(CurrentReg, hp1, TmpUsedRegs)) then if not(RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs)) then
{ we've got { we've got
mov x, %treg mov x, %treg
@ -3485,8 +3490,8 @@ unit aoptx86;
RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and
not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then
begin begin
CurrentReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3); NewMMReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3);
if CurrentReg <> NR_NO then if NewMMReg <> NR_NO then
begin begin
{ Remember that the offsets are 8 ahead } { Remember that the offsets are 8 ahead }
if ((SourceRef.offset mod 16) = 8) and if ((SourceRef.offset mod 16) = 8) and
@ -3500,7 +3505,7 @@ unit aoptx86;
taicpu(p).opcode := MovUnaligned; taicpu(p).opcode := MovUnaligned;
taicpu(p).opsize := S_XMM; taicpu(p).opsize := S_XMM;
taicpu(p).oper[1]^.reg := CurrentReg; taicpu(p).oper[1]^.reg := NewMMReg;
if ((TargetRef.offset mod 16) = 8) and if ((TargetRef.offset mod 16) = 8) and
( (
@ -3513,9 +3518,9 @@ unit aoptx86;
taicpu(hp1).opcode := MovUnaligned; taicpu(hp1).opcode := MovUnaligned;
taicpu(hp1).opsize := S_XMM; taicpu(hp1).opsize := S_XMM;
taicpu(hp1).oper[0]^.reg := CurrentReg; taicpu(hp1).oper[0]^.reg := NewMMReg;
DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(CurrentReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 1)', p); DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(NewMMReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 1)', p);
RemoveInstruction(hp2); RemoveInstruction(hp2);
RemoveInstruction(hp3); RemoveInstruction(hp3);
@ -3541,8 +3546,8 @@ unit aoptx86;
RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and
not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then
begin begin
CurrentReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3); NewMMReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3);
if CurrentReg <> NR_NO then if NewMMReg <> NR_NO then
begin begin
{ hp2 and hp3 are the starting offsets, so mod = 0 this time } { hp2 and hp3 are the starting offsets, so mod = 0 this time }
if ((SourceRef.offset mod 16) = 0) and if ((SourceRef.offset mod 16) = 0) and
@ -3556,7 +3561,7 @@ unit aoptx86;
taicpu(hp2).opcode := MovUnaligned; taicpu(hp2).opcode := MovUnaligned;
taicpu(hp2).opsize := S_XMM; taicpu(hp2).opsize := S_XMM;
taicpu(hp2).oper[1]^.reg := CurrentReg; taicpu(hp2).oper[1]^.reg := NewMMReg;
if ((TargetRef.offset mod 16) = 0) and if ((TargetRef.offset mod 16) = 0) and
( (
@ -3569,9 +3574,9 @@ unit aoptx86;
taicpu(hp3).opcode := MovUnaligned; taicpu(hp3).opcode := MovUnaligned;
taicpu(hp3).opsize := S_XMM; taicpu(hp3).opsize := S_XMM;
taicpu(hp3).oper[0]^.reg := CurrentReg; taicpu(hp3).oper[0]^.reg := NewMMReg;
DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(CurrentReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 2)', p); DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(NewMMReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 2)', p);
RemoveInstruction(hp1); RemoveInstruction(hp1);
RemoveCurrentP(p, hp2); RemoveCurrentP(p, hp2);
@ -3799,37 +3804,37 @@ unit aoptx86;
} }
if MatchOpType(taicpu(p), top_reg, top_reg) then if MatchOpType(taicpu(p), top_reg, top_reg) then
begin begin
CurrentReg := taicpu(p).oper[0]^.reg; p_SourceReg := taicpu(p).oper[0]^.reg;
ActiveReg := taicpu(p).oper[1]^.reg; { Remember that p_TargetReg contains taicpu(p).oper[1]^.reg }
if (taicpu(hp1).oper[0]^.typ = top_ref) { The other operand will be a register } and if (taicpu(hp1).oper[0]^.typ = top_ref) { The other operand will be a register } and
(taicpu(hp1).oper[1]^.reg = CurrentReg) and (taicpu(hp1).oper[1]^.reg = p_SourceReg) and
RegInRef(CurrentReg, taicpu(hp1).oper[0]^.ref^) and RegInRef(p_SourceReg, taicpu(hp1).oper[0]^.ref^) and
GetNextInstruction(hp1, hp2) and GetNextInstruction(hp1, hp2) and
MatchInstruction(hp2, A_MOV, [taicpu(p).opsize]) and MatchInstruction(hp2, A_MOV, [taicpu(p).opsize]) and
(taicpu(hp2).oper[0]^.typ = top_ref) { The other operand will be a register } then (taicpu(hp2).oper[0]^.typ = top_ref) { The other operand will be a register } then
begin begin
SourceRef := taicpu(hp2).oper[0]^.ref^; SourceRef := taicpu(hp2).oper[0]^.ref^;
if RegInRef(ActiveReg, SourceRef) and if RegInRef(p_TargetReg, SourceRef) and
{ If %reg1 also appears in the second reference, then it will { If %reg1 also appears in the second reference, then it will
not refer to the same memory block as the first reference } not refer to the same memory block as the first reference }
not RegInRef(CurrentReg, SourceRef) then not RegInRef(p_SourceReg, SourceRef) then
begin begin
{ Check to see if the references match if %reg2 is changed to %reg1 } { Check to see if the references match if %reg2 is changed to %reg1 }
if SourceRef.base = ActiveReg then if SourceRef.base = p_TargetReg then
SourceRef.base := CurrentReg; SourceRef.base := p_SourceReg;
if SourceRef.index = ActiveReg then if SourceRef.index = p_TargetReg then
SourceRef.index := CurrentReg; SourceRef.index := p_SourceReg;
{ RefsEqual also checks to ensure both references are non-volatile } { RefsEqual also checks to ensure both references are non-volatile }
if RefsEqual(taicpu(hp1).oper[0]^.ref^, SourceRef) then if RefsEqual(taicpu(hp1).oper[0]^.ref^, SourceRef) then
begin begin
taicpu(hp2).loadreg(0, CurrentReg); taicpu(hp2).loadreg(0, p_SourceReg);
DebugMsg(SPeepholeOptimization + 'Optimised register duplication and memory read (MovMovMov2MovMovMov)', p); DebugMsg(SPeepholeOptimization + 'Optimised register duplication and memory read (MovMovMov2MovMovMov)', p);
Result := True; Result := True;
if taicpu(hp2).oper[1]^.reg = ActiveReg then if taicpu(hp2).oper[1]^.reg = p_TargetReg then
begin begin
DebugMsg(SPeepholeOptimization + 'Mov2Nop 5a done', p); DebugMsg(SPeepholeOptimization + 'Mov2Nop 5a done', p);
RemoveCurrentP(p, hp1); RemoveCurrentP(p, hp1);
@ -3842,7 +3847,7 @@ unit aoptx86;
UpdateUsedRegs(TmpUsedRegs, tai(p.Next)); UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next)); UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
if not RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs) then if not RegUsedAfterInstruction(p_TargetReg, hp2, TmpUsedRegs) then
begin begin
DebugMsg(SPeepholeOptimization + 'Mov2Nop 5b done', p); DebugMsg(SPeepholeOptimization + 'Mov2Nop 5b done', p);
RemoveCurrentP(p, hp1); RemoveCurrentP(p, hp1);
@ -3871,8 +3876,7 @@ unit aoptx86;
{ Initialise CrossJump (if it becomes True at any point, it will remain True) } { Initialise CrossJump (if it becomes True at any point, it will remain True) }
CrossJump := (taicpu(hp1).opcode = A_Jcc); CrossJump := (taicpu(hp1).opcode = A_Jcc);
{ Saves on a large number of dereferences } { Remember that p_TargetReg contains taicpu(p).oper[1]^.reg }
ActiveReg := taicpu(p).oper[1]^.reg;
TransferUsedRegs(TmpUsedRegs); TransferUsedRegs(TmpUsedRegs);
UpdateUsedRegs(TmpUsedRegs, tai(p.Next)); UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
@ -3882,16 +3886,16 @@ unit aoptx86;
else else
JumpTracking := nil; JumpTracking := nil;
while GetNextInstructionUsingRegCond(hp3,hp2,ActiveReg,JumpTracking,CrossJump) and while GetNextInstructionUsingRegCond(hp3,hp2,p_TargetReg,JumpTracking,CrossJump) and
{ GetNextInstructionUsingRegCond only searches one instruction ahead unless -O3 is specified } { GetNextInstructionUsingRegCond only searches one instruction ahead unless -O3 is specified }
(hp2.typ=ait_instruction) do (hp2.typ=ait_instruction) do
begin begin
case taicpu(hp2).opcode of case taicpu(hp2).opcode of
A_POP: A_POP:
if MatchOperand(taicpu(hp2).oper[0]^,ActiveReg) then if MatchOperand(taicpu(hp2).oper[0]^,p_TargetReg) then
begin begin
if not CrossJump and if not CrossJump and
not RegUsedBetween(ActiveReg, p, hp2) then not RegUsedBetween(p_TargetReg, p, hp2) then
begin begin
{ We can remove the original MOV since the register { We can remove the original MOV since the register
wasn't used between it and its popping from the stack } wasn't used between it and its popping from the stack }
@ -3905,7 +3909,7 @@ unit aoptx86;
Break; Break;
end; end;
A_MOV: A_MOV:
if MatchOperand(taicpu(hp2).oper[0]^,ActiveReg) and if MatchOperand(taicpu(hp2).oper[0]^,p_TargetReg) and
((taicpu(p).oper[0]^.typ=top_const) or ((taicpu(p).oper[0]^.typ=top_const) or
((taicpu(p).oper[0]^.typ=top_reg) and ((taicpu(p).oper[0]^.typ=top_reg) and
not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp2)) not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp2))
@ -3924,8 +3928,8 @@ unit aoptx86;
TempRegUsed := TempRegUsed :=
CrossJump { Assume the register is in use if it crossed a conditional jump } or CrossJump { Assume the register is in use if it crossed a conditional jump } or
RegReadByInstruction(ActiveReg, hp3) or RegReadByInstruction(p_TargetReg, hp3) or
RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs); RegUsedAfterInstruction(p_TargetReg, hp2, TmpUsedRegs);
case taicpu(p).oper[0]^.typ Of case taicpu(p).oper[0]^.typ Of
top_reg: top_reg:
@ -3938,17 +3942,17 @@ unit aoptx86;
mov %reg, y mov %reg, y
} }
CurrentReg := taicpu(p).oper[0]^.reg; { Saves on a handful of pointer dereferences } p_SourceReg := taicpu(p).oper[0]^.reg; { Saves on a handful of pointer dereferences }
RegName1 := debug_regname(taicpu(hp2).oper[0]^.reg); RegName1 := debug_regname(taicpu(hp2).oper[0]^.reg);
if MatchOperand(taicpu(hp2).oper[1]^, CurrentReg) then if MatchOperand(taicpu(hp2).oper[1]^, p_SourceReg) then
begin begin
{ %reg = y - remove hp2 completely (doing it here instead of relying on { %reg = y - remove hp2 completely (doing it here instead of relying on
the "mov %reg,%reg" optimisation might cut down on a pass iteration) } the "mov %reg,%reg" optimisation might cut down on a pass iteration) }
if TempRegUsed then if TempRegUsed then
begin begin
DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + RegName1 + '; removed unnecessary instruction (MovMov2MovNop 6b}',hp2); DebugMsg(SPeepholeOptimization + debug_regname(p_SourceReg) + ' = ' + RegName1 + '; removed unnecessary instruction (MovMov2MovNop 6b}',hp2);
AllocRegBetween(CurrentReg, p, hp2, UsedRegs); AllocRegBetween(p_SourceReg, p, hp2, UsedRegs);
{ Set the start of the next GetNextInstructionUsingRegCond search { Set the start of the next GetNextInstructionUsingRegCond search
to start at the entry right before hp2 (which is about to be removed) } to start at the entry right before hp2 (which is about to be removed) }
hp3 := tai(hp2.Previous); hp3 := tai(hp2.Previous);
@ -3971,19 +3975,19 @@ unit aoptx86;
end end
else else
begin begin
AllocRegBetween(CurrentReg, p, hp2, UsedRegs); AllocRegBetween(p_SourceReg, p, hp2, UsedRegs);
taicpu(hp2).loadReg(0, CurrentReg); taicpu(hp2).loadReg(0, p_SourceReg);
DebugMsg(SPeepholeOptimization + RegName1 + ' = ' + debug_regname(CurrentReg) + '; changed to minimise pipeline stall (MovMov2Mov 6a}',hp2); DebugMsg(SPeepholeOptimization + RegName1 + ' = ' + debug_regname(p_SourceReg) + '; changed to minimise pipeline stall (MovMov2Mov 6a}',hp2);
{ Check to see if the register also appears in the reference } { Check to see if the register also appears in the reference }
if (taicpu(hp2).oper[1]^.typ = top_ref) then if (taicpu(hp2).oper[1]^.typ = top_ref) then
ReplaceRegisterInRef(taicpu(hp2).oper[1]^.ref^, ActiveReg, CurrentReg); ReplaceRegisterInRef(taicpu(hp2).oper[1]^.ref^, p_TargetReg, p_SourceReg);
{ Don't remove the first instruction if the temporary register is in use } { Don't remove the first instruction if the temporary register is in use }
if not TempRegUsed and if not TempRegUsed and
{ ReplaceRegisterInRef won't actually replace the register if it's a different size } { ReplaceRegisterInRef won't actually replace the register if it's a different size }
not RegInOp(ActiveReg, taicpu(hp2).oper[1]^) then not RegInOp(p_TargetReg, taicpu(hp2).oper[1]^) then
begin begin
DebugMsg(SPeepholeOptimization + 'MovMov2Mov 6 done',p); DebugMsg(SPeepholeOptimization + 'MovMov2Mov 6 done',p);
RemoveCurrentP(p, hp1); RemoveCurrentP(p, hp1);
@ -4037,11 +4041,11 @@ unit aoptx86;
end; end;
end end
else else
if MatchOperand(taicpu(hp2).oper[1]^, ActiveReg) then if MatchOperand(taicpu(hp2).oper[1]^, p_TargetReg) then
begin begin
if not CrossJump and if not CrossJump and
not RegUsedBetween(ActiveReg, p, hp2) and not RegUsedBetween(p_TargetReg, p, hp2) and
not RegReadByInstruction(ActiveReg, hp2) then not RegReadByInstruction(p_TargetReg, hp2) then
begin begin
{ Register is not used before it is overwritten } { Register is not used before it is overwritten }
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3a done',p); DebugMsg(SPeepholeOptimization + 'Mov2Nop 3a done',p);
@ -4067,8 +4071,8 @@ unit aoptx86;
end; end;
A_MOVZX, A_MOVSX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}: A_MOVZX, A_MOVSX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}:
if MatchOpType(taicpu(hp2), top_reg, top_reg) and if MatchOpType(taicpu(hp2), top_reg, top_reg) and
MatchOperand(taicpu(hp2).oper[0]^, ActiveReg) and MatchOperand(taicpu(hp2).oper[0]^, p_TargetReg) and
SuperRegistersEqual(taicpu(hp2).oper[1]^.reg, ActiveReg) then SuperRegistersEqual(taicpu(hp2).oper[1]^.reg, p_TargetReg) then
begin begin
{ {
Change from: Change from:
@ -4108,10 +4112,10 @@ unit aoptx86;
{ Also catches MOV/S/Z instructions that aren't modified } { Also catches MOV/S/Z instructions that aren't modified }
if taicpu(p).oper[0]^.typ = top_reg then if taicpu(p).oper[0]^.typ = top_reg then
begin begin
CurrentReg := taicpu(p).oper[0]^.reg; p_SourceReg := taicpu(p).oper[0]^.reg;
if if
not RegModifiedByInstruction(CurrentReg, hp3) and not RegModifiedByInstruction(p_SourceReg, hp3) and
not RegModifiedBetween(CurrentReg, hp3, hp2) and not RegModifiedBetween(p_SourceReg, hp3, hp2) and
DeepMOVOpt(taicpu(p), taicpu(hp2)) then DeepMOVOpt(taicpu(p), taicpu(hp2)) then
begin begin
Result := True; Result := True;
@ -4120,7 +4124,7 @@ unit aoptx86;
implicit register). Also, if it does read from this implicit register). Also, if it does read from this
register, then there's no longer an advantage to register, then there's no longer an advantage to
changing the register on subsequent instructions.} changing the register on subsequent instructions.}
if not RegReadByInstruction(ActiveReg, hp2) then if not RegReadByInstruction(p_TargetReg, hp2) then
begin begin
{ If a conditional jump was crossed, do not delete { If a conditional jump was crossed, do not delete
the original MOV no matter what } the original MOV no matter what }
@ -4128,7 +4132,7 @@ unit aoptx86;
{ RegEndOfLife returns True if the register is { RegEndOfLife returns True if the register is
deallocated before the next instruction or has deallocated before the next instruction or has
been loaded with a new value } been loaded with a new value }
RegEndOfLife(ActiveReg, taicpu(hp2)) then RegEndOfLife(p_TargetReg, taicpu(hp2)) then
begin begin
{ We can remove the original MOV } { We can remove the original MOV }
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3b done',p); DebugMsg(SPeepholeOptimization + 'Mov2Nop 3b done',p);
@ -4138,7 +4142,7 @@ unit aoptx86;
Exit; Exit;
end; end;
if not RegModifiedByInstruction(ActiveReg, hp2) then if not RegModifiedByInstruction(p_TargetReg, hp2) then
begin begin
{ See if there's more we can optimise } { See if there's more we can optimise }
hp3 := hp2; hp3 := hp2;
@ -4566,10 +4570,10 @@ unit aoptx86;
} }
if MatchOpType(taicpu(p), top_reg, top_reg) then if MatchOpType(taicpu(p), top_reg, top_reg) then
begin begin
CurrentReg := taicpu(p).oper[0]^.reg; p_SourceReg := taicpu(p).oper[0]^.reg;
ActiveReg := taicpu(p).oper[1]^.reg; { Remember that p_TargetReg contains taicpu(p).oper[1]^.reg }
TransferUsedRegs(TmpUsedRegs); TransferUsedRegs(TmpUsedRegs);
if not RegUsedAfterInstruction(CurrentReg, p, TmpUsedRegs) and if not RegUsedAfterInstruction(p_SourceReg, p, TmpUsedRegs) and
GetLastInstruction(p, hp2) and GetLastInstruction(p, hp2) and
(hp2.typ = ait_instruction) and (hp2.typ = ait_instruction) and
{ Have to make sure it's an instruction that only reads from { Have to make sure it's an instruction that only reads from
@ -4578,25 +4582,21 @@ unit aoptx86;
(taicpu(hp2).ops = 2) and (taicpu(hp2).ops = 2) and
(insprop[taicpu(hp2).opcode].Ch * [Ch_Rop1, Ch_Wop2] = [Ch_Rop1, Ch_Wop2]) and (insprop[taicpu(hp2).opcode].Ch * [Ch_Rop1, Ch_Wop2] = [Ch_Rop1, Ch_Wop2]) and
(taicpu(hp2).oper[1]^.typ = top_reg) and (taicpu(hp2).oper[1]^.typ = top_reg) and
(taicpu(hp2).oper[1]^.reg = CurrentReg) then (taicpu(hp2).oper[1]^.reg = p_SourceReg) then
begin begin
case taicpu(hp2).opcode of case taicpu(hp2).opcode of
A_FSTSW, A_FNSTSW, A_FSTSW, A_FNSTSW,
A_IN, A_INS, A_OUT, A_OUTS, A_IN, A_INS, A_OUT, A_OUTS,
A_CMPS, A_LODS, A_MOVS, A_SCAS, A_STOS, A_CMPS, A_LODS, A_MOVS, A_SCAS, A_STOS:
{ These routines have explicit operands, but they are restricted in { These routines have explicit operands, but they are restricted in
what they can be (e.g. IN and OUT can only read from AL, AX or what they can be (e.g. IN and OUT can only read from AL, AX or
EAX. } EAX. }
A_CMOVcc:
{ CMOV is not valid either because then CurrentReg will depend
on an unknown value if the condition is False and hence is
not a pure write }
; ;
else else
begin begin
DebugMsg(SPeepholeOptimization + 'Removed MOV and changed destination on previous instruction to optimise register usage (FuncMov2Func)', p); DebugMsg(SPeepholeOptimization + 'Removed MOV and changed destination on previous instruction to optimise register usage (FuncMov2Func)', p);
taicpu(hp2).oper[1]^.reg := ActiveReg; taicpu(hp2).oper[1]^.reg := p_TargetReg;
AllocRegBetween(ActiveReg, hp2, p, TmpUsedRegs); AllocRegBetween(p_TargetReg, hp2, p, TmpUsedRegs);
RemoveCurrentp(p, hp1); RemoveCurrentp(p, hp1);
Result := True; Result := True;
Exit; Exit;