* use source register as second register in VCVTSD2SS and VCVTSS2SD, this should break

dependency chains better and resolves partially #39360
This commit is contained in:
florian 2021-10-07 23:09:35 +02:00
parent ec40db3da7
commit 4752230c8f
2 changed files with 15 additions and 5 deletions

View File

@ -5874,14 +5874,24 @@ unit aoptx86;
MatchOpType(taicpu(hp1),top_reg,top_reg,top_reg) and
(getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
(getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
(getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[2]^.reg)) and
(getsupreg(taicpu(p).oper[2]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg))
)
) then
begin
DebugMsg(SPeepholeOptimization + '(V)Cvtss2CvtSd(V)Cvtsd2ss2Nop done',p);
RemoveCurrentP(p);
RemoveInstruction(hp1);
if getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[2]^.reg) then
begin
DebugMsg(SPeepholeOptimization + '(V)Cvtss2CvtSd(V)Cvtsd2ss2Nop done',p);
RemoveCurrentP(p);
RemoveInstruction(hp1);
end
else
begin
DebugMsg(SPeepholeOptimization + '(V)Cvtss2CvtSd(V)Cvtsd2ss2Vmovss done',p);
taicpu(p).loadreg(1,taicpu(hp1).oper[2]^.reg);
taicpu(p).ops:=2;
taicpu(p).opcode:=A_VMOVSS;
RemoveInstruction(hp1);
end;
Result:=true;
Exit;
end;

View File

@ -1501,7 +1501,7 @@ unit cgx86;
{ A_VCVTSD2SS and A_VCVTSS2SD require always three operands }
if (op=A_VCVTSD2SS) or (op=A_VCVTSS2SD) then
instr:=taicpu.op_reg_reg_reg(op,S_NO,reg1,reg2,reg2)
instr:=taicpu.op_reg_reg_reg(op,S_NO,reg1,reg1,reg2)
else
instr:=taicpu.op_reg_reg(op,S_NO,reg1,reg2);