mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-09 10:48:30 +02:00
+ factored out TX86AsmOptimizer.PrePeepholeOptIMUL, used now by x86-64 and i386
* generalized and simplified the code git-svn-id: trunk@40162 -
This commit is contained in:
parent
47c9bca7f2
commit
59d5d6ec95
@ -31,6 +31,7 @@ unit aoptutils;
|
||||
|
||||
function MatchOpType(const p : taicpu;type0: toptype) : Boolean;
|
||||
function MatchOpType(const p : taicpu;type0,type1 : toptype) : Boolean;
|
||||
function MatchOpType(const p : taicpu; type0,type1,type2 : toptype) : Boolean;
|
||||
|
||||
{ skips all labels and returns the next "real" instruction }
|
||||
function SkipLabels(hp: tai; var hp2: tai): boolean;
|
||||
@ -49,6 +50,12 @@ unit aoptutils;
|
||||
end;
|
||||
|
||||
|
||||
function MatchOpType(const p : taicpu; type0,type1,type2 : toptype) : Boolean;
|
||||
begin
|
||||
Result:=(p.ops=3) and (p.oper[0]^.typ=type0) and (p.oper[1]^.typ=type1) and (p.oper[2]^.typ=type1);
|
||||
end;
|
||||
|
||||
|
||||
{ skips all labels and returns the next "real" instruction }
|
||||
function SkipLabels(hp: tai; var hp2: tai): boolean;
|
||||
begin
|
||||
|
@ -169,205 +169,8 @@ begin
|
||||
end;
|
||||
case taicpu(p).opcode Of
|
||||
A_IMUL:
|
||||
{changes certain "imul const, %reg"'s to lea sequences}
|
||||
begin
|
||||
if (taicpu(p).oper[0]^.typ = Top_Const) and
|
||||
(taicpu(p).oper[1]^.typ = Top_Reg) and
|
||||
(taicpu(p).opsize = S_L) then
|
||||
if (taicpu(p).oper[0]^.val = 1) then
|
||||
if (taicpu(p).ops = 2) then
|
||||
{remove "imul $1, reg"}
|
||||
begin
|
||||
hp1 := tai(p.Next);
|
||||
asml.remove(p);
|
||||
p.free;
|
||||
p := hp1;
|
||||
continue;
|
||||
end
|
||||
else
|
||||
{change "imul $1, reg1, reg2" to "mov reg1, reg2"}
|
||||
begin
|
||||
hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
|
||||
InsertLLItem(p.previous, p.next, hp1);
|
||||
p.free;
|
||||
p := hp1;
|
||||
end
|
||||
else if
|
||||
((taicpu(p).ops <= 2) or
|
||||
(taicpu(p).oper[2]^.typ = Top_Reg)) and
|
||||
(taicpu(p).oper[0]^.val <= 12) and
|
||||
not(cs_opt_size in current_settings.optimizerswitches) and
|
||||
(not(GetNextInstruction(p, hp1)) or
|
||||
{GetNextInstruction(p, hp1) and}
|
||||
not((tai(hp1).typ = ait_instruction) and
|
||||
((taicpu(hp1).opcode=A_Jcc) and
|
||||
(taicpu(hp1).condition in [C_O,C_NO])))) then
|
||||
begin
|
||||
reference_reset(tmpref,1,[]);
|
||||
case taicpu(p).oper[0]^.val Of
|
||||
3: begin
|
||||
{imul 3, reg1, reg2 to
|
||||
lea (reg1,reg1,2), reg2
|
||||
imul 3, reg1 to
|
||||
lea (reg1,reg1,2), reg1}
|
||||
TmpRef.base := taicpu(p).oper[1]^.reg;
|
||||
TmpRef.index := taicpu(p).oper[1]^.reg;
|
||||
TmpRef.ScaleFactor := 2;
|
||||
if (taicpu(p).ops = 2) then
|
||||
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
|
||||
else
|
||||
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
|
||||
InsertLLItem(p.previous, p.next, hp1);
|
||||
p.free;
|
||||
p := hp1;
|
||||
end;
|
||||
5: begin
|
||||
{imul 5, reg1, reg2 to
|
||||
lea (reg1,reg1,4), reg2
|
||||
imul 5, reg1 to
|
||||
lea (reg1,reg1,4), reg1}
|
||||
TmpRef.base := taicpu(p).oper[1]^.reg;
|
||||
TmpRef.index := taicpu(p).oper[1]^.reg;
|
||||
TmpRef.ScaleFactor := 4;
|
||||
if (taicpu(p).ops = 2) then
|
||||
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
|
||||
else
|
||||
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
|
||||
InsertLLItem(p.previous, p.next, hp1);
|
||||
p.free;
|
||||
p := hp1;
|
||||
end;
|
||||
6: begin
|
||||
{imul 6, reg1, reg2 to
|
||||
lea (,reg1,2), reg2
|
||||
lea (reg2,reg1,4), reg2
|
||||
imul 6, reg1 to
|
||||
lea (reg1,reg1,2), reg1
|
||||
add reg1, reg1}
|
||||
if (current_settings.optimizecputype <= cpu_386) then
|
||||
begin
|
||||
TmpRef.index := taicpu(p).oper[1]^.reg;
|
||||
if (taicpu(p).ops = 3) then
|
||||
begin
|
||||
TmpRef.base := taicpu(p).oper[2]^.reg;
|
||||
TmpRef.ScaleFactor := 4;
|
||||
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
|
||||
end
|
||||
else
|
||||
begin
|
||||
hp1 := taicpu.op_reg_reg(A_ADD, S_L,
|
||||
taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
|
||||
end;
|
||||
InsertLLItem(p, p.next, hp1);
|
||||
reference_reset(tmpref,2,[]);
|
||||
TmpRef.index := taicpu(p).oper[1]^.reg;
|
||||
TmpRef.ScaleFactor := 2;
|
||||
if (taicpu(p).ops = 3) then
|
||||
begin
|
||||
TmpRef.base := NR_NO;
|
||||
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
|
||||
taicpu(p).oper[2]^.reg);
|
||||
end
|
||||
else
|
||||
begin
|
||||
TmpRef.base := taicpu(p).oper[1]^.reg;
|
||||
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
|
||||
end;
|
||||
InsertLLItem(p.previous, p.next, hp1);
|
||||
p.free;
|
||||
p := tai(hp1.next);
|
||||
end
|
||||
end;
|
||||
9: begin
|
||||
{imul 9, reg1, reg2 to
|
||||
lea (reg1,reg1,8), reg2
|
||||
imul 9, reg1 to
|
||||
lea (reg1,reg1,8), reg1}
|
||||
TmpRef.base := taicpu(p).oper[1]^.reg;
|
||||
TmpRef.index := taicpu(p).oper[1]^.reg;
|
||||
TmpRef.ScaleFactor := 8;
|
||||
if (taicpu(p).ops = 2) then
|
||||
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
|
||||
else
|
||||
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
|
||||
InsertLLItem(p.previous, p.next, hp1);
|
||||
p.free;
|
||||
p := hp1;
|
||||
end;
|
||||
10: begin
|
||||
{imul 10, reg1, reg2 to
|
||||
lea (reg1,reg1,4), reg2
|
||||
add reg2, reg2
|
||||
imul 10, reg1 to
|
||||
lea (reg1,reg1,4), reg1
|
||||
add reg1, reg1}
|
||||
if (current_settings.optimizecputype <= cpu_386) then
|
||||
begin
|
||||
if (taicpu(p).ops = 3) then
|
||||
hp1 := taicpu.op_reg_reg(A_ADD, S_L,
|
||||
taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
|
||||
else
|
||||
hp1 := taicpu.op_reg_reg(A_ADD, S_L,
|
||||
taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
|
||||
InsertLLItem(p, p.next, hp1);
|
||||
TmpRef.base := taicpu(p).oper[1]^.reg;
|
||||
TmpRef.index := taicpu(p).oper[1]^.reg;
|
||||
TmpRef.ScaleFactor := 4;
|
||||
if (taicpu(p).ops = 3) then
|
||||
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
|
||||
else
|
||||
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
|
||||
InsertLLItem(p.previous, p.next, hp1);
|
||||
p.free;
|
||||
p := tai(hp1.next);
|
||||
end
|
||||
end;
|
||||
12: begin
|
||||
{imul 12, reg1, reg2 to
|
||||
lea (,reg1,4), reg2
|
||||
lea (reg2,reg1,8), reg2
|
||||
imul 12, reg1 to
|
||||
lea (reg1,reg1,2), reg1
|
||||
lea (,reg1,4), reg1}
|
||||
if (current_settings.optimizecputype <= cpu_386)
|
||||
then
|
||||
begin
|
||||
TmpRef.index := taicpu(p).oper[1]^.reg;
|
||||
if (taicpu(p).ops = 3) then
|
||||
begin
|
||||
TmpRef.base := taicpu(p).oper[2]^.reg;
|
||||
TmpRef.ScaleFactor := 8;
|
||||
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
|
||||
end
|
||||
else
|
||||
begin
|
||||
TmpRef.base := NR_NO;
|
||||
TmpRef.ScaleFactor := 4;
|
||||
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
|
||||
end;
|
||||
InsertLLItem(p, p.next, hp1);
|
||||
reference_reset(tmpref,2,[]);
|
||||
TmpRef.index := taicpu(p).oper[1]^.reg;
|
||||
if (taicpu(p).ops = 3) then
|
||||
begin
|
||||
TmpRef.base := NR_NO;
|
||||
TmpRef.ScaleFactor := 4;
|
||||
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
|
||||
end
|
||||
else
|
||||
begin
|
||||
TmpRef.base := taicpu(p).oper[1]^.reg;
|
||||
TmpRef.ScaleFactor := 2;
|
||||
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
|
||||
end;
|
||||
InsertLLItem(p.previous, p.next, hp1);
|
||||
p.free;
|
||||
p := tai(hp1.next);
|
||||
end
|
||||
end
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
if PrePeepholeOptIMUL(p) then
|
||||
Continue;
|
||||
A_SAR,A_SHR:
|
||||
if PrePeepholeOptSxx(p) then
|
||||
continue;
|
||||
|
@ -57,6 +57,7 @@ unit aoptx86;
|
||||
function DoSubAddOpt(var p : tai) : Boolean;
|
||||
|
||||
function PrePeepholeOptSxx(var p : tai) : boolean;
|
||||
function PrePeepholeOptIMUL(var p : tai) : boolean;
|
||||
|
||||
function OptPass1AND(var p : tai) : boolean;
|
||||
function OptPass1VMOVAP(var p : tai) : boolean;
|
||||
@ -718,6 +719,91 @@ unit aoptx86;
|
||||
end;
|
||||
|
||||
|
||||
function TX86AsmOptimizer.PrePeepholeOptIMUL(var p : tai) : boolean;
|
||||
var
|
||||
opsize : topsize;
|
||||
hp1 : tai;
|
||||
tmpref : treference;
|
||||
hp2 : taicpu;
|
||||
ShiftValue : Cardinal;
|
||||
BaseValue : TCGInt;
|
||||
begin
|
||||
result:=false;
|
||||
opsize:=taicpu(p).opsize;
|
||||
{ changes certain "imul const, %reg"'s to lea sequences }
|
||||
if (MatchOpType(taicpu(p),top_const,top_reg) or
|
||||
MatchOpType(taicpu(p),top_const,top_reg,top_reg)) and
|
||||
(opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) then
|
||||
if (taicpu(p).oper[0]^.val = 1) then
|
||||
if (taicpu(p).ops = 2) then
|
||||
{ remove "imul $1, reg" }
|
||||
begin
|
||||
hp1 := tai(p.Next);
|
||||
asml.remove(p);
|
||||
DebugMsg(SPeepholeOptimization + 'Imul2Nop done',p);
|
||||
p.free;
|
||||
p := hp1;
|
||||
result:=true;
|
||||
end
|
||||
else
|
||||
{ change "imul $1, reg1, reg2" to "mov reg1, reg2" }
|
||||
begin
|
||||
hp1 := taicpu.Op_Reg_Reg(A_MOV, opsize, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
|
||||
InsertLLItem(p.previous, p.next, hp1);
|
||||
DebugMsg(SPeepholeOptimization + 'Imul2Mov done',p);
|
||||
p.free;
|
||||
p := hp1;
|
||||
end
|
||||
else if
|
||||
((taicpu(p).ops <= 2) or
|
||||
(taicpu(p).oper[2]^.typ = Top_Reg)) and
|
||||
not(cs_opt_size in current_settings.optimizerswitches) and
|
||||
(not(GetNextInstruction(p, hp1)) or
|
||||
not((tai(hp1).typ = ait_instruction) and
|
||||
((taicpu(hp1).opcode=A_Jcc) and
|
||||
(taicpu(hp1).condition in [C_O,C_NO])))) then
|
||||
begin
|
||||
{
|
||||
imul X, reg1, reg2 to
|
||||
lea (reg1,reg1,Y), reg2
|
||||
shl ZZ,reg2
|
||||
imul XX, reg1 to
|
||||
lea (reg1,reg1,YY), reg1
|
||||
shl ZZ,reg2
|
||||
|
||||
This optimziation makes sense for pretty much every x86, except the VIA Nano3000: it has IMUL latency 2, lea/shl pair as well,
|
||||
it does not exist as a separate optimization target in FPC though.
|
||||
|
||||
This optimziation can be applied as long as only two bits are set in the constant and those two bits are separated by
|
||||
at most two zeros
|
||||
}
|
||||
reference_reset(tmpref,1,[]);
|
||||
if (PopCnt(QWord(taicpu(p).oper[0]^.val))=2) and (BsrQWord(taicpu(p).oper[0]^.val)-BsfQWord(taicpu(p).oper[0]^.val)<=3) then
|
||||
begin
|
||||
ShiftValue:=BsfQWord(taicpu(p).oper[0]^.val);
|
||||
BaseValue:=taicpu(p).oper[0]^.val shr ShiftValue;
|
||||
TmpRef.base := taicpu(p).oper[1]^.reg;
|
||||
TmpRef.index := taicpu(p).oper[1]^.reg;
|
||||
if not(BaseValue in [3,5,9]) then
|
||||
Internalerror(2018110101);
|
||||
TmpRef.ScaleFactor := BaseValue-1;
|
||||
if (taicpu(p).ops = 2) then
|
||||
hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[1]^.reg)
|
||||
else
|
||||
hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[2]^.reg);
|
||||
AsmL.InsertAfter(hp1,p);
|
||||
DebugMsg(SPeepholeOptimization + 'Imul2LeaShl done',p);
|
||||
AsmL.Remove(p);
|
||||
taicpu(hp1).fileinfo:=taicpu(p).fileinfo;
|
||||
p.free;
|
||||
p := hp1;
|
||||
if ShiftValue>0 then
|
||||
AsmL.InsertAfter(taicpu.op_const_reg(A_SHL, opsize, ShiftValue, taicpu(hp1).oper[1]^.reg),hp1);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
|
||||
var
|
||||
p: taicpu;
|
||||
|
@ -51,6 +51,8 @@ uses
|
||||
ait_instruction:
|
||||
begin
|
||||
case taicpu(p).opcode of
|
||||
A_IMUL:
|
||||
result:=PrePeepholeOptIMUL(p);
|
||||
A_SAR,A_SHR:
|
||||
result:=PrePeepholeOptSxx(p);
|
||||
end;
|
||||
|
Loading…
Reference in New Issue
Block a user