mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-14 10:29:24 +02:00
Merge branch 'main' of gitlab.com:freepascal.org/fpc/source into main
This commit is contained in:
commit
5022d0e772
@ -197,7 +197,8 @@ implementation
|
|||||||
cpuinfo,
|
cpuinfo,
|
||||||
htypechk,pass_1,procinfo,paramgr,
|
htypechk,pass_1,procinfo,paramgr,
|
||||||
nbas,ncon,nflw,ninl,ncnv,nmem,ncal,nutils,
|
nbas,ncon,nflw,ninl,ncnv,nmem,ncal,nutils,
|
||||||
cgbase
|
cgbase,
|
||||||
|
optloadmodifystore
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|
||||||
@ -625,6 +626,13 @@ implementation
|
|||||||
is_constrealnode(right) and
|
is_constrealnode(right) and
|
||||||
not equal_defs(right.resultdef,left.resultdef) then
|
not equal_defs(right.resultdef,left.resultdef) then
|
||||||
inserttypeconv(right,left.resultdef);
|
inserttypeconv(right,left.resultdef);
|
||||||
|
{$if (cs_opt_use_load_modify_store in supported_optimizerswitches)}
|
||||||
|
{ Perform simple optimizations when -O2 and the dedicated
|
||||||
|
cs_opt_use_load_modify_store optimization pass is not enabled. }
|
||||||
|
if (cs_opt_level2 in current_settings.optimizerswitches) and
|
||||||
|
not (cs_opt_use_load_modify_store in current_settings.optimizerswitches) then
|
||||||
|
result:=try_opt_assignmentnode(self);
|
||||||
|
{$endif}
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
@ -38,16 +38,17 @@ unit optloadmodifystore;
|
|||||||
interface
|
interface
|
||||||
|
|
||||||
uses
|
uses
|
||||||
node;
|
node,nld;
|
||||||
|
|
||||||
procedure do_optloadmodifystore(var rootnode : tnode);
|
procedure do_optloadmodifystore(var rootnode : tnode);
|
||||||
|
function try_opt_assignmentnode(assignmentnode : tassignmentnode): tnode;
|
||||||
|
|
||||||
implementation
|
implementation
|
||||||
|
|
||||||
uses
|
uses
|
||||||
globtype,verbose,nutils,compinnr,
|
globtype,globals,verbose,nutils,compinnr,
|
||||||
defutil,defcmp,htypechk,pass_1,constexp,
|
defutil,defcmp,htypechk,pass_1,constexp,
|
||||||
nadd,ncal,ncon,ncnv,ninl,nld,nmat,
|
nadd,ncal,ncon,ncnv,ninl,nmat,
|
||||||
symdef;
|
symdef;
|
||||||
|
|
||||||
function try_opt_assignmentnode(assignmentnode: tassignmentnode): tnode;
|
function try_opt_assignmentnode(assignmentnode: tassignmentnode): tnode;
|
||||||
@ -57,6 +58,10 @@ unit optloadmodifystore;
|
|||||||
result:=nil;
|
result:=nil;
|
||||||
with assignmentnode do
|
with assignmentnode do
|
||||||
begin
|
begin
|
||||||
|
{ *** Here are simple optimizations which are performed
|
||||||
|
when -O2 (via a call from tassignmentnode.simplify) or
|
||||||
|
when cs_opt_use_load_modify_store is enabled (in a separate pass).
|
||||||
|
}
|
||||||
{ replace i:=succ/pred(i) by inc/dec(i)? }
|
{ replace i:=succ/pred(i) by inc/dec(i)? }
|
||||||
if (right.nodetype=inlinen) and
|
if (right.nodetype=inlinen) and
|
||||||
((tinlinenode(right).inlinenumber=in_succ_x) or (tinlinenode(right).inlinenumber=in_pred_x)) and
|
((tinlinenode(right).inlinenumber=in_succ_x) or (tinlinenode(right).inlinenumber=in_pred_x)) and
|
||||||
@ -273,6 +278,71 @@ unit optloadmodifystore;
|
|||||||
taddnode(ttypeconvnode(right).left).left:=nil;
|
taddnode(ttypeconvnode(right).left).left:=nil;
|
||||||
exit;
|
exit;
|
||||||
end;
|
end;
|
||||||
|
{ replace i:=not i by in_not_assign_x(i)
|
||||||
|
i:=-i by in_neg_assign_x(i)
|
||||||
|
|
||||||
|
this handles the case, where there are no implicit type conversions }
|
||||||
|
if (right.nodetype in [notn,unaryminusn]) and
|
||||||
|
(tunarynode(right).left.isequal(left)) and
|
||||||
|
is_integer(tunarynode(right).left.resultdef) and
|
||||||
|
((localswitches*[cs_check_overflow,cs_check_range])=[]) and
|
||||||
|
((right.localswitches*[cs_check_overflow,cs_check_range])=[]) and
|
||||||
|
valid_for_var(tunarynode(right).left,false) and
|
||||||
|
not(might_have_sideeffects(tunarynode(right).left)) then
|
||||||
|
begin
|
||||||
|
if right.nodetype=notn then
|
||||||
|
newinlinenodetype:=in_not_assign_x
|
||||||
|
else
|
||||||
|
newinlinenodetype:=in_neg_assign_x;
|
||||||
|
result:=cinlinenode.createintern(
|
||||||
|
newinlinenodetype,false,tunarynode(right).left);
|
||||||
|
result.localswitches:=localswitches;
|
||||||
|
result.fileinfo:=fileinfo;
|
||||||
|
result.verbosity:=verbosity;
|
||||||
|
tunarynode(right).left:=nil;
|
||||||
|
exit;
|
||||||
|
end;
|
||||||
|
{ replace i:=not i by in_not_assign_x(i)
|
||||||
|
i:=-i by in_neg_assign_x(i)
|
||||||
|
|
||||||
|
this handles the case with type conversions:
|
||||||
|
outer typeconv: right
|
||||||
|
neg/not: ttypeconvnode(right).left
|
||||||
|
inner typeconv: tunarynode(ttypeconvnode(right).left).left
|
||||||
|
right side 'i': ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left }
|
||||||
|
if (right.nodetype=typeconvn) and
|
||||||
|
(ttypeconvnode(right).convtype=tc_int_2_int) and
|
||||||
|
(ttypeconvnode(right).left.nodetype in [notn,unaryminusn]) and
|
||||||
|
is_integer(ttypeconvnode(right).left.resultdef) and
|
||||||
|
(right.resultdef.size<=ttypeconvnode(right).left.resultdef.size) and
|
||||||
|
(tunarynode(ttypeconvnode(right).left).left.nodetype=typeconvn) and
|
||||||
|
(ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).convtype=tc_int_2_int) and
|
||||||
|
are_equal_ints(right.resultdef,ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left.resultdef) and
|
||||||
|
ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left.isequal(left) and
|
||||||
|
is_integer(ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left.resultdef) and
|
||||||
|
((localswitches*[cs_check_overflow,cs_check_range])=[]) and
|
||||||
|
((right.localswitches*[cs_check_overflow,cs_check_range])=[]) and
|
||||||
|
valid_for_var(ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left,false) and
|
||||||
|
not(might_have_sideeffects(ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left)) then
|
||||||
|
begin
|
||||||
|
if ttypeconvnode(right).left.nodetype=notn then
|
||||||
|
newinlinenodetype:=in_not_assign_x
|
||||||
|
else
|
||||||
|
newinlinenodetype:=in_neg_assign_x;
|
||||||
|
result:=cinlinenode.createintern(
|
||||||
|
newinlinenodetype,false,ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left);
|
||||||
|
result.localswitches:=localswitches;
|
||||||
|
result.fileinfo:=fileinfo;
|
||||||
|
result.verbosity:=verbosity;
|
||||||
|
ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left:=nil;
|
||||||
|
exit;
|
||||||
|
end;
|
||||||
|
|
||||||
|
if not (cs_opt_use_load_modify_store in current_settings.optimizerswitches) then
|
||||||
|
exit;
|
||||||
|
{ *** Here are more complex optimizations which are performed only
|
||||||
|
when cs_opt_use_load_modify_store is enabled.
|
||||||
|
}
|
||||||
{$ifdef enable_shl_shr_assign_x_y}
|
{$ifdef enable_shl_shr_assign_x_y}
|
||||||
{ replace i:=i shl k by in_shl_assign_x_y(i,k)
|
{ replace i:=i shl k by in_shl_assign_x_y(i,k)
|
||||||
i:=i shr k by in_shr_assign_x_y(i,k)
|
i:=i shr k by in_shr_assign_x_y(i,k)
|
||||||
@ -555,65 +625,6 @@ unit optloadmodifystore;
|
|||||||
exit;
|
exit;
|
||||||
end;
|
end;
|
||||||
{$endif enable_sar_assign_x_y or enable_rox_assign_x_y}
|
{$endif enable_sar_assign_x_y or enable_rox_assign_x_y}
|
||||||
{ replace i:=not i by in_not_assign_x(i)
|
|
||||||
i:=-i by in_neg_assign_x(i)
|
|
||||||
|
|
||||||
this handles the case, where there are no implicit type conversions }
|
|
||||||
if (right.nodetype in [notn,unaryminusn]) and
|
|
||||||
(tunarynode(right).left.isequal(left)) and
|
|
||||||
is_integer(tunarynode(right).left.resultdef) and
|
|
||||||
((localswitches*[cs_check_overflow,cs_check_range])=[]) and
|
|
||||||
((right.localswitches*[cs_check_overflow,cs_check_range])=[]) and
|
|
||||||
valid_for_var(tunarynode(right).left,false) and
|
|
||||||
not(might_have_sideeffects(tunarynode(right).left)) then
|
|
||||||
begin
|
|
||||||
if right.nodetype=notn then
|
|
||||||
newinlinenodetype:=in_not_assign_x
|
|
||||||
else
|
|
||||||
newinlinenodetype:=in_neg_assign_x;
|
|
||||||
result:=cinlinenode.createintern(
|
|
||||||
newinlinenodetype,false,tunarynode(right).left);
|
|
||||||
result.localswitches:=localswitches;
|
|
||||||
result.fileinfo:=fileinfo;
|
|
||||||
result.verbosity:=verbosity;
|
|
||||||
tunarynode(right).left:=nil;
|
|
||||||
exit;
|
|
||||||
end;
|
|
||||||
{ replace i:=not i by in_not_assign_x(i)
|
|
||||||
i:=-i by in_neg_assign_x(i)
|
|
||||||
|
|
||||||
this handles the case with type conversions:
|
|
||||||
outer typeconv: right
|
|
||||||
neg/not: ttypeconvnode(right).left
|
|
||||||
inner typeconv: tunarynode(ttypeconvnode(right).left).left
|
|
||||||
right side 'i': ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left }
|
|
||||||
if (right.nodetype=typeconvn) and
|
|
||||||
(ttypeconvnode(right).convtype=tc_int_2_int) and
|
|
||||||
(ttypeconvnode(right).left.nodetype in [notn,unaryminusn]) and
|
|
||||||
is_integer(ttypeconvnode(right).left.resultdef) and
|
|
||||||
(right.resultdef.size<=ttypeconvnode(right).left.resultdef.size) and
|
|
||||||
(tunarynode(ttypeconvnode(right).left).left.nodetype=typeconvn) and
|
|
||||||
(ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).convtype=tc_int_2_int) and
|
|
||||||
are_equal_ints(right.resultdef,ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left.resultdef) and
|
|
||||||
ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left.isequal(left) and
|
|
||||||
is_integer(ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left.resultdef) and
|
|
||||||
((localswitches*[cs_check_overflow,cs_check_range])=[]) and
|
|
||||||
((right.localswitches*[cs_check_overflow,cs_check_range])=[]) and
|
|
||||||
valid_for_var(ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left,false) and
|
|
||||||
not(might_have_sideeffects(ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left)) then
|
|
||||||
begin
|
|
||||||
if ttypeconvnode(right).left.nodetype=notn then
|
|
||||||
newinlinenodetype:=in_not_assign_x
|
|
||||||
else
|
|
||||||
newinlinenodetype:=in_neg_assign_x;
|
|
||||||
result:=cinlinenode.createintern(
|
|
||||||
newinlinenodetype,false,ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left);
|
|
||||||
result.localswitches:=localswitches;
|
|
||||||
result.fileinfo:=fileinfo;
|
|
||||||
result.verbosity:=verbosity;
|
|
||||||
ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left:=nil;
|
|
||||||
exit;
|
|
||||||
end;
|
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
@ -8618,39 +8618,77 @@ unit aoptx86;
|
|||||||
begin
|
begin
|
||||||
if (taicpu(p).oper[0]^.typ = top_const) then
|
if (taicpu(p).oper[0]^.typ = top_const) then
|
||||||
begin
|
begin
|
||||||
if (taicpu(hp1).opcode = A_AND) and
|
case taicpu(hp1).opcode of
|
||||||
MatchOpType(taicpu(hp1),top_const,top_reg) and
|
A_AND:
|
||||||
(getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
|
if MatchOpType(taicpu(hp1),top_const,top_reg) and
|
||||||
{ the second register must contain the first one, so compare their subreg types }
|
(getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
|
||||||
(getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
|
{ the second register must contain the first one, so compare their subreg types }
|
||||||
(abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
|
(getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
|
||||||
{ change
|
(abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
|
||||||
and const1, reg
|
{ change
|
||||||
and const2, reg
|
and const1, reg
|
||||||
to
|
and const2, reg
|
||||||
and (const1 and const2), reg
|
to
|
||||||
}
|
and (const1 and const2), reg
|
||||||
begin
|
}
|
||||||
taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
|
begin
|
||||||
DebugMsg(SPeepholeOptimization + 'AndAnd2And done',hp1);
|
taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
|
||||||
RemoveCurrentP(p, hp1);
|
DebugMsg(SPeepholeOptimization + 'AndAnd2And done',hp1);
|
||||||
Result:=true;
|
RemoveCurrentP(p, hp1);
|
||||||
exit;
|
Result:=true;
|
||||||
end
|
exit;
|
||||||
else if (taicpu(hp1).opcode = A_MOVZX) and
|
end;
|
||||||
MatchOpType(taicpu(hp1),top_reg,top_reg) and
|
|
||||||
SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^.reg) and
|
A_CMP:
|
||||||
(getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
|
if (PopCnt(DWord(taicpu(p).oper[0]^.val)) = 1) and { Only 1 bit set }
|
||||||
(((taicpu(p).opsize=S_W) and
|
MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.val) and
|
||||||
(taicpu(hp1).opsize=S_BW)) or
|
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) and
|
||||||
((taicpu(p).opsize=S_L) and
|
{ Just check that the condition on the next instruction is compatible }
|
||||||
(taicpu(hp1).opsize in [S_WL,S_BL{$ifdef x86_64},S_BQ,S_WQ{$endif x86_64}]))
|
GetNextInstruction(hp1, hp2) and
|
||||||
|
(hp2.typ = ait_instruction) and
|
||||||
|
(taicpu(hp2).condition in [C_Z, C_E, C_NZ, C_NE])
|
||||||
|
then
|
||||||
|
{ change
|
||||||
|
and 2^n, reg
|
||||||
|
cmp 2^n, reg
|
||||||
|
j(c) / set(c) / cmov(c) (c is equal or not equal)
|
||||||
|
to
|
||||||
|
and 2^n, reg
|
||||||
|
test reg, reg
|
||||||
|
j(~c) / set(~c) / cmov(~c)
|
||||||
|
}
|
||||||
|
begin
|
||||||
|
{ Keep TEST instruction in, rather than remove it, because
|
||||||
|
it may trigger other optimisations such as MovAndTest2Test }
|
||||||
|
taicpu(hp1).loadreg(0, taicpu(hp1).oper[1]^.reg);
|
||||||
|
taicpu(hp1).opcode := A_TEST;
|
||||||
|
DebugMsg(SPeepholeOptimization + 'AND/CMP/J(c) -> AND/J(~c) with power of 2 constant', p);
|
||||||
|
taicpu(hp2).condition := inverse_cond(taicpu(hp2).condition);
|
||||||
|
Result := True;
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
|
||||||
|
A_MOVZX:
|
||||||
|
if MatchOpType(taicpu(hp1),top_reg,top_reg) and
|
||||||
|
SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^.reg) and
|
||||||
|
(getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
|
||||||
|
(
|
||||||
|
(
|
||||||
|
(taicpu(p).opsize=S_W) and
|
||||||
|
(taicpu(hp1).opsize=S_BW)
|
||||||
|
) or
|
||||||
|
(
|
||||||
|
(taicpu(p).opsize=S_L) and
|
||||||
|
(taicpu(hp1).opsize in [S_WL,S_BL{$ifdef x86_64},S_BQ,S_WQ{$endif x86_64}])
|
||||||
|
)
|
||||||
{$ifdef x86_64}
|
{$ifdef x86_64}
|
||||||
or
|
or
|
||||||
((taicpu(p).opsize=S_Q) and
|
(
|
||||||
(taicpu(hp1).opsize in [S_BQ,S_WQ,S_BL,S_WL]))
|
(taicpu(p).opsize=S_Q) and
|
||||||
|
(taicpu(hp1).opsize in [S_BQ,S_WQ,S_BL,S_WL])
|
||||||
|
)
|
||||||
{$endif x86_64}
|
{$endif x86_64}
|
||||||
) then
|
) then
|
||||||
begin
|
begin
|
||||||
if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
|
if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
|
||||||
((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
|
((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
|
||||||
@ -8673,108 +8711,114 @@ unit aoptx86;
|
|||||||
{ See if there are other optimisations possible }
|
{ See if there are other optimisations possible }
|
||||||
Continue;
|
Continue;
|
||||||
end;
|
end;
|
||||||
end
|
end;
|
||||||
else if (taicpu(hp1).opcode = A_SHL) and
|
|
||||||
MatchOpType(taicpu(hp1),top_const,top_reg) and
|
A_SHL:
|
||||||
(getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) then
|
if MatchOpType(taicpu(hp1),top_const,top_reg) and
|
||||||
begin
|
(getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) then
|
||||||
|
begin
|
||||||
{$ifopt R+}
|
{$ifopt R+}
|
||||||
{$define RANGE_WAS_ON}
|
{$define RANGE_WAS_ON}
|
||||||
{$R-}
|
{$R-}
|
||||||
{$endif}
|
{$endif}
|
||||||
{ get length of potential and mask }
|
{ get length of potential and mask }
|
||||||
MaskLength:=SizeOf(taicpu(p).oper[0]^.val)*8-BsrQWord(taicpu(p).oper[0]^.val)-1;
|
MaskLength:=SizeOf(taicpu(p).oper[0]^.val)*8-BsrQWord(taicpu(p).oper[0]^.val)-1;
|
||||||
|
|
||||||
{ really a mask? }
|
{ really a mask? }
|
||||||
{$ifdef RANGE_WAS_ON}
|
{$ifdef RANGE_WAS_ON}
|
||||||
{$R+}
|
{$R+}
|
||||||
{$endif}
|
{$endif}
|
||||||
if (((QWord(1) shl MaskLength)-1)=taicpu(p).oper[0]^.val) and
|
if (((QWord(1) shl MaskLength)-1)=taicpu(p).oper[0]^.val) and
|
||||||
{ unmasked part shifted out? }
|
{ unmasked part shifted out? }
|
||||||
((MaskLength+taicpu(hp1).oper[0]^.val)>=topsize2memsize[taicpu(hp1).opsize]) then
|
((MaskLength+taicpu(hp1).oper[0]^.val)>=topsize2memsize[taicpu(hp1).opsize]) then
|
||||||
begin
|
begin
|
||||||
DebugMsg(SPeepholeOptimization + 'AndShlToShl done',p);
|
DebugMsg(SPeepholeOptimization + 'AndShlToShl done',p);
|
||||||
RemoveCurrentP(p, hp1);
|
RemoveCurrentP(p, hp1);
|
||||||
Result:=true;
|
Result:=true;
|
||||||
exit;
|
exit;
|
||||||
end;
|
|
||||||
end
|
|
||||||
else if (taicpu(hp1).opcode = A_SHR) and
|
|
||||||
MatchOpType(taicpu(hp1),top_const,top_reg) and
|
|
||||||
(taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
|
|
||||||
(taicpu(hp1).oper[0]^.val <= 63) then
|
|
||||||
begin
|
|
||||||
{ Does SHR combined with the AND cover all the bits?
|
|
||||||
|
|
||||||
e.g. for "andb $252,%reg; shrb $2,%reg" - the "and" can be removed }
|
|
||||||
|
|
||||||
MaskedBits := taicpu(p).oper[0]^.val or ((TCgInt(1) shl taicpu(hp1).oper[0]^.val) - 1);
|
|
||||||
|
|
||||||
if ((taicpu(p).opsize = S_B) and ((MaskedBits and $FF) = $FF)) or
|
|
||||||
((taicpu(p).opsize = S_W) and ((MaskedBits and $FFFF) = $FFFF)) or
|
|
||||||
((taicpu(p).opsize = S_L) and ((MaskedBits and $FFFFFFFF) = $FFFFFFFF)) then
|
|
||||||
begin
|
|
||||||
DebugMsg(SPeepholeOptimization + 'AndShrToShr done', p);
|
|
||||||
RemoveCurrentP(p, hp1);
|
|
||||||
Result := True;
|
|
||||||
Exit;
|
|
||||||
end;
|
|
||||||
end
|
|
||||||
else if ((taicpu(hp1).opcode = A_MOVSX){$ifdef x86_64} or (taicpu(hp1).opcode = A_MOVSXD){$endif x86_64}) and
|
|
||||||
(taicpu(hp1).oper[0]^.typ = top_reg) and
|
|
||||||
SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg) then
|
|
||||||
begin
|
|
||||||
if SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.reg) and
|
|
||||||
(
|
|
||||||
(
|
|
||||||
(taicpu(hp1).opsize in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
|
|
||||||
((taicpu(p).oper[0]^.val and $7F) = taicpu(p).oper[0]^.val)
|
|
||||||
) or (
|
|
||||||
(taicpu(hp1).opsize in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
|
|
||||||
((taicpu(p).oper[0]^.val and $7FFF) = taicpu(p).oper[0]^.val)
|
|
||||||
{$ifdef x86_64}
|
|
||||||
) or (
|
|
||||||
(taicpu(hp1).opsize = S_LQ) and
|
|
||||||
((taicpu(p).oper[0]^.val and $7fffffff) = taicpu(p).oper[0]^.val)
|
|
||||||
{$endif x86_64}
|
|
||||||
)
|
|
||||||
) then
|
|
||||||
begin
|
|
||||||
if (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg){$ifdef x86_64} or (taicpu(hp1).opsize = S_LQ){$endif x86_64} then
|
|
||||||
begin
|
|
||||||
DebugMsg(SPeepholeOptimization + 'AndMovsxToAnd',p);
|
|
||||||
RemoveInstruction(hp1);
|
|
||||||
{ See if there are other optimisations possible }
|
|
||||||
Continue;
|
|
||||||
end;
|
|
||||||
|
|
||||||
{ The super-registers are the same though.
|
|
||||||
|
|
||||||
Note that this change by itself doesn't improve
|
|
||||||
code speed, but it opens up other optimisations. }
|
|
||||||
{$ifdef x86_64}
|
|
||||||
{ Convert 64-bit register to 32-bit }
|
|
||||||
case taicpu(hp1).opsize of
|
|
||||||
S_BQ:
|
|
||||||
begin
|
|
||||||
taicpu(hp1).opsize := S_BL;
|
|
||||||
taicpu(hp1).oper[1]^.reg := newreg(R_INTREGISTER, getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBD);
|
|
||||||
end;
|
|
||||||
S_WQ:
|
|
||||||
begin
|
|
||||||
taicpu(hp1).opsize := S_WL;
|
|
||||||
taicpu(hp1).oper[1]^.reg := newreg(R_INTREGISTER, getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBD);
|
|
||||||
end
|
|
||||||
else
|
|
||||||
;
|
|
||||||
end;
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
A_SHR:
|
||||||
|
if MatchOpType(taicpu(hp1),top_const,top_reg) and
|
||||||
|
(taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
|
||||||
|
(taicpu(hp1).oper[0]^.val <= 63) then
|
||||||
|
begin
|
||||||
|
{ Does SHR combined with the AND cover all the bits?
|
||||||
|
|
||||||
|
e.g. for "andb $252,%reg; shrb $2,%reg" - the "and" can be removed }
|
||||||
|
|
||||||
|
MaskedBits := taicpu(p).oper[0]^.val or ((TCgInt(1) shl taicpu(hp1).oper[0]^.val) - 1);
|
||||||
|
|
||||||
|
if ((taicpu(p).opsize = S_B) and ((MaskedBits and $FF) = $FF)) or
|
||||||
|
((taicpu(p).opsize = S_W) and ((MaskedBits and $FFFF) = $FFFF)) or
|
||||||
|
((taicpu(p).opsize = S_L) and ((MaskedBits and $FFFFFFFF) = $FFFFFFFF)) then
|
||||||
|
begin
|
||||||
|
DebugMsg(SPeepholeOptimization + 'AndShrToShr done', p);
|
||||||
|
RemoveCurrentP(p, hp1);
|
||||||
|
Result := True;
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
A_MOVSX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}:
|
||||||
|
if (taicpu(hp1).oper[0]^.typ = top_reg) and
|
||||||
|
SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg) then
|
||||||
|
begin
|
||||||
|
if SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.reg) and
|
||||||
|
(
|
||||||
|
(
|
||||||
|
(taicpu(hp1).opsize in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
|
||||||
|
((taicpu(p).oper[0]^.val and $7F) = taicpu(p).oper[0]^.val)
|
||||||
|
) or (
|
||||||
|
(taicpu(hp1).opsize in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
|
||||||
|
((taicpu(p).oper[0]^.val and $7FFF) = taicpu(p).oper[0]^.val)
|
||||||
|
{$ifdef x86_64}
|
||||||
|
) or (
|
||||||
|
(taicpu(hp1).opsize = S_LQ) and
|
||||||
|
((taicpu(p).oper[0]^.val and $7fffffff) = taicpu(p).oper[0]^.val)
|
||||||
{$endif x86_64}
|
{$endif x86_64}
|
||||||
DebugMsg(SPeepholeOptimization + 'AndMovsxToAndMovzx', hp1);
|
)
|
||||||
taicpu(hp1).opcode := A_MOVZX;
|
) then
|
||||||
{ See if there are other optimisations possible }
|
begin
|
||||||
Continue;
|
if (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg){$ifdef x86_64} or (taicpu(hp1).opsize = S_LQ){$endif x86_64} then
|
||||||
end;
|
begin
|
||||||
end;
|
DebugMsg(SPeepholeOptimization + 'AndMovsxToAnd',p);
|
||||||
|
RemoveInstruction(hp1);
|
||||||
|
{ See if there are other optimisations possible }
|
||||||
|
Continue;
|
||||||
|
end;
|
||||||
|
|
||||||
|
{ The super-registers are the same though.
|
||||||
|
|
||||||
|
Note that this change by itself doesn't improve
|
||||||
|
code speed, but it opens up other optimisations. }
|
||||||
|
{$ifdef x86_64}
|
||||||
|
{ Convert 64-bit register to 32-bit }
|
||||||
|
case taicpu(hp1).opsize of
|
||||||
|
S_BQ:
|
||||||
|
begin
|
||||||
|
taicpu(hp1).opsize := S_BL;
|
||||||
|
taicpu(hp1).oper[1]^.reg := newreg(R_INTREGISTER, getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBD);
|
||||||
|
end;
|
||||||
|
S_WQ:
|
||||||
|
begin
|
||||||
|
taicpu(hp1).opsize := S_WL;
|
||||||
|
taicpu(hp1).oper[1]^.reg := newreg(R_INTREGISTER, getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBD);
|
||||||
|
end
|
||||||
|
else
|
||||||
|
;
|
||||||
|
end;
|
||||||
|
{$endif x86_64}
|
||||||
|
DebugMsg(SPeepholeOptimization + 'AndMovsxToAndMovzx', hp1);
|
||||||
|
taicpu(hp1).opcode := A_MOVZX;
|
||||||
|
{ See if there are other optimisations possible }
|
||||||
|
Continue;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
else
|
||||||
|
;
|
||||||
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
if (taicpu(hp1).is_jmp) and
|
if (taicpu(hp1).is_jmp) and
|
||||||
|
@ -295,7 +295,7 @@ const
|
|||||||
fnstcw oldcw
|
fnstcw oldcw
|
||||||
fldt d
|
fldt d
|
||||||
movw oldcw,%cx
|
movw oldcw,%cx
|
||||||
orw $0x0c3f,%cx
|
orw $0x0c00,%cx
|
||||||
movw %cx,newcw
|
movw %cx,newcw
|
||||||
fldcw newcw
|
fldcw newcw
|
||||||
fld %st
|
fld %st
|
||||||
@ -315,7 +315,7 @@ const
|
|||||||
asm
|
asm
|
||||||
fnstcw oldcw
|
fnstcw oldcw
|
||||||
movw oldcw,%cx
|
movw oldcw,%cx
|
||||||
orw $0x0c3f,%cx
|
orw $0x0c00,%cx
|
||||||
movw %cx,newcw
|
movw %cx,newcw
|
||||||
fldcw newcw
|
fldcw newcw
|
||||||
fldt d
|
fldt d
|
||||||
@ -336,7 +336,7 @@ const
|
|||||||
asm
|
asm
|
||||||
fnstcw oldcw
|
fnstcw oldcw
|
||||||
movw oldcw,%cx
|
movw oldcw,%cx
|
||||||
orw $0x0c3f,%cx
|
orw $0x0c00,%cx
|
||||||
movw %cx,newcw
|
movw %cx,newcw
|
||||||
fldcw newcw
|
fldcw newcw
|
||||||
fldt d
|
fldt d
|
||||||
|
Loading…
Reference in New Issue
Block a user