Merge branch 'main' of gitlab.com:freepascal.org/fpc/source into main

This commit is contained in:
Michaël Van Canneyt 2021-08-13 14:09:43 +02:00
commit 5022d0e772
4 changed files with 252 additions and 189 deletions

View File

@ -197,7 +197,8 @@ implementation
cpuinfo,
htypechk,pass_1,procinfo,paramgr,
nbas,ncon,nflw,ninl,ncnv,nmem,ncal,nutils,
cgbase
cgbase,
optloadmodifystore
;
@ -625,6 +626,13 @@ implementation
is_constrealnode(right) and
not equal_defs(right.resultdef,left.resultdef) then
inserttypeconv(right,left.resultdef);
{$if (cs_opt_use_load_modify_store in supported_optimizerswitches)}
{ Perform simple optimizations when -O2 and the dedicated
cs_opt_use_load_modify_store optimization pass is not enabled. }
if (cs_opt_level2 in current_settings.optimizerswitches) and
not (cs_opt_use_load_modify_store in current_settings.optimizerswitches) then
result:=try_opt_assignmentnode(self);
{$endif}
end;

View File

@ -38,16 +38,17 @@ unit optloadmodifystore;
interface
uses
node;
node,nld;
procedure do_optloadmodifystore(var rootnode : tnode);
function try_opt_assignmentnode(assignmentnode : tassignmentnode): tnode;
implementation
uses
globtype,verbose,nutils,compinnr,
globtype,globals,verbose,nutils,compinnr,
defutil,defcmp,htypechk,pass_1,constexp,
nadd,ncal,ncon,ncnv,ninl,nld,nmat,
nadd,ncal,ncon,ncnv,ninl,nmat,
symdef;
function try_opt_assignmentnode(assignmentnode: tassignmentnode): tnode;
@ -57,6 +58,10 @@ unit optloadmodifystore;
result:=nil;
with assignmentnode do
begin
{ *** Here are simple optimizations which are performed
when -O2 (via a call from tassignmentnode.simplify) or
when cs_opt_use_load_modify_store is enabled (in a separate pass).
}
{ replace i:=succ/pred(i) by inc/dec(i)? }
if (right.nodetype=inlinen) and
((tinlinenode(right).inlinenumber=in_succ_x) or (tinlinenode(right).inlinenumber=in_pred_x)) and
@ -273,6 +278,71 @@ unit optloadmodifystore;
taddnode(ttypeconvnode(right).left).left:=nil;
exit;
end;
{ replace i:=not i by in_not_assign_x(i)
i:=-i by in_neg_assign_x(i)
this handles the case, where there are no implicit type conversions }
if (right.nodetype in [notn,unaryminusn]) and
(tunarynode(right).left.isequal(left)) and
is_integer(tunarynode(right).left.resultdef) and
((localswitches*[cs_check_overflow,cs_check_range])=[]) and
((right.localswitches*[cs_check_overflow,cs_check_range])=[]) and
valid_for_var(tunarynode(right).left,false) and
not(might_have_sideeffects(tunarynode(right).left)) then
begin
if right.nodetype=notn then
newinlinenodetype:=in_not_assign_x
else
newinlinenodetype:=in_neg_assign_x;
result:=cinlinenode.createintern(
newinlinenodetype,false,tunarynode(right).left);
result.localswitches:=localswitches;
result.fileinfo:=fileinfo;
result.verbosity:=verbosity;
tunarynode(right).left:=nil;
exit;
end;
{ replace i:=not i by in_not_assign_x(i)
i:=-i by in_neg_assign_x(i)
this handles the case with type conversions:
outer typeconv: right
neg/not: ttypeconvnode(right).left
inner typeconv: tunarynode(ttypeconvnode(right).left).left
right side 'i': ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left }
if (right.nodetype=typeconvn) and
(ttypeconvnode(right).convtype=tc_int_2_int) and
(ttypeconvnode(right).left.nodetype in [notn,unaryminusn]) and
is_integer(ttypeconvnode(right).left.resultdef) and
(right.resultdef.size<=ttypeconvnode(right).left.resultdef.size) and
(tunarynode(ttypeconvnode(right).left).left.nodetype=typeconvn) and
(ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).convtype=tc_int_2_int) and
are_equal_ints(right.resultdef,ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left.resultdef) and
ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left.isequal(left) and
is_integer(ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left.resultdef) and
((localswitches*[cs_check_overflow,cs_check_range])=[]) and
((right.localswitches*[cs_check_overflow,cs_check_range])=[]) and
valid_for_var(ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left,false) and
not(might_have_sideeffects(ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left)) then
begin
if ttypeconvnode(right).left.nodetype=notn then
newinlinenodetype:=in_not_assign_x
else
newinlinenodetype:=in_neg_assign_x;
result:=cinlinenode.createintern(
newinlinenodetype,false,ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left);
result.localswitches:=localswitches;
result.fileinfo:=fileinfo;
result.verbosity:=verbosity;
ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left:=nil;
exit;
end;
if not (cs_opt_use_load_modify_store in current_settings.optimizerswitches) then
exit;
{ *** Here are more complex optimizations which are performed only
when cs_opt_use_load_modify_store is enabled.
}
{$ifdef enable_shl_shr_assign_x_y}
{ replace i:=i shl k by in_shl_assign_x_y(i,k)
i:=i shr k by in_shr_assign_x_y(i,k)
@ -555,65 +625,6 @@ unit optloadmodifystore;
exit;
end;
{$endif enable_sar_assign_x_y or enable_rox_assign_x_y}
{ replace i:=not i by in_not_assign_x(i)
i:=-i by in_neg_assign_x(i)
this handles the case, where there are no implicit type conversions }
if (right.nodetype in [notn,unaryminusn]) and
(tunarynode(right).left.isequal(left)) and
is_integer(tunarynode(right).left.resultdef) and
((localswitches*[cs_check_overflow,cs_check_range])=[]) and
((right.localswitches*[cs_check_overflow,cs_check_range])=[]) and
valid_for_var(tunarynode(right).left,false) and
not(might_have_sideeffects(tunarynode(right).left)) then
begin
if right.nodetype=notn then
newinlinenodetype:=in_not_assign_x
else
newinlinenodetype:=in_neg_assign_x;
result:=cinlinenode.createintern(
newinlinenodetype,false,tunarynode(right).left);
result.localswitches:=localswitches;
result.fileinfo:=fileinfo;
result.verbosity:=verbosity;
tunarynode(right).left:=nil;
exit;
end;
{ replace i:=not i by in_not_assign_x(i)
i:=-i by in_neg_assign_x(i)
this handles the case with type conversions:
outer typeconv: right
neg/not: ttypeconvnode(right).left
inner typeconv: tunarynode(ttypeconvnode(right).left).left
right side 'i': ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left }
if (right.nodetype=typeconvn) and
(ttypeconvnode(right).convtype=tc_int_2_int) and
(ttypeconvnode(right).left.nodetype in [notn,unaryminusn]) and
is_integer(ttypeconvnode(right).left.resultdef) and
(right.resultdef.size<=ttypeconvnode(right).left.resultdef.size) and
(tunarynode(ttypeconvnode(right).left).left.nodetype=typeconvn) and
(ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).convtype=tc_int_2_int) and
are_equal_ints(right.resultdef,ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left.resultdef) and
ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left.isequal(left) and
is_integer(ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left.resultdef) and
((localswitches*[cs_check_overflow,cs_check_range])=[]) and
((right.localswitches*[cs_check_overflow,cs_check_range])=[]) and
valid_for_var(ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left,false) and
not(might_have_sideeffects(ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left)) then
begin
if ttypeconvnode(right).left.nodetype=notn then
newinlinenodetype:=in_not_assign_x
else
newinlinenodetype:=in_neg_assign_x;
result:=cinlinenode.createintern(
newinlinenodetype,false,ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left);
result.localswitches:=localswitches;
result.fileinfo:=fileinfo;
result.verbosity:=verbosity;
ttypeconvnode(tunarynode(ttypeconvnode(right).left).left).left:=nil;
exit;
end;
end;
end;

View File

@ -8618,8 +8618,9 @@ unit aoptx86;
begin
if (taicpu(p).oper[0]^.typ = top_const) then
begin
if (taicpu(hp1).opcode = A_AND) and
MatchOpType(taicpu(hp1),top_const,top_reg) and
case taicpu(hp1).opcode of
A_AND:
if MatchOpType(taicpu(hp1),top_const,top_reg) and
(getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
{ the second register must contain the first one, so compare their subreg types }
(getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
@ -8636,19 +8637,56 @@ unit aoptx86;
RemoveCurrentP(p, hp1);
Result:=true;
exit;
end
else if (taicpu(hp1).opcode = A_MOVZX) and
MatchOpType(taicpu(hp1),top_reg,top_reg) and
end;
A_CMP:
if (PopCnt(DWord(taicpu(p).oper[0]^.val)) = 1) and { Only 1 bit set }
MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.val) and
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) and
{ Just check that the condition on the next instruction is compatible }
GetNextInstruction(hp1, hp2) and
(hp2.typ = ait_instruction) and
(taicpu(hp2).condition in [C_Z, C_E, C_NZ, C_NE])
then
{ change
and 2^n, reg
cmp 2^n, reg
j(c) / set(c) / cmov(c) (c is equal or not equal)
to
and 2^n, reg
test reg, reg
j(~c) / set(~c) / cmov(~c)
}
begin
{ Keep TEST instruction in, rather than remove it, because
it may trigger other optimisations such as MovAndTest2Test }
taicpu(hp1).loadreg(0, taicpu(hp1).oper[1]^.reg);
taicpu(hp1).opcode := A_TEST;
DebugMsg(SPeepholeOptimization + 'AND/CMP/J(c) -> AND/J(~c) with power of 2 constant', p);
taicpu(hp2).condition := inverse_cond(taicpu(hp2).condition);
Result := True;
Exit;
end;
A_MOVZX:
if MatchOpType(taicpu(hp1),top_reg,top_reg) and
SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^.reg) and
(getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
(((taicpu(p).opsize=S_W) and
(taicpu(hp1).opsize=S_BW)) or
((taicpu(p).opsize=S_L) and
(taicpu(hp1).opsize in [S_WL,S_BL{$ifdef x86_64},S_BQ,S_WQ{$endif x86_64}]))
(
(
(taicpu(p).opsize=S_W) and
(taicpu(hp1).opsize=S_BW)
) or
(
(taicpu(p).opsize=S_L) and
(taicpu(hp1).opsize in [S_WL,S_BL{$ifdef x86_64},S_BQ,S_WQ{$endif x86_64}])
)
{$ifdef x86_64}
or
((taicpu(p).opsize=S_Q) and
(taicpu(hp1).opsize in [S_BQ,S_WQ,S_BL,S_WL]))
(
(taicpu(p).opsize=S_Q) and
(taicpu(hp1).opsize in [S_BQ,S_WQ,S_BL,S_WL])
)
{$endif x86_64}
) then
begin
@ -8673,9 +8711,10 @@ unit aoptx86;
{ See if there are other optimisations possible }
Continue;
end;
end
else if (taicpu(hp1).opcode = A_SHL) and
MatchOpType(taicpu(hp1),top_const,top_reg) and
end;
A_SHL:
if MatchOpType(taicpu(hp1),top_const,top_reg) and
(getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) then
begin
{$ifopt R+}
@ -8698,9 +8737,10 @@ unit aoptx86;
Result:=true;
exit;
end;
end
else if (taicpu(hp1).opcode = A_SHR) and
MatchOpType(taicpu(hp1),top_const,top_reg) and
end;
A_SHR:
if MatchOpType(taicpu(hp1),top_const,top_reg) and
(taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
(taicpu(hp1).oper[0]^.val <= 63) then
begin
@ -8719,9 +8759,10 @@ unit aoptx86;
Result := True;
Exit;
end;
end
else if ((taicpu(hp1).opcode = A_MOVSX){$ifdef x86_64} or (taicpu(hp1).opcode = A_MOVSXD){$endif x86_64}) and
(taicpu(hp1).oper[0]^.typ = top_reg) and
end;
A_MOVSX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}:
if (taicpu(hp1).oper[0]^.typ = top_reg) and
SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg) then
begin
if SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.reg) and
@ -8775,6 +8816,9 @@ unit aoptx86;
Continue;
end;
end;
else
;
end;
end;
if (taicpu(hp1).is_jmp) and

View File

@ -295,7 +295,7 @@ const
fnstcw oldcw
fldt d
movw oldcw,%cx
orw $0x0c3f,%cx
orw $0x0c00,%cx
movw %cx,newcw
fldcw newcw
fld %st
@ -315,7 +315,7 @@ const
asm
fnstcw oldcw
movw oldcw,%cx
orw $0x0c3f,%cx
orw $0x0c00,%cx
movw %cx,newcw
fldcw newcw
fldt d
@ -336,7 +336,7 @@ const
asm
fnstcw oldcw
movw oldcw,%cx
orw $0x0c3f,%cx
orw $0x0c00,%cx
movw %cx,newcw
fldcw newcw
fldt d