* x86: "x and ((1 shl y) - 1)" to BZHI adapted for in_and_assign_x_y

This commit is contained in:
J. Gareth "Curious Kit" Moreton 2022-11-02 16:39:47 +00:00 committed by FPK
parent 7da9b4a988
commit c40f518de4

View File

@ -61,6 +61,7 @@ interface
{ second pass override to generate these nodes }
procedure pass_generate_code_cpu;override;
procedure second_IncludeExclude;override;
procedure second_AndOrXorShiftRot_assign;override;
procedure second_pi; override;
procedure second_arctan_real; override;
procedure second_abs_real; override;
@ -98,7 +99,7 @@ implementation
htypechk,
cgbase,pass_1,pass_2,
cpuinfo,cpubase,nutils,
ncal,ncgutil,nld,ncon,
ncal,ncgutil,nld,ncon,nadd,nmat,constexp,
tgobj,
cga,cgutils,cgx86,cgobj,hlcgobj;
@ -619,6 +620,92 @@ implementation
end;
procedure tx86inlinenode.second_AndOrXorShiftRot_assign;
var
opsize : tcgsize;
valuenode, indexnode, loadnode: TNode;
DestReg: TRegister;
begin
{$ifndef i8086}
if (cs_opt_level2 in current_settings.optimizerswitches) then
begin
{ Saves on a lot of typecasting and potential coding mistakes }
valuenode := tcallparanode(left).left;
loadnode := tcallparanode(tcallparanode(left).right).left;
opsize := def_cgsize(loadnode.resultdef);
{ BMI2 optimisations }
if (CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) then
begin
{ If the second operand is "((1 shl y) - 1)", we can turn it
into a BZHI operator instead }
if (opsize in [OS_32, OS_S32{$ifdef x86_64}, OS_64, OS_S64{$endif x86_64}]) and
(valuenode.nodetype = subn) and
(taddnode(valuenode).right.nodetype = ordconstn) and
(tordconstnode(taddnode(valuenode).right).value = 1) and
(taddnode(valuenode).left.nodetype = shln) and
(tshlshrnode(taddnode(valuenode).left).left.nodetype = ordconstn) and
(tordconstnode(tshlshrnode(taddnode(valuenode).left).left).value = 1) then
begin
{ Skip the subtract and shift nodes completely }
{ Helps avoid all the awkward typecasts }
indexnode := tshlshrnode(taddnode(valuenode).left).right;
{$ifdef x86_64}
{ The code generator sometimes extends the shift result to 64-bit unnecessarily }
if (indexnode.nodetype = typeconvn) and (opsize in [OS_32, OS_S32]) and
(def_cgsize(TTypeConvNode(indexnode).resultdef) in [OS_64, OS_S64]) then
begin
{ Convert to the 32-bit type }
indexnode.resultdef := loadnode.resultdef;
node_reset_flags(indexnode,[nf_pass1_done]);
{ We should't be getting any new errors }
if do_firstpass(indexnode) then
InternalError(2022110202);
{ Keep things internally consistent in case indexnode changed }
tshlshrnode(taddnode(valuenode).left).right := indexnode;
end;
{$endif x86_64}
secondpass(indexnode);
secondpass(loadnode);
{ allocate registers }
hlcg.location_force_reg(
current_asmdata.CurrAsmList,
indexnode.location,
indexnode.resultdef,
loadnode.resultdef,
false
);
case loadnode.location.loc of
LOC_REFERENCE,
LOC_CREFERENCE:
begin
{ BZHI can only write to a register }
DestReg := cg.getintregister(current_asmdata.CurrAsmList,opsize);
emit_reg_ref_reg(A_BZHI, TCGSize2OpSize[opsize], indexnode.location.register, loadnode.location.reference, DestReg);
emit_reg_ref(A_MOV, TCGSize2OpSize[opsize], DestReg, loadnode.location.reference);
end;
LOC_REGISTER,
LOC_CREGISTER:
emit_reg_reg_reg(A_BZHI, TCGSize2OpSize[opsize], indexnode.location.register, loadnode.location.register, loadnode.location.register);
else
InternalError(2022102110);
end;
Exit;
end;
end;
end;
{$endif not i8086}
inherited second_AndOrXorShiftRot_assign;
end;
procedure tx86inlinenode.second_pi;
begin
location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));