diff --git a/compiler/x86/nx86inl.pas b/compiler/x86/nx86inl.pas index b0ff6f5ae8..8fd0d5b989 100644 --- a/compiler/x86/nx86inl.pas +++ b/compiler/x86/nx86inl.pas @@ -61,6 +61,7 @@ interface { second pass override to generate these nodes } procedure pass_generate_code_cpu;override; procedure second_IncludeExclude;override; + procedure second_AndOrXorShiftRot_assign;override; procedure second_pi; override; procedure second_arctan_real; override; procedure second_abs_real; override; @@ -98,7 +99,7 @@ implementation htypechk, cgbase,pass_1,pass_2, cpuinfo,cpubase,nutils, - ncal,ncgutil,nld,ncon, + ncal,ncgutil,nld,ncon,nadd,nmat,constexp, tgobj, cga,cgutils,cgx86,cgobj,hlcgobj; @@ -619,6 +620,92 @@ implementation end; + procedure tx86inlinenode.second_AndOrXorShiftRot_assign; + var + opsize : tcgsize; + valuenode, indexnode, loadnode: TNode; + DestReg: TRegister; + begin +{$ifndef i8086} + if (cs_opt_level2 in current_settings.optimizerswitches) then + begin + { Saves on a lot of typecasting and potential coding mistakes } + valuenode := tcallparanode(left).left; + loadnode := tcallparanode(tcallparanode(left).right).left; + + opsize := def_cgsize(loadnode.resultdef); + + { BMI2 optimisations } + if (CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) then + begin + { If the second operand is "((1 shl y) - 1)", we can turn it + into a BZHI operator instead } + if (opsize in [OS_32, OS_S32{$ifdef x86_64}, OS_64, OS_S64{$endif x86_64}]) and + (valuenode.nodetype = subn) and + (taddnode(valuenode).right.nodetype = ordconstn) and + (tordconstnode(taddnode(valuenode).right).value = 1) and + (taddnode(valuenode).left.nodetype = shln) and + (tshlshrnode(taddnode(valuenode).left).left.nodetype = ordconstn) and + (tordconstnode(tshlshrnode(taddnode(valuenode).left).left).value = 1) then + begin + { Skip the subtract and shift nodes completely } + + { Helps avoid all the awkward typecasts } + indexnode := tshlshrnode(taddnode(valuenode).left).right; +{$ifdef x86_64} + { The code generator sometimes extends the shift result to 64-bit unnecessarily } + if (indexnode.nodetype = typeconvn) and (opsize in [OS_32, OS_S32]) and + (def_cgsize(TTypeConvNode(indexnode).resultdef) in [OS_64, OS_S64]) then + begin + { Convert to the 32-bit type } + indexnode.resultdef := loadnode.resultdef; + node_reset_flags(indexnode,[nf_pass1_done]); + + { We should't be getting any new errors } + if do_firstpass(indexnode) then + InternalError(2022110202); + + { Keep things internally consistent in case indexnode changed } + tshlshrnode(taddnode(valuenode).left).right := indexnode; + end; +{$endif x86_64} + secondpass(indexnode); + secondpass(loadnode); + + { allocate registers } + hlcg.location_force_reg( + current_asmdata.CurrAsmList, + indexnode.location, + indexnode.resultdef, + loadnode.resultdef, + false + ); + + case loadnode.location.loc of + LOC_REFERENCE, + LOC_CREFERENCE: + begin + { BZHI can only write to a register } + DestReg := cg.getintregister(current_asmdata.CurrAsmList,opsize); + emit_reg_ref_reg(A_BZHI, TCGSize2OpSize[opsize], indexnode.location.register, loadnode.location.reference, DestReg); + emit_reg_ref(A_MOV, TCGSize2OpSize[opsize], DestReg, loadnode.location.reference); + end; + LOC_REGISTER, + LOC_CREGISTER: + emit_reg_reg_reg(A_BZHI, TCGSize2OpSize[opsize], indexnode.location.register, loadnode.location.register, loadnode.location.register); + else + InternalError(2022102110); + end; + + Exit; + end; + end; + end; +{$endif not i8086} + + inherited second_AndOrXorShiftRot_assign; + end; + procedure tx86inlinenode.second_pi; begin location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));