diff --git a/compiler/aarch64/cgcpu.pas b/compiler/aarch64/cgcpu.pas index ad18f5c660..1f8183f4e1 100644 --- a/compiler/aarch64/cgcpu.pas +++ b/compiler/aarch64/cgcpu.pas @@ -84,7 +84,7 @@ interface procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle); override; - procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override; + procedure a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override; { comparison operations } procedure a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);override; procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);override; @@ -1292,7 +1292,7 @@ implementation end; - procedure tcgaarch64.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); + procedure tcgaarch64.a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); var bitsize: longint; begin @@ -1304,8 +1304,9 @@ implementation begin bitsize:=32; end; - { source is 0 -> dst will have to become 255 } - list.concat(taicpu.op_reg_const(A_CMP,src,0)); + if not(not_zero) then + { source is 0 -> dst will have to become 255 } + list.concat(taicpu.op_reg_const(A_CMP,src,0)); if reverse then begin list.Concat(taicpu.op_reg_reg(A_CLZ,makeregsize(dst,srcsize),src)); @@ -1319,10 +1320,13 @@ implementation list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst)); end; { set dst to -1 if src was 0 } - list.Concat(taicpu.op_reg_reg_reg_cond(A_CSINV,dst,dst,makeregsize(NR_XZR,dstsize),C_NE)); - { mask the -1 to 255 if src was 0 (anyone find a two-instruction - branch-free version? All of mine are 3...) } - list.Concat(taicpu.op_reg_reg(A_UXTB,makeregsize(dst,OS_32),makeregsize(dst,OS_32))); + if not(not_zero) then + begin + list.Concat(taicpu.op_reg_reg_reg_cond(A_CSINV,dst,dst,makeregsize(NR_XZR,dstsize),C_NE)); + { mask the -1 to 255 if src was 0 (anyone find a two-instruction + branch-free version? All of mine are 3...) } + list.Concat(taicpu.op_reg_reg(A_UXTB,makeregsize(dst,OS_32),makeregsize(dst,OS_32))); + end; end; diff --git a/compiler/arm/cgcpu.pas b/compiler/arm/cgcpu.pas index 878ffebcd8..7161e9c00b 100644 --- a/compiler/arm/cgcpu.pas +++ b/compiler/arm/cgcpu.pas @@ -106,7 +106,7 @@ unit cgcpu; procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override; { Transform unsupported methods into Internal errors } - procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override; + procedure a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override; { try to generate optimized 32 Bit multiplication, returns true if successful generated } function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean; @@ -1827,7 +1827,7 @@ unit cgcpu; end; - procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); + procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); begin if reverse then begin diff --git a/compiler/cghlcpu.pas b/compiler/cghlcpu.pas index 50774ea1b2..0a5b8f06c1 100644 --- a/compiler/cghlcpu.pas +++ b/compiler/cghlcpu.pas @@ -65,14 +65,14 @@ uses procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel); override; procedure a_call_reg(list: TAsmList; reg: tregister); override; procedure a_call_name(list: TAsmList; const s: string; weak: boolean); override; - procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override; + procedure a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override; end; implementation { thlbasecgcpu } - procedure thlbasecgcpu.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); + procedure thlbasecgcpu.a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); begin internalerror(2012042801); end; diff --git a/compiler/cgobj.pas b/compiler/cgobj.pas index aaf0d4e3ca..ac82da79ef 100644 --- a/compiler/cgobj.pas +++ b/compiler/cgobj.pas @@ -267,7 +267,7 @@ unit cgobj; procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);virtual; abstract; { bit scan instructions } - procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); virtual; + procedure a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); virtual; { Multiplication with doubling result size. dstlo or dsthi may be NR_NO, in which case corresponding half of result is discarded. } @@ -3026,7 +3026,7 @@ implementation end; - procedure tcg.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); + procedure tcg.a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); begin internalerror(2014070601); end; diff --git a/compiler/hlcg2ll.pas b/compiler/hlcg2ll.pas index 7a0c264162..3c1df0e16b 100644 --- a/compiler/hlcg2ll.pas +++ b/compiler/hlcg2ll.pas @@ -171,7 +171,7 @@ unit hlcg2ll; procedure a_loadaddr_ref_reg(list : TAsmList;fromsize, tosize : tdef;const ref : treference;r : tregister);override; { bit scan instructions } - procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); override; + procedure a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tdef; src, dst: tregister); override; { fpu move instructions } procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister); override; @@ -586,9 +586,9 @@ implementation cg.a_loadaddr_ref_reg(list,ref,r); end; - procedure thlcg2ll.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); + procedure thlcg2ll.a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tdef; src, dst: tregister); begin - cg.a_bit_scan_reg_reg(list,reverse,def_cgsize(srcsize),def_cgsize(dstsize),src,dst); + cg.a_bit_scan_reg_reg(list,reverse,not_zero,def_cgsize(srcsize),def_cgsize(dstsize),src,dst); end; procedure thlcg2ll.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister); diff --git a/compiler/hlcgobj.pas b/compiler/hlcgobj.pas index ba6b3a935e..a27870759d 100644 --- a/compiler/hlcgobj.pas +++ b/compiler/hlcgobj.pas @@ -310,7 +310,7 @@ unit hlcgobj; public { bit scan instructions (still need transformation to thlcgobj) } - procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); virtual; abstract; + procedure a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tdef; src, dst: tregister); virtual; abstract; { fpu move instructions } procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister); virtual; abstract; diff --git a/compiler/jvm/hlcgcpu.pas b/compiler/jvm/hlcgcpu.pas index 956e404d56..673e3d942e 100644 --- a/compiler/jvm/hlcgcpu.pas +++ b/compiler/jvm/hlcgcpu.pas @@ -114,7 +114,7 @@ uses procedure gen_exit_code(list: TAsmList); override; { unimplemented/unnecessary routines } - procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); override; + procedure a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tdef; src, dst: tregister); override; procedure a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister; shuffle: pmmshuffle); override; procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister; shuffle: pmmshuffle); override; procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle); override; @@ -1898,7 +1898,7 @@ implementation { nothing } end; - procedure thlcgjvm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); + procedure thlcgjvm.a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tdef; src, dst: tregister); begin internalerror(2012090201); end; diff --git a/compiler/llvm/hlcgllvm.pas b/compiler/llvm/hlcgllvm.pas index 11434345ac..3919a3f709 100644 --- a/compiler/llvm/hlcgllvm.pas +++ b/compiler/llvm/hlcgllvm.pas @@ -145,7 +145,7 @@ uses {$endif cpuflags} { unimplemented or unnecessary routines } - procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); override; + procedure a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tdef; src, dst: tregister); override; procedure g_stackpointer_alloc(list: TAsmList; size: longint); override; procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint); override; procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: aint); override; @@ -2019,7 +2019,7 @@ implementation end; - procedure thlcgllvm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); + procedure thlcgllvm.a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tdef; src, dst: tregister); begin internalerror(2012090201); end; diff --git a/compiler/loongarch64/cgcpu.pas b/compiler/loongarch64/cgcpu.pas index fb6b526f59..de1295d180 100644 --- a/compiler/loongarch64/cgcpu.pas +++ b/compiler/loongarch64/cgcpu.pas @@ -57,7 +57,7 @@ unit cgcpu; procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override; { bit scan instructions } - procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override; + procedure a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override; { fpu move instructions } procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override; @@ -512,7 +512,7 @@ implementation end; - procedure tcgloongarch64.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); + procedure tcgloongarch64.a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); begin internalerror(2022111943); end; diff --git a/compiler/ncginl.pas b/compiler/ncginl.pas index 3981ff45c9..14ddb11ad2 100644 --- a/compiler/ncginl.pas +++ b/compiler/ncginl.pas @@ -81,7 +81,7 @@ implementation aasmbase,aasmdata, cgbase,pass_2, cpubase,procinfo, - ncon,ncal, + nadd,ncon,ncal, tgobj,ncgutil, cgutils,cgobj,hlcgobj, defcmp @@ -994,10 +994,13 @@ implementation procedure tcginlinenode.second_BsfBsr; var + not_zero, reverse: boolean; opsize: tcgsize; begin reverse:=(inlinenumber = in_bsr_x); + not_zero:=(left.nodetype=orn) and (((is_constintnode(taddnode(left).left) and (tordconstnode(taddnode(left).left).value<>0))) or + ((is_constintnode(taddnode(left).right) and (tordconstnode(taddnode(left).right).value<>0)))); secondpass(left); opsize:=tcgsize2unsigned[left.location.size]; @@ -1006,7 +1009,7 @@ implementation location_reset(location,LOC_REGISTER,def_cgsize(resultdef)); location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size); - cg.a_bit_scan_reg_reg(current_asmdata.CurrAsmList,reverse,opsize,location.size,left.location.register,location.register); + cg.a_bit_scan_reg_reg(current_asmdata.CurrAsmList,reverse,not_zero,opsize,location.size,left.location.register,location.register); end; diff --git a/compiler/ppcgen/cgppc.pas b/compiler/ppcgen/cgppc.pas index 37d191471d..fe3c7b73fc 100644 --- a/compiler/ppcgen/cgppc.pas +++ b/compiler/ppcgen/cgppc.pas @@ -35,7 +35,7 @@ unit cgppc; tcgppcgen = class(tcg) procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : tcgpara); override; - procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override; + procedure a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override; procedure a_call_reg(list : TAsmList;reg: tregister); override; @@ -205,7 +205,7 @@ unit cgppc; end; - procedure tcgppcgen.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); + procedure tcgppcgen.a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); var tmpreg: tregister; cntlzop: tasmop; diff --git a/compiler/riscv/cgrv.pas b/compiler/riscv/cgrv.pas index 8f313fc1ad..74732e5504 100644 --- a/compiler/riscv/cgrv.pas +++ b/compiler/riscv/cgrv.pas @@ -35,7 +35,7 @@ unit cgrv; tcgrv = class(tcg) procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : tcgpara); override; - procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override; + procedure a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override; procedure a_call_reg(list : TAsmList;reg: tregister); override; procedure a_call_name(list : TAsmList;const s : string; weak: boolean); override; @@ -196,7 +196,7 @@ unit cgrv; end; - procedure tcgrv.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); + procedure tcgrv.a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); begin internalerror(2016060401); end; diff --git a/compiler/wasm32/hlcgcpu.pas b/compiler/wasm32/hlcgcpu.pas index 2a0cef59ed..59f4517aa9 100644 --- a/compiler/wasm32/hlcgcpu.pas +++ b/compiler/wasm32/hlcgcpu.pas @@ -133,7 +133,7 @@ uses procedure gen_stack_check_call(list: TAsmList); override; { unimplemented/unnecessary routines } - procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); override; + procedure a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tdef; src, dst: tregister); override; procedure a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister; shuffle: pmmshuffle); override; procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister; shuffle: pmmshuffle); override; procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle); override; @@ -2446,7 +2446,7 @@ implementation inherited; end; - procedure thlcgwasm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); + procedure thlcgwasm.a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tdef; src, dst: tregister); begin internalerror(2012090201); end; diff --git a/compiler/x86/cgx86.pas b/compiler/x86/cgx86.pas index 5229fd2e79..8625dced7d 100644 --- a/compiler/x86/cgx86.pas +++ b/compiler/x86/cgx86.pas @@ -84,7 +84,7 @@ unit cgx86; procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override; { bit scan instructions } - procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override; + procedure a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override; { fpu move instructions } procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override; @@ -2473,7 +2473,7 @@ unit cgx86; list.concat(taicpu.op_ref(TOpCG2AsmOp[op],tcgsize2opsize[size],tmpref)); end; - procedure tcgx86.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); + procedure tcgx86.a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); var tmpreg: tregister; opsize: topsize; diff --git a/compiler/xtensa/cgcpu.pas b/compiler/xtensa/cgcpu.pas index e933b8a300..5268fe0dd7 100644 --- a/compiler/xtensa/cgcpu.pas +++ b/compiler/xtensa/cgcpu.pas @@ -68,7 +68,7 @@ interface procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);override; procedure a_jmp_always(list: TAsmList; l: TAsmLabel);override; - procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);override; + procedure a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);override; procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: tresflags; reg: TRegister);override; @@ -1320,7 +1320,7 @@ implementation end; - procedure tcgcpu.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); + procedure tcgcpu.a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); var ai: taicpu; tmpreg: TRegister;