diff --git a/compiler/x86/cgx86.pas b/compiler/x86/cgx86.pas index 6c389c5bb7..68c9b6533c 100644 --- a/compiler/x86/cgx86.pas +++ b/compiler/x86/cgx86.pas @@ -2492,14 +2492,40 @@ unit cgx86; else tmpreg:=dst; opsize:=tcgsize2opsize[srcsize]; + + { AMD docs: BSF/R dest, 0 “sets ZF to 1 and does not change the contents of the destination register.” + Intel docs: “If the content source operand is 0, the content of the destination operand is undefined.” + (However, Intel silently implements the same behavior as AMD, which is understandable.) + + If relying on this behavior, do + + mov tmpreg, $FF + bsx tmpreg, src + + If not relying, do + + bsx tmpreg, src + jnz .LDone + mov tmpreg, $FF +.LDone: + } + + if CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1 in cpu_optimization_hints[current_settings.optimizecputype] then + list.concat(taicpu.op_const_reg(A_MOV,opsize,$ff,tmpreg)); + if not reverse then list.concat(taicpu.op_reg_reg(A_BSF,opsize,src,tmpreg)) else list.concat(taicpu.op_reg_reg(A_BSR,opsize,src,tmpreg)); - current_asmdata.getjumplabel(l); - a_jmp_cond(list,OC_NE,l); - list.concat(taicpu.op_const_reg(A_MOV,opsize,$ff,tmpreg)); - a_label(list,l); + + if not (CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1 in cpu_optimization_hints[current_settings.optimizecputype]) then + begin + current_asmdata.getjumplabel(l); + a_jmp_cond(list,OC_NE,l); + list.concat(taicpu.op_const_reg(A_MOV,opsize,$ff,tmpreg)); + a_label(list,l); + end; + if tmpreg<>dst then a_load_reg_reg(list,srcsize,dstsize,tmpreg,dst); end; diff --git a/rtl/i386/i386.inc b/rtl/i386/i386.inc index 2f2f543159..9ad8efa01d 100644 --- a/rtl/i386/i386.inc +++ b/rtl/i386/i386.inc @@ -2708,9 +2708,14 @@ end; {$ifndef FPC_SYSTEM_HAS_BSF_QWORD} {$define FPC_SYSTEM_HAS_BSF_QWORD} - function BsfQWord(Const AValue : QWord): cardinal; assembler; nostackframe; asm +{$ifdef CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1} + mov $255-32,%eax { On AMD, BSF/R are documented to not change the destination on zero input. } + bsfl 8(%esp),%eax { On Intel, destination is formally undefined on zero input, but in practice the behavior is the same. } + add $32,%eax + bsfl 4(%esp),%eax +{$else} bsfl 4(%esp),%eax jz .L1 ret $8 @@ -2721,6 +2726,7 @@ asm ret $8 .L2: movl $255,%eax +{$endif} end; {$endif FPC_SYSTEM_HAS_BSF_QWORD} @@ -2729,16 +2735,26 @@ end; {$define FPC_SYSTEM_HAS_BSR_QWORD} function BsrQWord(Const AValue : QWord): cardinal; assembler; nostackframe; asm +{$ifdef CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1} + mov $255,%eax + bsrl 4(%esp),%eax + sub $32,%eax bsrl 8(%esp),%eax - jz .L1 add $32,%eax +{$else} + mov 8(%esp),%eax + test %eax,%eax + jnz .L1 { Speculate Hi(q) = 0. } + bsrl 4(%esp),%eax + jz .L2 ret $8 .L1: - bsrl 4(%esp),%eax - jz .L2 + bsrl %eax,%eax + add $32,%eax ret $8 .L2: movl $255,%eax +{$endif} end; {$endif FPC_SYSTEM_HAS_BSR_QWORD} diff --git a/tests/test/tbsx1.pp b/tests/test/tbsx1.pp index f53d4ede7a..eafb95d69c 100644 --- a/tests/test/tbsx1.pp +++ b/tests/test/tbsx1.pp @@ -23,7 +23,7 @@ begin exit(false); end; end; - x8:=0; + x8:=random(0); f:=BsfByte(x8); if (f<>$ff) then begin @@ -60,7 +60,7 @@ begin exit(false); end; end; - x16:=0; + x16:=random(0); f:=BsfWord(x16); if (f<>$ff) then begin @@ -97,7 +97,7 @@ begin exit(false); end; end; - x32:=0; + x32:=random(0); f:=BsfDWord(x32); if (f<>$ff) then begin @@ -132,7 +132,7 @@ begin exit(false); end; end; - x64:=0; + x64:=random(0); f:=BsfQWord(x64); if (f<>$ff) then begin