Make use of CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1 in Bsf*/Bsr*.

This commit is contained in:
Rika Ichinose 2024-05-11 07:37:18 +03:00 committed by FPK
parent 1ecd60e90d
commit fc1050a834
3 changed files with 54 additions and 12 deletions

View File

@ -2492,14 +2492,40 @@ unit cgx86;
else else
tmpreg:=dst; tmpreg:=dst;
opsize:=tcgsize2opsize[srcsize]; opsize:=tcgsize2opsize[srcsize];
{ AMD docs: BSF/R dest, 0 “sets ZF to 1 and does not change the contents of the destination register.
Intel docs: If the content source operand is 0, the content of the destination operand is undefined.
(However, Intel silently implements the same behavior as AMD, which is understandable.)
If relying on this behavior, do
mov tmpreg, $FF
bsx tmpreg, src
If not relying, do
bsx tmpreg, src
jnz .LDone
mov tmpreg, $FF
.LDone:
}
if CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1 in cpu_optimization_hints[current_settings.optimizecputype] then
list.concat(taicpu.op_const_reg(A_MOV,opsize,$ff,tmpreg));
if not reverse then if not reverse then
list.concat(taicpu.op_reg_reg(A_BSF,opsize,src,tmpreg)) list.concat(taicpu.op_reg_reg(A_BSF,opsize,src,tmpreg))
else else
list.concat(taicpu.op_reg_reg(A_BSR,opsize,src,tmpreg)); list.concat(taicpu.op_reg_reg(A_BSR,opsize,src,tmpreg));
if not (CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1 in cpu_optimization_hints[current_settings.optimizecputype]) then
begin
current_asmdata.getjumplabel(l); current_asmdata.getjumplabel(l);
a_jmp_cond(list,OC_NE,l); a_jmp_cond(list,OC_NE,l);
list.concat(taicpu.op_const_reg(A_MOV,opsize,$ff,tmpreg)); list.concat(taicpu.op_const_reg(A_MOV,opsize,$ff,tmpreg));
a_label(list,l); a_label(list,l);
end;
if tmpreg<>dst then if tmpreg<>dst then
a_load_reg_reg(list,srcsize,dstsize,tmpreg,dst); a_load_reg_reg(list,srcsize,dstsize,tmpreg,dst);
end; end;

View File

@ -2708,9 +2708,14 @@ end;
{$ifndef FPC_SYSTEM_HAS_BSF_QWORD} {$ifndef FPC_SYSTEM_HAS_BSF_QWORD}
{$define FPC_SYSTEM_HAS_BSF_QWORD} {$define FPC_SYSTEM_HAS_BSF_QWORD}
function BsfQWord(Const AValue : QWord): cardinal; assembler; nostackframe; function BsfQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
asm asm
{$ifdef CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1}
mov $255-32,%eax { On AMD, BSF/R are documented to not change the destination on zero input. }
bsfl 8(%esp),%eax { On Intel, destination is formally undefined on zero input, but in practice the behavior is the same. }
add $32,%eax
bsfl 4(%esp),%eax
{$else}
bsfl 4(%esp),%eax bsfl 4(%esp),%eax
jz .L1 jz .L1
ret $8 ret $8
@ -2721,6 +2726,7 @@ asm
ret $8 ret $8
.L2: .L2:
movl $255,%eax movl $255,%eax
{$endif}
end; end;
{$endif FPC_SYSTEM_HAS_BSF_QWORD} {$endif FPC_SYSTEM_HAS_BSF_QWORD}
@ -2729,16 +2735,26 @@ end;
{$define FPC_SYSTEM_HAS_BSR_QWORD} {$define FPC_SYSTEM_HAS_BSR_QWORD}
function BsrQWord(Const AValue : QWord): cardinal; assembler; nostackframe; function BsrQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
asm asm
{$ifdef CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1}
mov $255,%eax
bsrl 4(%esp),%eax
sub $32,%eax
bsrl 8(%esp),%eax bsrl 8(%esp),%eax
jz .L1
add $32,%eax add $32,%eax
ret $8 {$else}
.L1: mov 8(%esp),%eax
test %eax,%eax
jnz .L1 { Speculate Hi(q) = 0. }
bsrl 4(%esp),%eax bsrl 4(%esp),%eax
jz .L2 jz .L2
ret $8 ret $8
.L1:
bsrl %eax,%eax
add $32,%eax
ret $8
.L2: .L2:
movl $255,%eax movl $255,%eax
{$endif}
end; end;
{$endif FPC_SYSTEM_HAS_BSR_QWORD} {$endif FPC_SYSTEM_HAS_BSR_QWORD}

View File

@ -23,7 +23,7 @@ begin
exit(false); exit(false);
end; end;
end; end;
x8:=0; x8:=random(0);
f:=BsfByte(x8); f:=BsfByte(x8);
if (f<>$ff) then if (f<>$ff) then
begin begin
@ -60,7 +60,7 @@ begin
exit(false); exit(false);
end; end;
end; end;
x16:=0; x16:=random(0);
f:=BsfWord(x16); f:=BsfWord(x16);
if (f<>$ff) then if (f<>$ff) then
begin begin
@ -97,7 +97,7 @@ begin
exit(false); exit(false);
end; end;
end; end;
x32:=0; x32:=random(0);
f:=BsfDWord(x32); f:=BsfDWord(x32);
if (f<>$ff) then if (f<>$ff) then
begin begin
@ -132,7 +132,7 @@ begin
exit(false); exit(false);
end; end;
end; end;
x64:=0; x64:=random(0);
f:=BsfQWord(x64); f:=BsfQWord(x64);
if (f<>$ff) then if (f<>$ff) then
begin begin