mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-05 11:38:19 +02:00
Make use of CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1 in Bsf*/Bsr*.
This commit is contained in:
parent
1ecd60e90d
commit
fc1050a834
@ -2492,14 +2492,40 @@ unit cgx86;
|
||||
else
|
||||
tmpreg:=dst;
|
||||
opsize:=tcgsize2opsize[srcsize];
|
||||
|
||||
{ AMD docs: BSF/R dest, 0 “sets ZF to 1 and does not change the contents of the destination register.”
|
||||
Intel docs: “If the content source operand is 0, the content of the destination operand is undefined.”
|
||||
(However, Intel silently implements the same behavior as AMD, which is understandable.)
|
||||
|
||||
If relying on this behavior, do
|
||||
|
||||
mov tmpreg, $FF
|
||||
bsx tmpreg, src
|
||||
|
||||
If not relying, do
|
||||
|
||||
bsx tmpreg, src
|
||||
jnz .LDone
|
||||
mov tmpreg, $FF
|
||||
.LDone:
|
||||
}
|
||||
|
||||
if CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1 in cpu_optimization_hints[current_settings.optimizecputype] then
|
||||
list.concat(taicpu.op_const_reg(A_MOV,opsize,$ff,tmpreg));
|
||||
|
||||
if not reverse then
|
||||
list.concat(taicpu.op_reg_reg(A_BSF,opsize,src,tmpreg))
|
||||
else
|
||||
list.concat(taicpu.op_reg_reg(A_BSR,opsize,src,tmpreg));
|
||||
current_asmdata.getjumplabel(l);
|
||||
a_jmp_cond(list,OC_NE,l);
|
||||
list.concat(taicpu.op_const_reg(A_MOV,opsize,$ff,tmpreg));
|
||||
a_label(list,l);
|
||||
|
||||
if not (CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1 in cpu_optimization_hints[current_settings.optimizecputype]) then
|
||||
begin
|
||||
current_asmdata.getjumplabel(l);
|
||||
a_jmp_cond(list,OC_NE,l);
|
||||
list.concat(taicpu.op_const_reg(A_MOV,opsize,$ff,tmpreg));
|
||||
a_label(list,l);
|
||||
end;
|
||||
|
||||
if tmpreg<>dst then
|
||||
a_load_reg_reg(list,srcsize,dstsize,tmpreg,dst);
|
||||
end;
|
||||
|
@ -2708,9 +2708,14 @@ end;
|
||||
|
||||
{$ifndef FPC_SYSTEM_HAS_BSF_QWORD}
|
||||
{$define FPC_SYSTEM_HAS_BSF_QWORD}
|
||||
|
||||
function BsfQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
|
||||
asm
|
||||
{$ifdef CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1}
|
||||
mov $255-32,%eax { On AMD, BSF/R are documented to not change the destination on zero input. }
|
||||
bsfl 8(%esp),%eax { On Intel, destination is formally undefined on zero input, but in practice the behavior is the same. }
|
||||
add $32,%eax
|
||||
bsfl 4(%esp),%eax
|
||||
{$else}
|
||||
bsfl 4(%esp),%eax
|
||||
jz .L1
|
||||
ret $8
|
||||
@ -2721,6 +2726,7 @@ asm
|
||||
ret $8
|
||||
.L2:
|
||||
movl $255,%eax
|
||||
{$endif}
|
||||
end;
|
||||
{$endif FPC_SYSTEM_HAS_BSF_QWORD}
|
||||
|
||||
@ -2729,16 +2735,26 @@ end;
|
||||
{$define FPC_SYSTEM_HAS_BSR_QWORD}
|
||||
function BsrQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
|
||||
asm
|
||||
{$ifdef CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1}
|
||||
mov $255,%eax
|
||||
bsrl 4(%esp),%eax
|
||||
sub $32,%eax
|
||||
bsrl 8(%esp),%eax
|
||||
jz .L1
|
||||
add $32,%eax
|
||||
{$else}
|
||||
mov 8(%esp),%eax
|
||||
test %eax,%eax
|
||||
jnz .L1 { Speculate Hi(q) = 0. }
|
||||
bsrl 4(%esp),%eax
|
||||
jz .L2
|
||||
ret $8
|
||||
.L1:
|
||||
bsrl 4(%esp),%eax
|
||||
jz .L2
|
||||
bsrl %eax,%eax
|
||||
add $32,%eax
|
||||
ret $8
|
||||
.L2:
|
||||
movl $255,%eax
|
||||
{$endif}
|
||||
end;
|
||||
{$endif FPC_SYSTEM_HAS_BSR_QWORD}
|
||||
|
||||
|
@ -23,7 +23,7 @@ begin
|
||||
exit(false);
|
||||
end;
|
||||
end;
|
||||
x8:=0;
|
||||
x8:=random(0);
|
||||
f:=BsfByte(x8);
|
||||
if (f<>$ff) then
|
||||
begin
|
||||
@ -60,7 +60,7 @@ begin
|
||||
exit(false);
|
||||
end;
|
||||
end;
|
||||
x16:=0;
|
||||
x16:=random(0);
|
||||
f:=BsfWord(x16);
|
||||
if (f<>$ff) then
|
||||
begin
|
||||
@ -97,7 +97,7 @@ begin
|
||||
exit(false);
|
||||
end;
|
||||
end;
|
||||
x32:=0;
|
||||
x32:=random(0);
|
||||
f:=BsfDWord(x32);
|
||||
if (f<>$ff) then
|
||||
begin
|
||||
@ -132,7 +132,7 @@ begin
|
||||
exit(false);
|
||||
end;
|
||||
end;
|
||||
x64:=0;
|
||||
x64:=random(0);
|
||||
f:=BsfQWord(x64);
|
||||
if (f<>$ff) then
|
||||
begin
|
||||
|
Loading…
Reference in New Issue
Block a user