mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-25 09:09:25 +02:00
Make use of CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1 in Bsf*/Bsr*.
This commit is contained in:
parent
1ecd60e90d
commit
fc1050a834
@ -2492,14 +2492,40 @@ unit cgx86;
|
|||||||
else
|
else
|
||||||
tmpreg:=dst;
|
tmpreg:=dst;
|
||||||
opsize:=tcgsize2opsize[srcsize];
|
opsize:=tcgsize2opsize[srcsize];
|
||||||
|
|
||||||
|
{ AMD docs: BSF/R dest, 0 “sets ZF to 1 and does not change the contents of the destination register.”
|
||||||
|
Intel docs: “If the content source operand is 0, the content of the destination operand is undefined.”
|
||||||
|
(However, Intel silently implements the same behavior as AMD, which is understandable.)
|
||||||
|
|
||||||
|
If relying on this behavior, do
|
||||||
|
|
||||||
|
mov tmpreg, $FF
|
||||||
|
bsx tmpreg, src
|
||||||
|
|
||||||
|
If not relying, do
|
||||||
|
|
||||||
|
bsx tmpreg, src
|
||||||
|
jnz .LDone
|
||||||
|
mov tmpreg, $FF
|
||||||
|
.LDone:
|
||||||
|
}
|
||||||
|
|
||||||
|
if CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1 in cpu_optimization_hints[current_settings.optimizecputype] then
|
||||||
|
list.concat(taicpu.op_const_reg(A_MOV,opsize,$ff,tmpreg));
|
||||||
|
|
||||||
if not reverse then
|
if not reverse then
|
||||||
list.concat(taicpu.op_reg_reg(A_BSF,opsize,src,tmpreg))
|
list.concat(taicpu.op_reg_reg(A_BSF,opsize,src,tmpreg))
|
||||||
else
|
else
|
||||||
list.concat(taicpu.op_reg_reg(A_BSR,opsize,src,tmpreg));
|
list.concat(taicpu.op_reg_reg(A_BSR,opsize,src,tmpreg));
|
||||||
|
|
||||||
|
if not (CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1 in cpu_optimization_hints[current_settings.optimizecputype]) then
|
||||||
|
begin
|
||||||
current_asmdata.getjumplabel(l);
|
current_asmdata.getjumplabel(l);
|
||||||
a_jmp_cond(list,OC_NE,l);
|
a_jmp_cond(list,OC_NE,l);
|
||||||
list.concat(taicpu.op_const_reg(A_MOV,opsize,$ff,tmpreg));
|
list.concat(taicpu.op_const_reg(A_MOV,opsize,$ff,tmpreg));
|
||||||
a_label(list,l);
|
a_label(list,l);
|
||||||
|
end;
|
||||||
|
|
||||||
if tmpreg<>dst then
|
if tmpreg<>dst then
|
||||||
a_load_reg_reg(list,srcsize,dstsize,tmpreg,dst);
|
a_load_reg_reg(list,srcsize,dstsize,tmpreg,dst);
|
||||||
end;
|
end;
|
||||||
|
@ -2708,9 +2708,14 @@ end;
|
|||||||
|
|
||||||
{$ifndef FPC_SYSTEM_HAS_BSF_QWORD}
|
{$ifndef FPC_SYSTEM_HAS_BSF_QWORD}
|
||||||
{$define FPC_SYSTEM_HAS_BSF_QWORD}
|
{$define FPC_SYSTEM_HAS_BSF_QWORD}
|
||||||
|
|
||||||
function BsfQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
|
function BsfQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
|
||||||
asm
|
asm
|
||||||
|
{$ifdef CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1}
|
||||||
|
mov $255-32,%eax { On AMD, BSF/R are documented to not change the destination on zero input. }
|
||||||
|
bsfl 8(%esp),%eax { On Intel, destination is formally undefined on zero input, but in practice the behavior is the same. }
|
||||||
|
add $32,%eax
|
||||||
|
bsfl 4(%esp),%eax
|
||||||
|
{$else}
|
||||||
bsfl 4(%esp),%eax
|
bsfl 4(%esp),%eax
|
||||||
jz .L1
|
jz .L1
|
||||||
ret $8
|
ret $8
|
||||||
@ -2721,6 +2726,7 @@ asm
|
|||||||
ret $8
|
ret $8
|
||||||
.L2:
|
.L2:
|
||||||
movl $255,%eax
|
movl $255,%eax
|
||||||
|
{$endif}
|
||||||
end;
|
end;
|
||||||
{$endif FPC_SYSTEM_HAS_BSF_QWORD}
|
{$endif FPC_SYSTEM_HAS_BSF_QWORD}
|
||||||
|
|
||||||
@ -2729,16 +2735,26 @@ end;
|
|||||||
{$define FPC_SYSTEM_HAS_BSR_QWORD}
|
{$define FPC_SYSTEM_HAS_BSR_QWORD}
|
||||||
function BsrQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
|
function BsrQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
|
||||||
asm
|
asm
|
||||||
|
{$ifdef CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1}
|
||||||
|
mov $255,%eax
|
||||||
|
bsrl 4(%esp),%eax
|
||||||
|
sub $32,%eax
|
||||||
bsrl 8(%esp),%eax
|
bsrl 8(%esp),%eax
|
||||||
jz .L1
|
|
||||||
add $32,%eax
|
add $32,%eax
|
||||||
ret $8
|
{$else}
|
||||||
.L1:
|
mov 8(%esp),%eax
|
||||||
|
test %eax,%eax
|
||||||
|
jnz .L1 { Speculate Hi(q) = 0. }
|
||||||
bsrl 4(%esp),%eax
|
bsrl 4(%esp),%eax
|
||||||
jz .L2
|
jz .L2
|
||||||
ret $8
|
ret $8
|
||||||
|
.L1:
|
||||||
|
bsrl %eax,%eax
|
||||||
|
add $32,%eax
|
||||||
|
ret $8
|
||||||
.L2:
|
.L2:
|
||||||
movl $255,%eax
|
movl $255,%eax
|
||||||
|
{$endif}
|
||||||
end;
|
end;
|
||||||
{$endif FPC_SYSTEM_HAS_BSR_QWORD}
|
{$endif FPC_SYSTEM_HAS_BSR_QWORD}
|
||||||
|
|
||||||
|
@ -23,7 +23,7 @@ begin
|
|||||||
exit(false);
|
exit(false);
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
x8:=0;
|
x8:=random(0);
|
||||||
f:=BsfByte(x8);
|
f:=BsfByte(x8);
|
||||||
if (f<>$ff) then
|
if (f<>$ff) then
|
||||||
begin
|
begin
|
||||||
@ -60,7 +60,7 @@ begin
|
|||||||
exit(false);
|
exit(false);
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
x16:=0;
|
x16:=random(0);
|
||||||
f:=BsfWord(x16);
|
f:=BsfWord(x16);
|
||||||
if (f<>$ff) then
|
if (f<>$ff) then
|
||||||
begin
|
begin
|
||||||
@ -97,7 +97,7 @@ begin
|
|||||||
exit(false);
|
exit(false);
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
x32:=0;
|
x32:=random(0);
|
||||||
f:=BsfDWord(x32);
|
f:=BsfDWord(x32);
|
||||||
if (f<>$ff) then
|
if (f<>$ff) then
|
||||||
begin
|
begin
|
||||||
@ -132,7 +132,7 @@ begin
|
|||||||
exit(false);
|
exit(false);
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
x64:=0;
|
x64:=random(0);
|
||||||
f:=BsfQWord(x64);
|
f:=BsfQWord(x64);
|
||||||
if (f<>$ff) then
|
if (f<>$ff) then
|
||||||
begin
|
begin
|
||||||
|
Loading…
Reference in New Issue
Block a user