Improved CompareDWord for i386 and x86_64.

This commit is contained in:
Rika Ichinose 2023-02-08 06:29:36 +03:00 committed by FPK
parent ee5b9e53a9
commit d36e96ea74
2 changed files with 87 additions and 48 deletions

View File

@ -639,59 +639,37 @@ end;
{$define FPC_SYSTEM_HAS_COMPAREDWORD} {$define FPC_SYSTEM_HAS_COMPAREDWORD}
function CompareDWord(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe; function CompareDWord(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
asm asm
cmpl $32,%ecx { empirical average value, on a Athlon XP the cmp $536870912, %ecx
break even is at 12, on a Core 2 Duo > 100 } push %ebx
jg .LCmpDWordFull jnb .LUnbounded
testl %ecx,%ecx lea (%eax,%ecx,4), %ebx
je .LCmpDWordZero cmp %ebx, %eax
je .LNothing
pushl %ebx .balign 16
.LCmpDWordLoop: .LDwordwise_Body:
movl (%eax),%ebx mov (%edx), %ecx
cmpl (%edx),%ebx cmp (%eax), %ecx
leal 4(%eax),%eax jne .LDoSbb
leal 4(%edx),%edx add $4, %eax
jne .LCmpDWordExitFast add $4, %edx
decl %ecx cmp %eax, %ebx
jne .LCmpDWordLoop jne .LDwordwise_Body
.LCmpDWordExitFast: .LNothing:
xorl %eax,%eax xor %eax, %eax
movl -4(%edx),%edx // Compare failing (or equal) position pop %ebx
subl %edx,%ebx // calculate end result.
setb %dl
seta %cl
addb %cl,%al
subb %dl,%al
movsbl %al,%eax
popl %ebx
ret ret
.LCmpDWordZero: .LDoSbb:
movl $0,%eax pop %ebx
sbb %eax, %eax
and $2, %eax
sub $1, %eax
ret ret
.LCmpDWordFull: .LUnbounded:
pushl %esi mov %eax, %ebx
pushl %edi jmp .LDwordwise_Body
{$ifdef FPC_ENABLED_CLD}
cld
{$endif FPC_ENABLED_CLD}
movl %eax,%edi
movl %edx,%esi
xorl %eax,%eax
repe { Compare entire DWords}
cmpsl
movl -4(%edi),%edi // Compare failing (or equal) position
subl -4(%esi),%edi // calculate end result.
setb %dl
seta %cl
addb %cl,%al
subb %dl,%al
movsbl %al,%eax
.LCmpDwordExit:
popl %edi
popl %esi
end; end;
{$endif FPC_SYSTEM_HAS_COMPAREDWORD} {$endif FPC_SYSTEM_HAS_COMPAREDWORD}

View File

@ -772,6 +772,67 @@ end;
{$endif FPC_SYSTEM_HAS_COMPAREBYTE} {$endif FPC_SYSTEM_HAS_COMPAREBYTE}
{$ifndef FPC_SYSTEM_HAS_COMPAREDWORD}
{$define FPC_SYSTEM_HAS_COMPAREDWORD}
function CompareDWord(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
asm
{$ifndef win64}
mov %rdx, %r8
mov %rsi, %rdx
mov %rdi, %rcx
{$endif win64}
mov %r8, %rax
shr $61, %rax
jnz .LUnbounded
cmp $3, %r8
lea (%rcx,%r8,4), %r9
jle .LDwordwise_Test
and $-4, %r8
lea (%rcx,%r8,4), %r8
.balign 16
.L4x_Body:
movdqu (%rcx), %xmm1
movdqu (%rdx), %xmm0
pcmpeqd %xmm1, %xmm0
pmovmskb %xmm0, %eax
xor $65535, %eax
jne .L4x_Found
add $16, %rcx
add $16, %rdx
cmp %rcx, %r8
jne .L4x_Body
.LDwordwise_Test:
cmp %rcx, %r9
je .LNothing
.LDwordwise_Body:
mov (%rcx), %eax
cmp %eax, (%rdx)
jne .LDoSbb
add $4, %rcx
add $4, %rdx
cmp %rcx, %r9
jne .LDwordwise_Body
.LNothing:
xor %eax, %eax
ret
.L4x_Found:
bsf %eax, %eax
mov (%rcx,%rax), %ecx
cmp %ecx, (%rdx,%rax)
.LDoSbb:
sbb %rax, %rax
and $2, %eax
sub $1, %rax
ret
.LUnbounded:
mov %rcx, %r9
jmp .LDwordwise_Body
end;
{$endif FPC_SYSTEM_HAS_COMPAREDWORD}
{$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT} {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
{ does a thread save inc/dec } { does a thread save inc/dec }
function declocked(var l : longint) : boolean;assembler; nostackframe; function declocked(var l : longint) : boolean;assembler; nostackframe;