Improved CompareDWord for i386 and x86_64.

This commit is contained in:
Rika Ichinose 2023-02-08 06:29:36 +03:00 committed by FPK
parent ee5b9e53a9
commit d36e96ea74
2 changed files with 87 additions and 48 deletions

View File

@ -639,59 +639,37 @@ end;
{$define FPC_SYSTEM_HAS_COMPAREDWORD}
function CompareDWord(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
asm
cmpl $32,%ecx { empirical average value, on a Athlon XP the
break even is at 12, on a Core 2 Duo > 100 }
jg .LCmpDWordFull
testl %ecx,%ecx
je .LCmpDWordZero
cmp $536870912, %ecx
push %ebx
jnb .LUnbounded
lea (%eax,%ecx,4), %ebx
cmp %ebx, %eax
je .LNothing
pushl %ebx
.LCmpDWordLoop:
movl (%eax),%ebx
cmpl (%edx),%ebx
leal 4(%eax),%eax
leal 4(%edx),%edx
jne .LCmpDWordExitFast
decl %ecx
jne .LCmpDWordLoop
.LCmpDWordExitFast:
xorl %eax,%eax
movl -4(%edx),%edx // Compare failing (or equal) position
subl %edx,%ebx // calculate end result.
setb %dl
seta %cl
addb %cl,%al
subb %dl,%al
movsbl %al,%eax
popl %ebx
.balign 16
.LDwordwise_Body:
mov (%edx), %ecx
cmp (%eax), %ecx
jne .LDoSbb
add $4, %eax
add $4, %edx
cmp %eax, %ebx
jne .LDwordwise_Body
.LNothing:
xor %eax, %eax
pop %ebx
ret
.LCmpDWordZero:
movl $0,%eax
.LDoSbb:
pop %ebx
sbb %eax, %eax
and $2, %eax
sub $1, %eax
ret
.LCmpDWordFull:
pushl %esi
pushl %edi
{$ifdef FPC_ENABLED_CLD}
cld
{$endif FPC_ENABLED_CLD}
movl %eax,%edi
movl %edx,%esi
xorl %eax,%eax
repe { Compare entire DWords}
cmpsl
movl -4(%edi),%edi // Compare failing (or equal) position
subl -4(%esi),%edi // calculate end result.
setb %dl
seta %cl
addb %cl,%al
subb %dl,%al
movsbl %al,%eax
.LCmpDwordExit:
popl %edi
popl %esi
.LUnbounded:
mov %eax, %ebx
jmp .LDwordwise_Body
end;
{$endif FPC_SYSTEM_HAS_COMPAREDWORD}

View File

@ -772,6 +772,67 @@ end;
{$endif FPC_SYSTEM_HAS_COMPAREBYTE}
{$ifndef FPC_SYSTEM_HAS_COMPAREDWORD}
{$define FPC_SYSTEM_HAS_COMPAREDWORD}
function CompareDWord(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
asm
{$ifndef win64}
mov %rdx, %r8
mov %rsi, %rdx
mov %rdi, %rcx
{$endif win64}
mov %r8, %rax
shr $61, %rax
jnz .LUnbounded
cmp $3, %r8
lea (%rcx,%r8,4), %r9
jle .LDwordwise_Test
and $-4, %r8
lea (%rcx,%r8,4), %r8
.balign 16
.L4x_Body:
movdqu (%rcx), %xmm1
movdqu (%rdx), %xmm0
pcmpeqd %xmm1, %xmm0
pmovmskb %xmm0, %eax
xor $65535, %eax
jne .L4x_Found
add $16, %rcx
add $16, %rdx
cmp %rcx, %r8
jne .L4x_Body
.LDwordwise_Test:
cmp %rcx, %r9
je .LNothing
.LDwordwise_Body:
mov (%rcx), %eax
cmp %eax, (%rdx)
jne .LDoSbb
add $4, %rcx
add $4, %rdx
cmp %rcx, %r9
jne .LDwordwise_Body
.LNothing:
xor %eax, %eax
ret
.L4x_Found:
bsf %eax, %eax
mov (%rcx,%rax), %ecx
cmp %ecx, (%rdx,%rax)
.LDoSbb:
sbb %rax, %rax
and $2, %eax
sub $1, %rax
ret
.LUnbounded:
mov %rcx, %r9
jmp .LDwordwise_Body
end;
{$endif FPC_SYSTEM_HAS_COMPAREDWORD}
{$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
{ does a thread save inc/dec }
function declocked(var l : longint) : boolean;assembler; nostackframe;