mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-20 18:09:27 +02:00
Improved CompareDWord for i386 and x86_64.
This commit is contained in:
parent
ee5b9e53a9
commit
d36e96ea74
@ -639,59 +639,37 @@ end;
|
||||
{$define FPC_SYSTEM_HAS_COMPAREDWORD}
|
||||
function CompareDWord(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
|
||||
asm
|
||||
cmpl $32,%ecx { empirical average value, on a Athlon XP the
|
||||
break even is at 12, on a Core 2 Duo > 100 }
|
||||
jg .LCmpDWordFull
|
||||
testl %ecx,%ecx
|
||||
je .LCmpDWordZero
|
||||
cmp $536870912, %ecx
|
||||
push %ebx
|
||||
jnb .LUnbounded
|
||||
lea (%eax,%ecx,4), %ebx
|
||||
cmp %ebx, %eax
|
||||
je .LNothing
|
||||
|
||||
pushl %ebx
|
||||
.LCmpDWordLoop:
|
||||
movl (%eax),%ebx
|
||||
cmpl (%edx),%ebx
|
||||
leal 4(%eax),%eax
|
||||
leal 4(%edx),%edx
|
||||
jne .LCmpDWordExitFast
|
||||
decl %ecx
|
||||
jne .LCmpDWordLoop
|
||||
.LCmpDWordExitFast:
|
||||
xorl %eax,%eax
|
||||
movl -4(%edx),%edx // Compare failing (or equal) position
|
||||
subl %edx,%ebx // calculate end result.
|
||||
setb %dl
|
||||
seta %cl
|
||||
addb %cl,%al
|
||||
subb %dl,%al
|
||||
movsbl %al,%eax
|
||||
|
||||
popl %ebx
|
||||
.balign 16
|
||||
.LDwordwise_Body:
|
||||
mov (%edx), %ecx
|
||||
cmp (%eax), %ecx
|
||||
jne .LDoSbb
|
||||
add $4, %eax
|
||||
add $4, %edx
|
||||
cmp %eax, %ebx
|
||||
jne .LDwordwise_Body
|
||||
.LNothing:
|
||||
xor %eax, %eax
|
||||
pop %ebx
|
||||
ret
|
||||
|
||||
.LCmpDWordZero:
|
||||
movl $0,%eax
|
||||
.LDoSbb:
|
||||
pop %ebx
|
||||
sbb %eax, %eax
|
||||
and $2, %eax
|
||||
sub $1, %eax
|
||||
ret
|
||||
|
||||
.LCmpDWordFull:
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
{$ifdef FPC_ENABLED_CLD}
|
||||
cld
|
||||
{$endif FPC_ENABLED_CLD}
|
||||
movl %eax,%edi
|
||||
movl %edx,%esi
|
||||
xorl %eax,%eax
|
||||
repe { Compare entire DWords}
|
||||
cmpsl
|
||||
movl -4(%edi),%edi // Compare failing (or equal) position
|
||||
subl -4(%esi),%edi // calculate end result.
|
||||
setb %dl
|
||||
seta %cl
|
||||
addb %cl,%al
|
||||
subb %dl,%al
|
||||
movsbl %al,%eax
|
||||
.LCmpDwordExit:
|
||||
popl %edi
|
||||
popl %esi
|
||||
.LUnbounded:
|
||||
mov %eax, %ebx
|
||||
jmp .LDwordwise_Body
|
||||
end;
|
||||
{$endif FPC_SYSTEM_HAS_COMPAREDWORD}
|
||||
|
||||
|
@ -772,6 +772,67 @@ end;
|
||||
{$endif FPC_SYSTEM_HAS_COMPAREBYTE}
|
||||
|
||||
|
||||
{$ifndef FPC_SYSTEM_HAS_COMPAREDWORD}
|
||||
{$define FPC_SYSTEM_HAS_COMPAREDWORD}
|
||||
function CompareDWord(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
|
||||
asm
|
||||
{$ifndef win64}
|
||||
mov %rdx, %r8
|
||||
mov %rsi, %rdx
|
||||
mov %rdi, %rcx
|
||||
{$endif win64}
|
||||
mov %r8, %rax
|
||||
shr $61, %rax
|
||||
jnz .LUnbounded
|
||||
cmp $3, %r8
|
||||
lea (%rcx,%r8,4), %r9
|
||||
jle .LDwordwise_Test
|
||||
and $-4, %r8
|
||||
lea (%rcx,%r8,4), %r8
|
||||
.balign 16
|
||||
.L4x_Body:
|
||||
movdqu (%rcx), %xmm1
|
||||
movdqu (%rdx), %xmm0
|
||||
pcmpeqd %xmm1, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
xor $65535, %eax
|
||||
jne .L4x_Found
|
||||
add $16, %rcx
|
||||
add $16, %rdx
|
||||
cmp %rcx, %r8
|
||||
jne .L4x_Body
|
||||
.LDwordwise_Test:
|
||||
cmp %rcx, %r9
|
||||
je .LNothing
|
||||
.LDwordwise_Body:
|
||||
mov (%rcx), %eax
|
||||
cmp %eax, (%rdx)
|
||||
jne .LDoSbb
|
||||
add $4, %rcx
|
||||
add $4, %rdx
|
||||
cmp %rcx, %r9
|
||||
jne .LDwordwise_Body
|
||||
.LNothing:
|
||||
xor %eax, %eax
|
||||
ret
|
||||
|
||||
.L4x_Found:
|
||||
bsf %eax, %eax
|
||||
mov (%rcx,%rax), %ecx
|
||||
cmp %ecx, (%rdx,%rax)
|
||||
.LDoSbb:
|
||||
sbb %rax, %rax
|
||||
and $2, %eax
|
||||
sub $1, %rax
|
||||
ret
|
||||
|
||||
.LUnbounded:
|
||||
mov %rcx, %r9
|
||||
jmp .LDwordwise_Body
|
||||
end;
|
||||
{$endif FPC_SYSTEM_HAS_COMPAREDWORD}
|
||||
|
||||
|
||||
{$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
|
||||
{ does a thread save inc/dec }
|
||||
function declocked(var l : longint) : boolean;assembler; nostackframe;
|
||||
|
Loading…
Reference in New Issue
Block a user