mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-14 21:49:32 +02:00
* Replaced i386 assembler IndexByte by more sophisticated implementation, larger in size but faster by factor from 2 (on Athlon X2 L310) to 5 (on Core2Duo E7200) for 512 byte buffers.
git-svn-id: trunk@20188 -
This commit is contained in:
parent
f984a3d74e
commit
6874aa9676
@ -301,35 +301,135 @@ end;
|
|||||||
|
|
||||||
{$ifndef FPC_SYSTEM_HAS_INDEXBYTE}
|
{$ifndef FPC_SYSTEM_HAS_INDEXBYTE}
|
||||||
{$define FPC_SYSTEM_HAS_INDEXBYTE}
|
{$define FPC_SYSTEM_HAS_INDEXBYTE}
|
||||||
function IndexByte(Const buf;len:SizeInt;b:byte):SizeInt; assembler;
|
function IndexByte(Const buf;len:SizeInt;b:byte):SizeInt; assembler; nostackframe;
|
||||||
var
|
|
||||||
saveedi,saveebx : longint;
|
|
||||||
asm
|
asm
|
||||||
movl %edi,saveedi
|
push %esi
|
||||||
movl %ebx,saveebx
|
push %edi
|
||||||
movl buf,%edi // Load String
|
push %eax { save initial value of 'buf' }
|
||||||
movb b,%bl
|
|
||||||
movl len,%ecx // Load len
|
cmp $4,%edx { less than 4 bytes, just test byte by byte. }
|
||||||
xorl %eax,%eax
|
jb .Ltail
|
||||||
testl %ecx,%ecx
|
|
||||||
jz .Lcharposnotfound
|
mov %cl,%ch { prepare pattern }
|
||||||
cld
|
movzwl %cx,%esi
|
||||||
movl %ecx,%edx // Copy for easy manipulation
|
shl $16,%ecx
|
||||||
movb %bl,%al
|
or %esi,%ecx
|
||||||
repne
|
|
||||||
scasb
|
.Lalignloop:
|
||||||
jne .Lcharposnotfound
|
test $3,%al { align to 4 bytes if necessary }
|
||||||
incl %ecx
|
je .Laligned
|
||||||
subl %ecx,%edx
|
cmp %cl,(%eax)
|
||||||
movl %edx,%eax
|
je .Lexit
|
||||||
jmp .Lready
|
inc %eax
|
||||||
.Lcharposnotfound:
|
dec %edx
|
||||||
movl $-1,%eax
|
jmp .Lalignloop
|
||||||
.Lready:
|
|
||||||
movl saveedi,%edi
|
.balign 16 { Main loop, unrolled 4 times for speed }
|
||||||
movl saveebx,%ebx
|
|
||||||
|
.Lloop:
|
||||||
|
mov (%eax),%esi { load dword }
|
||||||
|
xor %ecx,%esi { XOR with pattern, bytes equal to target are now 0 }
|
||||||
|
lea -0x01010101(%esi),%edi
|
||||||
|
xor %esi,%edi { (x-0x01010101) xor x }
|
||||||
|
not %esi
|
||||||
|
and $0x80808080,%esi
|
||||||
|
and %edi,%esi { ((x-0x01010101) xor x) and (not x) and 0x80808080 }
|
||||||
|
jnz .Lfound { one of the bytes matches }
|
||||||
|
|
||||||
|
mov 4(%eax),%esi
|
||||||
|
xor %ecx,%esi
|
||||||
|
lea -0x01010101(%esi),%edi
|
||||||
|
xor %esi,%edi
|
||||||
|
not %esi
|
||||||
|
and $0x80808080,%esi
|
||||||
|
and %edi,%esi
|
||||||
|
jnz .Lfound4
|
||||||
|
|
||||||
|
mov 8(%eax),%esi
|
||||||
|
xor %ecx,%esi
|
||||||
|
lea -0x01010101(%esi),%edi
|
||||||
|
xor %esi,%edi
|
||||||
|
not %esi
|
||||||
|
and $0x80808080,%esi
|
||||||
|
and %edi,%esi
|
||||||
|
jnz .Lfound8
|
||||||
|
|
||||||
|
mov 12(%eax),%esi
|
||||||
|
xor %ecx,%esi
|
||||||
|
lea -0x01010101(%esi),%edi
|
||||||
|
xor %esi,%edi
|
||||||
|
not %esi
|
||||||
|
and $0x80808080,%esi
|
||||||
|
and %edi,%esi
|
||||||
|
jnz .Lfound12
|
||||||
|
|
||||||
|
add $16,%eax
|
||||||
|
.Laligned:
|
||||||
|
sub $16,%edx
|
||||||
|
jae .Lloop { Still more than 16 bytes remaining }
|
||||||
|
|
||||||
|
{ Process remaining bytes (<16 left at this point) }
|
||||||
|
{ length is offset by -16 at this point }
|
||||||
|
.Lloop2:
|
||||||
|
cmp $4-16,%edx { < 4 bytes left? }
|
||||||
|
jb .Ltail
|
||||||
|
|
||||||
|
mov (%eax),%esi
|
||||||
|
xor %ecx,%esi
|
||||||
|
lea -0x01010101(%esi),%edi
|
||||||
|
xor %esi,%edi
|
||||||
|
not %esi
|
||||||
|
and $0x80808080,%esi
|
||||||
|
and %edi,%esi
|
||||||
|
jne .Lfound
|
||||||
|
|
||||||
|
add $4,%eax
|
||||||
|
sub $4,%edx
|
||||||
|
jmp .Lloop2
|
||||||
|
|
||||||
|
.Ltail: { Less than 4 bytes remaining, check one by one }
|
||||||
|
and $3, %edx
|
||||||
|
jz .Lnotfound
|
||||||
|
.Lloop3:
|
||||||
|
cmp %cl,(%eax)
|
||||||
|
je .Lexit
|
||||||
|
inc %eax
|
||||||
|
dec %edx
|
||||||
|
jnz .Lloop3
|
||||||
|
|
||||||
|
.Lnotfound:
|
||||||
|
or $-1,%eax
|
||||||
|
jmp .Lexit1
|
||||||
|
|
||||||
|
{ add missing source pointer increments }
|
||||||
|
.Lfound12:
|
||||||
|
add $4,%eax
|
||||||
|
.Lfound8:
|
||||||
|
add $4,%eax
|
||||||
|
.Lfound4:
|
||||||
|
add $4,%eax
|
||||||
|
|
||||||
|
.Lfound:
|
||||||
|
test $0xff,%esi
|
||||||
|
jnz .Lexit
|
||||||
|
inc %eax
|
||||||
|
|
||||||
|
test $0xff00,%esi
|
||||||
|
jnz .Lexit
|
||||||
|
inc %eax
|
||||||
|
|
||||||
|
test $0xff0000,%esi
|
||||||
|
jnz .Lexit
|
||||||
|
inc %eax
|
||||||
|
|
||||||
|
.Lexit:
|
||||||
|
sub (%esp),%eax
|
||||||
|
.Lexit1:
|
||||||
|
pop %ecx { removes initial 'buf' value }
|
||||||
|
pop %edi
|
||||||
|
pop %esi
|
||||||
end;
|
end;
|
||||||
{$endif FPC_SYSTEM_HAS_FILLDWORD}
|
{$endif FPC_SYSTEM_HAS_INDEXBYTE}
|
||||||
|
|
||||||
|
|
||||||
{$ifndef FPC_SYSTEM_HAS_INDEXWORD}
|
{$ifndef FPC_SYSTEM_HAS_INDEXWORD}
|
||||||
|
Loading…
Reference in New Issue
Block a user