- Intergrate i386/strlen.inc and remove it.

+ int_str assembler implementations for i386
 + fpc_shortstr_to_shortstr assembler implementation for ARM
 + fpc_shortstr_assign assembler implementation for ARM
 + fpc_Pchar_length assembler implementation for ARM

git-svn-id: trunk@9582 -
This commit is contained in:
daniel 2007-12-30 11:19:10 +00:00
parent 6db4748644
commit d8bffd27fc
4 changed files with 254 additions and 37 deletions

1
.gitattributes vendored
View File

@ -4814,7 +4814,6 @@ rtl/i386/setjump.inc svneol=native#text/plain
rtl/i386/setjumph.inc svneol=native#text/plain
rtl/i386/strings.inc svneol=native#text/plain
rtl/i386/stringss.inc svneol=native#text/plain
rtl/i386/strlen.inc svneol=native#text/plain
rtl/i386/strpas.inc svneol=native#text/plain
rtl/inc/aliases.inc svneol=native#text/plain
rtl/inc/astrings.inc svneol=native#text/plain

View File

@ -307,6 +307,168 @@ end;
{$endif FPC_SYSTEM_HAS_MOVE}
{****************************************************************************
String
****************************************************************************}
{$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
{$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
{$ifndef FPC_STRTOSHORTSTRINGPROC}
function fpc_shortstr_to_shortstr(len:longint;const sstr:shortstring):shortstring;assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
{$else}
procedure fpc_shortstr_to_shortstr(out res:shortstring;const sstr:shortstring);assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
{$endif}
{r0: __RESULT
r1: len
r2: sstr}
asm
ldrb r12,[r2],#1
cmp r12,r1
movgt r1,r12
strb r12,[r0],#1
cmp r12,#6 (* 6 seems to be the break even point. *)
blt .LStartTailCopy
(* Align destination on 32bits. This is the only place where unrolling
really seems to help, since in the common case, sstr is aligned on
32 bits, therefore in the common case we need to copy 3 bytes to
align, i.e. in the case of a loop, you wouldn't branch out early.*)
rsb r3,r0,#0
ands r3,r3,#3
sub r12,r12,r3
ldrneb r1,[r2],#1
strneb r1,[r0],#1
subnes r3,r3,#1
ldrneb r1,[r2],#1
strneb r1,[r0],#1
subnes r3,r3,#1
ldrneb r1,[r2],#1
strneb r1,[r0],#1
subnes r3,r3,#1
.LDoneAlign:
(* Destination should be aligned now, but source might not be aligned,
if this is the case, do a byte-per-byte copy. *)
tst r2,#3
bne .LStartTailCopy
(* Start the main copy, 32 bit at a time. *)
movs r3,r12,lsr #2
and r12,r12,#3
beq .LStartTailCopy
.LNext4bytes:
(* Unrolling this loop would save a little bit of time for long strings
(>20 chars), but alas, it hurts for short strings and they are the
common case.*)
ldrne r1,[r2],#4
strne r1,[r0],#4
subnes r3,r3,#1
bne .LNext4bytes
.LStartTailCopy:
(* Do remaining bytes. *)
cmp r12,#0
beq .LDoneTail
.LNextChar3:
ldrb r1,[r2],#1
strb r1,[r0],#1
subs r12,r12,#1
bne .LNextChar3
.LDoneTail:
end;
procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);assembler;nostackframe;[public,alias:'FPC_SHORTSTR_ASSIGN'];compilerproc;
{r0: len
r1: sstr
r2: dstr}
asm
ldrb r12,[r1],#1
cmp r12,r0
movgt r0,r12
strb r12,[r2],#1
cmp r12,#6 (* 6 seems to be the break even point. *)
blt .LStartTailCopy
(* Align destination on 32bits. This is the only place where unrolling
really seems to help, since in the common case, sstr is aligned on
32 bits, therefore in the common case we need to copy 3 bytes to
align, i.e. in the case of a loop, you wouldn't branch out early.*)
rsb r3,r2,#0
ands r3,r3,#3
sub r12,r12,r3
ldrneb r0,[r1],#1
strneb r0,[r2],#1
subnes r3,r3,#1
ldrneb r0,[r1],#1
strneb r0,[r2],#1
subnes r3,r3,#1
ldrneb r0,[r1],#1
strneb r0,[r2],#1
subnes r3,r3,#1
.LDoneAlign:
(* Destination should be aligned now, but source might not be aligned,
if this is the case, do a byte-per-byte copy. *)
tst r1,#3
bne .LStartTailCopy
(* Start the main copy, 32 bit at a time. *)
movs r3,r12,lsr #2
and r12,r12,#3
beq .LStartTailCopy
.LNext4bytes:
(* Unrolling this loop would save a little bit of time for long strings
(>20 chars), but alas, it hurts for short strings and they are the
common case.*)
ldrne r0,[r1],#4
strne r0,[r2],#4
subnes r3,r3,#1
bne .LNext4bytes
.LStartTailCopy:
(* Do remaining bytes. *)
cmp r12,#0
beq .LDoneTail
.LNextChar3:
ldrb r0,[r1],#1
strb r0,[r2],#1
subs r12,r12,#1
bne .LNextChar3
.LDoneTail:
end;
{$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
{$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
{$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
function fpc_Pchar_length(p:Pchar):longint;assembler;nostackframe;[public,alias:'FPC_PCHAR_LENGTH'];compilerproc;
asm
mov r1,r0
.Lnextchar:
(*Are we aligned?*)
tst r1,#3
bne .Ltest_unaligned (*No, do byte per byte.*)
ldr r3,.L01010101
.Ltest_aligned:
(*Aligned, load 4 bytes at a time.*)
ldr r12,[r1],#4
(*Check wether r12 contains a 0 byte.*)
sub r2,r12,r3
mvn r12,r12
and r2,r2,r12
ands r2,r2,r3,lsl #7 (*r3 lsl 7 = $80808080*)
beq .Ltest_aligned (*No 0 byte, repeat.*)
sub r1,r1,#4
.Ltest_unaligned:
ldrb r12,[r1],#1
cmp r12,#1 (*r12<1 same as r12=0, but result in carry flag*)
bcs .Lnextchar
(*Dirty trick: we need to subtract 1 extra because we have counted the
terminating 0, due to the known carry flag sbc can do this.*)
sbc r0,r1,r0
mov pc,lr
.L01010101:
.long 0x01010101
end;
{$endif}
var
fpc_system_lock: longint; export name 'fpc_system_lock';
@ -439,3 +601,4 @@ end;
{include hand-optimized assembler division code}
{$i divide.inc}

View File

@ -1013,7 +1013,25 @@ end;
{$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
{$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
function fpc_pchar_length(p:pchar):longint;assembler;[public,alias:'FPC_PCHAR_LENGTH']; compilerproc;
{$include strlen.inc}
var
saveedi : longint;
asm
movl %edi,saveedi
{$ifdef REGCALL}
movl %eax,%edi
{$else}
movl p,%edi
{$endif}
movl $0xffffffff,%ecx
xorl %eax,%eax
cld
repne
scasb
movl $0xfffffffe,%eax
subl %ecx,%eax
movl saveedi,%edi
end;
{$endif FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
{$IFNDEF INTERNAL_BACKTRACE}
@ -1073,7 +1091,79 @@ Function Sptr : Pointer;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$end
asm
movl %esp,%eax
end;
{****************************************************************************
Str()
****************************************************************************}
{$define FPC_SYSTEM_HAS_INT_STR_LONGWORD}
{$define FPC_SYSTEM_HAS_INT_STR_LONGINT}
label str_int_shortcut;
{$asmmode intel}
procedure int_str(l:longword;var s:string);assembler;nostackframe;
asm
push edi
push ebx
mov edi,edx
xor edx,edx
jmp str_int_shortcut
end;
procedure int_str(l:longint;var s:string);assembler;nostackframe;
{Optimized for speed, but balanced with size.}
const digits:array[0..9] of cardinal=(0,10,100,1000,10000,
100000,1000000,10000000,
100000000,1000000000);
asm
push edi
push ebx
mov edi,edx
{ Calculate absolute value and put sign in edx}
cdq
xor eax,edx
sub eax,edx
neg edx
str_int_shortcut:
{Calculate amount of digits in ecx.}
bsr ecx,eax
inc ecx
imul ecx,1233
shr ecx,12
cmp eax,[digits+4*ecx]
cmc
adc ecx,0 {Nr. digits ready in ecx.}
{Write length & sign.}
lea ebx,[edx+ecx]
mov bh,'-'
mov [edi],bx
add edi,edx
{Write out digits.}
mov edx,eax
@loop:
mov eax,$cccccccd {Divide by 10 using mul+shr}
lea ebx,[edx+'0'] {Pre-add '0'}
mul edx
shr edx,3
lea eax,[8*edx+edx] {x mod 10 = x-10*(x div 10)}
sub ebx,edx
sub ebx,eax
mov [edi+ecx],bl
dec ecx
jnz @loop
pop ebx
pop edi
end;
{$asmmode att}
{****************************************************************************
Bounds Check

View File

@ -1,35 +0,0 @@
{
This file is part of the Free Pascal run time library.
Copyright (c) 1999-2000 by the Free Pascal development team
Processor specific implementation of strlen
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
var
saveedi : longint;
asm
movl %edi,saveedi
{$ifdef REGCALL}
movl %eax,%edi
{$else}
movl p,%edi
{$endif}
movl $0xffffffff,%ecx
xorl %eax,%eax
cld
repne
scasb
movl $0xfffffffe,%eax
subl %ecx,%eax
movl saveedi,%edi
end;