+ initial revision

This commit is contained in:
florian 2003-01-06 19:40:18 +00:00
parent 76a01b9847
commit d94534348b
2 changed files with 255 additions and 0 deletions

13
rtl/x86_64/makefile.cpu Normal file
View File

@ -0,0 +1,13 @@
# $Id$
#
# Here we set processor dependent include file names.
#
CPUNAMES=x86_64
CPUINCNAMES=$(addsuffix .inc,$(CPUNAMES))
#
# $Log$
# Revision 1.1 2003-01-06 19:40:18 florian
# + initial revision
#
#

242
rtl/x86_64/x86_64.inc Normal file
View File

@ -0,0 +1,242 @@
{
$Id$
This file is part of the Free Pascal run time library.
Copyright (c) 2002 by Florian Klaempfl.
Member of the Free Pascal development team
Parts of this code are derived from the x86-64 linux port
Copyright 2002 Andi Kleen
Processor dependent implementation for the system unit for
the x86-64 architecture
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
{$asmmode DIRECT}
{****************************************************************************
Primitives
****************************************************************************}
{$define FPC_SYSTEM_HAS_MOVE}
procedure Move(const source;var dest;count:longint);assembler;
asm
{ rdi destination
rsi source
rdx count
}
pushq %rbx
prefetcht0 (%rsi) // for more hopefully the hw prefetch will kick in
movq %rdi,%rax
movl %edi,%ecx
andl $7,%ecx
jnz bad_alignment
after_bad_alignment:
movq %rdx,%rcx
movl $64,%ebx
shrq $6,%rcx
jz handle_tail
loop_64:
{ no prefetch because we assume the hw prefetcher does it already
and we have no specific temporal hint to give. XXX or give a nta
hint for the source? }
movq (%rsi),%r11
movq 8(%rsi),%r8
movq 2*8(%rsi),%r9
movq 3*8(%rsi),%r10
movnti %r11,(%rdi)
movnti %r8,1*8(%rdi)
movnti %r9,2*8(%rdi)
movnti %r10,3*8(%rdi)
movq 4*8(%rsi),%r11
movq 5*8(%rsi),%r8
movq 6*8(%rsi),%r9
movq 7*8(%rsi),%r10
movnti %r11,4*8(%rdi)
movnti %r8,5*8(%rdi)
movnti %r9,6*8(%rdi)
movnti %r10,7*8(%rdi)
addq %rbx,%rsi
addq %rbx,%rdi
loop loop_64
handle_tail:
movl %edx,%ecx
andl $63,%ecx
shrl $3,%ecx
jz handle_7
movl $8,%ebx
loop_8:
movq (%rsi),%r8
movnti %r8,(%rdi)
addq %rbx,%rdi
addq %rbx,%rsi
loop loop_8
handle_7:
movl %edx,%ecx
andl $7,%ecx
jz ende
loop_1:
movb (%rsi),%r8b
movb %r8b,(%rdi)
incq %rdi
incq %rsi
loop loop_1
ende:
sfence
popq %rbx
ret
/* align destination */
/* This is simpleminded. For bigger blocks it may make sense to align
src and dst to their aligned subset and handle the rest separately */
bad_alignment:
movl $8,%r9d
subl %ecx,%r9d
movl %r9d,%ecx
subq %r9,%rdx
js small_alignment
jz small_alignment
align_1:
movb (%rsi),%r8b
movb %r8b,(%rdi)
incq %rdi
incq %rsi
loop align_1
jmp after_bad_alignment
small_alignment:
addq %r9,%rdx
jmp handle_7
end;
{$define FPC_SYSTEM_HAS_FILLCHAR}
Procedure FillChar(var x;count:longint;value:byte);
asm
{ rdi destination
rsi value (char)
rdx count (bytes)
}
movq %rdi,%r10
movq %rdx,%r11
/* expand byte value */
movzbl %sil,%ecx
movabs $0x0101010101010101,%rax
mul %rcx /* with rax, clobbers rdx */
/* align dst */
movl %edi,%r9d
andl $7,%r9d
jnz bad_alignment
after_bad_alignment:
movq %r11,%rcx
movl $64,%r8d
shrq $6,%rcx
jz handle_tail
loop_64:
movnti %rax,(%rdi)
movnti %rax,8(%rdi)
movnti %rax,16(%rdi)
movnti %rax,24(%rdi)
movnti %rax,32(%rdi)
movnti %rax,40(%rdi)
movnti %rax,48(%rdi)
movnti %rax,56(%rdi)
addq %r8,%rdi
loop loop_64
{ Handle tail in loops. The loops should be faster than hard
to predict jump tables. }
handle_tail:
movl %r11d,%ecx
andl $63&(~7),%ecx
jz handle_7
shrl $3,%ecx
loop_8:
movnti %rax,(%rdi)
addq $8,%rdi
loop loop_8
handle_7:
movl %r11d,%ecx
andl $7,%ecx
jz ende
loop_1:
movb %al,(%rdi)
addq $1,%rdi
loop loop_1
ende:
movq %r10,%rax
ret
bad_alignment:
cmpq $7,%r11
jbe handle_7
movnti %rax,(%rdi) /* unaligned store */
movq $8,%r8
subq %r9,%r8
addq %r8,%rdi
subq %r8,%r11
jmp after_bad_alignment
end;
{ does a thread save inc/dec }
function declocked(var l : longint) : boolean;assembler;
asm
{
l: %rdi
}
{$ifdef MT}
{ this check should be done because a lock takes a lot }
{ of time! }
cmpb $0,IsMultithread
jz .Ldeclockednolock
lock
decl (%rdi)
jmp .Ldeclockedend
.Ldeclockednolock:
{$endif MT}
decl (%rdi);
.Ldeclockedend:
setzb %al
end;
procedure inclocked(var l : longint);assembler;
asm
{
l: %rdi
}
{$ifdef MT}
{ this check should be done because a lock takes a lot }
{ of time! }
cmpb $0,IsMultithread
jz .Linclockednolock
lock
incl (%rdi)
jmp .Linclockedend
.Linclockednolock:
{$endif MT}
incl (%rdi)
.Linclockedend:
end;
{
$Log$
Revision 1.1 2003-01-06 19:40:18 florian
+ initial revision
}