mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-14 20:09:27 +02:00
+ x86_64 assembler implementations of Move and FillChar. Does not use SIMD, so probably not the fastest for large move sizes, but for small to medium sizes it should be competitive.
* Extended the related test with checks for medium and large move sizes, to improve coverage for different code paths that are used depending on size. git-svn-id: trunk@17249 -
This commit is contained in:
parent
da7ef036ba
commit
6e09d76b07
@ -72,181 +72,387 @@ asm
|
||||
.Lg_a_null:
|
||||
end ['RAX'];
|
||||
|
||||
(*
|
||||
{$define FPC_SYSTEM_HAS_MOVE}
|
||||
procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler;
|
||||
procedure Move(const source;var dest;count:SizeInt);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
|
||||
{ Linux: rdi source, rsi dest, rdx count
|
||||
win64: rcx source, rdx dest, r8 count }
|
||||
asm
|
||||
{ rdi destination
|
||||
rsi source
|
||||
rdx count
|
||||
}
|
||||
pushq %rbx
|
||||
prefetcht0 (%rsi) // for more hopefully the hw prefetch will kick in
|
||||
movq %rdi,%rax
|
||||
{$ifndef win64}
|
||||
mov %rdx, %r8
|
||||
mov %rsi, %rdx
|
||||
mov %rdi, %rcx
|
||||
{$endif win64}
|
||||
|
||||
movl %edi,%ecx
|
||||
andl $7,%ecx
|
||||
jnz .Lbad_alignment
|
||||
.Lafter_bad_alignment:
|
||||
movq %rdx,%rcx
|
||||
movl $64,%ebx
|
||||
shrq $6,%rcx
|
||||
jz .Lhandle_tail
|
||||
mov %r8, %rax
|
||||
sub %rdx, %rcx { rcx = src - dest }
|
||||
jz .Lquit { exit if src=dest }
|
||||
jnb .L1 { src>dest => forward move }
|
||||
|
||||
.Lloop_64:
|
||||
{ no prefetch because we assume the hw prefetcher does it already
|
||||
and we have no specific temporal hint to give. XXX or give a nta
|
||||
hint for the source? }
|
||||
movq (%rsi),%r11
|
||||
movq 8(%rsi),%r8
|
||||
movq 2*8(%rsi),%r9
|
||||
movq 3*8(%rsi),%r10
|
||||
movnti %r11,(%rdi)
|
||||
movnti %r8,1*8(%rdi)
|
||||
movnti %r9,2*8(%rdi)
|
||||
movnti %r10,3*8(%rdi)
|
||||
add %rcx, %rax { rcx is negative => r8+rcx > 0 if regions overlap }
|
||||
jb .Lback { if no overlap, still do forward move }
|
||||
|
||||
movq 4*8(%rsi),%r11
|
||||
movq 5*8(%rsi),%r8
|
||||
movq 6*8(%rsi),%r9
|
||||
movq 7*8(%rsi),%r10
|
||||
movnti %r11,4*8(%rdi)
|
||||
movnti %r8,5*8(%rdi)
|
||||
movnti %r9,6*8(%rdi)
|
||||
movnti %r10,7*8(%rdi)
|
||||
.L1:
|
||||
cmp $8, %r8
|
||||
jl .Lless8f { signed compare, negative count not allowed }
|
||||
test $7, %dl
|
||||
je .Ldestaligned
|
||||
|
||||
addq %rbx,%rsi
|
||||
addq %rbx,%rdi
|
||||
loop .Lloop_64
|
||||
test $1, %dl { align dest by moving first 1+2+4 bytes }
|
||||
je .L2f
|
||||
mov (%rcx,%rdx,1),%al
|
||||
dec %r8
|
||||
mov %al, (%rdx)
|
||||
add $1, %rdx
|
||||
.L2f:
|
||||
test $2, %dl
|
||||
je .L4f
|
||||
mov (%rcx,%rdx,1),%ax
|
||||
sub $2, %r8
|
||||
mov %ax, (%rdx)
|
||||
add $2, %rdx
|
||||
.L4f:
|
||||
test $4, %dl
|
||||
je .Ldestaligned
|
||||
mov (%rcx,%rdx,1),%eax
|
||||
sub $4, %r8
|
||||
mov %eax, (%rdx)
|
||||
add $4, %rdx
|
||||
|
||||
.Lhandle_tail:
|
||||
movl %edx,%ecx
|
||||
andl $63,%ecx
|
||||
shrl $3,%ecx
|
||||
jz .Lhandle_7
|
||||
movl $8,%ebx
|
||||
.Lloop_8:
|
||||
movq (%rsi),%r8
|
||||
movnti %r8,(%rdi)
|
||||
addq %rbx,%rdi
|
||||
addq %rbx,%rsi
|
||||
loop .Lloop_8
|
||||
.Ldestaligned:
|
||||
mov %r8, %r9
|
||||
shr $5, %r9
|
||||
jne .Lmore32
|
||||
|
||||
.Lhandle_7:
|
||||
movl %edx,%ecx
|
||||
andl $7,%ecx
|
||||
jz .Lende
|
||||
.Lloop_1:
|
||||
movb (%rsi),%r8b
|
||||
movb %r8b,(%rdi)
|
||||
incq %rdi
|
||||
incq %rsi
|
||||
loop .Lloop_1
|
||||
.Ltail:
|
||||
mov %r8, %r9
|
||||
shr $3, %r9
|
||||
je .Lless8f
|
||||
|
||||
jmp .Lende
|
||||
.balign 16
|
||||
.Lloop8f: { max. 8 iterations }
|
||||
mov (%rcx,%rdx,1),%rax
|
||||
mov %rax, (%rdx)
|
||||
add $8, %rdx
|
||||
dec %r9
|
||||
jne .Lloop8f
|
||||
and $7, %r8
|
||||
|
||||
{ align destination }
|
||||
{ This is simpleminded. For bigger blocks it may make sense to align
|
||||
src and dst to their aligned subset and handle the rest separately }
|
||||
.Lbad_alignment:
|
||||
movl $8,%r9d
|
||||
subl %ecx,%r9d
|
||||
movl %r9d,%ecx
|
||||
subq %r9,%rdx
|
||||
js .Lsmall_alignment
|
||||
jz .Lsmall_alignment
|
||||
.Lalign_1:
|
||||
movb (%rsi),%r8b
|
||||
movb %r8b,(%rdi)
|
||||
incq %rdi
|
||||
incq %rsi
|
||||
loop .Lalign_1
|
||||
jmp .Lafter_bad_alignment
|
||||
.Lsmall_alignment:
|
||||
addq %r9,%rdx
|
||||
jmp .Lhandle_7
|
||||
.Lless8f:
|
||||
test %r8, %r8
|
||||
jle .Lquit
|
||||
|
||||
.Lende:
|
||||
sfence
|
||||
popq %rbx
|
||||
.balign 16
|
||||
.Lloop1f:
|
||||
mov (%rcx,%rdx,1),%al
|
||||
mov %al,(%rdx)
|
||||
inc %rdx
|
||||
dec %r8
|
||||
jne .Lloop1f
|
||||
.Lquit:
|
||||
retq
|
||||
|
||||
|
||||
.Lmore32:
|
||||
cmp $0x2000, %r9 { this limit must be processor-specific (1/2 L2 cache size) }
|
||||
jnae .Lloop32
|
||||
cmp $0x1000, %rcx { but don't bother bypassing cache if src and dest }
|
||||
jnb .Lntloopf { are close to each other}
|
||||
|
||||
.balign 16
|
||||
.Lloop32:
|
||||
add $32,%rdx
|
||||
mov -32(%rcx,%rdx,1),%rax
|
||||
mov -24(%rcx,%rdx,1),%r10
|
||||
mov %rax,-32(%rdx)
|
||||
mov %r10,-24(%rdx)
|
||||
dec %r9
|
||||
mov -16(%rcx,%rdx,1),%rax
|
||||
mov -8(%rcx,%rdx,1),%r10
|
||||
mov %rax,-16(%rdx)
|
||||
mov %r10,-8(%rdx)
|
||||
jne .Lloop32
|
||||
|
||||
and $0x1f, %r8
|
||||
jmpq .Ltail
|
||||
|
||||
.Lntloopf:
|
||||
mov $32, %eax
|
||||
|
||||
.balign 16
|
||||
.Lpref:
|
||||
prefetchnta (%rcx,%rdx,1)
|
||||
prefetchnta 0x40(%rcx,%rdx,1)
|
||||
add $0x80, %rdx
|
||||
dec %eax
|
||||
jne .Lpref
|
||||
|
||||
sub $0x1000, %rdx
|
||||
mov $64, %eax
|
||||
|
||||
.balign 16
|
||||
.Loop64:
|
||||
add $64, %rdx
|
||||
mov -64(%rcx,%rdx,1), %r9
|
||||
mov -56(%rcx,%rdx,1), %r10
|
||||
movnti %r9, -64(%rdx)
|
||||
movnti %r10, -56(%rdx)
|
||||
|
||||
mov -48(%rcx,%rdx,1), %r9
|
||||
mov -40(%rcx,%rdx,1), %r10
|
||||
movnti %r9, -48(%rdx)
|
||||
movnti %r10, -40(%rdx)
|
||||
dec %eax
|
||||
mov -32(%rcx,%rdx,1), %r9
|
||||
mov -24(%rcx,%rdx,1), %r10
|
||||
movnti %r9, -32(%rdx)
|
||||
movnti %r10, -24(%rdx)
|
||||
|
||||
mov -16(%rcx,%rdx,1), %r9
|
||||
mov -8(%rcx,%rdx,1), %r10
|
||||
movnti %r9, -16(%rdx)
|
||||
movnti %r10, -8(%rdx)
|
||||
jne .Loop64
|
||||
|
||||
sub $0x1000, %r8
|
||||
cmp $0x1000, %r8
|
||||
jae .Lntloopf
|
||||
|
||||
mfence
|
||||
jmpq .Ldestaligned { go handle remaining bytes }
|
||||
|
||||
{ backwards move }
|
||||
.Lback:
|
||||
add %r8, %rdx { points to the end of dest }
|
||||
cmp $8, %r8
|
||||
jl .Lless8b { signed compare, negative count not allowed }
|
||||
test $7, %dl
|
||||
je .Ldestalignedb
|
||||
test $1, %dl
|
||||
je .L2b
|
||||
dec %rdx
|
||||
mov (%rcx,%rdx,1), %al
|
||||
dec %r8
|
||||
mov %al, (%rdx)
|
||||
.L2b:
|
||||
test $2, %dl
|
||||
je .L4b
|
||||
sub $2, %rdx
|
||||
mov (%rcx,%rdx,1), %ax
|
||||
sub $2, %r8
|
||||
mov %ax, (%rdx)
|
||||
.L4b:
|
||||
test $4, %dl
|
||||
je .Ldestalignedb
|
||||
sub $4, %rdx
|
||||
mov (%rcx,%rdx,1), %eax
|
||||
sub $4, %r8
|
||||
mov %eax, (%rdx)
|
||||
|
||||
.Ldestalignedb:
|
||||
mov %r8, %r9
|
||||
shr $5, %r9
|
||||
jne .Lmore32b
|
||||
|
||||
.Ltailb:
|
||||
mov %r8, %r9
|
||||
shr $3, %r9
|
||||
je .Lless8b
|
||||
|
||||
.Lloop8b:
|
||||
sub $8, %rdx
|
||||
mov (%rcx,%rdx,1), %rax
|
||||
dec %r9
|
||||
mov %rax, (%rdx)
|
||||
jne .Lloop8b
|
||||
and $7, %r8
|
||||
|
||||
.Lless8b:
|
||||
test %r8, %r8
|
||||
jle .Lquit2
|
||||
|
||||
.balign 16
|
||||
.Lsmallb:
|
||||
dec %rdx
|
||||
mov (%rcx,%rdx,1), %al
|
||||
dec %r8
|
||||
mov %al,(%rdx)
|
||||
jnz .Lsmallb
|
||||
.Lquit2:
|
||||
retq
|
||||
|
||||
.Lmore32b:
|
||||
cmp $0x2000, %r9
|
||||
jnae .Lloop32b
|
||||
cmp $0xfffffffffffff000,%rcx
|
||||
jb .Lntloopb
|
||||
|
||||
.balign 16
|
||||
.Lloop32b:
|
||||
sub $32, %rdx
|
||||
mov 24(%rcx,%rdx,1), %rax
|
||||
mov 16(%rcx,%rdx,1), %r10
|
||||
mov %rax, 24(%rdx)
|
||||
mov %r10, 16(%rdx)
|
||||
dec %r9
|
||||
mov 8(%rcx,%rdx,1),%rax
|
||||
mov (%rcx,%rdx,1), %r10
|
||||
mov %rax, 8(%rdx)
|
||||
mov %r10, (%rdx)
|
||||
jne .Lloop32b
|
||||
and $0x1f, %r8
|
||||
jmpq .Ltailb
|
||||
|
||||
|
||||
.Lntloopb:
|
||||
mov $32, %eax
|
||||
|
||||
.balign 16
|
||||
.Lprefb:
|
||||
sub $0x80, %rdx
|
||||
prefetchnta (%rcx,%rdx,1)
|
||||
prefetchnta 0x40(%rcx,%rdx,1)
|
||||
dec %eax
|
||||
jnz .Lprefb
|
||||
|
||||
add $0x1000, %rdx
|
||||
mov $0x40, %eax
|
||||
|
||||
.balign 16
|
||||
.Lloop64b:
|
||||
sub $64, %rdx
|
||||
mov 56(%rcx,%rdx,1), %r9
|
||||
mov 48(%rcx,%rdx,1), %r10
|
||||
movnti %r9, 56(%rdx)
|
||||
movnti %r10, 48(%rdx)
|
||||
|
||||
mov 40(%rcx,%rdx,1), %r9
|
||||
mov 32(%rcx,%rdx,1), %r10
|
||||
movnti %r9, 40(%rdx)
|
||||
movnti %r10, 32(%rdx)
|
||||
dec %eax
|
||||
mov 24(%rcx,%rdx,1), %r9
|
||||
mov 16(%rcx,%rdx,1), %r10
|
||||
movnti %r9, 24(%rdx)
|
||||
movnti %r10, 16(%rdx)
|
||||
|
||||
mov 8(%rcx,%rdx,1), %r9
|
||||
mov (%rcx,%rdx,1), %r10
|
||||
movnti %r9, 8(%rdx)
|
||||
movnti %r10, (%rdx)
|
||||
jne .Lloop64b
|
||||
|
||||
sub $0x1000, %r8
|
||||
cmp $0x1000, %r8
|
||||
jae .Lntloopb
|
||||
mfence
|
||||
jmpq .Ldestalignedb
|
||||
end;
|
||||
*)
|
||||
|
||||
(*
|
||||
{$define FPC_SYSTEM_HAS_FILLCHAR}
|
||||
Procedure FillChar(var x;count:longint;value:byte);assembler;
|
||||
Procedure FillChar(var x;count:SizeInt;value:byte);assembler;nostackframe;
|
||||
asm
|
||||
{ rdi destination
|
||||
rsi value (char)
|
||||
rdx count (bytes)
|
||||
}
|
||||
movq %rdi,%r10
|
||||
movq %rdx,%r11
|
||||
{ win64: rcx dest, rdx count, r8b value
|
||||
linux: rdi dest, rsi count, rdx value }
|
||||
{$ifndef win64}
|
||||
mov %rdx, %r8
|
||||
mov %rsi, %rdx
|
||||
mov %rdi, %rcx
|
||||
{$endif win64}
|
||||
|
||||
cmp $8, %rdx
|
||||
jl .Ltiny
|
||||
|
||||
{ expand byte value }
|
||||
movzbl %sil,%ecx
|
||||
movabs $0x0101010101010101,%rax
|
||||
mul %rcx { with rax, clobbers rdx }
|
||||
movzbl %r8b, %r8
|
||||
mov $0x0101010101010101,%r9
|
||||
imul %r9, %r8
|
||||
|
||||
{ align dst }
|
||||
movl %edi,%r9d
|
||||
andl $7,%r9d
|
||||
jnz .Lbad_alignment
|
||||
.Lafter_bad_alignment:
|
||||
test $7, %cl
|
||||
je .Laligned
|
||||
|
||||
movq %r11,%rcx
|
||||
movl $64,%r8d
|
||||
shrq $6,%rcx
|
||||
jz .Lhandle_tail
|
||||
{ align dest to 8 bytes }
|
||||
test $1, %cl
|
||||
je .L2
|
||||
movb %r8b, (%rcx)
|
||||
add $1, %rcx
|
||||
sub $1, %rdx
|
||||
.L2:
|
||||
test $2, %cl
|
||||
je .L4
|
||||
movw %r8w, (%rcx)
|
||||
add $2, %rcx
|
||||
sub $2, %rdx
|
||||
.L4:
|
||||
test $4, %cl
|
||||
je .Laligned
|
||||
movl %r8d, (%rcx)
|
||||
add $4, %rcx
|
||||
sub $4, %rdx
|
||||
|
||||
.Lloop_64:
|
||||
movnti %rax,(%rdi)
|
||||
movnti %rax,8(%rdi)
|
||||
movnti %rax,16(%rdi)
|
||||
movnti %rax,24(%rdi)
|
||||
movnti %rax,32(%rdi)
|
||||
movnti %rax,40(%rdi)
|
||||
movnti %rax,48(%rdi)
|
||||
movnti %rax,56(%rdi)
|
||||
addq %r8,%rdi
|
||||
loop .Lloop_64
|
||||
.Laligned:
|
||||
mov %rdx, %rax
|
||||
and $0x3f, %rdx
|
||||
shr $6, %rax
|
||||
jne .Lmore64
|
||||
|
||||
{ Handle tail in loops. The loops should be faster than hard
|
||||
to predict jump tables. }
|
||||
.Lhandle_tail:
|
||||
movl %r11d,%ecx
|
||||
andl $56,%ecx
|
||||
jz .Lhandle_7
|
||||
shrl $3,%ecx
|
||||
.Lloop_8:
|
||||
movnti %rax,(%rdi)
|
||||
addq $8,%rdi
|
||||
loop .Lloop_8
|
||||
.Lhandle_7:
|
||||
movl %r11d,%ecx
|
||||
andl $7,%ecx
|
||||
jz .Lende
|
||||
.Lloop_1:
|
||||
movb %al,(%rdi)
|
||||
addq $1,%rdi
|
||||
loop .Lloop_1
|
||||
.Lless64:
|
||||
mov %rdx, %rax
|
||||
and $7, %rdx
|
||||
shr $3, %rax
|
||||
je .Ltiny
|
||||
|
||||
jmp .Lende
|
||||
.balign 16
|
||||
.Lloop8: { max. 8 iterations }
|
||||
mov %r8, (%rcx)
|
||||
add $8, %rcx
|
||||
dec %rax
|
||||
jne .Lloop8
|
||||
.Ltiny:
|
||||
test %rdx, %rdx
|
||||
jle .Lquit
|
||||
.Lloop1:
|
||||
movb %r8b, (%rcx)
|
||||
inc %rcx
|
||||
dec %rdx
|
||||
jnz .Lloop1
|
||||
.Lquit:
|
||||
retq
|
||||
|
||||
.Lbad_alignment:
|
||||
cmpq $7,%r11
|
||||
jbe .Lhandle_7
|
||||
movnti %rax,(%rdi) (* unaligned store *)
|
||||
movq $8,%r8
|
||||
subq %r9,%r8
|
||||
addq %r8,%rdi
|
||||
subq %r8,%r11
|
||||
jmp .Lafter_bad_alignment
|
||||
.Lmore64:
|
||||
cmp $0x2000,%rax
|
||||
jae .Lloop64nti
|
||||
|
||||
.Lende:
|
||||
movq %r10,%rax
|
||||
.balign 16
|
||||
.Lloop64:
|
||||
add $64, %rcx
|
||||
mov %r8, -64(%rcx)
|
||||
mov %r8, -56(%rcx)
|
||||
mov %r8, -48(%rcx)
|
||||
mov %r8, -40(%rcx)
|
||||
dec %rax
|
||||
mov %r8, -32(%rcx)
|
||||
mov %r8, -24(%rcx)
|
||||
mov %r8, -16(%rcx)
|
||||
mov %r8, -8(%rcx)
|
||||
jne .Lloop64
|
||||
jmp .Lless64
|
||||
|
||||
.balign 16
|
||||
.Lloop64nti:
|
||||
add $64, %rcx
|
||||
movnti %r8, -64(%rcx)
|
||||
movnti %r8, -56(%rcx)
|
||||
movnti %r8, -48(%rcx)
|
||||
movnti %r8, -40(%rcx)
|
||||
dec %rax
|
||||
movnti %r8, -32(%rcx)
|
||||
movnti %r8, -24(%rcx)
|
||||
movnti %r8, -16(%rcx)
|
||||
movnti %r8, -8(%rcx)
|
||||
jnz .Lloop64nti
|
||||
mfence
|
||||
jmp .Lless64
|
||||
end;
|
||||
*)
|
||||
|
||||
|
||||
{$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
|
||||
|
@ -16,9 +16,7 @@ var
|
||||
dst_arraybyte : array[1..MAX_TABLE] of byte;
|
||||
src_arraybyte : array[1..MAX_TABLE] of byte;
|
||||
dst_arrayword : array[1..MAX_TABLE] of word;
|
||||
src_arrayword : array[1..MAX_TABLE] of word;
|
||||
dst_arraylongword : array[1..MAX_TABLE] of longword;
|
||||
src_arratlongword : array[1..MAX_TABLE] of longword;
|
||||
i: integer;
|
||||
|
||||
|
||||
@ -70,6 +68,8 @@ procedure test_fillchar;
|
||||
for i := 1 to MAX_TABLE do
|
||||
dst_arraybyte[i] := DEFAULT_VALUE;
|
||||
fillchar(dst_arraybyte, -1, FILL_VALUE);
|
||||
for i := 1 to MAX_TABLE do
|
||||
test(dst_arraybyte[i], DEFAULT_VALUE);
|
||||
writeln('Passed!');
|
||||
end;
|
||||
|
||||
@ -103,7 +103,7 @@ begin
|
||||
test(dst_arraybyte[i], FILL_VALUE);
|
||||
writeln('Passed!');
|
||||
{ zero move count }
|
||||
write('test move (zero count)...');
|
||||
write('testing move (zero count)...');
|
||||
for i := 1 to MAX_TABLE do
|
||||
begin
|
||||
dst_arraybyte[i] := DEFAULT_VALUE;
|
||||
@ -114,11 +114,75 @@ begin
|
||||
test(dst_arraybyte[i], DEFAULT_VALUE);
|
||||
writeln('Passed!');
|
||||
{ negative move count }
|
||||
write('test move (negative count)...');
|
||||
write('testing move (negative count)...');
|
||||
move(src_arraybyte,dst_arraybyte,-12);
|
||||
writeln('Passed!');
|
||||
end;
|
||||
|
||||
|
||||
procedure test_move_large(size: longint);
|
||||
var
|
||||
src, dst: PLongInt;
|
||||
i: LongInt;
|
||||
begin
|
||||
GetMem(src, size*sizeof(LongInt));
|
||||
GetMem(dst, size*sizeof(LongInt));
|
||||
write('testing move of ',size,' dwords ...');
|
||||
for i := 0 to size-1 do
|
||||
begin
|
||||
src[i] := i;
|
||||
dst[i] := -1;
|
||||
end;
|
||||
move(src[0], dst[2], (size-4)*sizeof(LongInt));
|
||||
test(dst[0], -1);
|
||||
test(dst[1], -1);
|
||||
test(dst[size-1], -1);
|
||||
test(dst[size-2], -1);
|
||||
for i := 2 to size-3 do
|
||||
test(dst[i], i-2);
|
||||
writeln('Passed!');
|
||||
|
||||
// repeat with source and dest swapped (maybe move in opposite direction)
|
||||
// current implementations detect that regions don't overlap and move forward,
|
||||
// so this test is mostly useless. But it won't harm anyway.
|
||||
write('testing move of ',size,' dwords, opposite direction...');
|
||||
for i := 0 to size-1 do
|
||||
begin
|
||||
dst[i] := i;
|
||||
src[i] := -1;
|
||||
end;
|
||||
move(dst[0], src[2], (size-4)*sizeof(LongInt));
|
||||
test(src[0], -1);
|
||||
test(src[1], -1);
|
||||
test(src[size-1], -1);
|
||||
test(src[size-2], -1);
|
||||
for i := 2 to size-3 do
|
||||
test(src[i], i-2);
|
||||
writeln('Passed!');
|
||||
|
||||
write('testing move of ',size,' dwords, overlapping forward...');
|
||||
for i := 0 to size-1 do
|
||||
src[i] := i;
|
||||
move(src[0], src[100], (size-100)*sizeof(LongInt));
|
||||
for i := 0 to 99 do
|
||||
test(src[i], i);
|
||||
for i := 100 to size-101 do
|
||||
test(src[i], i-100);
|
||||
writeln('Passed!');
|
||||
|
||||
write('testing move of ',size,' dwords, overlapping backward...');
|
||||
for i := 0 to size-1 do
|
||||
src[i] := i;
|
||||
move(src[100], src[0], (size-100)*sizeof(LongInt));
|
||||
for i := 0 to size-101 do
|
||||
test(src[i], i+100);
|
||||
for i := size-100 to size-1 do
|
||||
test(src[i], i);
|
||||
writeln('Passed!');
|
||||
FreeMem(dst);
|
||||
FreeMem(src);
|
||||
end;
|
||||
|
||||
{$ifdef fpc}
|
||||
procedure test_fillword;
|
||||
var
|
||||
@ -271,6 +335,8 @@ end;
|
||||
begin
|
||||
test_fillchar;
|
||||
test_move;
|
||||
test_move_large(500); // 512 longints=2048 bytes
|
||||
test_move_large(500000);
|
||||
{$ifdef fpc}
|
||||
test_fillword;
|
||||
test_filldword;
|
||||
|
Loading…
Reference in New Issue
Block a user