mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-17 04:59:33 +02:00
+ new, complete implementation of move procedure (including support for
overlapping regions)
This commit is contained in:
parent
6874cceff4
commit
b7970bf7a4
@ -24,80 +24,148 @@
|
||||
|
||||
{$define FPC_SYSTEM_HAS_MOVE}
|
||||
|
||||
procedure Move(var source;var dest;count:longint);
|
||||
begin
|
||||
{ register usage:
|
||||
r3 source
|
||||
r4 dest
|
||||
r5 count
|
||||
r13 ptr to end of source
|
||||
r14 ptr to end of dest
|
||||
r15 counter 1
|
||||
r16 counter 2
|
||||
r17 addr increment
|
||||
r18 ptr to current source block
|
||||
r19 ptr to current dest block
|
||||
r20-24 buffer
|
||||
f1-4 buffer
|
||||
ctr Loop counter
|
||||
notes:
|
||||
Move uses FPRs for increased bandwidth
|
||||
}
|
||||
asm
|
||||
{ do some param checking, initialization }
|
||||
cmplwi cr2,r3,0
|
||||
cmplwi cr3,r4,0
|
||||
cmplw cr4,r3,r4
|
||||
add r13,r3,r5
|
||||
add r14,r4,r5
|
||||
bt cr2,.MoveEnd //end if source=nil
|
||||
bt cr3,.MoveEnd //end if dest=nil
|
||||
bt cr4,.MoveEnd //end if source=dest
|
||||
{ see if source and dest overlap }
|
||||
cmplw cr2,r13,r4
|
||||
cmplw cr3,r4,r3
|
||||
srawi. r15,r5,$5 //r15 := count div 32
|
||||
andi r16,r5,$1F //r16 := count mod 32
|
||||
crand cr3,cr2,cr3
|
||||
mtctr r15 //Load loop counter
|
||||
bgt cr3,.MoveRL //dest overlaps source on right
|
||||
li r17,$8 //Offset 8 bytes per doubleword copy
|
||||
sub r18,r17,r3 //calculate the starting source
|
||||
sub r19,r17,r4 // and dest ptrs
|
||||
beq .MoveByByte //If count<32 skip 32 byte block copy
|
||||
srawi. r15,r16,$2 //r15 := r16 div 4
|
||||
andi r16,r15,$3 //r16 := r15 mod 4
|
||||
cmpwi cr2,r16,0 //r16 = 0 ?
|
||||
crand cr3,cr2,cr0 //r15 = 0 AND r16 = 0 ?
|
||||
.MoveBlockLoop: //32 Byte block copy (fully optimized)
|
||||
lfdux f1,r18,r17
|
||||
lfdux f2,r18,r17
|
||||
lfdux f3,r18,r17
|
||||
lfdux f4,r18,r17
|
||||
stfdux f1,r19,r17
|
||||
stfdux f2,r19,r17
|
||||
stfdux f3,r19,r17
|
||||
stfdux f4,r19,r17
|
||||
bdnz .MoveBlockLoop
|
||||
procedure Move(var sou{}rce;var dest;count:longint);assembler;
|
||||
asm
|
||||
{ count <= 0 ? }
|
||||
cmpwi cr0,r5,0
|
||||
{ check if we have to do the move backwards because of overlap }
|
||||
sub r30,r4,r3
|
||||
{ carry := boolean(dest-source < count) = boolean(overlap) }
|
||||
subc r30,r30,r5
|
||||
|
||||
{ count < 11 ? (to decide whether we will move dwords or bytes }
|
||||
cmpwi cr1,r5,11
|
||||
|
||||
{ if overlap, then r30 := -1 else r30 := 0 }
|
||||
subfe r30,r30,r30
|
||||
|
||||
{ count < 39 ? (32 + max. alignment (7) }
|
||||
cmpwi cr7,r5,39
|
||||
|
||||
{ if count <= 0, stop }
|
||||
ble cr0,LMoveDone
|
||||
|
||||
{ if overlap, then r29 := count else r29 := 0 }
|
||||
and r29,r5,r30
|
||||
{ if overlap, then point source and dest to the end }
|
||||
add r3,r3,r29
|
||||
add r4,r4,r29
|
||||
{ if overlap, then r29 := 0, else r29 := -1 }
|
||||
not r29,r30
|
||||
{ if overlap, then r30 := -2, else r30 := 0 }
|
||||
slwi r30,r30,1
|
||||
{ if overlap, then r30 := -1, else r30 := 1 }
|
||||
addi r30,r30,1
|
||||
{ if overlap, then source/dest += -1, otherwise they stay }
|
||||
{ After the next instruction, r3/r4 + r30 = next position }
|
||||
{ to load/store from/to }
|
||||
add r3,r3,r29
|
||||
add r4,r4,r29
|
||||
|
||||
bt cr3,MoveEnd //Nothing left to do...
|
||||
mtspr 1,r16 //XER := r16
|
||||
beq .MoveBytes //There are fewer than 4 bytes left
|
||||
mtctr r15 //load counter
|
||||
andi r15,r15,$3 //r15 := r15 mod 4
|
||||
srawi r17,$1 //Offset := Offset div 2
|
||||
.MoveWordLoop: //4 byte copy
|
||||
lwzux r20,r18,r17
|
||||
stwux r20,r19,r17
|
||||
bdnz .WordCopyLoop
|
||||
{ if count < 11, copy everything byte by byte }
|
||||
blt cr1,LMoveBytes
|
||||
|
||||
bt cr2,MoveEnd //Nothing left to do...
|
||||
.MoveBytes: //Copy remaining stragglers
|
||||
lswx r20,r0,r18
|
||||
stswx r20,r0,r19
|
||||
.MoveEnd:
|
||||
End;
|
||||
End;
|
||||
{ otherwise, guarantee 4 byte alignment for dest for starters }
|
||||
LMove4ByteAlignLoop:
|
||||
lbzux r29,r3,r30
|
||||
stbux r29,r4,r30
|
||||
{ is dest now 4 aligned? }
|
||||
andi. r29,r4,3
|
||||
subi r5,r5,1
|
||||
{ while not aligned, continue }
|
||||
bne cr0,LMove4ByteAlignLoop
|
||||
|
||||
{ check for 8 byte alignment }
|
||||
andi. r29,r4,7
|
||||
{ we are going to copy one byte again (the one at the newly }
|
||||
{ aligned address), so increase count again }
|
||||
addi r5,r5,1
|
||||
{ count div 4 for number of dwords to copy }
|
||||
srwi r29,r5,2
|
||||
{ if 11 <= count < 39, copy using dwords }
|
||||
blt cr7,LMoveDWords
|
||||
|
||||
beq cr0,L8BytesAligned
|
||||
|
||||
{ count >= 39 -> align to 8 byte boundary and then use the FPU }
|
||||
{ since we're already at 4 byte alignment, use dword store }
|
||||
lwzux r29,r3,r30
|
||||
stwux r29,r4,r30
|
||||
L8BytesAligned:
|
||||
{ count div 32 ( >= 1, since count was >=39 }
|
||||
srwi r29,r5,5
|
||||
{ remainder }
|
||||
andi. r5,r5,31
|
||||
{ to decide if we will do some dword stores afterwards or not }
|
||||
cmpwi cr1,r5,11
|
||||
mtctr r29
|
||||
|
||||
{ r29 := count div 4, will be moved to ctr when copying dwords }
|
||||
srwi r29,r5,2
|
||||
|
||||
{ adjust the update count: it will now be 8 or -8 depending on overlap }
|
||||
slwi r30,r30,3
|
||||
|
||||
{ adjust source and dest pointers: because of the above loop, dest is now }
|
||||
{ aligned to 8 bytes. So if we substract r30 we will still have an 8 bytes }
|
||||
{ aligned address) }
|
||||
sub r3,r3,r30
|
||||
sub r4,r4,r30
|
||||
|
||||
LMove32ByteLoop:
|
||||
lfdux f31,r3,r30
|
||||
lfdux f30,r3,r30
|
||||
lfdux f29,r3,r30
|
||||
lfdux f28,r3,r30
|
||||
stfdux f31,r4,r30
|
||||
stfdux f30,r4,r30
|
||||
stfdux f29,r4,r30
|
||||
stfdux f28,r4,r30
|
||||
bdnz LMove32ByteLoop
|
||||
|
||||
{ cr0*4+eq is true if "count and 31" = 0 }
|
||||
beq cr0,LMoveDone
|
||||
|
||||
{ make r30 again -1 or 1, but first adjust source/dest pointers }
|
||||
add r3,r3,r30
|
||||
add r4,r4,r30
|
||||
srawi r30,r30,3
|
||||
sub r3,r3,r30
|
||||
sub r4,r4,r30
|
||||
|
||||
{ cr1 contains whether count <= 11 }
|
||||
ble cr1,LMoveBytes
|
||||
add r3,r3,r30
|
||||
add r4,r4,r30
|
||||
|
||||
LMoveDWords:
|
||||
mtctr r29
|
||||
andi. r5,r5,3
|
||||
{ r30 * 4 }
|
||||
slwi r30,r30,2
|
||||
sub r3,r3,r30
|
||||
sub r4,r4,r30
|
||||
|
||||
LMoveDWordsLoop:
|
||||
lwzux r29,r3,r30
|
||||
stwux r29,r4,r30
|
||||
bdnz LMoveDWordsLoop
|
||||
|
||||
beq cr0,LMoveDone
|
||||
{ make r30 again -1 or 1 }
|
||||
add r3,r3,r30
|
||||
add r4,r4,r30
|
||||
srawi r30,r30,2
|
||||
sub r3,r3,r30
|
||||
sub r4,r4,r30
|
||||
LMoveBytes:
|
||||
mtctr r5
|
||||
LMoveBytesLoop:
|
||||
lbzux r29,r3,r30
|
||||
stbux r29,r4,r30
|
||||
bdnz LMoveBytesLoop
|
||||
LMoveDone:
|
||||
end ['R3','R4','R5','R29','R30','F28','F29','F30','F31','CTR','CR0','CR1','CR7'];
|
||||
|
||||
|
||||
{$define FPC_SYSTEM_HAS_FILLCHAR}
|
||||
@ -380,7 +448,11 @@ end ['r3','r4','r5','r29','r30','cr0','ctr'];
|
||||
|
||||
{
|
||||
$Log$
|
||||
Revision 1.2 2001-02-11 17:59:46 jonas
|
||||
Revision 1.3 2001-03-02 13:24:10 jonas
|
||||
+ new, complete implementation of move procedure (including support for
|
||||
overlapping regions)
|
||||
|
||||
Revision 1.2 2001/02/11 17:59:46 jonas
|
||||
* implemented several more procedures
|
||||
|
||||
Revision 1.1 2000/07/27 07:32:12 jonas
|
||||
|
Loading…
Reference in New Issue
Block a user