* fixed some bugs in move()

This commit is contained in:
Jonas Maebe 2003-05-02 19:03:25 +00:00
parent 5f71d48015
commit b11c927856

View File

@ -152,21 +152,22 @@ asm
{ if overlap, then point source and dest to the end } { if overlap, then point source and dest to the end }
add r3,r3,r0 add r3,r3,r0
add r4,r4,r0 add r4,r4,r0
{ if overlap, then r0 := 0, else r0 := -1 } { if overlap, then r0 := 6, else r6 := -1 }
not r0,r10 not r6,r10
{ if overlap, then r10 := -2, else r10 := 0 } { if overlap, then r10 := -2, else r10 := 0 }
slwi r10,r10,1 slwi r10,r10,1
{ if overlap, then r10 := -1, else r10 := 1 } { if overlap, then r10 := -1, else r10 := 1 }
addi r10,r10,1 addi r10,r10,1
{ if overlap, then source/dest += -1, otherwise they stay }
{ After the next instruction, r3/r4 + r10 = next position }
{ to load/store from/to }
add r3,r3,r0
add r4,r4,r0
{ if count < 15, copy everything byte by byte } { if count < 15, copy everything byte by byte }
blt cr1,LMoveBytes blt cr1,LMoveBytes
{ if no overlap, then source/dest += -1, otherwise they stay }
{ After the next instruction, r3/r4 + r10 = next position to }
{ load/store from/to }
add r3,r3,r6
add r4,r4,r6
{ otherwise, guarantee 4 byte alignment for dest for starters } { otherwise, guarantee 4 byte alignment for dest for starters }
LMove4ByteAlignLoop: LMove4ByteAlignLoop:
lbzux r0,r3,r10 lbzux r0,r3,r10
@ -189,6 +190,10 @@ LMove4ByteAlignLoop:
{ multiply the update count with 4 } { multiply the update count with 4 }
slwi r10,r10,2 slwi r10,r10,2
slwi r6,r6,2
{ and adapt the source and dest }
add r3,r3,r6
add r4,r4,r6
beq cr0,L8BytesAligned beq cr0,L8BytesAligned
@ -212,12 +217,13 @@ L8BytesAligned:
{ adjust the update count: it will now be 8 or -8 depending on overlap } { adjust the update count: it will now be 8 or -8 depending on overlap }
slwi r10,r10,1 slwi r10,r10,1
slwi r6,r6,1
{ adjust source and dest pointers: because of the above loop, dest is now } { adjust source and dest pointers: because of the above loop, dest is now }
{ aligned to 8 bytes. So if we substract r10 we will still have an 8 bytes } { aligned to 8 bytes. So if we add r6 we will still have an 8 bytes }
{ aligned address) } { aligned address) }
sub r3,r3,r10 add r3,r3,r6
sub r4,r4,r10 add r4,r4,r6
LMove32ByteLoop: LMove32ByteLoop:
lfdux f0,r3,r10 lfdux f0,r3,r10
@ -234,24 +240,22 @@ LMove32ByteLoop:
beq cr0,LMoveDone beq cr0,LMoveDone
{ make r10 again -1 or 1, but first adjust source/dest pointers } { make r10 again -1 or 1, but first adjust source/dest pointers }
add r3,r3,r10 sub r3,r3,r6
add r4,r4,r10 sub r4,r4,r6
srawi r10,r10,3 srawi r10,r10,3
sub r3,r3,r10 srawi r6,r6,3
sub r4,r4,r10
{ cr1 contains whether count <= 11 } { cr1 contains whether count <= 11 }
ble cr1,LMoveBytes ble cr1,LMoveBytes
add r3,r3,r10
add r4,r4,r10
LMoveDWords: LMoveDWords:
mtctr r0 mtctr r0
andi. r5,r5,3 andi. r5,r5,3
{ r10 * 4 } { r10 * 4 }
slwi r10,r10,2 slwi r10,r10,2
sub r3,r3,r10 slwi r6,r6,2
sub r4,r4,r10 add r3,r3,r6
add r4,r4,r6
LMoveDWordsLoop: LMoveDWordsLoop:
lwzux r0,r3,r10 lwzux r0,r3,r10
@ -260,12 +264,13 @@ LMoveDWordsLoop:
beq cr0,LMoveDone beq cr0,LMoveDone
{ make r10 again -1 or 1 } { make r10 again -1 or 1 }
add r3,r3,r10 sub r3,r3,r6
add r4,r4,r10 sub r4,r4,r6
srawi r10,r10,2 srawi r10,r10,2
sub r3,r3,r10 srawi r6,r6,2
sub r4,r4,r10
LMoveBytes: LMoveBytes:
add r3,r3,r6
add r4,r4,r6
mtctr r5 mtctr r5
LMoveBytesLoop: LMoveBytesLoop:
lbzux r0,r3,r10 lbzux r0,r3,r10
@ -881,7 +886,10 @@ end ['R3','R10'];
{ {
$Log$ $Log$
Revision 1.38 2003-04-27 16:24:44 jonas Revision 1.39 2003-05-02 19:03:25 jonas
* fixed some bugs in move()
Revision 1.38 2003/04/27 16:24:44 jonas
- disabled fpc_shortstr_concat because it's called differently than that - disabled fpc_shortstr_concat because it's called differently than that
routine is declared routine is declared