{ Copyright (c) 2004, John O'Harrow (john@almcrest.demon.co.uk) This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. ------------------------------------------------------------------------------- Version: 1.40 - 16-SEP-2004 } {$ifdef USE_FASTMOVE} {$ifndef FPC_SYSTEM_HAS_MOVE} {$define FPC_SYSTEM_HAS_MOVE} {$asmmode intel} {-------------------------------------------------------------------------} (* {Just to show that a good Pascal algorithm can beat the default BASM} procedure MoveJOH_PAS_3(const Source; var Dest; Count : Integer); var S, D : PtrUInt; Temp, C, I : PtrInt; L : PPtrInt; begin S := Cardinal(@Source); D := Cardinal(@Dest); if S = D then Exit; if Count <= 4 then case Count of 1 : PByte(@Dest)^ := PByte(S)^; 2 : PWord(@Dest)^ := PWord(S)^; 3 : if D > S then begin PByte(Integer(@Dest)+2)^ := PByte(S+2)^; PWord(@Dest)^ := PWord(S)^; end else begin PWord(@Dest)^ := PWord(S)^; PByte(Integer(@Dest)+2)^ := PByte(S+2)^; end; 4 : PInteger(@Dest)^ := PInteger(S)^ else Exit; {Count <= 0} end else if D > S then begin Temp := PInteger(S)^; I := Integer(@Dest); C := Count - 4; L := PInteger(Integer(@Dest) + C); Inc(S, C); repeat L^ := PInteger(S)^; if Count <= 8 then Break; Dec(Count, 4); Dec(S, 4); Dec(L); until False; PInteger(I)^ := Temp; end else begin C := Count - 4; Temp := PInteger(S + Cardinal(C))^; I := Integer(@Dest) + C; L := @Dest; repeat L^ := PInteger(S)^; if Count <= 8 then Break; Dec(Count, 4); Inc(S, 4); Inc(L); until False; PInteger(I)^ := Temp; end; end; {MoveJOH_PAS} *) const SMALLMOVESIZE = 36; {-------------------------------------------------------------------------} {Perform Forward Move of 0..36 Bytes} {On Entry, ECX = Count, EAX = Source+Count, EDX = Dest+Count. Destroys ECX} procedure SmallForwardMove_3;assembler;nostackframe; asm jmp dword ptr @@FwdJumpTable[ecx*4] align 16 @@FwdJumpTable: dd @@Done {Removes need to test for zero size move} dd @@Fwd01,@@Fwd02,@@Fwd03,@@Fwd04,@@Fwd05,@@Fwd06,@@Fwd07,@@Fwd08 dd @@Fwd09,@@Fwd10,@@Fwd11,@@Fwd12,@@Fwd13,@@Fwd14,@@Fwd15,@@Fwd16 dd @@Fwd17,@@Fwd18,@@Fwd19,@@Fwd20,@@Fwd21,@@Fwd22,@@Fwd23,@@Fwd24 dd @@Fwd25,@@Fwd26,@@Fwd27,@@Fwd28,@@Fwd29,@@Fwd30,@@Fwd31,@@Fwd32 dd @@Fwd33,@@Fwd34,@@Fwd35,@@Fwd36 @@Fwd36: mov ecx,[eax-36] mov [edx-36],ecx @@Fwd32: mov ecx,[eax-32] mov [edx-32],ecx @@Fwd28: mov ecx,[eax-28] mov [edx-28],ecx @@Fwd24: mov ecx,[eax-24] mov [edx-24],ecx @@Fwd20: mov ecx,[eax-20] mov [edx-20],ecx @@Fwd16: mov ecx,[eax-16] mov [edx-16],ecx @@Fwd12: mov ecx,[eax-12] mov [edx-12],ecx @@Fwd08: mov ecx,[eax-8] mov [edx-8],ecx @@Fwd04: mov ecx,[eax-4] mov [edx-4],ecx ret @@Fwd35: mov ecx,[eax-35] mov [edx-35],ecx @@Fwd31: mov ecx,[eax-31] mov [edx-31],ecx @@Fwd27: mov ecx,[eax-27] mov [edx-27],ecx @@Fwd23: mov ecx,[eax-23] mov [edx-23],ecx @@Fwd19: mov ecx,[eax-19] mov [edx-19],ecx @@Fwd15: mov ecx,[eax-15] mov [edx-15],ecx @@Fwd11: mov ecx,[eax-11] mov [edx-11],ecx @@Fwd07: mov ecx,[eax-7] mov [edx-7],ecx mov ecx,[eax-4] mov [edx-4],ecx ret @@Fwd03: movzx ecx, word ptr [eax-3] mov [edx-3],cx movzx ecx, byte ptr [eax-1] mov [edx-1],cl ret @@Fwd34: mov ecx,[eax-34] mov [edx-34],ecx @@Fwd30: mov ecx,[eax-30] mov [edx-30],ecx @@Fwd26: mov ecx,[eax-26] mov [edx-26],ecx @@Fwd22: mov ecx,[eax-22] mov [edx-22],ecx @@Fwd18: mov ecx,[eax-18] mov [edx-18],ecx @@Fwd14: mov ecx,[eax-14] mov [edx-14],ecx @@Fwd10: mov ecx,[eax-10] mov [edx-10],ecx @@Fwd06: mov ecx,[eax-6] mov [edx-6],ecx @@Fwd02: movzx ecx, word ptr [eax-2] mov [edx-2],cx ret @@Fwd33: mov ecx,[eax-33] mov [edx-33],ecx @@Fwd29: mov ecx,[eax-29] mov [edx-29],ecx @@Fwd25: mov ecx,[eax-25] mov [edx-25],ecx @@Fwd21: mov ecx,[eax-21] mov [edx-21],ecx @@Fwd17: mov ecx,[eax-17] mov [edx-17],ecx @@Fwd13: mov ecx,[eax-13] mov [edx-13],ecx @@Fwd09: mov ecx,[eax-9] mov [edx-9],ecx @@Fwd05: mov ecx,[eax-5] mov [edx-5],ecx @@Fwd01: movzx ecx, byte ptr [eax-1] mov [edx-1],cl @@Done: end; {SmallForwardMove} {-------------------------------------------------------------------------} {Perform Backward Move of 0..36 Bytes} {On Entry, ECX = Count, EAX = Source, EDX = Dest. Destroys ECX} procedure SmallBackwardMove_3;assembler;nostackframe; asm jmp dword ptr @@BwdJumpTable[ecx*4] align 16 @@BwdJumpTable: dd @@Done {Removes need to test for zero size move} dd @@Bwd01,@@Bwd02,@@Bwd03,@@Bwd04,@@Bwd05,@@Bwd06,@@Bwd07,@@Bwd08 dd @@Bwd09,@@Bwd10,@@Bwd11,@@Bwd12,@@Bwd13,@@Bwd14,@@Bwd15,@@Bwd16 dd @@Bwd17,@@Bwd18,@@Bwd19,@@Bwd20,@@Bwd21,@@Bwd22,@@Bwd23,@@Bwd24 dd @@Bwd25,@@Bwd26,@@Bwd27,@@Bwd28,@@Bwd29,@@Bwd30,@@Bwd31,@@Bwd32 dd @@Bwd33,@@Bwd34,@@Bwd35,@@Bwd36 @@Bwd36: mov ecx,[eax+32] mov [edx+32],ecx @@Bwd32: mov ecx,[eax+28] mov [edx+28],ecx @@Bwd28: mov ecx,[eax+24] mov [edx+24],ecx @@Bwd24: mov ecx,[eax+20] mov [edx+20],ecx @@Bwd20: mov ecx,[eax+16] mov [edx+16],ecx @@Bwd16: mov ecx,[eax+12] mov [edx+12],ecx @@Bwd12: mov ecx,[eax+8] mov [edx+8],ecx @@Bwd08: mov ecx,[eax+4] mov [edx+4],ecx @@Bwd04: mov ecx,[eax] mov [edx],ecx ret @@Bwd35: mov ecx,[eax+31] mov [edx+31],ecx @@Bwd31: mov ecx,[eax+27] mov [edx+27],ecx @@Bwd27: mov ecx,[eax+23] mov [edx+23],ecx @@Bwd23: mov ecx,[eax+19] mov [edx+19],ecx @@Bwd19: mov ecx,[eax+15] mov [edx+15],ecx @@Bwd15: mov ecx,[eax+11] mov [edx+11],ecx @@Bwd11: mov ecx,[eax+7] mov [edx+7],ecx @@Bwd07: mov ecx,[eax+3] mov [edx+3],ecx mov ecx,[eax] mov [edx],ecx ret @@Bwd03: movzx ecx, word ptr [eax+1] mov [edx+1],cx movzx ecx, byte ptr [eax] mov [edx],cl ret @@Bwd34: mov ecx,[eax+30] mov [edx+30],ecx @@Bwd30: mov ecx,[eax+26] mov [edx+26],ecx @@Bwd26: mov ecx,[eax+22] mov [edx+22],ecx @@Bwd22: mov ecx,[eax+18] mov [edx+18],ecx @@Bwd18: mov ecx,[eax+14] mov [edx+14],ecx @@Bwd14: mov ecx,[eax+10] mov [edx+10],ecx @@Bwd10: mov ecx,[eax+6] mov [edx+6],ecx @@Bwd06: mov ecx,[eax+2] mov [edx+2],ecx @@Bwd02: movzx ecx, word ptr [eax] mov [edx],cx ret @@Bwd33: mov ecx,[eax+29] mov [edx+29],ecx @@Bwd29: mov ecx,[eax+25] mov [edx+25],ecx @@Bwd25: mov ecx,[eax+21] mov [edx+21],ecx @@Bwd21: mov ecx,[eax+17] mov [edx+17],ecx @@Bwd17: mov ecx,[eax+13] mov [edx+13],ecx @@Bwd13: mov ecx,[eax+9] mov [edx+9],ecx @@Bwd09: mov ecx,[eax+5] mov [edx+5],ecx @@Bwd05: mov ecx,[eax+1] mov [edx+1],ecx @@Bwd01: movzx ecx, byte ptr[eax] mov [edx],cl @@Done: end; {SmallBackwardMove} { at least valgrind up to 3.3 has a bug which prevents the default code to work so we use a rather simple implementation here } procedure Forwards_Valgrind;assembler;nostackframe; asm {$ifdef FPC_ENABLED_CLD} cld {$endif FPC_ENABLED_CLD} push esi push edi mov esi,eax mov edi,edx rep movsb pop edi pop esi end; { at least valgrind up to 3.3 has a bug which prevents the default code to work so we use a rather simple implementation here } procedure Backwards_Valgrind;assembler;nostackframe; asm push esi push edi lea esi,[eax+ecx-1] lea edi,[edx+ecx-1] @@repeat: mov al,[esi] mov [edi],al dec esi dec edi dec ecx jnz @@repeat pop edi pop esi end; {-------------------------------------------------------------------------} {Move ECX Bytes from EAX to EDX, where EAX > EDX and ECX > 36 (SMALLMOVESIZE)} procedure Forwards_IA32_3;assembler;nostackframe; asm push ebx mov ebx,edx fild qword ptr [eax] add eax,ecx {QWORD Align Writes} add ecx,edx add edx,7 and edx,-8 sub ecx,edx add edx,ecx {Now QWORD Aligned} sub ecx,16 neg ecx @FwdLoop: fild qword ptr [eax+ecx-16] fistp qword ptr [edx+ecx-16] fild qword ptr [eax+ecx-8] fistp qword ptr [edx+ecx-8] add ecx,16 jle @FwdLoop fistp qword ptr [ebx] neg ecx add ecx,16 pop ebx jmp SmallForwardMove_3 end; {Forwards_IA32} {-------------------------------------------------------------------------} {Move ECX Bytes from EAX to EDX, where EAX < EDX and ECX > 36 (SMALLMOVESIZE)} procedure Backwards_IA32_3;assembler;nostackframe; asm push ebx fild qword ptr [eax+ecx-8] lea ebx,[edx+ecx] {QWORD Align Writes} and ebx,7 sub ecx,ebx add ebx,ecx {Now QWORD Aligned, EBX = Original Length} sub ecx,16 @BwdLoop: fild qword ptr [eax+ecx] fild qword ptr [eax+ecx+8] fistp qword ptr [edx+ecx+8] fistp qword ptr [edx+ecx] sub ecx,16 jge @BwdLoop fistp qword ptr [edx+ebx-8] add ecx,16 pop ebx jmp SmallBackwardMove_3 end; {Backwards_IA32} {-------------------------------------------------------------------------} {Move ECX Bytes from EAX to EDX, where EAX > EDX and ECX > 36 (SMALLMOVESIZE)} procedure Forwards_MMX_3;assembler;nostackframe; const LARGESIZE = 1024; asm cmp ecx,LARGESIZE jge @FwdLargeMove cmp ecx,72 {Size at which using MMX becomes worthwhile} jl Forwards_IA32_3 push ebx mov ebx,edx movq mm0,[eax] {First 8 Characters} {QWORD Align Writes} add eax,ecx add ecx,edx add edx,7 and edx,-8 sub ecx,edx add edx,ecx {Now QWORD Aligned} sub ecx,32 neg ecx @FwdLoopMMX: movq mm1,[eax+ecx-32] movq mm2,[eax+ecx-24] movq mm3,[eax+ecx-16] movq mm4,[eax+ecx- 8] movq [edx+ecx-32],mm1 movq [edx+ecx-24],mm2 movq [edx+ecx-16],mm3 movq [edx+ecx- 8],mm4 add ecx,32 jle @FwdLoopMMX movq [ebx],mm0 {First 8 Characters} emms pop ebx neg ecx add ecx,32 jmp SmallForwardMove_3 @FwdLargeMove: push ebx mov ebx,ecx test edx,15 jz @FwdAligned {16 byte Align Destination} mov ecx,edx add ecx,15 and ecx,-16 sub ecx,edx add eax,ecx add edx,ecx sub ebx,ecx {Destination now 16 Byte Aligned} call SmallForwardMove_3 @FwdAligned: mov ecx,ebx and ecx,-16 sub ebx,ecx {EBX = Remainder} push esi push edi mov esi,eax {ESI = Source} mov edi,edx {EDI = Dest} mov eax,ecx {EAX = Count} and eax,-64 {EAX = No of Bytes to Blocks Moves} and ecx,$3F {ECX = Remaining Bytes to Move (0..63)} add esi,eax add edi,eax shr eax,3 {EAX = No of QWORD's to Block Move} neg eax @MMXcopyloop: movq mm0,[esi+eax*8 ] movq mm1,[esi+eax*8+ 8] movq mm2,[esi+eax*8+16] movq mm3,[esi+eax*8+24] movq mm4,[esi+eax*8+32] movq mm5,[esi+eax*8+40] movq mm6,[esi+eax*8+48] movq mm7,[esi+eax*8+56] movq [edi+eax*8 ],mm0 movq [edi+eax*8+ 8],mm1 movq [edi+eax*8+16],mm2 movq [edi+eax*8+24],mm3 movq [edi+eax*8+32],mm4 movq [edi+eax*8+40],mm5 movq [edi+eax*8+48],mm6 movq [edi+eax*8+56],mm7 add eax,8 jnz @MMXcopyloop emms {Empty MMX State} {$ifdef FPC_ENABLED_CLD} cld {$endif FPC_ENABLED_CLD} add ecx,ebx shr ecx,2 rep movsd mov ecx,ebx and ecx,3 rep movsb pop edi pop esi pop ebx end; {Forwards_MMX} {-------------------------------------------------------------------------} {Move ECX Bytes from EAX to EDX, where EAX < EDX and ECX > 36 (SMALLMOVESIZE)} procedure Backwards_MMX_3;assembler;nostackframe; asm cmp ecx,72 {Size at which using MMX becomes worthwhile} jl Backwards_IA32_3 push ebx movq mm0,[eax+ecx-8] {Get Last QWORD} {QWORD Align Writes} lea ebx,[edx+ecx] and ebx,7 sub ecx,ebx add ebx,ecx {Now QWORD Aligned} sub ecx,32 @BwdLoopMMX: movq mm1,[eax+ecx ] movq mm2,[eax+ecx+ 8] movq mm3,[eax+ecx+16] movq mm4,[eax+ecx+24] movq [edx+ecx+24],mm4 movq [edx+ecx+16],mm3 movq [edx+ecx+ 8],mm2 movq [edx+ecx ],mm1 sub ecx,32 jge @BwdLoopMMX movq [edx+ebx-8], mm0 {Last QWORD} emms add ecx,32 pop ebx jmp SmallBackwardMove_3 end; {Backwards_MMX} {$ifndef FASTMOVE_DISABLE_SSE3} {-------------------------------------------------------------------------} {Dest MUST be 16-Byes Aligned, Count MUST be multiple of 16 } procedure AlignedFwdMoveSSE_3(const Source; var Dest; Count: Integer);assembler;nostackframe; const Prefetch = 512; asm push esi mov esi,eax {ESI = Source} mov eax,ecx {EAX = Count} and eax,-128 {EAX = No of Bytes to Block Move} add esi,eax add edx,eax shr eax,3 {EAX = No of QWORD's to Block Move} neg eax cmp eax, -(32*1024) {Count > 256K} jl @Large @Small: {Count<=256K} test esi,15 {Check if Both Source/Dest Aligned} jnz @SmallUnaligned @SmallAligned: {Both Source and Dest 16-Byte Aligned} @SmallAlignedLoop: movaps xmm0,[esi+8*eax] movaps xmm1,[esi+8*eax+16] movaps xmm2,[esi+8*eax+32] movaps xmm3,[esi+8*eax+48] movaps [edx+8*eax],xmm0 movaps [edx+8*eax+16],xmm1 movaps [edx+8*eax+32],xmm2 movaps [edx+8*eax+48],xmm3 movaps xmm4,[esi+8*eax+64] movaps xmm5,[esi+8*eax+80] movaps xmm6,[esi+8*eax+96] movaps xmm7,[esi+8*eax+112] movaps [edx+8*eax+64],xmm4 movaps [edx+8*eax+80],xmm5 movaps [edx+8*eax+96],xmm6 movaps [edx+8*eax+112],xmm7 add eax,16 js @SmallAlignedLoop jmp @Remainder @SmallUnaligned: {Source Not 16-Byte Aligned} @SmallUnalignedLoop: movups xmm0,[esi+8*eax] movups xmm1,[esi+8*eax+16] movups xmm2,[esi+8*eax+32] movups xmm3,[esi+8*eax+48] movaps [edx+8*eax],xmm0 movaps [edx+8*eax+16],xmm1 movaps [edx+8*eax+32],xmm2 movaps [edx+8*eax+48],xmm3 movups xmm4,[esi+8*eax+64] movups xmm5,[esi+8*eax+80] movups xmm6,[esi+8*eax+96] movups xmm7,[esi+8*eax+112] movaps [edx+8*eax+64],xmm4 movaps [edx+8*eax+80],xmm5 movaps [edx+8*eax+96],xmm6 movaps [edx+8*eax+112],xmm7 add eax,16 js @SmallUnalignedLoop jmp @Remainder @Large: {Count>256K} test esi,15 {Check if Both Source/Dest Aligned} jnz @LargeUnaligned @LargeAligned: {Both Source and Dest 16-Byte Aligned} @LargeAlignedLoop: prefetchnta [esi+8*eax+Prefetch] prefetchnta [esi+8*eax+Prefetch+64] movaps xmm0,[esi+8*eax] movaps xmm1,[esi+8*eax+16] movaps xmm2,[esi+8*eax+32] movaps xmm3,[esi+8*eax+48] movntps [edx+8*eax],xmm0 movntps [edx+8*eax+16],xmm1 movntps [edx+8*eax+32],xmm2 movntps [edx+8*eax+48],xmm3 movaps xmm4,[esi+8*eax+64] movaps xmm5,[esi+8*eax+80] movaps xmm6,[esi+8*eax+96] movaps xmm7,[esi+8*eax+112] movntps [edx+8*eax+64],xmm4 movntps [edx+8*eax+80],xmm5 movntps [edx+8*eax+96],xmm6 movntps [edx+8*eax+112],xmm7 add eax,16 js @LargeAlignedLoop sfence jmp @Remainder @LargeUnaligned: {Source Not 16-Byte Aligned} @LargeUnalignedLoop: prefetchnta [esi+8*eax+Prefetch] prefetchnta [esi+8*eax+Prefetch+64] movups xmm0,[esi+8*eax] movups xmm1,[esi+8*eax+16] movups xmm2,[esi+8*eax+32] movups xmm3,[esi+8*eax+48] movntps [edx+8*eax],xmm0 movntps [edx+8*eax+16],xmm1 movntps [edx+8*eax+32],xmm2 movntps [edx+8*eax+48],xmm3 movups xmm4,[esi+8*eax+64] movups xmm5,[esi+8*eax+80] movups xmm6,[esi+8*eax+96] movups xmm7,[esi+8*eax+112] movntps [edx+8*eax+64],xmm4 movntps [edx+8*eax+80],xmm5 movntps [edx+8*eax+96],xmm6 movntps [edx+8*eax+112],xmm7 add eax,16 js @LargeUnalignedLoop sfence @Remainder: and ecx,$7F {ECX = Remainder (0..112 - Multiple of 16)} jz @Done add esi,ecx add edx,ecx neg ecx @RemainderLoop: movups xmm0,[esi+ecx] movaps [edx+ecx],xmm0 add ecx,16 jnz @RemainderLoop @Done: pop esi end; {AlignedFwdMoveSSE} {-------------------------------------------------------------------------} {Move ECX Bytes from EAX to EDX, where EAX > EDX and ECX > 36 (SMALLMOVESIZE)} procedure Forwards_SSE_3;assembler;nostackframe; const LARGESIZE = 2048; asm cmp ecx,LARGESIZE jge @FwdLargeMove cmp ecx,SMALLMOVESIZE+32 movups xmm0,[eax] jg @FwdMoveSSE movups xmm1,[eax+16] movups [edx],xmm0 movups [edx+16],xmm1 add eax,ecx add edx,ecx sub ecx,32 jmp SmallForwardMove_3 @FwdMoveSSE: push ebx mov ebx,edx {Align Writes} add eax,ecx add ecx,edx add edx,15 and edx,-16 sub ecx,edx add edx,ecx {Now Aligned} sub ecx,32 neg ecx @FwdLoopSSE: movups xmm1,[eax+ecx-32] movups xmm2,[eax+ecx-16] movaps [edx+ecx-32],xmm1 movaps [edx+ecx-16],xmm2 add ecx,32 jle @FwdLoopSSE movups [ebx],xmm0 {First 16 Bytes} neg ecx add ecx,32 pop ebx jmp SmallForwardMove_3 @FwdLargeMove: push ebx mov ebx,ecx test edx,15 jz @FwdLargeAligned {16 byte Align Destination} mov ecx,edx add ecx,15 and ecx,-16 sub ecx,edx add eax,ecx add edx,ecx sub ebx,ecx {Destination now 16 Byte Aligned} call SmallForwardMove_3 mov ecx,ebx @FwdLargeAligned: and ecx,-16 sub ebx,ecx {EBX = Remainder} push edx push eax push ecx call AlignedFwdMoveSSE_3 pop ecx pop eax pop edx add ecx,ebx add eax,ecx add edx,ecx mov ecx,ebx pop ebx jmp SmallForwardMove_3 end; {Forwards_SSE} {-------------------------------------------------------------------------} {Move ECX Bytes from EAX to EDX, where EAX < EDX and ECX > 36 (SMALLMOVESIZE)} procedure Backwards_SSE_3;assembler;nostackframe; asm cmp ecx,SMALLMOVESIZE+32 jg @BwdMoveSSE sub ecx,32 movups xmm1,[eax+ecx] movups xmm2,[eax+ecx+16] movups [edx+ecx],xmm1 movups [edx+ecx+16],xmm2 jmp SmallBackwardMove_3 @BwdMoveSSE: push ebx movups xmm0,[eax+ecx-16] {Last 16 Bytes} {Align Writes} lea ebx,[edx+ecx] and ebx,15 sub ecx,ebx add ebx,ecx {Now Aligned} sub ecx,32 @BwdLoop: movups xmm1,[eax+ecx] movups xmm2,[eax+ecx+16] movaps [edx+ecx],xmm1 movaps [edx+ecx+16],xmm2 sub ecx,32 jge @BwdLoop movups [edx+ebx-16],xmm0 {Last 16 Bytes} add ecx,32 pop ebx jmp SmallBackwardMove_3 end; {Backwards_SSE} {$endif ndef FASTMOVE_DISABLE_SSE3} const fastmoveproc_forward : pointer = @Forwards_IA32_3; fastmoveproc_backward : pointer = @Backwards_IA32_3; procedure Move(const source;var dest;count:SizeInt);[public, alias: 'FPC_MOVE'];assembler;nostackframe; asm cmp ecx,SMALLMOVESIZE ja @Large cmp eax,edx lea eax,[eax+ecx] jle @SmallCheck @SmallForward: add edx,ecx jmp SmallForwardMove_3 @SmallCheck: je @Done {For Compatibility with Delphi's move for Source = Dest} sub eax,ecx jmp SmallBackwardMove_3 @Large: jng @Done {For Compatibility with Delphi's move for Count < 0} cmp eax,edx jg @moveforward je @Done {For Compatibility with Delphi's move for Source = Dest} push eax add eax,ecx cmp eax,edx pop eax jg @movebackward @moveforward: jmp dword ptr fastmoveproc_forward @movebackward: jmp dword ptr fastmoveproc_backward {Source/Dest Overlap} @Done: end; {$asmmode att} {$ifndef FPC_HAS_INDIRECT_ENTRY_INFORMATION} var valgrind_used : boolean;external name '__fpc_valgrind'; {$endif FPC_HAS_INDIRECT_ENTRY_INFORMATION} procedure setup_fastmove;{$ifdef SYSTEMINLINE}inline;{$endif} begin { workaround valgrind bug } {$ifdef FPC_HAS_INDIRECT_ENTRY_INFORMATION} if EntryInformation.valgrind_used then {$else FPC_HAS_INDIRECT_ENTRY_INFORMATION} if valgrind_used then {$endif FPC_HAS_INDIRECT_ENTRY_INFORMATION} begin fastmoveproc_forward:=@Forwards_Valgrind; fastmoveproc_backward:=@Backwards_Valgrind; end {$ifndef FASTMOVE_DISABLE_SSE3} else if has_sse_support then begin fastmoveproc_forward:=@Forwards_SSE_3; fastmoveproc_backward:=@Backwards_SSE_3; end {$endif ndef FASTMOVE_DISABLE_SSE3} else if has_mmx_support then begin fastmoveproc_forward:=@Forwards_MMX_3; fastmoveproc_backward:=@Backwards_MMX_3; end; end; {$endif FPC_SYSTEM_HAS_MOVE} {$endif}