mirror of
				https://gitlab.com/freepascal.org/fpc/source.git
				synced 2025-10-31 20:31:51 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			1130 lines
		
	
	
		
			28 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			1130 lines
		
	
	
		
			28 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| {
 | |
| 
 | |
|     This file is part of the Free Pascal run time library.
 | |
|     Copyright (c) 2003 by the Free Pascal development team.
 | |
| 
 | |
|     Processor dependent implementation for the system unit for
 | |
|     ARM
 | |
| 
 | |
|     See the file COPYING.FPC, included in this distribution,
 | |
|     for details about the copyright.
 | |
| 
 | |
|     This program is distributed in the hope that it will be useful,
 | |
|     but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 | |
| 
 | |
|  **********************************************************************}
 | |
| 
 | |
| {$ifndef FPC_SYSTEM_HAS_MOVE}
 | |
| {$define FPC_SYSTEM_FPC_MOVE}
 | |
| {$endif FPC_SYSTEM_HAS_MOVE}
 | |
| 
 | |
| {$ifdef FPC_SYSTEM_FPC_MOVE}
 | |
| const
 | |
|   cpu_has_edsp : boolean = false;
 | |
|   in_edsp_test : boolean = false;
 | |
| {$endif FPC_SYSTEM_FPC_MOVE}
 | |
| 
 | |
| {$if not(defined(wince)) and not(defined(gba)) and not(defined(nds)) and not(defined(FPUSOFT)) and not(defined(FPULIBGCC))}
 | |
| 
 | |
| {$define FPC_SYSTEM_HAS_SYSINITFPU}
 | |
| {$if not defined(darwin) and not defined(FPUVFPV2) and not defined(FPUVFPV3) and not defined(FPUVFPV3_D16)}
 | |
| Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
 | |
| begin
 | |
|   { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
 | |
|   asm
 | |
|     rfs r0
 | |
|     and r0,r0,#0xffe0ffff
 | |
|     orr r0,r0,#0x00070000
 | |
|     wfs r0
 | |
|   end;
 | |
| end;
 | |
| {$else}
 | |
| Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
 | |
| begin
 | |
|   { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
 | |
|   asm
 | |
|     fmrx r0,fpscr
 | |
|     // set "round to nearest" mode
 | |
|     and  r0,r0,#0xff3fffff
 | |
|     // mask "exception happened" and overflow flags
 | |
|     and  r0,r0,#0xffffff20
 | |
|     // mask exception flags
 | |
|     and  r0,r0,#0xffff40ff
 | |
| {$ifndef darwin}
 | |
|     // Floating point exceptions cause kernel panics on iPhoneOS 2.2.1...
 | |
| 
 | |
|     // disable flush-to-zero mode (IEEE math compliant)
 | |
|     and  r0,r0,#0xfeffffff
 | |
|     // enable invalid operation, div-by-zero and overflow exceptions
 | |
|     orr  r0,r0,#0x00000700
 | |
| {$endif}
 | |
|     fmxr fpscr,r0
 | |
|   end;
 | |
| end;
 | |
| {$endif}
 | |
| {$endif}
 | |
| 
 | |
| procedure fpc_cpuinit;
 | |
| begin
 | |
|   { don't let libraries influence the FPU cw set by the host program }
 | |
|   if not IsLibrary then
 | |
|     SysInitFPU;
 | |
| end;
 | |
| 
 | |
| {$ifdef wince}
 | |
| function _controlfp(new: DWORD; mask: DWORD): DWORD; cdecl; external 'coredll';
 | |
| 
 | |
| {$define FPC_SYSTEM_HAS_SYSRESETFPU}
 | |
| Procedure SysResetFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
 | |
| begin
 | |
| end;
 | |
| 
 | |
| {$define FPC_SYSTEM_HAS_SYSINITFPU}
 | |
| Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
 | |
| begin
 | |
|   { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
 | |
|   { FPU precision 64 bit, rounding to nearest, affine infinity }
 | |
|   _controlfp($000C0003, $030F031F);
 | |
| end;
 | |
| {$endif wince}
 | |
| 
 | |
| {****************************************************************************
 | |
|                        stack frame related stuff
 | |
| ****************************************************************************}
 | |
| 
 | |
| {$IFNDEF INTERNAL_BACKTRACE}
 | |
| {$define FPC_SYSTEM_HAS_GET_FRAME}
 | |
| function get_frame:pointer;assembler;nostackframe;
 | |
| asm
 | |
| {$ifndef darwin}
 | |
|   mov    r0,r11
 | |
| {$else}
 | |
|   mov    r0,r7
 | |
| {$endif}
 | |
| end;
 | |
| {$ENDIF not INTERNAL_BACKTRACE}
 | |
| 
 | |
| {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
 | |
| function get_caller_addr(framebp:pointer;addr:pointer=nil):pointer;assembler;nostackframe;
 | |
| asm
 | |
|   cmp r0,#0
 | |
| {$ifndef darwin}
 | |
|   ldrne r0,[r0,#-4]
 | |
| {$else}
 | |
|   ldrne r0,[r0,#4]
 | |
| {$endif}
 | |
| end;
 | |
| 
 | |
| 
 | |
| {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
 | |
| function get_caller_frame(framebp:pointer;addr:pointer=nil):pointer;assembler;nostackframe;
 | |
| asm
 | |
|   cmp r0,#0
 | |
| {$ifndef darwin}
 | |
|   ldrne r0,[r0,#-12]
 | |
| {$else}
 | |
|   ldrne r0,[r0]
 | |
| {$endif}
 | |
| end;
 | |
| 
 | |
| 
 | |
| {$define FPC_SYSTEM_HAS_SPTR}
 | |
| Function Sptr : pointer;assembler;nostackframe;
 | |
| asm
 | |
|   mov    r0,sp
 | |
| end;
 | |
| 
 | |
| 
 | |
| {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
 | |
| {$define FPC_SYSTEM_HAS_FILLCHAR}
 | |
| Procedure FillChar(var x;count:longint;value:byte);assembler;nostackframe;
 | |
| asm
 | |
|         // less than 0?
 | |
|         cmp     r1,#0
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|         bxle    lr
 | |
| {$else}
 | |
|         movle   pc,lr
 | |
| {$endif}
 | |
|         mov     r3,r0
 | |
| 
 | |
|         orr     r2,r2,r2,lsl #8
 | |
|         orr     r2,r2,r2,lsl #16
 | |
| 
 | |
|         tst     r3, #3  // Aligned?
 | |
|         bne     .LFillchar_do_align
 | |
| 
 | |
| .LFillchar_is_aligned:
 | |
|         subs    r1,r1,#8
 | |
|         bmi     .LFillchar_less_than_8bytes
 | |
| 
 | |
|         mov     ip,r2
 | |
| .LFillchar_at_least_8bytes:
 | |
|         // Do 16 bytes per loop
 | |
|         // More unrolling is uncessary, as we'll just stall on the write buffers
 | |
|         stmia   r3!,{r2,ip}
 | |
|         subs    r1,r1,#8
 | |
|         stmplia r3!,{r2,ip}
 | |
|         subpls  r1,r1,#8
 | |
|         bpl     .LFillchar_at_least_8bytes
 | |
| 
 | |
| .LFillchar_less_than_8bytes:
 | |
|         // Do the rest
 | |
|         adds    r1, r1, #8
 | |
| 
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|         bxeq    lr
 | |
| {$else}
 | |
|         moveq   pc,lr
 | |
| {$endif}
 | |
| 
 | |
|         tst     r1, #4
 | |
|         strne   r2,[r3],#4
 | |
|         tst     r1, #2
 | |
|         strneh  r2,[r3],#2
 | |
|         tst     r1, #1
 | |
|         strneb  r2,[r3],#1
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|         bx  lr
 | |
| {$else}
 | |
|         mov pc,lr
 | |
| {$endif}
 | |
| 
 | |
| // Special case for unaligned start
 | |
| // We make a maximum of 3 loops here
 | |
| .LFillchar_do_align:
 | |
|         strb r2,[r3],#1
 | |
|         subs r1, r1, #1
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|         bxeq  lr
 | |
| {$else}
 | |
|         moveq pc,lr
 | |
| {$endif}
 | |
|         tst r3,#3
 | |
|         bne .LFillchar_do_align
 | |
|         b .LFillchar_is_aligned
 | |
| end;
 | |
| {$endif FPC_SYSTEM_HAS_FILLCHAR}
 | |
| 
 | |
| {$ifndef FPC_SYSTEM_HAS_MOVE}
 | |
| {$define FPC_SYSTEM_HAS_MOVE}
 | |
| {$ifdef CPUARM_HAS_EDSP}
 | |
| procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
 | |
| {$else CPUARM_HAS_EDSP}
 | |
| procedure Move_pld(const source;var dest;count:longint);assembler;nostackframe;
 | |
| {$endif CPUARM_HAS_EDSP}
 | |
| asm
 | |
|   // pld [r0]
 | |
|   // encode this using .long so the rtl assembles also with instructions sets not supporting pld
 | |
|   .long 0xf5d0f000
 | |
| 
 | |
|   // count <=0 ?
 | |
|   cmp r2,#0
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|   bxle  lr
 | |
| {$else}
 | |
|   movle pc,lr
 | |
| {$endif}
 | |
|   // overlap?
 | |
|   subs   r3, r1, r0    // if (dest > source) and
 | |
|   cmphi  r2, r3        //    (count > dest - src) then
 | |
|   bhi    .Loverlapped  //   DoReverseByteCopy;
 | |
| 
 | |
|   cmp r2,#8            // if (count < 8) then
 | |
|   blt .Lbyteloop       //    DoForwardByteCopy;
 | |
|   // Any way to avoid the above jump and fuse the next two instructions?
 | |
|   tst   r0, #3         // if (source and 3) <> 0 or
 | |
|   tsteq r1, #3         //    (dest and 3) <> 0 then
 | |
|   bne   .Lbyteloop     //   DoForwardByteCopy;
 | |
| 
 | |
|   // pld   [r0,#32]
 | |
|   // encode this using .long so the rtl assembles also with instructions sets not supporting pld
 | |
|   .long 0xf5d0f020
 | |
| 
 | |
| .Ldwordloop:
 | |
|   ldmia r0!, {r3, ip}
 | |
| 
 | |
|   // preload
 | |
|   // pld   [r0,#64]
 | |
|   // encode this using .long so the rtl assembles also with instructions sets not supporting pld
 | |
|   .long 0xf5d0f040
 | |
| 
 | |
|   sub   r2,r2,#8
 | |
|   cmp   r2, #8
 | |
|   stmia r1!, {r3, ip}
 | |
|   bge   .Ldwordloop
 | |
|   cmp   r2,#0
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|   bxeq  lr
 | |
| {$else}
 | |
|   moveq pc,lr
 | |
| {$endif}
 | |
| .Lbyteloop:
 | |
|   subs r2,r2,#1
 | |
|   ldrb r3,[r0],#1
 | |
|   strb r3,[r1],#1
 | |
|   bne .Lbyteloop
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|   bx  lr
 | |
| {$else}
 | |
|   mov pc,lr
 | |
| {$endif}
 | |
| .Loverlapped:
 | |
|   subs r2,r2,#1
 | |
|   ldrb r3,[r0,r2]
 | |
|   strb r3,[r1,r2]
 | |
|   bne .Loverlapped
 | |
| end;
 | |
| 
 | |
| {$ifndef CPUARM_HAS_EDSP}
 | |
| procedure Move_blended(const source;var dest;count:longint);assembler;nostackframe;
 | |
| asm
 | |
|   // count <=0 ?
 | |
|   cmp r2,#0
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|   bxle  lr
 | |
| {$else}
 | |
|   movle pc,lr
 | |
| {$endif}
 | |
|   // overlap?
 | |
|   subs   r3, r1, r0    // if (dest > source) and
 | |
|   cmphi  r2, r3        //    (count > dest - src) then
 | |
|   bhi    .Loverlapped  //   DoReverseByteCopy;
 | |
| 
 | |
|   cmp r2,#8            // if (count < 8) then
 | |
|   blt .Lbyteloop       //    DoForwardByteCopy;
 | |
|   // Any way to avoid the above jump and fuse the next two instructions?
 | |
|   tst   r0, #3         // if (source and 3) <> 0 or
 | |
|   tsteq r1, #3         //    (dest and 3) <> 0 then
 | |
|   bne   .Lbyteloop     //   DoForwardByteCopy;
 | |
| 
 | |
| .Ldwordloop:
 | |
|   ldmia r0!, {r3, ip}
 | |
|   sub   r2,r2,#8
 | |
|   cmp   r2, #8
 | |
|   stmia r1!, {r3, ip}
 | |
|   bge   .Ldwordloop
 | |
|   cmp   r2,#0
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|   bxeq  lr
 | |
| {$else}
 | |
|   moveq pc,lr
 | |
| {$endif}
 | |
| .Lbyteloop:
 | |
|   subs r2,r2,#1
 | |
|   ldrb r3,[r0],#1
 | |
|   strb r3,[r1],#1
 | |
|   bne .Lbyteloop
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|   bx  lr
 | |
| {$else}
 | |
|   mov pc,lr
 | |
| {$endif}
 | |
| .Loverlapped:
 | |
|   subs r2,r2,#1
 | |
|   ldrb r3,[r0,r2]
 | |
|   strb r3,[r1,r2]
 | |
|   bne .Loverlapped
 | |
| end;
 | |
| 
 | |
| const
 | |
|   moveproc : pointer = @move_blended;
 | |
| 
 | |
| procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
 | |
| asm
 | |
|   ldr ip,.Lmoveproc
 | |
|   ldr pc,[ip]
 | |
| .Lmoveproc:
 | |
|   .long moveproc
 | |
| end;
 | |
| {$endif CPUARM_HAS_EDSP}
 | |
| 
 | |
| {$endif FPC_SYSTEM_HAS_MOVE}
 | |
| 
 | |
| {****************************************************************************
 | |
|                                  String
 | |
| ****************************************************************************}
 | |
| 
 | |
| {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
 | |
| {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
 | |
| 
 | |
| procedure fpc_shortstr_to_shortstr(out res:shortstring;const sstr:shortstring);assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
 | |
| {r0: __RESULT
 | |
|  r1: len
 | |
|  r2: sstr}
 | |
| 
 | |
| asm
 | |
|     ldrb r12,[r2],#1
 | |
|     cmp  r12,r1
 | |
|     movgt r12,r1
 | |
|     strb r12,[r0],#1
 | |
|     cmp  r12,#6 (* 6 seems to be the break even point. *)
 | |
|     blt  .LStartTailCopy
 | |
|     (* Align destination on 32bits. This is the only place where unrolling
 | |
|        really seems to help, since in the common case, sstr is aligned on
 | |
|        32 bits, therefore in the common case we need to copy 3 bytes to
 | |
|        align, i.e. in the case of a loop, you wouldn't branch out early.*)
 | |
|     rsb  r3,r0,#0
 | |
|     ands  r3,r3,#3
 | |
|     sub  r12,r12,r3
 | |
|     ldrneb r1,[r2],#1
 | |
|     strneb r1,[r0],#1
 | |
|     subnes  r3,r3,#1
 | |
|     ldrneb r1,[r2],#1
 | |
|     strneb r1,[r0],#1
 | |
|     subnes  r3,r3,#1
 | |
|     ldrneb r1,[r2],#1
 | |
|     strneb r1,[r0],#1
 | |
|     subnes  r3,r3,#1
 | |
| .LDoneAlign:
 | |
|     (* Destination should be aligned now, but source might not be aligned,
 | |
|        if this is the case, do a byte-per-byte copy. *)
 | |
|     tst r2,#3
 | |
|     bne .LStartTailCopy
 | |
|     (* Start the main copy, 32 bit at a time. *)
 | |
|     movs r3,r12,lsr #2
 | |
|     and r12,r12,#3
 | |
|     beq  .LStartTailCopy
 | |
| .LNext4bytes:
 | |
|     (* Unrolling this loop would save a little bit of time for long strings
 | |
|        (>20 chars), but alas, it hurts for short strings and they are the
 | |
|        common case.*)
 | |
|     ldrne r1,[r2],#4
 | |
|     strne r1,[r0],#4
 | |
|     subnes  r3,r3,#1
 | |
|     bne .LNext4bytes
 | |
| .LStartTailCopy:
 | |
|     (* Do remaining bytes. *)
 | |
|     cmp r12,#0
 | |
|     beq .LDoneTail
 | |
| .LNextChar3:
 | |
|     ldrb r1,[r2],#1
 | |
|     strb r1,[r0],#1
 | |
|     subs  r12,r12,#1
 | |
|     bne .LNextChar3
 | |
| .LDoneTail:
 | |
| end;
 | |
| 
 | |
| procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);assembler;nostackframe;[public,alias:'FPC_SHORTSTR_ASSIGN'];compilerproc;
 | |
| 
 | |
| {r0: len
 | |
|  r1: sstr
 | |
|  r2: dstr}
 | |
| 
 | |
| asm
 | |
|     ldrb r12,[r1],#1
 | |
|     cmp  r12,r0
 | |
|     movgt r12,r0
 | |
|     strb r12,[r2],#1
 | |
|     cmp  r12,#6 (* 6 seems to be the break even point. *)
 | |
|     blt  .LStartTailCopy
 | |
|     (* Align destination on 32bits. This is the only place where unrolling
 | |
|        really seems to help, since in the common case, sstr is aligned on
 | |
|        32 bits, therefore in the common case we need to copy 3 bytes to
 | |
|        align, i.e. in the case of a loop, you wouldn't branch out early.*)
 | |
|     rsb  r3,r2,#0
 | |
|     ands  r3,r3,#3
 | |
|     sub  r12,r12,r3
 | |
|     ldrneb r0,[r1],#1
 | |
|     strneb r0,[r2],#1
 | |
|     subnes  r3,r3,#1
 | |
|     ldrneb r0,[r1],#1
 | |
|     strneb r0,[r2],#1
 | |
|     subnes  r3,r3,#1
 | |
|     ldrneb r0,[r1],#1
 | |
|     strneb r0,[r2],#1
 | |
|     subnes  r3,r3,#1
 | |
| .LDoneAlign:
 | |
|     (* Destination should be aligned now, but source might not be aligned,
 | |
|        if this is the case, do a byte-per-byte copy. *)
 | |
|     tst r1,#3
 | |
|     bne .LStartTailCopy
 | |
|     (* Start the main copy, 32 bit at a time. *)
 | |
|     movs r3,r12,lsr #2
 | |
|     and r12,r12,#3
 | |
|     beq  .LStartTailCopy
 | |
| .LNext4bytes:
 | |
|     (* Unrolling this loop would save a little bit of time for long strings
 | |
|        (>20 chars), but alas, it hurts for short strings and they are the
 | |
|        common case.*)
 | |
|     ldrne r0,[r1],#4
 | |
|     strne r0,[r2],#4
 | |
|     subnes  r3,r3,#1
 | |
|     bne .LNext4bytes
 | |
| .LStartTailCopy:
 | |
|     (* Do remaining bytes. *)
 | |
|     cmp r12,#0
 | |
|     beq .LDoneTail
 | |
| .LNextChar3:
 | |
|     ldrb r0,[r1],#1
 | |
|     strb r0,[r2],#1
 | |
|     subs  r12,r12,#1
 | |
|     bne .LNextChar3
 | |
| .LDoneTail:
 | |
| end;
 | |
| {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
 | |
| 
 | |
| {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
 | |
| {$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
 | |
| function fpc_Pchar_length(p:Pchar):sizeint;assembler;nostackframe;[public,alias:'FPC_PCHAR_LENGTH'];compilerproc;
 | |
| 
 | |
| asm
 | |
|     cmp r0,#0
 | |
|     mov r1,r0
 | |
|     beq .Ldone
 | |
| .Lnextchar:
 | |
|     (*Are we aligned?*)
 | |
|     tst r1,#3
 | |
|     bne .Ltest_unaligned    (*No, do byte per byte.*)
 | |
|     ldr r3,.L01010101
 | |
| .Ltest_aligned:
 | |
|     (*Aligned, load 4 bytes at a time.*)
 | |
|     ldr r12,[r1],#4
 | |
|     (*Check wether r12 contains a 0 byte.*)
 | |
|     sub r2,r12,r3
 | |
|     mvn r12,r12
 | |
|     and r2,r2,r12
 | |
|     ands r2,r2,r3,lsl #7    (*r3 lsl 7 = $80808080*)
 | |
|     beq .Ltest_aligned      (*No 0 byte, repeat.*)
 | |
|     sub r1,r1,#4
 | |
| .Ltest_unaligned:
 | |
|     ldrb r12,[r1],#1
 | |
|     cmp r12,#1              (*r12<1 same as r12=0, but result in carry flag*)
 | |
|     bcs .Lnextchar
 | |
|     (*Dirty trick: we need to subtract 1 extra because we have counted the
 | |
|       terminating 0, due to the known carry flag sbc can do this.*)
 | |
|     sbc r0,r1,r0
 | |
| .Ldone:
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|     bx  lr
 | |
| {$else}
 | |
|     mov pc,lr
 | |
| {$endif}
 | |
| .L01010101:
 | |
|     .long 0x01010101
 | |
| end;
 | |
| {$endif}
 | |
| 
 | |
| 
 | |
| {$ifndef darwin}
 | |
| {$define FPC_SYSTEM_HAS_ANSISTR_DECR_REF}
 | |
| Procedure fpc_ansistr_decr_ref (Var S : Pointer); [Public,Alias:'FPC_ANSISTR_DECR_REF'];assembler;nostackframe; compilerproc;
 | |
| asm
 | |
|   ldr     r1, [r0]
 | |
|   // On return the pointer will always be set to zero, so utilize the delay slots
 | |
|   mov     r2, #0
 | |
|   str     r2, [r0]
 | |
| 
 | |
|   // Check for a zero string
 | |
|   cmp     r1, #0
 | |
|   // Load reference counter
 | |
|   ldrne   r2, [r1, #-8]
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|   bxeq    lr
 | |
| {$else}
 | |
|   moveq   pc,lr
 | |
| {$endif}
 | |
|   // Check for a constant string
 | |
|   cmp     r2, #0
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|   bxlt    lr
 | |
| {$else}
 | |
|   movlt   pc,lr
 | |
| {$endif}
 | |
|   stmfd   sp!, {r1, lr}
 | |
|   sub     r0, r1, #8
 | |
| {$if defined(CPUARM_HAS_BLX_LABEL) and not(defined(WINCE))}
 | |
|   blx     InterLockedDecrement
 | |
| {$else defined(CPUARM_HAS_BLX_LABEL) and not(defined(WINCE))}
 | |
|   bl      InterLockedDecrement
 | |
| {$endif defined(CPUARM_HAS_BLX_LABEL) and not(defined(WINCE))}
 | |
|   // InterLockedDecrement is a nice guy and sets the z flag for us
 | |
|   // if the reference count dropped to 0
 | |
|   ldmnefd sp!, {r1, pc}
 | |
|   ldmfd sp!, {r0, lr}
 | |
|   // We currently can not use constant symbols in ARM-Assembly
 | |
|   // but we need to stay backward compatible with 2.6
 | |
|   sub     r0, r0, #12
 | |
|   // Jump without a link, so freemem directly returns to our caller
 | |
|   b       FPC_FREEMEM
 | |
| end;
 | |
| {$endif not darwin}
 | |
| 
 | |
| var
 | |
|   fpc_system_lock: longint; export name 'fpc_system_lock';
 | |
| 
 | |
| function InterLockedDecrement (var Target: longint) : longint; assembler; nostackframe;
 | |
| asm
 | |
| {$ifdef CPUARM_HAS_LDREX}
 | |
| .Lloop:
 | |
|   ldrex r1, [r0]
 | |
|   sub   r1, r1, #1
 | |
|   strex r2, r1, [r0]
 | |
|   cmp r2, #0
 | |
|   bne .Lloop
 | |
|   movs r0, r1
 | |
|   bx  lr
 | |
| {$else}
 | |
| {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
 | |
|   stmfd r13!, {lr}
 | |
|   mov r2, r0   // kuser_cmpxchg does not clobber r2 by definition
 | |
| .Latomic_dec_loop:
 | |
|   ldr r0, [r2]   // Load the current value
 | |
| 
 | |
|   // We expect this to work without looping most of the time
 | |
|   // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
 | |
|   // loop here again, we have to reload the value. Normaly this just fills the
 | |
|   // load stall-cycles from the above ldr so in reality we'll not get any additional
 | |
|   // delays because of this
 | |
|   // Don't use ldr to load r3 to avoid cacheline trashing
 | |
|   // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
 | |
|   // the kuser_cmpxchg entry point
 | |
|   mvn r3, #0x0000f000
 | |
|   sub r3, r3, #0x3F
 | |
| 
 | |
|   sub r1, r0, #1 // Decrement value
 | |
| {$ifdef CPUARM_HAS_BLX}
 | |
|   blx r3	 // Call kuser_cmpxchg, sets C-Flag on success
 | |
| {$else}
 | |
|   mov lr, pc
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|   bx  r3
 | |
| {$else}
 | |
|   mov pc, r3
 | |
| {$endif}
 | |
| {$endif}
 | |
|   // MOVS sets the Z flag when the result reaches zero, this can be used later on
 | |
|   // The C-Flag will not be modified by this because we're not doing any shifting
 | |
|   movcss r0, r1	 // We expect that to work most of the time so keep it pipeline friendly
 | |
|   ldmcsfd r13!, {pc}
 | |
|   b .Latomic_dec_loop // kuser_cmpxchg sets C flag on error
 | |
| 
 | |
| {$else}
 | |
| // lock
 | |
|   ldr r3, .Lfpc_system_lock
 | |
|   mov r1, #1
 | |
| .Lloop:
 | |
|   swp r2, r1, [r3]
 | |
|   cmp r2, #0
 | |
|   bne .Lloop
 | |
| // do the job
 | |
|   ldr r1, [r0]
 | |
|   sub r1, r1, #1
 | |
|   str r1, [r0]
 | |
|   movs r0, r1
 | |
| // unlock and return
 | |
|   str r2, [r3]
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|   bx  lr
 | |
| {$else}
 | |
|   mov pc,lr
 | |
| {$endif}
 | |
| 
 | |
| .Lfpc_system_lock:
 | |
|   .long fpc_system_lock
 | |
| {$endif}
 | |
| {$endif}
 | |
| end;
 | |
| 
 | |
| 
 | |
| {$ifndef darwin}
 | |
| {$define FPC_SYSTEM_HAS_ANSISTR_INCR_REF}
 | |
| 
 | |
| Procedure fpc_ansistr_incr_ref (S : Pointer); [Public,Alias:'FPC_ANSISTR_INCR_REF'];assembler;nostackframe; compilerproc;
 | |
| asm
 | |
|   // Null string?
 | |
|   cmp     r0, #0
 | |
|   // Load reference counter
 | |
|   ldrne   r1, [r0, #-8]
 | |
|   // pointer to counter, calculate here for delay slot utilization
 | |
|   subne   r0, r0, #8
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|   bxeq    lr
 | |
| {$else}
 | |
|   moveq   pc,lr
 | |
| {$endif}
 | |
|   // Check for a constant string
 | |
|   cmp     r1, #0
 | |
|   // Tailcall
 | |
|   // Hopefully the linker will place InterLockedIncrement as layed out here
 | |
|   bge     InterLockedIncrement
 | |
|   // Freepascal will generate a proper return here, save some cachespace
 | |
| end;
 | |
| {$endif not darwin}
 | |
| 
 | |
| function InterLockedIncrement (var Target: longint) : longint; assembler; nostackframe;
 | |
| asm
 | |
| {$ifdef CPUARM_HAS_LDREX}
 | |
| .Lloop:
 | |
|   ldrex r1, [r0]
 | |
|   add   r1, r1, #1
 | |
|   strex r2, r1, [r0]
 | |
|   cmp r2, #0
 | |
|   bne .Lloop
 | |
|   mov r0, r1
 | |
|   bx  lr
 | |
| {$else}
 | |
| {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
 | |
|   stmfd r13!, {lr}
 | |
|   mov r2, r0   // kuser_cmpxchg does not clobber r2 by definition
 | |
| .Latomic_inc_loop:
 | |
|   ldr r0, [r2]   // Load the current value
 | |
| 
 | |
|   // We expect this to work without looping most of the time
 | |
|   // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
 | |
|   // loop here again, we have to reload the value. Normaly this just fills the
 | |
|   // load stall-cycles from the above ldr so in reality we'll not get any additional
 | |
|   // delays because of this
 | |
|   // Don't use ldr to load r3 to avoid cacheline trashing
 | |
|   // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
 | |
|   // the kuser_cmpxchg entry point
 | |
|   mvn r3, #0x0000f000
 | |
|   sub r3, r3, #0x3F
 | |
| 
 | |
|   add r1, r0, #1 // Increment value
 | |
| {$ifdef CPUARM_HAS_BLX}
 | |
|   blx r3	 // Call kuser_cmpxchg, sets C-Flag on success
 | |
| {$else}
 | |
|   mov lr, pc
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|   bx  r3
 | |
| {$else}
 | |
|   mov pc, r3
 | |
| {$endif}
 | |
| {$endif}
 | |
|   movcs r0, r1	 // We expect that to work most of the time so keep it pipeline friendly
 | |
|   ldmcsfd r13!, {pc}
 | |
|   b .Latomic_inc_loop // kuser_cmpxchg sets C flag on error
 | |
| 
 | |
| {$else}
 | |
| // lock
 | |
|   ldr r3, .Lfpc_system_lock
 | |
|   mov r1, #1
 | |
| .Lloop:
 | |
|   swp r2, r1, [r3]
 | |
|   cmp r2, #0
 | |
|   bne .Lloop
 | |
| // do the job
 | |
|   ldr r1, [r0]
 | |
|   add r1, r1, #1
 | |
|   str r1, [r0]
 | |
|   mov r0, r1
 | |
| // unlock and return
 | |
|   str r2, [r3]
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|   bx  lr
 | |
| {$else}
 | |
|   mov pc,lr
 | |
| {$endif}
 | |
| 
 | |
| .Lfpc_system_lock:
 | |
|   .long fpc_system_lock
 | |
| {$endif}
 | |
| {$endif}
 | |
| end;
 | |
| 
 | |
| 
 | |
| function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler; nostackframe;
 | |
| asm
 | |
| {$ifdef CPUARM_HAS_LDREX}
 | |
| // swp is deprecated on ARMv6 and above
 | |
| .Lloop:
 | |
|   ldrex r2, [r0]
 | |
|   strex r3, r1, [r0]
 | |
|   cmp r3, #0
 | |
|   bne .Lloop
 | |
|   mov r0, r2
 | |
|   bx  lr
 | |
| {$else}
 | |
| {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
 | |
|   stmfd r13!, {r4, lr}
 | |
|   mov r2, r0   // kuser_cmpxchg does not clobber r2 (and r1) by definition
 | |
| .Latomic_add_loop:
 | |
|   ldr r0, [r2]   // Load the current value
 | |
| 
 | |
|   // We expect this to work without looping most of the time
 | |
|   // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
 | |
|   // loop here again, we have to reload the value. Normaly this just fills the
 | |
|   // load stall-cycles from the above ldr so in reality we'll not get any additional
 | |
|   // delays because of this
 | |
|   // Don't use ldr to load r3 to avoid cacheline trashing
 | |
|   // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
 | |
|   // the kuser_cmpxchg entry point
 | |
|   mvn r3, #0x0000f000
 | |
|   sub r3, r3, #0x3F
 | |
|   mov r4, r0     // save the current value because kuser_cmpxchg clobbers r0
 | |
| {$ifdef CPUARM_HAS_BLX}
 | |
|   blx r3	 // Call kuser_cmpxchg, sets C-Flag on success
 | |
| {$else}
 | |
|   mov lr, pc
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|   bx  r3
 | |
| {$else}
 | |
|   mov pc, r3
 | |
| {$endif}
 | |
| {$endif}
 | |
|   // restore the original value if needed
 | |
|   movcs   r0, r4
 | |
|   ldmcsfd r13!, {r4, pc}
 | |
| 
 | |
|   b .Latomic_add_loop // kuser_cmpxchg failed, loop back
 | |
| {$else}
 | |
| // lock
 | |
|   ldr r3, .Lfpc_system_lock
 | |
|   mov r2, #1
 | |
| .Lloop:
 | |
|   swp r2, r2, [r3]
 | |
|   cmp r2, #0
 | |
|   bne .Lloop
 | |
| // do the job
 | |
|   ldr r2, [r0]
 | |
|   str r1, [r0]
 | |
|   mov r0, r2
 | |
| // unlock and return
 | |
|   mov r2, #0
 | |
|   str r2, [r3]
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|   bx  lr
 | |
| {$else}
 | |
|   mov pc,lr
 | |
| {$endif}
 | |
| 
 | |
| .Lfpc_system_lock:
 | |
|   .long fpc_system_lock
 | |
| {$endif}
 | |
| {$endif}
 | |
| end;
 | |
| 
 | |
| function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler; nostackframe;
 | |
| asm
 | |
| {$ifdef CPUARM_HAS_LDREX}
 | |
| .Lloop:
 | |
|   ldrex r2, [r0]
 | |
|   add   r12, r1, r2
 | |
|   strex r3, r12, [r0]
 | |
|   cmp r3, #0
 | |
|   bne .Lloop
 | |
|   mov  r0, r2
 | |
|   bx  lr
 | |
| {$else}
 | |
| {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
 | |
|   stmfd r13!, {r4, lr}
 | |
|   mov r2, r0   // kuser_cmpxchg does not clobber r2 by definition
 | |
|   mov r4, r1   // Save addend
 | |
| .Latomic_add_loop:
 | |
|   ldr r0, [r2]   // Load the current value
 | |
| 
 | |
|   // We expect this to work without looping most of the time
 | |
|   // R3 gets clobbered in kuser_cmpxchg so in the unlikely case that we have to
 | |
|   // loop here again, we have to reload the value. Normaly this just fills the
 | |
|   // load stall-cycles from the above ldr so in reality we'll not get any additional
 | |
|   // delays because of this
 | |
|   // Don't use ldr to load r3 to avoid cacheline trashing
 | |
|   // Load 0xffff0fff into r3 and substract to 0xffff0fc0,
 | |
|   // the kuser_cmpxchg entry point
 | |
|   mvn r3, #0x0000f000
 | |
|   sub r3, r3, #0x3F
 | |
| 
 | |
|   add r1, r0, r4 // Add to value
 | |
| {$ifdef CPUARM_HAS_BLX}
 | |
|   blx r3	 // Call kuser_cmpxchg, sets C-Flag on success
 | |
| {$else}
 | |
|   mov lr, pc
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|   bx  r3
 | |
| {$else}
 | |
|   mov pc, r3
 | |
| {$endif}
 | |
| {$endif}
 | |
|   // r1 does not get clobbered, so just get back the original value
 | |
|   // Otherwise we would have to allocate one more register and store the
 | |
|   // temporary value
 | |
|   subcs   r0, r1, r4
 | |
|   ldmcsfd r13!, {r4, pc}
 | |
| 
 | |
|   b .Latomic_add_loop // kuser_cmpxchg failed, loop back
 | |
| 
 | |
| {$else}
 | |
| // lock
 | |
|   ldr r3, .Lfpc_system_lock
 | |
|   mov r2, #1
 | |
| .Lloop:
 | |
|   swp r2, r2, [r3]
 | |
|   cmp r2, #0
 | |
|   bne .Lloop
 | |
| // do the job
 | |
|   ldr r2, [r0]
 | |
|   add r1, r1, r2
 | |
|   str r1, [r0]
 | |
|   mov r0, r2
 | |
| // unlock and return
 | |
|   mov r2, #0
 | |
|   str r2, [r3]
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|   bx  lr
 | |
| {$else}
 | |
|   mov pc,lr
 | |
| {$endif}
 | |
| 
 | |
| .Lfpc_system_lock:
 | |
|   .long fpc_system_lock
 | |
| {$endif}
 | |
| {$endif}
 | |
| end;
 | |
| 
 | |
| 
 | |
| function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler; nostackframe;
 | |
| asm
 | |
| {$ifdef CPUARM_HAS_LDREX}
 | |
| .Lloop:
 | |
|   ldrex    r3, [r0]
 | |
|   mov      r12, #0
 | |
|   cmp      r3, r2
 | |
|   strexeq  r12, r1, [r0]
 | |
|   cmp      r12, #0
 | |
|   bne      .Lloop
 | |
|   mov      r0, r3
 | |
|   bx       lr
 | |
| {$else}
 | |
| {$ifdef SYSTEM_HAS_KUSER_CMPXCHG}
 | |
|   stmfd r13!, {r4, lr}
 | |
|   mvn   r3, #0x0000f000
 | |
|   sub   r3, r3, #0x3F
 | |
| 
 | |
|   mov   r4, r2 // Swap parameters around
 | |
|   mov   r2, r0
 | |
|   mov   r0, r4 // Use r4 because we'll need the new value for later
 | |
| 
 | |
|   // r1 and r2 will not be clobbered by kuser_cmpxchg
 | |
|   // If we have to loop, r0 will be set to the original Comperand
 | |
|   .Linterlocked_compare_exchange_loop:
 | |
| {$ifdef CPUARM_HAS_BLX}
 | |
|   blx r3	 // Call kuser_cmpxchg, sets C-Flag on success
 | |
| {$else}
 | |
|   mov lr, pc
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|   bx  r3
 | |
| {$else}
 | |
|   mov pc, r3
 | |
| {$endif}
 | |
| {$endif}
 | |
|   movcs r0, r4   // Return the previous value on success
 | |
|   ldmcsfd r13!, {r4, pc}
 | |
|   // The error case is a bit tricky, kuser_cmpxchg does not return the current value
 | |
|   // So we may need to loop to avoid race conditions
 | |
|   // The loop case is HIGHLY unlikely, it would require that we got rescheduled between
 | |
|   // calling kuser_cmpxchg and the ldr. While beeing rescheduled another process/thread
 | |
|   // would have the set the value to our comperand
 | |
|   ldr	r0, [r2] // Load the currently set value
 | |
|   cmp   r0, r4   // Return if Comperand != current value, otherwise loop again
 | |
|   ldmnefd r13!, {r4, pc}
 | |
|   // If we need to loop here, we have to
 | |
|   b	.Linterlocked_compare_exchange_loop
 | |
| 
 | |
| {$else}
 | |
| // lock
 | |
|   ldr r12, .Lfpc_system_lock
 | |
|   mov r3, #1
 | |
| .Lloop:
 | |
|   swp r3, r3, [r12]
 | |
|   cmp r3, #0
 | |
|   bne .Lloop
 | |
| // do the job
 | |
|   ldr r3, [r0]
 | |
|   cmp r3, r2
 | |
|   streq r1, [r0]
 | |
|   mov r0, r3
 | |
| // unlock and return
 | |
|   mov r3, #0
 | |
|   str r3, [r12]
 | |
| {$ifdef CPUARM_HAS_BX}
 | |
|   bx  lr
 | |
| {$else}
 | |
|   mov pc,lr
 | |
| {$endif}
 | |
| 
 | |
| .Lfpc_system_lock:
 | |
|   .long fpc_system_lock
 | |
| {$endif}
 | |
| {$endif}
 | |
| end;
 | |
| 
 | |
| {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
 | |
| function declocked(var l: longint) : boolean; inline;
 | |
| begin
 | |
|   Result:=InterLockedDecrement(l) = 0;
 | |
| end;
 | |
| 
 | |
| {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
 | |
| procedure inclocked(var l: longint); inline;
 | |
| begin
 | |
|   InterLockedIncrement(l);
 | |
| end;
 | |
| 
 | |
| procedure fpc_cpucodeinit;
 | |
| begin
 | |
| {$ifdef FPC_SYSTEM_FPC_MOVE}
 | |
| {$ifndef CPUARM_HAS_EDSP}
 | |
|   cpu_has_edsp:=true;
 | |
|   in_edsp_test:=true;
 | |
|   asm
 | |
|     bic r0,sp,#7
 | |
| 
 | |
|     // ldrd r0,r1,[r0]
 | |
|     // encode this using .long so the rtl assembles also with instructions sets not supporting pld
 | |
|     .long 0xe1c000d0
 | |
|   end;
 | |
|   in_edsp_test:=false;
 | |
|   if cpu_has_edsp then
 | |
|     moveproc:=@move_pld
 | |
|   else
 | |
|     moveproc:=@move_blended;
 | |
| {$else CPUARM_HAS_EDSP}
 | |
|   cpu_has_edsp:=true;
 | |
| {$endif CPUARM_HAS_EDSP}
 | |
| {$endif FPC_SYSTEM_FPC_MOVE}
 | |
| end;
 | |
| 
 | |
| {$define FPC_SYSTEM_HAS_SWAPENDIAN}
 | |
| 
 | |
| { SwapEndian(<16 Bit>) being inlined is faster than using assembler }
 | |
| function SwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif}
 | |
|   begin
 | |
|     { the extra Word type cast is necessary because the "AValue shr 8" }
 | |
|     { is turned into "longint(AValue) shr 8", so if AValue < 0 then    }
 | |
|     { the sign bits from the upper 16 bits are shifted in rather than  }
 | |
|     { zeroes.                                                          }
 | |
|     Result := SmallInt((Word(AValue) shr 8) or (Word(AValue) shl 8));
 | |
|   end;
 | |
| 
 | |
| 
 | |
| function SwapEndian(const AValue: Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
 | |
|   begin
 | |
|     Result := Word((AValue shr 8) or (AValue shl 8));
 | |
|   end;
 | |
| 
 | |
| (*
 | |
| This is kept for reference. Thats what the compiler COULD generate in these cases.
 | |
| But FPC currently does not support inlining of asm-functions, so the whole call-overhead
 | |
| is bigger than the gain of the optimized function.
 | |
| function AsmSwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif};assembler;nostackframe;
 | |
| asm
 | |
| 	// We're starting with 4321
 | |
| {$if defined(CPUARM_HAS_REV)}
 | |
| 	rev r0, r0		// Reverse byteorder    r0 = 1234
 | |
| 	mov r0, r0, shr #16	// Shift down to 16bits r0 = 0012
 | |
| {$else}
 | |
| 	mov r0, r0, shl #16	// Shift to make that 2100
 | |
| 	mov r0, r0, ror #24	// Rotate to 1002
 | |
| 	orr r0, r0, r0 shr #16  // Shift and combine into 0012
 | |
| {$endif}
 | |
| end;
 | |
| 
 | |
| *)
 | |
| 
 | |
| {
 | |
|   These used to be an assembler-function, but with newer improvements to the compiler this
 | |
|   generates a perfect 4 cycle code sequence and can be inlined.
 | |
| }
 | |
| function SwapEndian(const AValue: LongWord): LongWord;{$ifdef SYSTEMINLINE}inline;{$endif}
 | |
| begin
 | |
|   Result:= AValue xor rordword(AValue,16);
 | |
|   Result:= Result and $FF00FFFF;
 | |
|   Result:= (Result shr 8) xor rordword(AValue,8);
 | |
| end;
 | |
| 
 | |
| function SwapEndian(const AValue: LongInt): LongInt;{$ifdef SYSTEMINLINE}inline;{$endif}
 | |
| begin
 | |
|   Result:=LongInt(SwapEndian(DWord(AValue)));
 | |
| end;
 | |
| 
 | |
| {
 | |
|   Currently freepascal will not generate a good assembler sequence for
 | |
|   Result:=(SwapEndian(longword(lo(AValue))) shl 32) or
 | |
|           (SwapEndian(longword(hi(AValue))));
 | |
| 
 | |
|   So we keep an assembly version for now
 | |
| }
 | |
| 
 | |
| function SwapEndian(const AValue: Int64): Int64; assembler; nostackframe;
 | |
| asm
 | |
| // fpc >2.6.0 adds the "rev" instruction in the internal assembler
 | |
| {$if defined(CPUARM_HAS_REV)}
 | |
|         rev r2, r0
 | |
|         rev r0, r1
 | |
|         mov r1, r2
 | |
| {$else}
 | |
|         mov ip, r1
 | |
| 
 | |
|         // We're starting with r0 = $87654321
 | |
|         eor r1, r0, r0, ror #16          // r1 = $C444C444
 | |
|         bic r1, r1, #16711680            // r1 = r1 and $ff00ffff = $C400C444
 | |
|         mov r0, r0, ror #8               // r0 = $21876543
 | |
|         eor r1, r0, r1, lsr #8           // r1 = $21436587
 | |
| 
 | |
|         eor r0, ip, ip, ror #16
 | |
|         bic r0, r0, #16711680
 | |
|         mov ip, ip, ror #8
 | |
|         eor r0, ip, r0, lsr #8
 | |
| 
 | |
| {$endif}
 | |
| end;
 | |
| 
 | |
| function SwapEndian(const AValue: QWord): QWord; {$ifdef SYSTEMINLINE}inline;{$endif}
 | |
| begin
 | |
|   Result:=QWord(SwapEndian(Int64(AValue)));
 | |
| end;
 | |
| 
 | |
| {$ifndef FPC_SYSTEM_HAS_MEM_BARRIER}
 | |
| {$define FPC_SYSTEM_HAS_MEM_BARRIER}
 | |
| 
 | |
| { Generic read/readwrite barrier code.  }
 | |
| procedure barrier; assembler; nostackframe;
 | |
| asm
 | |
|   // manually encode the instructions to avoid bootstrap and -march external
 | |
|   // assembler settings 
 | |
| {$ifdef CPUARM_HAS_DMB}
 | |
|   .long 0xf57ff05f  // dmb sy
 | |
| {$else}
 | |
| {$ifdef CPUARMV6}
 | |
|   mov r0, #0
 | |
|   .long 0xee070fba  // mcr 15, 0, r0, cr7, cr10, {5}
 | |
| {$endif}
 | |
| {$endif}
 | |
| end;
 | |
| 
 | |
| procedure ReadBarrier;{$ifdef SYSTEMINLINE}inline;{$endif}
 | |
| begin
 | |
|   barrier;
 | |
| end;
 | |
| 
 | |
| procedure ReadDependencyBarrier;{$ifdef SYSTEMINLINE}inline;{$endif}
 | |
| begin
 | |
|   { reads imply barrier on earlier reads depended on; not required on ARM }
 | |
| end;
 | |
| 
 | |
| procedure ReadWriteBarrier;{$ifdef SYSTEMINLINE}inline;{$endif}
 | |
| begin
 | |
|   barrier;
 | |
| end;
 | |
| 
 | |
| procedure WriteBarrier; assembler; nostackframe;
 | |
| asm
 | |
|   // specialize the write barrier because according to ARM, implementations for
 | |
|   // "dmb st" may be more optimal than the more generic "dmb sy"
 | |
| {$ifdef CPUARM_HAS_DMB}
 | |
|   .long 0xf57ff05e  // dmb st
 | |
| {$else}
 | |
| {$ifdef CPUARMV6}
 | |
|   mov r0, #0
 | |
|   .long 0xee070fba  // mcr 15, 0, r0, cr7, cr10, {5} 
 | |
| {$endif}
 | |
| {$endif}
 | |
| end;
 | |
| 
 | |
| {$endif}
 | |
| 
 | |
| {include hand-optimized assembler division code}
 | |
| {$i divide.inc}
 | 
