diff --git a/compiler/arm/aasmcpu.pas b/compiler/arm/aasmcpu.pas index 2932d6d6b3..9301bc54af 100644 --- a/compiler/arm/aasmcpu.pas +++ b/compiler/arm/aasmcpu.pas @@ -735,7 +735,7 @@ implementation { check for pre/post indexed } result := operand_read; //Thumb2 - A_LSL, A_LSR, A_ROR, A_ASR, A_SDIV, A_UDIV,A_MOVT: + A_LSL, A_LSR, A_ROR, A_ASR, A_SDIV, A_UDIV, A_MOVW, A_MOVT, A_MLS: if opnr in [0] then result:=operand_write else diff --git a/compiler/arm/aoptcpu.pas b/compiler/arm/aoptcpu.pas index 1700f1bd40..b2ab717244 100644 --- a/compiler/arm/aoptcpu.pas +++ b/compiler/arm/aoptcpu.pas @@ -342,7 +342,8 @@ Implementation {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same} not ( (taicpu(p).opcode in [A_MLA, A_MUL]) and - (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) + (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and + (current_settings.cputype < cpu_armv6) ) and { Take care to only do this for instructions which REALLY load to the first register. Otherwise @@ -1170,7 +1171,10 @@ Implementation add reg2, ... } if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then - RemoveSuperfluousMove(p, hp1, 'DataMov2Data'); + begin + if (taicpu(p).ops=3) then + RemoveSuperfluousMove(p, hp1, 'DataMov2Data'); + end; end; A_MVN: begin @@ -1260,6 +1264,52 @@ Implementation asml.remove(p); p.free; p:=hp1; + end + { + change + uxtb reg2,reg1 + uxtb reg3,reg2 + dealloc reg2 + to + uxtb reg3,reg1 + } + else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and + GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and + MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and + (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or + (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg)) and + { reg1 might not be modified inbetween } + not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then + begin + DebugMsg('Peephole UxtbUxtb2Uxtb done', p); + taicpu(hp1).opcode:=A_UXTB; + taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=hp1; + end + { + change + uxth reg2,reg1 + uxth reg3,reg2 + dealloc reg2 + to + uxth reg3,reg1 + } + else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and + GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and + MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and + (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or + (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg)) and + { reg1 might not be modified inbetween } + not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then + begin + DebugMsg('Peephole UxthUxth2Uxth done', p); + taicpu(hp1).opcode:=A_UXTH; + taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=hp1; end; end; A_UXTH: @@ -1858,7 +1908,17 @@ Implementation result:=true; end else if (p.typ=ait_instruction) and - MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_None,PF_S]) and + MatchInstruction(p, [A_ADD], [C_None], [PF_None]) and + (taicpu(p).ops = 3) and + MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and + (taicpu(p).oper[2]^.typ=top_reg) then + begin + taicpu(p).ops := 2; + taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg); + result:=true; + end + else if (p.typ=ait_instruction) and + MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_None]) and (taicpu(p).ops = 3) and MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and (taicpu(p).oper[2]^.typ=top_reg) and @@ -1873,7 +1933,7 @@ Implementation result:=true; end else if (p.typ=ait_instruction) and - MatchInstruction(p, [A_AND,A_ORR,A_EOR], [], [PF_None,PF_S]) and + MatchInstruction(p, [A_AND,A_ORR,A_EOR], [C_None], [PF_None,PF_S]) and (taicpu(p).ops = 3) and MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then @@ -1885,6 +1945,33 @@ Implementation taicpu(p).ops := 2; result:=true; end + else if (p.typ=ait_instruction) and + MatchInstruction(p, [A_MOV], [C_None], [PF_None]) and + (taicpu(p).ops=3) and + (taicpu(p).oper[2]^.typ=top_shifterop) and + (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSL,SM_LSR,SM_ASR,SM_ROR]) and + MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and + (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then + begin + asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p); + asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p); + IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs); + taicpu(p).oppostfix:=PF_S; + taicpu(p).ops := 2; + + if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then + taicpu(p).loadreg(1, taicpu(p).oper[2]^.shifterop^.rs) + else + taicpu(p).loadconst(1, taicpu(p).oper[2]^.shifterop^.shiftimm); + + case taicpu(p).oper[2]^.shifterop^.shiftmode of + SM_LSL: taicpu(p).opcode:=A_LSL; + SM_LSR: taicpu(p).opcode:=A_LSR; + SM_ASR: taicpu(p).opcode:=A_ASR; + SM_ROR: taicpu(p).opcode:=A_ROR; + end; + result:=true; + end else if (p.typ=ait_instruction) and MatchInstruction(p, [A_AND], [], [PF_None]) and (taicpu(p).ops = 2) and @@ -1917,6 +2004,76 @@ Implementation result := true; end + { + Turn + mul reg0, z,w + sub/add x, y, reg0 + dealloc reg0 + + into + + mls/mla x,y,z,w + } + else if (p.typ=ait_instruction) and + MatchInstruction(p, [A_MUL], [C_None], [PF_None]) and + (taicpu(p).ops=3) and + (taicpu(p).oper[0]^.typ = top_reg) and + (taicpu(p).oper[1]^.typ = top_reg) and + (taicpu(p).oper[2]^.typ = top_reg) and + GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and + MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and + (((taicpu(hp1).ops=3) and + (taicpu(hp1).oper[2]^.typ=top_reg) and + (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or + (MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and + (taicpu(hp1).opcode=A_ADD)))) or + ((taicpu(hp1).ops=2) and + (taicpu(hp1).oper[1]^.typ=top_reg) and + MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and + assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and + not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and + not(RegModifiedBetween(taicpu(p).oper[2]^.reg,p,hp1)) then + begin + if taicpu(hp1).opcode=A_ADD then + begin + taicpu(hp1).opcode:=A_MLA; + + if taicpu(hp1).ops=3 then + if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then + taicpu(hp1).loadreg(1,taicpu(hp1).oper[2]^.reg); + + taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg); + taicpu(hp1).loadreg(3,taicpu(p).oper[2]^.reg); + + DebugMsg('MulAdd2MLA done', p); + + taicpu(hp1).ops:=4; + + asml.remove(p); + p.free; + p:=hp1; + end + else + begin + taicpu(hp1).opcode:=A_MLS; + + if taicpu(hp1).ops=2 then + taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg); + + taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg); + taicpu(hp1).loadreg(3,taicpu(p).oper[2]^.reg); + + DebugMsg('MulSub2MLS done', p); + + taicpu(hp1).ops:=4; + + asml.remove(p); + p.free; + p:=hp1; + end; + + result:=true; + end {else if (p.typ=ait_instruction) and MatchInstruction(p, [A_CMP], [C_None], [PF_None]) and (taicpu(p).oper[1]^.typ=top_const) and diff --git a/compiler/arm/cgcpu.pas b/compiler/arm/cgcpu.pas index 5f882ba56e..0f9cbe4845 100644 --- a/compiler/arm/cgcpu.pas +++ b/compiler/arm/cgcpu.pas @@ -160,6 +160,8 @@ unit cgcpu; procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override; procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override; + procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister); override; + function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override; procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override; @@ -3170,24 +3172,12 @@ unit cgcpu; begin if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then internalerror(2002090902); - if is_shifter_const(a,imm_shift) then + if is_thumb_imm(a) then list.concat(taicpu.op_reg_const(A_MOV,reg,a)) - { loading of constants with mov and orr } - else if (is_shifter_const(a-byte(a),imm_shift)) then - begin - list.concat(taicpu.op_reg_const(A_MOV,reg,a-byte(a))); - list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,byte(a))); - end - else if (is_shifter_const(a-word(a),imm_shift)) and (is_shifter_const(word(a),imm_shift)) then - begin - list.concat(taicpu.op_reg_const(A_MOV,reg,a-word(a))); - list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,word(a))); - end - else if (is_shifter_const(a-(dword(a) shl 8) shr 8,imm_shift)) and (is_shifter_const((dword(a) shl 8) shr 8,imm_shift)) then - begin - list.concat(taicpu.op_reg_const(A_MOV,reg,a-(dword(a) shl 8) shr 8)); - list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,(dword(a) shl 8) shr 8)); - end + else if is_thumb_imm(not(a)) then + list.concat(taicpu.op_reg_const(A_MVN,reg,not(a))) + else if (a and $FFFF)=a then + list.concat(taicpu.op_reg_const(A_MOVW,reg,a)) else begin reference_reset(hr,4); @@ -3198,6 +3188,7 @@ unit cgcpu; current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a))); hr.symbol:=l; + hr.base:=NR_PC; list.concat(taicpu.op_reg_ref(A_LDR,reg,hr)); end; end; @@ -3478,6 +3469,35 @@ unit cgcpu; so.shiftimm:=l1; list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so)); end + { for example : b=a*7 -> b=a*8-a with rsb instruction and shl } + else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then + begin + if l1>32 then{does this ever happen?} + internalerror(201205181); + shifterop_reset(so); + so.shiftmode:=SM_LSL; + so.shiftimm:=l1; + list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so)); + end + else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then + begin + { nothing to do on success } + end + { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops. + Just using mov x, #0 might allow some easier optimizations down the line. } + else if (op = OP_AND) and (dword(a)=0) then + list.concat(taicpu.op_reg_const(A_MOV,dst,0)) + { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations } + else if (op = OP_AND) and (not(dword(a))=0) then + list.concat(taicpu.op_reg_reg(A_MOV,dst,src)) + { BIC clears the specified bits, while AND keeps them, using BIC allows to use a + broader range of shifterconstants.} + {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then + list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))} + else if (op = OP_AND) and is_thumb_imm(a) then + list.concat(taicpu.op_reg_reg_const(A_MOV,dst,src,dword(a))) + else if (op = OP_AND) and is_thumb_imm(not(dword(a))) then + list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a)))) else begin tmpreg:=getintregister(list,size); @@ -3810,6 +3830,22 @@ unit cgcpu; list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14)); end; + procedure Tthumb2cgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister); + begin + if reverse then + begin + list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src)); + list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31)); + list.Concat(taicpu.op_reg_reg(A_UXTB,dst,dst)); + end + else + begin + list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src)); + list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst)); + list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31)); + list.Concat(taicpu.op_reg_reg(A_UXTB,dst,dst)); + end + end; function Tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; var diff --git a/compiler/arm/cpubase.pas b/compiler/arm/cpubase.pas index e0883843dc..93ab6535f5 100644 --- a/compiler/arm/cpubase.pas +++ b/compiler/arm/cpubase.pas @@ -365,6 +365,7 @@ unit cpubase; function is_pc(const r : tregister) : boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE} function is_shifter_const(d : aint;var imm_shift : byte) : boolean; + function is_thumb_imm(d : aint) : boolean; { Doesn't handle ROR_C detection } function split_into_shifter_const(value : aint;var imm1: dword; var imm2: dword):boolean; function dwarf_reg(r:tregister):shortint; @@ -550,6 +551,43 @@ unit cpubase; result:=false; end; + function is_thumb_imm(d: aint): boolean; + var + t : aint; + i : longint; + imm : byte; + begin + result:=false; + if (d and $FF) = d then + begin + result:=true; + exit; + end; + if ((d and $FF00FF00) = 0) and + ((d shr 16)=(d and $FFFF)) then + begin + result:=true; + exit; + end; + if ((d and $00FF00FF) = 0) and + ((d shr 16)=(d and $FFFF)) then + begin + result:=true; + exit; + end; + if ((d shr 16)=(d and $FFFF)) and + ((d shr 8)=(d and $FF)) then + begin + result:=true; + exit; + end; + if is_shifter_const(d,imm) then + begin + result:=true; + exit; + end; + end; + function split_into_shifter_const(value : aint;var imm1: dword; var imm2: dword) : boolean; var d, i, i2: Dword; diff --git a/compiler/arm/cpuinfo.pas b/compiler/arm/cpuinfo.pas index 4cf999fd60..2ace88bd8d 100644 --- a/compiler/arm/cpuinfo.pas +++ b/compiler/arm/cpuinfo.pas @@ -1066,6 +1066,7 @@ Const CPUARM_HAS_CLZ, { CPU supports the CLZ instruction } CPUARM_HAS_EDSP, { CPU supports the PLD,STRD,LDRD,MCRR and MRRC instructions } CPUARM_HAS_REV, { CPU supports the REV instruction } + CPUARM_HAS_RBIT, { CPU supports the RBIT instruction } CPUARM_HAS_LDREX, CPUARM_HAS_IDIV ); @@ -1088,8 +1089,8 @@ Const { cpu_armv7 } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX], { cpu_armv7a } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX], { cpu_armv7r } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX], - { cpu_armv7m } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX,CPUARM_HAS_IDIV], - { cpu_armv7em } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX,CPUARM_HAS_IDIV] + { cpu_armv7m } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_IDIV], + { cpu_armv7em } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_IDIV] ); Implementation diff --git a/compiler/options.pas b/compiler/options.pas index 4950598d5a..bdb460539a 100644 --- a/compiler/options.pas +++ b/compiler/options.pas @@ -3268,7 +3268,11 @@ if (target_info.abi = abi_eabihf) then this is not perfect but the current implementation bsf/bsr does not allow another solution } if CPUARM_HAS_CLZ in cpu_capabilities[init_settings.cputype] then - def_system_macro('FPC_HAS_INTERNAL_BSR'); + begin + def_system_macro('FPC_HAS_INTERNAL_BSR'); + if CPUARM_HAS_RBIT in cpu_capabilities[init_settings.cputype] then + def_system_macro('FPC_HAS_INTERNAL_BSF'); + end; {$endif} diff --git a/rtl/arm/thumb2.inc b/rtl/arm/thumb2.inc index ddd39b3f23..ce3cfa5db3 100644 --- a/rtl/arm/thumb2.inc +++ b/rtl/arm/thumb2.inc @@ -505,140 +505,67 @@ asm end; {$endif} - -var - fpc_system_lock: longint; export name 'fpc_system_lock'; - function InterLockedDecrement (var Target: longint) : longint; assembler; nostackframe; asm -// lock - ldr r3, .Lfpc_system_lock - mov r1, #1 .Lloop: - ldrex r2, [r3] - cmp r2, #0 - itt eq - strexeq r2, r1, [r3] - cmpeq r2, #0 + ldrex ip, [r0] + sub ip, #1 + strex r3, ip, [r0] + cmp r3, #0 bne .Lloop -// do the job - ldr r1, [r0] - sub r1, r1, #1 - str r1, [r0] - mov r0, r1 -// unlock and return - str r2, [r3] - mov pc, lr -.Lfpc_system_lock: - .long fpc_system_lock + mov r0, ip end; function InterLockedIncrement (var Target: longint) : longint; assembler; nostackframe; asm -// lock - ldr r3, .Lfpc_system_lock - mov r1, #1 .Lloop: - ldrex r2, [r3] - cmp r2, #0 - itt eq - strexeq r2, r1, [r3] - cmpeq r2, #0 + ldrex ip, [r0] + add ip, #1 + strex r3, ip, [r0] + cmp r3, #0 bne .Lloop -// do the job - ldr r1, [r0] - add r1, r1, #1 - str r1, [r0] - mov r0, r1 -// unlock and return - str r2, [r3] - mov pc, lr - -.Lfpc_system_lock: - .long fpc_system_lock + + mov r0, ip end; function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler; nostackframe; asm - -// lock - ldr r3, .Lfpc_system_lock - mov r2, #1 .Lloop: - ldrex r2, [r3] - cmp r2, #0 - itt eq - strexeq r2, r12, [r3] - cmpeq r2, #0 + ldrex ip, [r0] + strex r3, r1, [r0] + cmp r3, #0 bne .Lloop -// do the job - ldr r2, [r0] - str r1, [r0] - mov r0, r2 -// unlock and return - mov r2, #0 - str r2, [r3] - mov pc, lr - -.Lfpc_system_lock: - .long fpc_system_lock + + mov r0, ip end; function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler; nostackframe; asm -// lock - ldr r3, .Lfpc_system_lock - mov r2, #1 .Lloop: - ldrex r2, [r3] - cmp r2, #0 - itt eq - strexeq r2, r12, [r3] - cmpeq r2, #0 + ldrex ip, [r0] + add r2, ip, r1 + strex r3, r2, [r0] + cmp r3, #0 bne .Lloop -// do the job - ldr r2, [r0] - add r1, r1, r2 - str r1, [r0] - mov r0, r2 -// unlock and return - mov r2, #0 - str r2, [r3] - mov pc, lr - -.Lfpc_system_lock: - .long fpc_system_lock + + mov r0, ip end; - function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler; nostackframe; asm -// lock - ldr r12, .Lfpc_system_lock - mov r3, #1 .Lloop: - ldrex r2, [r12] - cmp r2, #0 - itt eq - strexeq r2, r1, [r12] - cmpeq r2, #0 + ldrex ip, [r0] + cmp ip, r2 + ite eq + strexeq r3, r1, [r0] + movne r3, #0 + cmp r3, #0 bne .Lloop -// do the job - ldr r3, [r0] - cmp r3, r2 - it eq - streq r1, [r0] - mov r0, r3 -// unlock and return - mov r3, #0 - str r3, [r12] - mov pc, lr - -.Lfpc_system_lock: - .long fpc_system_lock + + mov r0, ip end; {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}