mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-20 18:09:27 +02:00
o patch by Nico Erfurth: Support Assembly optimized functions of SwapEndian on ARM
Currently the ARM-Port uses generic functions for SwapEndian, which are relativly slow. This patch adds optimized functions for the 32 and 64-bit case, the 16 bit case is still handled with a normal function, while the generated code is far from optimal, the inlining (which is not possible with asm-functions) makes it faster than the optimized function. Some Numbers from my 1.2GHz Kirkwood (ARMv5): Old New Result SwapEndian(Integer) 12.168s 5.411s 44.47% SwapEndian(Int64) 168.28s 9.015s 5.36% Testcode was begin I := $FFFFFFF; while I > 0 do begin Val2 := MySwapEndian(Val); Dec(I); end; end. Currently only the ARM implementation is tested. ARMv6+ includes a rev instruction, while I've implemented them, I was not able to test them. git-svn-id: trunk@20685 -
This commit is contained in:
parent
e0ae28b967
commit
df0201799e
121
rtl/arm/arm.inc
121
rtl/arm/arm.inc
@ -855,6 +855,127 @@ begin
|
||||
{$endif FPC_SYSTEM_FPC_MOVE}
|
||||
end;
|
||||
|
||||
{$define FPC_SYSTEM_HAS_SWAPENDIAN}
|
||||
|
||||
{ SwapEndian(<16 Bit>) being inlined is faster than using assembler }
|
||||
function SwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif}
|
||||
begin
|
||||
{ the extra Word type cast is necessary because the "AValue shr 8" }
|
||||
{ is turned into "longint(AValue) shr 8", so if AValue < 0 then }
|
||||
{ the sign bits from the upper 16 bits are shifted in rather than }
|
||||
{ zeroes. }
|
||||
Result := SmallInt((Word(AValue) shr 8) or (Word(AValue) shl 8));
|
||||
end;
|
||||
|
||||
|
||||
function SwapEndian(const AValue: Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
|
||||
begin
|
||||
Result := Word((AValue shr 8) or (AValue shl 8));
|
||||
end;
|
||||
|
||||
(*
|
||||
This is kept for reference. Thats what the compiler COULD generate in these cases.
|
||||
But FPC currently does not support inlining of asm-functions, so the whole call-overhead
|
||||
is bigger than the gain of the optimized function.
|
||||
function AsmSwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif};assembler;nostackframe;
|
||||
asm
|
||||
// We're starting with 4321
|
||||
{$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
|
||||
mov r0, r0, shl #16 // Shift to make that 2100
|
||||
mov r0, r0, ror #24 // Rotate to 1002
|
||||
orr r0, r0, r0 shr #16 // Shift and combine into 0012
|
||||
{$else}
|
||||
rev r0, r0 // Reverse byteorder r0 = 1234
|
||||
mov r0, r0, shr #16 // Shift down to 16bits r0 = 0012
|
||||
{$endif}
|
||||
end;
|
||||
|
||||
*)
|
||||
|
||||
function SwapEndian(const AValue: LongInt): LongInt;assembler;nostackframe;
|
||||
asm
|
||||
// We're starting with r0 = 4321
|
||||
{$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
|
||||
mov r2, r0, lsr #24 // r2 = 0004
|
||||
and r1, r0, #16711680 // r1 = 0300
|
||||
orr r2, r2, r0, lsl #24 // r2 = 1004
|
||||
orr r2, r2, r1, lsr #8 // r2 = 1034
|
||||
and r0, r0, #65280 // r0 = 0020
|
||||
orr r0, r2, r0, lsl #8 // r0 = 1234
|
||||
{$else}
|
||||
rev r0, r0
|
||||
{$endif}
|
||||
end;
|
||||
|
||||
function SwapEndian(const AValue: DWord): DWord;assembler;nostackframe;
|
||||
asm
|
||||
// We're starting with r0 = 4321
|
||||
{$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
|
||||
mov r2, r0, lsr #24 // r2 = 0004
|
||||
and r1, r0, #16711680 // r1 = 0300
|
||||
orr r2, r2, r0, lsl #24 // r2 = 1004
|
||||
orr r2, r2, r1, lsr #8 // r2 = 1034
|
||||
and r0, r0, #65280 // r0 = 0020
|
||||
orr r0, r2, r0, lsl #8 // r0 = 1234
|
||||
{$else}
|
||||
rev r0, r0
|
||||
{$endif}
|
||||
end;
|
||||
|
||||
function SwapEndian(const AValue: Int64): Int64; assembler; nostackframe;
|
||||
asm
|
||||
// We're starting with r0 = 4321 r1 = 8765
|
||||
{$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
|
||||
mov ip, r1
|
||||
|
||||
mov r2, r0, lsr #24 // r2 = 0004
|
||||
and r3, r0, #16711680 // r3 = 0300
|
||||
orr r2, r2, r0, lsl #24 // r2 = 1004
|
||||
orr r2, r2, r3, lsr #8 // r2 = 1034
|
||||
and r0, r0, #65280 // r0 = 0020
|
||||
orr r1, r2, r0, lsl #8 // r1 = 1234
|
||||
|
||||
mov r2, ip, lsr #24 // r2 = 0008
|
||||
and r3, ip, #16711680 // r1 = 0700
|
||||
orr r2, r2, ip, lsl #24 // r2 = 5008
|
||||
orr r2, r2, r3, lsr #8 // r2 = 5078
|
||||
and ip, ip, #65280 // ip = 0060
|
||||
orr r0, r2, ip, lsl #8 // r0 = 5678
|
||||
bx lr
|
||||
{$else}
|
||||
rev r2, r0
|
||||
rev r0, r1
|
||||
mov r1, r2
|
||||
{$endif}
|
||||
end;
|
||||
|
||||
function SwapEndian(const AValue: QWord): QWord; assembler; nostackframe;
|
||||
asm
|
||||
// We're starting with r0 = 4321 r1 = 8765
|
||||
{$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
|
||||
mov ip, r1
|
||||
|
||||
mov r2, r0, lsr #24 // r2 = 0004
|
||||
and r3, r0, #16711680 // r3 = 0300
|
||||
orr r2, r2, r0, lsl #24 // r2 = 1004
|
||||
orr r2, r2, r3, lsr #8 // r2 = 1034
|
||||
and r0, r0, #65280 // r0 = 0020
|
||||
orr r1, r2, r0, lsl #8 // r1 = 1234
|
||||
|
||||
mov r2, ip, lsr #24 // r2 = 0008
|
||||
and r3, ip, #16711680 // r1 = 0700
|
||||
orr r2, r2, ip, lsl #24 // r2 = 5008
|
||||
orr r2, r2, r3, lsr #8 // r2 = 5078
|
||||
and ip, ip, #65280 // ip = 0060
|
||||
orr r0, r2, ip, lsl #8 // r0 = 5678
|
||||
bx lr
|
||||
{$else}
|
||||
rev r2, r0
|
||||
rev r0, r1
|
||||
mov r1, r2
|
||||
{$endif}
|
||||
end;
|
||||
|
||||
{include hand-optimized assembler division code}
|
||||
{$i divide.inc}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user