o patch by Nico Erfurth: Support Assembly optimized functions of SwapEndian on ARM

Currently the ARM-Port uses generic functions for SwapEndian, which are
relativly slow.

This patch adds optimized functions for the 32 and 64-bit case, the 16
bit case is still handled with a normal function, while the generated
code is far from optimal, the inlining (which is not possible with
asm-functions) makes it faster than the optimized function.

Some Numbers from my 1.2GHz Kirkwood (ARMv5):

                        Old     New     Result
SwapEndian(Integer)     12.168s 5.411s  44.47%
SwapEndian(Int64)       168.28s 9.015s   5.36%

Testcode was
begin
        I := $FFFFFFF;
        while I > 0 do
        begin
                Val2 := MySwapEndian(Val);
                Dec(I);
        end;
end.

Currently only the ARM implementation is tested. ARMv6+ includes a rev
instruction, while I've implemented them, I was not able to test them.

git-svn-id: trunk@20685 -
This commit is contained in:
florian 2012-04-01 17:31:49 +00:00
parent e0ae28b967
commit df0201799e

View File

@ -855,6 +855,127 @@ begin
{$endif FPC_SYSTEM_FPC_MOVE}
end;
{$define FPC_SYSTEM_HAS_SWAPENDIAN}
{ SwapEndian(<16 Bit>) being inlined is faster than using assembler }
function SwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
{ the extra Word type cast is necessary because the "AValue shr 8" }
{ is turned into "longint(AValue) shr 8", so if AValue < 0 then }
{ the sign bits from the upper 16 bits are shifted in rather than }
{ zeroes. }
Result := SmallInt((Word(AValue) shr 8) or (Word(AValue) shl 8));
end;
function SwapEndian(const AValue: Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
Result := Word((AValue shr 8) or (AValue shl 8));
end;
(*
This is kept for reference. Thats what the compiler COULD generate in these cases.
But FPC currently does not support inlining of asm-functions, so the whole call-overhead
is bigger than the gain of the optimized function.
function AsmSwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif};assembler;nostackframe;
asm
// We're starting with 4321
{$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
mov r0, r0, shl #16 // Shift to make that 2100
mov r0, r0, ror #24 // Rotate to 1002
orr r0, r0, r0 shr #16 // Shift and combine into 0012
{$else}
rev r0, r0 // Reverse byteorder r0 = 1234
mov r0, r0, shr #16 // Shift down to 16bits r0 = 0012
{$endif}
end;
*)
function SwapEndian(const AValue: LongInt): LongInt;assembler;nostackframe;
asm
// We're starting with r0 = 4321
{$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
mov r2, r0, lsr #24 // r2 = 0004
and r1, r0, #16711680 // r1 = 0300
orr r2, r2, r0, lsl #24 // r2 = 1004
orr r2, r2, r1, lsr #8 // r2 = 1034
and r0, r0, #65280 // r0 = 0020
orr r0, r2, r0, lsl #8 // r0 = 1234
{$else}
rev r0, r0
{$endif}
end;
function SwapEndian(const AValue: DWord): DWord;assembler;nostackframe;
asm
// We're starting with r0 = 4321
{$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
mov r2, r0, lsr #24 // r2 = 0004
and r1, r0, #16711680 // r1 = 0300
orr r2, r2, r0, lsl #24 // r2 = 1004
orr r2, r2, r1, lsr #8 // r2 = 1034
and r0, r0, #65280 // r0 = 0020
orr r0, r2, r0, lsl #8 // r0 = 1234
{$else}
rev r0, r0
{$endif}
end;
function SwapEndian(const AValue: Int64): Int64; assembler; nostackframe;
asm
// We're starting with r0 = 4321 r1 = 8765
{$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
mov ip, r1
mov r2, r0, lsr #24 // r2 = 0004
and r3, r0, #16711680 // r3 = 0300
orr r2, r2, r0, lsl #24 // r2 = 1004
orr r2, r2, r3, lsr #8 // r2 = 1034
and r0, r0, #65280 // r0 = 0020
orr r1, r2, r0, lsl #8 // r1 = 1234
mov r2, ip, lsr #24 // r2 = 0008
and r3, ip, #16711680 // r1 = 0700
orr r2, r2, ip, lsl #24 // r2 = 5008
orr r2, r2, r3, lsr #8 // r2 = 5078
and ip, ip, #65280 // ip = 0060
orr r0, r2, ip, lsl #8 // r0 = 5678
bx lr
{$else}
rev r2, r0
rev r0, r1
mov r1, r2
{$endif}
end;
function SwapEndian(const AValue: QWord): QWord; assembler; nostackframe;
asm
// We're starting with r0 = 4321 r1 = 8765
{$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
mov ip, r1
mov r2, r0, lsr #24 // r2 = 0004
and r3, r0, #16711680 // r3 = 0300
orr r2, r2, r0, lsl #24 // r2 = 1004
orr r2, r2, r3, lsr #8 // r2 = 1034
and r0, r0, #65280 // r0 = 0020
orr r1, r2, r0, lsl #8 // r1 = 1234
mov r2, ip, lsr #24 // r2 = 0008
and r3, ip, #16711680 // r1 = 0700
orr r2, r2, ip, lsl #24 // r2 = 5008
orr r2, r2, r3, lsr #8 // r2 = 5078
and ip, ip, #65280 // ip = 0060
orr r0, r2, ip, lsl #8 // r0 = 5678
bx lr
{$else}
rev r2, r0
rev r0, r1
mov r1, r2
{$endif}
end;
{include hand-optimized assembler division code}
{$i divide.inc}