* patch by ab: Optimized default hash asm in generics.hashes, resolves #40035

* generic.hashes now uses the cpu unit to detect crc32 cpu support

(cherry picked from commit e1bbcf7050)
This commit is contained in:
florian 2022-12-14 21:56:04 +01:00 committed by marcoonthegit
parent 5d878ac342
commit 48ffc20502

View File

@ -122,6 +122,10 @@ var
mORMotHasher: THasher; mORMotHasher: THasher;
implementation implementation
{$ifdef CPUINTEL}
uses
cpu;
{$endif CPUINTEL}
function SimpleChecksumHash(AKey: Pointer; ALength: SizeInt): UInt32; function SimpleChecksumHash(AKey: Pointer; ALength: SizeInt): UInt32;
var var
@ -1241,210 +1245,80 @@ end;
{$ifdef CPUINTEL} {$ifdef CPUINTEL}
type
TRegisters = record
eax,ebx,ecx,edx: cardinal;
end;
{$ifdef CPU64} {$ifdef CPU64}
procedure GetCPUID(Param: Cardinal; var Registers: TRegisters); nostackframe; assembler; function crc32csse42(crc: cardinal; buf: PAnsiChar; len: cardinal): cardinal; nostackframe; assembler;
asm asm
{$ifdef win64} mov eax, crc
mov eax, ecx test len, len
mov r9, rdx jz @z
{$else} test buf, buf
mov eax, edi jz @z
mov r9, rsi
{$endif win64}
mov r10, rbx // preserve rbx
xor ebx, ebx
xor ecx, ecx
xor edx, edx
cpuid
mov TRegisters(r9).&eax, eax
mov TRegisters(r9).&ebx, ebx
mov TRegisters(r9).&ecx, ecx
mov TRegisters(r9).&edx, edx
mov rbx, r10
end;
function crc32csse42(crc: cardinal; buf: Pointer; len: cardinal): cardinal; nostackframe; assembler;
asm // ecx=crc, rdx=buf, r8=len (Linux: edi,rsi,rdx)
{$ifdef win64}
mov eax, ecx
{$else}
mov eax, edi
mov r8, rdx
mov rdx, rsi
{$endif win64}
not eax not eax
test rdx, rdx mov ecx, len
jz @0 shr len, 3
test r8, r8 jnz @by8 // we don't care for read alignment
jz @0 @0: test cl, 4
@7: test dl, 7 jz @4
jz @8 // align to 8 bytes boundary crc32 eax, dword ptr [buf]
crc32 eax, byte ptr[rdx] add buf, 4
inc rdx @4: test cl, 2
dec r8
jz @0
test dl, 7
jnz @7
@8: mov rcx, r8
shr r8, 3
jz @2 jz @2
@1: crc32 eax, word ptr [buf]
crc32 rax, qword [rdx] // hash 8 bytes per loop add buf, 2
dec r8 @2: test cl, 1
lea rdx, [rdx + 8] jz @1
jnz @1 crc32 eax, byte ptr [buf]
@2: and ecx, 7 @1: not eax
jz @0 @z: ret
cmp ecx, 4 align 16
jb @4 @by8: crc32 rax, qword ptr [buf] // hash 8 bytes per loop
crc32 eax, dword ptr[rdx] add buf, 8
sub ecx, 4 sub len, 1
lea rdx, [rdx + 4] jnz @by8
jz @0 jmp @0
@4: crc32 eax, byte ptr[rdx]
dec ecx
jz @0
crc32 eax, byte ptr[rdx + 1]
dec ecx
jz @0
crc32 eax, byte ptr[rdx + 2]
@0: not eax
end; end;
{$endif CPU64} {$else}
function crc32csse42(crc: cardinal; buf: PAnsiChar; len: cardinal): cardinal; nostackframe; assembler;
{$ifdef CPUX86}
procedure GetCPUID(Param: Cardinal; var Registers: TRegisters);
asm asm
push esi // eax=crc, edx=buf, ecx=len
push edi
mov esi, edx
mov edi, eax
pushfd
pop eax
mov edx, eax
xor eax, $200000
push eax
popfd
pushfd
pop eax
xor eax, edx
jz @nocpuid
push ebx
mov eax, edi
xor ecx, ecx
cpuid
mov TRegisters(esi).&eax, eax
mov TRegisters(esi).&ebx, ebx
mov TRegisters(esi).&ecx, ecx
mov TRegisters(esi).&edx, edx
pop ebx
@nocpuid:
pop edi
pop esi
end;
function crc32csse42(crc: cardinal; buf: Pointer; len: cardinal): cardinal;
asm // eax=crc, edx=buf, ecx=len
not eax not eax
test ecx, ecx test ecx, ecx
jz @0 jz @0
test edx, edx test edx, edx
jz @0 jz @0
@3: test edx, 3 jmp @align
jz @8 // align to 4 bytes boundary @a: crc32 eax, byte ptr [edx]
crc32 eax, byte ptr[edx]
inc edx inc edx
dec ecx dec ecx
jz @0 jz @0
test edx, 3 @align: test dl, 3
jnz @3 jnz @a
@8: push ecx push ecx
shr ecx, 3 shr ecx, 3
jnz @by8
@rem: pop ecx
test cl, 4
jz @4
crc32 eax, dword ptr [edx]
add edx, 4
@4: test cl, 2
jz @2 jz @2
@1: crc32 eax, word ptr [edx]
crc32 eax, dword ptr[edx] add edx, 2
crc32 eax, dword ptr[edx + 4] @2: test cl, 1
dec ecx
lea edx, [edx + 8]
jnz @1
@2: pop ecx
and ecx, 7
jz @0 jz @0
cmp ecx, 4 crc32 eax, byte ptr [edx]
jb @4
crc32 eax, dword ptr[edx]
sub ecx, 4
lea edx, [edx + 4]
jz @0
@4:
crc32 eax, byte ptr[edx]
dec ecx
jz @0
crc32 eax, byte ptr[edx + 1]
dec ecx
jz @0
crc32 eax, byte ptr[edx + 2]
@0: not eax @0: not eax
ret
@by8: crc32 eax, dword ptr [edx]
crc32 eax, dword ptr [edx + 4]
add edx, 8
dec ecx
jnz @by8
jmp @rem
end; end;
{$endif CPUX86} {$endif}
type
/// the potential features, retrieved from an Intel CPU
// - see https://en.wikipedia.org/wiki/CPUID#EAX.3D1:_Processor_Info_and_Feature_Bits
TIntelCpuFeature =
( { in EDX }
cfFPU, cfVME, cfDE, cfPSE, cfTSC, cfMSR, cfPAE, cfMCE,
cfCX8, cfAPIC, cf_d10, cfSEP, cfMTRR, cfPGE, cfMCA, cfCMOV,
cfPAT, cfPSE36, cfPSN, cfCLFSH, cf_d20, cfDS, cfACPI, cfMMX,
cfFXSR, cfSSE, cfSSE2, cfSS, cfHTT, cfTM, cfIA64, cfPBE,
{ in ECX }
cfSSE3, cfCLMUL, cfDS64, cfMON, cfDSCPL, cfVMX, cfSMX, cfEST,
cfTM2, cfSSSE3, cfCID, cfSDBG, cfFMA, cfCX16, cfXTPR, cfPDCM,
cf_c16, cfPCID, cfDCA, cfSSE41, cfSSE42, cfX2A, cfMOVBE, cfPOPCNT,
cfTSC2, cfAESNI, cfXS, cfOSXS, cfAVX, cfF16C, cfRAND, cfHYP,
{ extended features in EBX, ECX }
cfFSGS, cf_b01, cfSGX, cfBMI1, cfHLE, cfAVX2, cf_b06, cfSMEP,
cfBMI2, cfERMS, cfINVPCID, cfRTM, cfPQM, cf_b13, cfMPX, cfPQE,
cfAVX512F, cfAVX512DQ, cfRDSEED, cfADX, cfSMAP, cfAVX512IFMA, cfPCOMMIT, cfCLFLUSH,
cfCLWB, cfIPT, cfAVX512PF, cfAVX512ER, cfAVX512CD, cfSHA, cfAVX512BW, cfAVX512VL,
cfPREFW1, cfAVX512VBMI, cfUMIP, cfPKU, cfOSPKE, cf_c05, cf_c06, cf_c07,
cf_c08, cf_c09, cf_c10, cf_c11, cf_c12, cf_c13, cfAVX512VPC, cf_c15,
cf_cc16, cf_c17, cf_c18, cf_c19, cf_c20, cf_c21, cfRDPID, cf_c23,
cf_c24, cf_c25, cf_c26, cf_c27, cf_c28, cf_c29, cfSGXLC, cf_c31,
cf_d0, cf_d1, cfAVX512NNI, cfAVX512MAS, cf_d4, cf_d5, cf_d6, cf_d7);
/// all features, as retrieved from an Intel CPU
TIntelCpuFeatures = set of TIntelCpuFeature;
var
/// the available CPU features, as recognized at program startup
CpuFeatures: TIntelCpuFeatures;
procedure TestIntelCpuFeatures;
var regs: TRegisters;
begin
regs.edx := 0;
regs.ecx := 0;
GetCPUID(1,regs);
PIntegerArray(@CpuFeatures)^[0] := regs.edx;
PIntegerArray(@CpuFeatures)^[1] := regs.ecx;
GetCPUID(7,regs);
PIntegerArray(@CpuFeatures)^[2] := regs.ebx;
PIntegerArray(@CpuFeatures)^[3] := regs.ecx;
PByte(@PIntegerArray(@CpuFeatures)^[4])^ := regs.edx;
// assert(sizeof(CpuFeatures)=4*4+1);
{$ifdef Darwin}
{$ifdef CPU64}
// SSE42 asm does not (yet) work on Darwin x64 ...
Exclude(CpuFeatures, cfSSE42);
{$endif}
{$endif}
end;
{$endif CPUINTEL} {$endif CPUINTEL}
var var
@ -1600,8 +1474,7 @@ end;
begin begin
{$ifdef CPUINTEL} {$ifdef CPUINTEL}
TestIntelCpuFeatures; if SSE42Support then
if cfSSE42 in CpuFeatures then
begin begin
crc32c := @crc32csse42; crc32c := @crc32csse42;
mORMotHasher := @crc32csse42; mORMotHasher := @crc32csse42;