* patch by ab: Optimized default hash asm in generics.hashes, resolves #40035

* generic.hashes now uses the cpu unit to detect crc32 cpu support

(cherry picked from commit e1bbcf7050)
This commit is contained in:
florian 2022-12-14 21:56:04 +01:00 committed by marcoonthegit
parent 5d878ac342
commit 48ffc20502

View File

@ -122,6 +122,10 @@ var
mORMotHasher: THasher;
implementation
{$ifdef CPUINTEL}
uses
cpu;
{$endif CPUINTEL}
function SimpleChecksumHash(AKey: Pointer; ALength: SizeInt): UInt32;
var
@ -1241,210 +1245,80 @@ end;
{$ifdef CPUINTEL}
type
TRegisters = record
eax,ebx,ecx,edx: cardinal;
end;
{$ifdef CPU64}
procedure GetCPUID(Param: Cardinal; var Registers: TRegisters); nostackframe; assembler;
function crc32csse42(crc: cardinal; buf: PAnsiChar; len: cardinal): cardinal; nostackframe; assembler;
asm
{$ifdef win64}
mov eax, ecx
mov r9, rdx
{$else}
mov eax, edi
mov r9, rsi
{$endif win64}
mov r10, rbx // preserve rbx
xor ebx, ebx
xor ecx, ecx
xor edx, edx
cpuid
mov TRegisters(r9).&eax, eax
mov TRegisters(r9).&ebx, ebx
mov TRegisters(r9).&ecx, ecx
mov TRegisters(r9).&edx, edx
mov rbx, r10
end;
function crc32csse42(crc: cardinal; buf: Pointer; len: cardinal): cardinal; nostackframe; assembler;
asm // ecx=crc, rdx=buf, r8=len (Linux: edi,rsi,rdx)
{$ifdef win64}
mov eax, ecx
{$else}
mov eax, edi
mov r8, rdx
mov rdx, rsi
{$endif win64}
mov eax, crc
test len, len
jz @z
test buf, buf
jz @z
not eax
test rdx, rdx
jz @0
test r8, r8
jz @0
@7: test dl, 7
jz @8 // align to 8 bytes boundary
crc32 eax, byte ptr[rdx]
inc rdx
dec r8
jz @0
test dl, 7
jnz @7
@8: mov rcx, r8
shr r8, 3
mov ecx, len
shr len, 3
jnz @by8 // we don't care for read alignment
@0: test cl, 4
jz @4
crc32 eax, dword ptr [buf]
add buf, 4
@4: test cl, 2
jz @2
@1:
crc32 rax, qword [rdx] // hash 8 bytes per loop
dec r8
lea rdx, [rdx + 8]
jnz @1
@2: and ecx, 7
jz @0
cmp ecx, 4
jb @4
crc32 eax, dword ptr[rdx]
sub ecx, 4
lea rdx, [rdx + 4]
jz @0
@4: crc32 eax, byte ptr[rdx]
dec ecx
jz @0
crc32 eax, byte ptr[rdx + 1]
dec ecx
jz @0
crc32 eax, byte ptr[rdx + 2]
@0: not eax
crc32 eax, word ptr [buf]
add buf, 2
@2: test cl, 1
jz @1
crc32 eax, byte ptr [buf]
@1: not eax
@z: ret
align 16
@by8: crc32 rax, qword ptr [buf] // hash 8 bytes per loop
add buf, 8
sub len, 1
jnz @by8
jmp @0
end;
{$endif CPU64}
{$ifdef CPUX86}
procedure GetCPUID(Param: Cardinal; var Registers: TRegisters);
{$else}
function crc32csse42(crc: cardinal; buf: PAnsiChar; len: cardinal): cardinal; nostackframe; assembler;
asm
push esi
push edi
mov esi, edx
mov edi, eax
pushfd
pop eax
mov edx, eax
xor eax, $200000
push eax
popfd
pushfd
pop eax
xor eax, edx
jz @nocpuid
push ebx
mov eax, edi
xor ecx, ecx
cpuid
mov TRegisters(esi).&eax, eax
mov TRegisters(esi).&ebx, ebx
mov TRegisters(esi).&ecx, ecx
mov TRegisters(esi).&edx, edx
pop ebx
@nocpuid:
pop edi
pop esi
end;
function crc32csse42(crc: cardinal; buf: Pointer; len: cardinal): cardinal;
asm // eax=crc, edx=buf, ecx=len
// eax=crc, edx=buf, ecx=len
not eax
test ecx, ecx
jz @0
test edx, edx
jz @0
@3: test edx, 3
jz @8 // align to 4 bytes boundary
crc32 eax, byte ptr[edx]
jmp @align
@a: crc32 eax, byte ptr [edx]
inc edx
dec ecx
jz @0
test edx, 3
jnz @3
@8: push ecx
@align: test dl, 3
jnz @a
push ecx
shr ecx, 3
jnz @by8
@rem: pop ecx
test cl, 4
jz @4
crc32 eax, dword ptr [edx]
add edx, 4
@4: test cl, 2
jz @2
@1:
crc32 eax, dword ptr[edx]
crc32 eax, dword ptr[edx + 4]
dec ecx
lea edx, [edx + 8]
jnz @1
@2: pop ecx
and ecx, 7
crc32 eax, word ptr [edx]
add edx, 2
@2: test cl, 1
jz @0
cmp ecx, 4
jb @4
crc32 eax, dword ptr[edx]
sub ecx, 4
lea edx, [edx + 4]
jz @0
@4:
crc32 eax, byte ptr[edx]
dec ecx
jz @0
crc32 eax, byte ptr[edx + 1]
dec ecx
jz @0
crc32 eax, byte ptr[edx + 2]
crc32 eax, byte ptr [edx]
@0: not eax
ret
@by8: crc32 eax, dword ptr [edx]
crc32 eax, dword ptr [edx + 4]
add edx, 8
dec ecx
jnz @by8
jmp @rem
end;
{$endif CPUX86}
{$endif}
type
/// the potential features, retrieved from an Intel CPU
// - see https://en.wikipedia.org/wiki/CPUID#EAX.3D1:_Processor_Info_and_Feature_Bits
TIntelCpuFeature =
( { in EDX }
cfFPU, cfVME, cfDE, cfPSE, cfTSC, cfMSR, cfPAE, cfMCE,
cfCX8, cfAPIC, cf_d10, cfSEP, cfMTRR, cfPGE, cfMCA, cfCMOV,
cfPAT, cfPSE36, cfPSN, cfCLFSH, cf_d20, cfDS, cfACPI, cfMMX,
cfFXSR, cfSSE, cfSSE2, cfSS, cfHTT, cfTM, cfIA64, cfPBE,
{ in ECX }
cfSSE3, cfCLMUL, cfDS64, cfMON, cfDSCPL, cfVMX, cfSMX, cfEST,
cfTM2, cfSSSE3, cfCID, cfSDBG, cfFMA, cfCX16, cfXTPR, cfPDCM,
cf_c16, cfPCID, cfDCA, cfSSE41, cfSSE42, cfX2A, cfMOVBE, cfPOPCNT,
cfTSC2, cfAESNI, cfXS, cfOSXS, cfAVX, cfF16C, cfRAND, cfHYP,
{ extended features in EBX, ECX }
cfFSGS, cf_b01, cfSGX, cfBMI1, cfHLE, cfAVX2, cf_b06, cfSMEP,
cfBMI2, cfERMS, cfINVPCID, cfRTM, cfPQM, cf_b13, cfMPX, cfPQE,
cfAVX512F, cfAVX512DQ, cfRDSEED, cfADX, cfSMAP, cfAVX512IFMA, cfPCOMMIT, cfCLFLUSH,
cfCLWB, cfIPT, cfAVX512PF, cfAVX512ER, cfAVX512CD, cfSHA, cfAVX512BW, cfAVX512VL,
cfPREFW1, cfAVX512VBMI, cfUMIP, cfPKU, cfOSPKE, cf_c05, cf_c06, cf_c07,
cf_c08, cf_c09, cf_c10, cf_c11, cf_c12, cf_c13, cfAVX512VPC, cf_c15,
cf_cc16, cf_c17, cf_c18, cf_c19, cf_c20, cf_c21, cfRDPID, cf_c23,
cf_c24, cf_c25, cf_c26, cf_c27, cf_c28, cf_c29, cfSGXLC, cf_c31,
cf_d0, cf_d1, cfAVX512NNI, cfAVX512MAS, cf_d4, cf_d5, cf_d6, cf_d7);
/// all features, as retrieved from an Intel CPU
TIntelCpuFeatures = set of TIntelCpuFeature;
var
/// the available CPU features, as recognized at program startup
CpuFeatures: TIntelCpuFeatures;
procedure TestIntelCpuFeatures;
var regs: TRegisters;
begin
regs.edx := 0;
regs.ecx := 0;
GetCPUID(1,regs);
PIntegerArray(@CpuFeatures)^[0] := regs.edx;
PIntegerArray(@CpuFeatures)^[1] := regs.ecx;
GetCPUID(7,regs);
PIntegerArray(@CpuFeatures)^[2] := regs.ebx;
PIntegerArray(@CpuFeatures)^[3] := regs.ecx;
PByte(@PIntegerArray(@CpuFeatures)^[4])^ := regs.edx;
// assert(sizeof(CpuFeatures)=4*4+1);
{$ifdef Darwin}
{$ifdef CPU64}
// SSE42 asm does not (yet) work on Darwin x64 ...
Exclude(CpuFeatures, cfSSE42);
{$endif}
{$endif}
end;
{$endif CPUINTEL}
var
@ -1600,8 +1474,7 @@ end;
begin
{$ifdef CPUINTEL}
TestIntelCpuFeatures;
if cfSSE42 in CpuFeatures then
if SSE42Support then
begin
crc32c := @crc32csse42;
mORMotHasher := @crc32csse42;