{----------------------------------------------------------------------------- The contents of this file are subject to the Mozilla Public License Version 1.1 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.mozilla.org/NPL/NPL-1_1Final.html Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. The Original Code is: mwStringHashList.pas, released December 18, 2000. The Initial Developer of the Original Code is Martin Waldenburg (Martin.Waldenburg@T-Online.de). Portions created by Martin Waldenburg are Copyright (C) 2000 Martin Waldenburg. All Rights Reserved. Contributor(s): ___________________. Last Modified: 1/2/2001 Current Version: 2.0 Notes: This is a very fast Hash list for strings. Known Issues: -----------------------------------------------------------------------------} {$ifdef fpc} {$mode delphi} {$H+} {$endif} {$R-} unit mwStringHashList; interface uses {$IFDEF UNIX}clocale, cwstring,{$ENDIF} Classes, SysUtils, LCLIntf; var mwHashTable: array[#0..#255] of byte; mwInsensitiveHashTable: array[#0..#255] of Byte; type TmwStringHash = function(const aString: string): Integer; TmwStringHashCompare = function(const Str1: string; const Str2: string): Boolean; TmwHashWord = class S: string; Id: Integer; ExID: Integer; constructor Create(aString: string; anId, anExId: Integer); end; PHashPointerList = ^THashPointerList; THashPointerList = array[1..1] of TObject; TmwBaseStringHashList = class(TObject) FList: PHashPointerList; fCapacity: Integer; protected fHash: TmwStringHash; function Get(Index: Integer): Pointer; procedure Put(Index: Integer; Item: Pointer); procedure SetCapacity(NewCapacity: Integer); public destructor Destroy; override; procedure Clear; property Capacity: Integer read fCapacity; property Items[Index: Integer]: Pointer read Get write Put; default; end; TmwHashStrings = class(TmwBaseStringHashList) public procedure AddString(aString: string; anId, anExId: Integer); end; TmwHashItems = class(TmwBaseStringHashList) public constructor Create(aHash: TmwStringHash); procedure AddString(aString: string; anId, anExId: Integer); end; TmwStringHashList = class(TmwBaseStringHashList) private fSecondaryHash: TmwStringHash; fCompare: TmwStringHashCompare; public constructor Create(Primary, Secondary: TmwStringHash; aCompare: TmwStringHashCompare); procedure AddString(aString: string; anId, anExId: Integer); function Hash(const S: string; var anId: Integer; var anExId: Integer): Boolean; function HashEX(const S: string; var anId: Integer; var anExId: Integer; HashValue: Integer): Boolean; end; function CrcHash(const aString: string): integer; function ICrcHash(const aString: string): integer; function SmallCrcHash(const aString: string): integer; function ISmallCrcHash(const aString: string): integer; function TinyHash(const aString: string): Integer; function ITinyHash(const aString: string): Integer; function HashCompare(const Str1: string; const Str2: string): Boolean; function IHashCompare(const Str1: string; const Str2: string): Boolean; function HashSecondaryOne(const aString: string): Integer; function HashSecondaryTwo(const aString: string): Integer; procedure InitTables; implementation procedure InitTables; var I, K: Char; Temp: Byte; begin for I := #0 to #255 do begin mwHashTable[I] := Ord(I); end; RandSeed := 255; for I := #1 to #255 do begin repeat K := Char(Random(255)); until K <> #0; Temp := mwHashTable[I]; mwHashTable[I] := mwHashTable[K]; mwHashTable[K] := Temp; end; for I := #0 to #255 do mwInsensitiveHashTable[I] := mwHashTable[AnsiLowerCase(string(I))[1]]; end; { based on a Hasch function by Cyrille de Brebisson } function CrcHash(const aString: string): integer; var I: Integer; begin Result := 0; for i := 1 to length(aString) do begin Result := (Result shr 4) xor (((Result xor mwHashTable[aString[I]]) and $F) * $1000); Result := (Result shr 4) xor (((Result xor (ord(mwHashTable[aString[I]]) shr 4)) and $F) * $1000); end; if Result = 0 then Result := Length(aString) mod 8 + 1; end; function ICrcHash(const aString: string): integer; var I: Integer; begin Result := 0; for i := 1 to length(aString) do begin Result := (Result shr 4) xor (((Result xor mwInsensitiveHashTable[aString[I]]) and $F) * $1000); Result := (Result shr 4) xor (((Result xor (ord(mwInsensitiveHashTable[aString[I]]) shr 4)) and $F) * $1000); end; if Result = 0 then Result := Length(aString) mod 8 + 1; end; function SmallCrcHash(const aString: string): integer; var I: Integer; begin Result := 0; for i := 1 to length(aString) do begin Result := (Result shr 4) xor (((Result xor mwHashTable[aString[I]]) and $F) * $80); Result := (Result shr 4) xor (((Result xor (ord(mwHashTable[aString[I]]) shr 4)) and $F) * $80); if I = 3 then break; end; if Result = 0 then Result := Length(aString) mod 8 + 1; end; function ISmallCrcHash(const aString: string): integer; var I: Integer; begin Result := 0; for i := 1 to length(aString) do begin Result := (Result shr 4) xor (((Result xor mwInsensitiveHashTable[aString[I]]) and $F) * $80); Result := (Result shr 4) xor (((Result xor (ord(mwInsensitiveHashTable[aString[I]]) shr 4)) and $F) * $80); if I = 3 then break; end; if Result = 0 then Result := Length(aString) mod 8 + 1; end; function TinyHash(const aString: string): Integer; var I: Integer; begin Result := Length(aString); for i := 1 to length(aString) do begin inc(Result, mwHashTable[aString[I]]); Result := Result mod 128 + 1; if I = 2 then break; end; end; function ITinyHash(const aString: string): Integer; var I: Integer; begin Result := Length(aString); for i := 1 to length(aString) do begin inc(Result, mwInsensitiveHashTable[aString[I]]); Result := Result mod 128 + 1; if I = 2 then break; end; end; function HashCompare(const Str1: string; const Str2: string): Boolean; var I: Integer; begin if Length(Str1) <> Length(Str2) then begin Result := False; Exit; end; Result := True; for I := 1 to Length(Str1) do if Str1[I] <> Str2[I] then begin Result := False; Exit; end; end; function IHashCompare(const Str1: string; const Str2: string): Boolean; var I: Integer; begin if Length(Str1) <> Length(Str2) then begin Result := False; Exit; end; Result := True; for I := 1 to Length(Str1) do if mwInsensitiveHashTable[Str1[I]] <> mwInsensitiveHashTable[Str2[I]] then begin Result := False; Exit; end; end; function HashSecondaryOne(const aString: string): Integer; begin Result := Length(aString); inc(Result, mwInsensitiveHashTable[aString[Length(aString)]]); Result := Result mod 16 + 1; inc(Result, mwInsensitiveHashTable[aString[1]]); Result := Result mod 16 + 1; end; function HashSecondaryTwo(const aString: string): Integer; var I: Integer; begin Result := Length(aString); for I := Length(aString) downto 1 do begin inc(Result, mwInsensitiveHashTable[aString[I]]); Result := Result mod 32 + 1; end; end; { TmwHashString } constructor TmwHashWord.Create(aString: string; anId, anExId: Integer); begin inherited Create; S := aString; Id := anId; ExID := anExId; end; { TmwBaseStringHashList } procedure TmwBaseStringHashList.Clear; var I: Integer; begin for I := 1 to fCapacity do if fList[I] <> nil then fList[I].Free; ReallocMem(FList, 0); fCapacity := 0; end; destructor TmwBaseStringHashList.Destroy; begin Clear; inherited Destroy; end; function TmwBaseStringHashList.Get(Index: Integer): Pointer; begin Result := nil; if (Index > 0) and (Index <= fCapacity) then Result := fList[Index]; end; procedure TmwBaseStringHashList.Put(Index: Integer; Item: Pointer); begin if (Index > 0) and (Index <= fCapacity) then fList[Index] := Item; end; procedure TmwBaseStringHashList.SetCapacity(NewCapacity: Integer); var I, OldCapacity: Integer; begin if NewCapacity > fCapacity then begin ReallocMem(FList, (NewCapacity) * SizeOf(Pointer)); OldCapacity := fCapacity; FCapacity := NewCapacity; for I := OldCapacity + 1 to NewCapacity do Items[I] := nil; end; end; { TmwHashStrings } procedure TmwHashStrings.AddString(aString: string; anId, anExId: Integer); begin SetCapacity(Capacity + 1); fList[Capacity] := TmwHashWord.Create(aString, anId, anExId); end; { TmwHashItems } procedure TmwHashItems.AddString(aString: string; anId, anExId: Integer); var HashWord: TmwHashWord; HashStrings: TmwHashStrings; HashVal: Integer; begin HashVal := fHash(aString); SetCapacity(HashVal); if Items[HashVal] = nil then begin Items[HashVal] := TmwHashWord.Create(aString, anId, anExId); end else if fList[HashVal] is TmwHashStrings then begin TmwHashStrings(Items[HashVal]).AddString(aString, anId, anExId); end else begin HashWord := Items[HashVal]; HashStrings := TmwHashStrings.Create; Items[HashVal] := HashStrings; HashStrings.AddString(HashWord.S, HashWord.Id, HashWord.ExId); HashWord.Free; HashStrings.AddString(aString, anId, anExId) end; end; constructor TmwHashItems.Create(aHash: TmwStringHash); begin inherited Create; fHash := aHash; end; { TmwStringHashList } constructor TmwStringHashList.Create(Primary, Secondary: TmwStringHash; aCompare: TmwStringHashCompare); begin inherited Create; fHash := Primary; fSecondaryHash := Secondary; fCompare := aCompare; end; procedure TmwStringHashList.AddString(aString: string; anId, anExId: Integer); var HashWord: TmwHashWord; HashValue: Integer; HashItems: TmwHashItems; begin HashValue := fHash(aString); if HashValue >= fCapacity then SetCapacity(HashValue); if Items[HashValue] = nil then begin Items[HashValue] := TmwHashWord.Create(aString, anId, anExId); end else if fList[HashValue] is TmwHashItems then begin TmwHashItems(Items[HashValue]).AddString(aString, anId, anExId); end else begin HashWord := Items[HashValue]; HashItems := TmwHashItems.Create(fSecondaryHash); Items[HashValue] := HashItems; HashItems.AddString(HashWord.S, HashWord.Id, HashWord.ExId); HashWord.Free; HashItems.AddString(aString, anId, anExId); end; end; function TmwStringHashList.Hash(const S: string; var anId: Integer; var anExId: Integer): Boolean; begin Result := HashEX(S, anId, anExId, fHash(S)); end; function TmwStringHashList.HashEX(const S: string; var anId: Integer; var anExId: Integer; HashValue: Integer): Boolean; var Temp: TObject; Hashword: TmwHashWord; HashItems: TmwHashItems; I, ItemHash: Integer; begin Result := False; anID := -1; anExId := -1; if HashValue < 1 then Exit; if HashValue > Capacity then Exit; if Items[HashValue] <> nil then begin if fList[HashValue] is TmwHashWord then begin Hashword := Items[HashValue]; Result := fCompare(HashWord.S, S); if Result then begin anID := HashWord.Id; anExId := HashWord.ExID; end; end else begin HashItems := Items[HashValue]; ItemHash := HashItems.fHash(S); if ItemHash > HashItems.Capacity then Exit; Temp := HashItems[ItemHash]; if Temp <> nil then if Temp is TmwHashWord then begin Result := fCompare(TmwHashWord(Temp).S, S); if Result then begin anID := TmwHashWord(Temp).Id; anExId := TmwHashWord(Temp).ExID; end; end else for I := 1 to TmwHashStrings(Temp).Capacity do begin HashWord := TmwHashStrings(Temp)[I]; Result := fCompare(HashWord.S, S); if Result then begin anID := HashWord.Id; anExId := HashWord.ExID; exit; end; end; end; end; end; initialization InitTables; {$R+} end.