LazUtils: New function UTF8CompareLatinTextFast. Use in IDE instead of UTF8CompareText.

git-svn-id: trunk@64385 -
This commit is contained in:
juha 2021-01-14 13:52:23 +00:00
parent 42850cfe4d
commit 751852a44a
24 changed files with 109 additions and 85 deletions

View File

@ -53,7 +53,7 @@ begin
for i:=0 to List.Count-1 do begin
case Cmp of
cstCaseSensitive: if List[i]=s then exit(i);
cstCaseInsensitive: if UTF8CompareText(List[i],s)=0 then exit(i);
cstCaseInsensitive: if UTF8CompareLatinTextFast(List[i],s)=0 then exit(i);
cstFilename: if CompareFilenames(List[i],s)=0 then exit(i);
end;
end;

View File

@ -282,7 +282,7 @@ begin
if CaseSensitive then
Result := CompareStr(FnExt, Ext)
else
Result := UTF8CompareText(FnExt, Ext);
Result := UTF8CompareLatinTextFast(FnExt, Ext);
if Result < 0 then
Result := -1
else if Result > 0 then

View File

@ -173,10 +173,19 @@ function UTF8CompareStr(const S1, S2: string): PtrInt; inline;
function UTF8CompareStrP(S1, S2: PChar): PtrInt;
function UTF8CompareStr(S1: PChar; Count1: SizeInt; S2: PChar; Count2: SizeInt): PtrInt;
function UTF8CompareText(const S1, S2: string): PtrInt;
function UTF8CompareLatinTextFast(const S1, S2: String): PtrInt;
function UTF8CompareStrCollated(const S1, S2: string): PtrInt; {$IFnDEF ACP_RTL}inline;{$endif}
function CompareStrListUTF8LowerCase(List: TStringList; Index1, Index2: Integer): Integer;
type
{ TStringListUTF8Fast }
TStringListUTF8Fast = class(TStringList)
protected // Uses UTF8CompareLatinTextFast for comparison.
function DoCompareText(const s1,s2 : string): PtrInt; override;
end;
TConvertResult = (trNoError, trNullSrc, trNullDest, trDestExhausted,
trInvalidChar, trUnfinishedChar);
@ -3078,7 +3087,7 @@ begin
TextLen := Utf8Length(AText);
SubTextLen := Utf8Length(ASubText);
if (TextLen >= SubTextLen) then
Result := (Utf8CompareText(Utf8Copy(AText,1,SubTextLen),ASubText) = 0);
Result := (UTF8CompareLatinTextFast(Utf8Copy(AText,1,SubTextLen),ASubText) = 0);
end;
end;
@ -3092,7 +3101,7 @@ begin
TextLen := Utf8Length(AText);
SubTextLen := Utf8Length(ASubText);
if (TextLen >= SubTextLen) then
Result := (Utf8CompareText(Utf8Copy(AText,TextLen-SubTextLen+1,SubTextLen),ASubText) = 0);
Result := (UTF8CompareLatinTextFast(Utf8Copy(AText,TextLen-SubTextLen+1,SubTextLen),ASubText) = 0);
end;
end;
@ -3315,7 +3324,6 @@ begin
Result:=UTF8CompareStr(S1,StrLen(S1),S2,StrLen(S2));
end;
function UTF8CompareStr(S1: PChar; Count1: SizeInt; S2: PChar; Count2: SizeInt): PtrInt;
var
Count: SizeInt;
@ -3385,47 +3393,71 @@ end;
Returns: < 0 if S1 < S2, 0 if S1 = S2, > 0 if S1 > S2.
Compare two UTF8 encoded strings, case insensitive.
This function guarantees proper collation on all supported platforms.
Internally it uses WideCompareText when codepoints have more than one byte.
Internally it uses WideCompareText.
------------------------------------------------------------------------------}
function UTF8CompareText(const S1, S2: String): PtrInt;
begin
Result := WideCompareText(UTF8ToUTF16(S1),UTF8ToUTF16(S2));
end;
function UTF8CompareLatinTextFast(const S1, S2: String): PtrInt;
// Like UTF8CompareText but does not return strict alphabetical order.
// The order is deterministic and good for binary search and such uses.
// Optimizes comparison of single-byte encoding and also multi-byte portions
// when they are equal. Otherwise falls back to WideCompareText.
var
i, Count, Count1, Count2: sizeint;
Count, Count1, Count2: sizeint;
Chr1, Chr2: Char;
P1, P2: PChar;
P1LastBytePointOffset: PChar;
begin
Count1 := Length(S1);
Count2 := Length(S2);
if Count1>Count2 then
if Count1 > Count2 then
Count := Count2
else
Count := Count1;
i := 0;
if Count>0 then
if Count > 0 then
begin
P1 := @S1[1];
P2 := @S2[1];
while i < Count do
P1LastBytePointOffset := P1;
while Count > 0 do
begin
if (P1^ > #191) or (P2^ > #191) then // Multi-byte encoding.
begin
//WriteLn('UTF8CompareText: Calling WideCompareText for "'+S1+'" <> "'+S2+'"');
Exit(WideCompareText(UTF8ToUTF16(S1),UTF8ToUTF16(S2)));
end;
Chr1 := P1^;
Chr2 := P2^;
if Chr1 <> Chr2 then
begin
if Chr1 in ['A'..'Z'] then
Inc(Chr1,32);
if Chr2 in ['A'..'Z'] then
Inc(Chr2,32);
if Chr1 <> Chr2 then
Break;
end;
Inc(P1); Inc(P2); Inc(i);
if (ord(Chr1) or ord(Chr2)) < 128 then
begin
P1LastBytePointOffset := P1;
if (Chr1 in ['A'..'Z']) then
inc(Chr1, $20);
if (Chr2 in ['A'..'Z']) then
inc(Chr2, $20);
if Chr1 <> Chr2 then
break;
end
else
begin
p2 := p2 + (P1LastBytePointOffset - P1);
p1 := P1LastBytePointOffset;
Exit(WideCompareText(
UTF8ToUTF16(p1, Length(s1) - (p1 - @S1[1])),
UTF8ToUTF16(p2, Length(s2) - (p2 - @S2[1]))
));
end;
end
else
if (ord(Chr1) or ord(Chr2)) < 128 then
P1LastBytePointOffset := P1;
Inc(P1); Inc(P2);
Dec(Count);
end;
end;
if i < Count then
if Count > 0 then
Result := Byte(Chr1)-Byte(Chr2)
else
Result := Count1-Count2;
@ -3996,6 +4028,14 @@ begin
end;
end;
{ TStringListUTF8Fast }
function TStringListUTF8Fast.DoCompareText(const s1, s2: string): PtrInt;
begin
Result:=UTF8CompareLatinTextFast(s1, s2);
end;
initialization
InitFPUpchars;
InitLazUtf8;

View File

@ -402,14 +402,14 @@ begin
begin
if (ArgErr2 = 0) then
begin
Result := Utf8CompareText(Extr1, Extr2) = 0;
Result := UTF8CompareLatinTextFast(Extr1, Extr2) = 0;
end
else
begin
//Extr2 can have dangling %'s
//e.g. Extr1 = "%s %d" Extr2 = "%s %d {%H}", it does not make sense, but it's not illegal
if (ArgErr2 = Utf8Length(Extr1)+1) and not (ArgErr2 > Utf8Length(Extr2)) then Extr2 := Utf8Copy(Extr2,1,ArgErr2-1);
Result := Utf8CompareText(Extr1, Extr2) = 0;
Result := UTF8CompareLatinTextFast(Extr1, Extr2) = 0;
end;
end
else
@ -418,7 +418,7 @@ begin
//Only compare until the last valid argument in Extr1
if (ArgErr1 = Utf8Length(Extr1)) then Utf8Delete(Extr1, ArgErr1, 1);
if Utf8Length(Extr2) > Utf8Length(Extr1) then Extr2 := Utf8Copy(Extr2, 1, Utf8Length(Extr1));
Result := Utf8CompareText(Extr1, Extr2) = 0;
Result := UTF8CompareLatinTextFast(Extr1, Extr2) = 0;
end;
//writeln('CompareFormatArgs: Result = ',Result);
end;

View File

@ -557,7 +557,7 @@ end;
function ListSortFunc(List: TStringList; Index1, Index2: Integer): Integer;
begin
Result := Utf8CompareText(List.Strings[Index1], List.Strings[Index2]);
Result := UTF8CompareLatinTextFast(List.Strings[Index1], List.Strings[Index2]);
end;
function TPoCheckerForm.LangFilterIndexToLangID(Index: Integer): TLangID;

View File

@ -98,7 +98,7 @@ var
begin
if BuildModes=nil then exit;
for i:=0 to BuildModes.Count-1 do
if UTF8CompareText(BuildModes[i].GetCaption,ModeComboBox.Text)=0
if UTF8CompareLatinTextFast(BuildModes[i].GetCaption,ModeComboBox.Text)=0
then begin
fBaseMode:=BuildModes[i];
FillDiffTreeView;

View File

@ -359,7 +359,7 @@ end;
function TBuildLazarusProfiles.IndexByName(AName: string): integer;
begin
Result:=Count-1;
while (Result>=0) and (UTF8CompareText(Items[Result].Name,AName)<>0) do
while (Result>=0) and (UTF8CompareLatinTextFast(Items[Result].Name,AName)<>0) do
dec(Result);
end;

View File

@ -517,7 +517,7 @@ begin
TokenStartX:=length(s)+1;
j:=length(TemplateName);
while (j>0)
and (UTF8CompareText(copy(TemplateName,1,j),copy(s,TokenStartX-j,j))<>0) do
and (UTF8CompareLatinTextFast(copy(TemplateName,1,j),copy(s,TokenStartX-j,j))<>0) do
dec(j);
dec(TokenStartX,j);
AEditor.BlockBegin := Point(TokenStartX, p.y);

View File

@ -3399,7 +3399,7 @@ end;
function TEditOptLangList.FindByName(const Name: String): Integer;
begin
Result := Count - 1;
while (Result >= 0) and (UTF8CompareText(
while (Result >= 0) and (UTF8CompareLatinTextFast(
Items[Result].SynClass.GetLanguageName, Name) <> 0) do
dec(Result);
end;

View File

@ -436,8 +436,8 @@ var
begin
i:=TypeRadiogroup.ItemIndex;
for Result:=Low(TEncloseSelectionType) to High(TEncloseSelectionType) do
if UTF8CompareText(TypeRadiogroup.Items[i],
EncloseSelectionTypeDescription(Result))=0
if UTF8CompareLatinTextFast(TypeRadiogroup.Items[i],
EncloseSelectionTypeDescription(Result))=0
then
exit;
RaiseGDBException('TEncloseSelectionDialog.GetEncloseType');

View File

@ -157,6 +157,7 @@ end;
procedure TEditorMouseOptionsAdvFrame.ActionGridCompareCells(Sender: TObject; ACol, ARow, BCol,
BRow: Integer; var Result: integer);
function CompareCol(i : Integer) : Integer;
var
j: Integer;
@ -175,22 +176,26 @@ procedure TEditorMouseOptionsAdvFrame.ActionGridCompareCells(Sender: TObject; AC
Result := ord(TSynEditMouseAction(TStringGrid(Sender).Objects[0, BRow]).ClickDir)
- ord(TSynEditMouseAction(TStringGrid(Sender).Objects[0, ARow]).ClickDir);
else
Result := UTF8CompareText(TStringGrid(Sender).Cells[i, ARow], TStringGrid(Sender).Cells[i, BRow]);
Result := UTF8CompareLatinTextFast(TStringGrid(Sender).Cells[i, ARow],
TStringGrid(Sender).Cells[i, BRow]);
end;
end;
var
i: Integer;
begin
Result := 0;
if Sender = nil then exit;
if Sender = OtherActionGrid then begin
for i := 1 to 4 do if result = 0 then
Result := CompareCol(FOtherSort[i]);
for i := 1 to 4 do
if result = 0 then
Result := CompareCol(FOtherSort[i]);
if Result = 0 then
Result := CompareCol(9); // Priority
end else begin
for i := 1 to 4 do if result = 0 then
Result := CompareCol(FSort[i]);
for i := 1 to 4 do
if result = 0 then
Result := CompareCol(FSort[i]);
if Result = 0 then
Result := CompareCol(7); // Priority
end;

View File

@ -801,7 +801,7 @@ end;
function THistoryLists.IndexOfName(const Name: string): integer;
begin
Result:=Count-1;
while (Result>=0) and (UTF8CompareText(Items[Result].Name,Name)<>0) do
while (Result>=0) and (UTF8CompareLatinTextFast(Items[Result].Name,Name)<>0) do
dec(Result);
end;

View File

@ -517,7 +517,7 @@ end;
function TNewLazIDEItemCategories.IndexOf(const CategoryName: string): integer;
begin
Result := Count - 1;
while (Result >= 0) and (UTF8CompareText(CategoryName, Items[Result].Name) <> 0) do
while (Result >= 0) and (UTF8CompareLatinTextFast(CategoryName, Items[Result].Name) <> 0) do
Dec(Result);
end;

View File

@ -951,7 +951,7 @@ end;
function TLazProjectFileDescriptors.IndexOf(const Name: string): integer;
begin
Result:=Count-1;
while (Result>=0) and (UTF8CompareText(Name,Items[Result].Name)<>0) do
while (Result>=0) and (UTF8CompareLatinTextFast(Name,Items[Result].Name)<>0) do
dec(Result);
end;
@ -1075,7 +1075,7 @@ end;
function TLazProjectDescriptors.IndexOf(const Name: string): integer;
begin
Result:=Count-1;
while (Result>=0) and (UTF8CompareText(Name,Items[Result].Name)<>0) do
while (Result>=0) and (UTF8CompareLatinTextFast(Name,Items[Result].Name)<>0) do
dec(Result);
end;

View File

@ -2706,7 +2706,7 @@ begin
while (x<=length(s)) and (s[x]<>#3) do inc(x);
if x<length(s) then begin
inc(x,2);
if UTF8CompareText(CurStr,copy(s,x,length(CurStr)))=0 then begin
if UTF8CompareLatinTextFast(CurStr,copy(s,x,length(CurStr)))=0 then begin
APosition:=i;
break;
end;
@ -5018,7 +5018,7 @@ begin
else
SrcToken:=copy(Line,length(Line)-length(AToken)+1,length(AToken));
//DebugLn(['TSourceEditor.AutoCompleteChar ',AToken,' SrcToken=',SrcToken,' CatName=',CatName,' Index=',Manager.CodeTemplateModul.CompletionAttributes[i].IndexOfName(CatName)]);
if (UTF8CompareText(AToken,SrcToken)=0)
if (UTF8CompareLatinTextFast(AToken,SrcToken)=0)
and (Manager.CodeTemplateModul.CompletionAttributes[i].IndexOfName(CatName)>=0)
and ( (not FEditor.SelAvail) or
(Manager.CodeTemplateModul.CompletionAttributes[i].IndexOfName(

View File

@ -224,7 +224,7 @@ begin
m:=0;
while l<=r do begin
m:=(l+r) shr 1;
cmp:=UTF8CompareText(NewMacro.Name,Items[m].Name);
cmp:=UTF8CompareLatinTextFast(NewMacro.Name,Items[m].Name);
if cmp<0 then
r:=m-1
else if cmp>0 then
@ -232,7 +232,7 @@ begin
else
break;
end;
if (m<fItems.Count) and (UTF8CompareText(NewMacro.Name,Items[m].Name)>0) then
if (m<fItems.Count) and (UTF8CompareLatinTextFast(NewMacro.Name,Items[m].Name)>0) then
inc(m);
fItems.Insert(m,NewMacro);
//if NewMacro.MacroFunction<>nil then
@ -447,7 +447,7 @@ begin
while l<=r do begin
m:=(l+r) shr 1;
Result:=Items[m];
cmp:=UTF8CompareText(MacroName,Result.Name);
cmp:=UTF8CompareLatinTextFast(MacroName,Result.Name);
if cmp<0 then
r:=m-1
else if cmp>0 then

View File

@ -11478,13 +11478,12 @@ begin
end;
end;
function TCustomStringGrid.DoCompareCells(Acol, ARow, Bcol, BRow: Integer
): Integer;
function TCustomStringGrid.DoCompareCells(Acol, ARow, Bcol, BRow: Integer): Integer;
begin
if Assigned(OnCompareCells) then
Result:=inherited DoCompareCells(Acol, ARow, Bcol, BRow)
else begin
Result:=UTF8CompareText(Cells[ACol,ARow], Cells[BCol,BRow]);
Result:=UTF8CompareLatinTextFast(Cells[ACol,ARow], Cells[BCol,BRow]);
if SortOrder=soDescending then
result:=-result;
end;

View File

@ -923,14 +923,14 @@ var i: integer;
begin
// insert as first
if (Items.Count=0)
or (not CaseSensitive and (UTF8CompareText(Items[0],Item)<>0))
or (not CaseSensitive and (UTF8CompareLatinTextFast(Items[0],Item)<>0))
or (CaseSensitive and (Items[0]<>Item)) then
begin
Items.InsertObject(0,Item,AnObject);
end;
// delete old
for i:=Items.Count-1 downto 1 do begin
if (not CaseSensitive and (UTF8CompareText(Items[i],Item)=0))
if (not CaseSensitive and (UTF8CompareLatinTextFast(Items[i],Item)=0))
or (CaseSensitive and (Items[i]=Item)) then
Items.Delete(i);
end;

View File

@ -628,7 +628,7 @@ begin
begin
XMLFontConfigChilds := XMLFontConfig.ChildNodes;
for i := 0 to XMLFontConfigChilds.Count-1 do
if UTF8CompareText(XMLFontConfigChilds.Item[i].NodeName, 'dir') = 0 then
if UTF8CompareLatinTextFast(XMLFontConfigChilds.Item[i].NodeName, 'dir') = 0 then
FontsScanDir(XMLFontConfigChilds.Item[i].FirstChild.NodeValue,AFontPaths,AFontList);
end;
finally

View File

@ -469,21 +469,11 @@ end;
{ TQtComboStrings }
procedure TQtComboStrings.SetSorted(AValue: Boolean);
var
i: Integer;
begin
if FSorted=AValue then Exit;
FSorted:=AValue;
if not FSorted then Exit;
for i := 0 to Count - 2 do
begin
if UTF8CompareText(Strings[i], Strings[i + 1]) < 0 then
begin
Sort;
Break;
end;
end;
if FSorted then
Sort;
end;
procedure TQtComboStrings.Put(Index: Integer; const S: string);

View File

@ -472,21 +472,11 @@ end;
{ TQtComboStrings }
procedure TQtComboStrings.SetSorted(AValue: Boolean);
var
i: Integer;
begin
if FSorted=AValue then Exit;
FSorted:=AValue;
if not FSorted then Exit;
for i := 0 to Count - 2 do
begin
if UTF8CompareText(Strings[i], Strings[i + 1]) < 0 then
begin
Sort;
Break;
end;
end;
if FSorted then
Sort;
end;
procedure TQtComboStrings.Put(Index: Integer; const S: string);

View File

@ -1075,7 +1075,7 @@ begin
for i:=0 to FSearchItems.Count-1 do begin
Node:=THelpDBItem(FSearchItems[i]).Node;
if (Node=nil) or (not Node.IDValid) then continue;
if UTF8CompareText(Node.ID,HelpKeyword)<>0 then continue;
if UTF8CompareLatinTextFast(Node.ID,HelpKeyword)<>0 then continue;
CreateNodeQueryListAndAdd(Node,nil,ListOfNodes,true);
end;
end;
@ -1097,7 +1097,7 @@ begin
for i:=0 to FSearchItems.Count-1 do begin
Node:=THelpDBItem(FSearchItems[i]).Node;
if (Node=nil) or (not Node.IDValid) then continue;
if UTF8CompareText(Node.ID,HelpDirective)<>0 then continue;
if UTF8CompareLatinTextFast(Node.ID,HelpDirective)<>0 then continue;
CreateNodeQueryListAndAdd(Node,nil,ListOfNodes,true);
end;
end;
@ -1395,7 +1395,7 @@ end;
function THelpDatabases.IndexOf(ID: THelpDatabaseID): integer;
begin
Result:=Count-1;
while (Result>=0) and (UTF8CompareText(ID,Items[Result].ID)<>0) do
while (Result>=0) and (UTF8CompareLatinTextFast(ID,Items[Result].ID)<>0) do
dec(Result);
end;

View File

@ -1594,7 +1594,7 @@ begin
begin
if (Index <> i) and (FStrings.Names[i] <> '') then
begin
if (Utf8CompareText(FStrings.Names[i], NewValue) = 0) then
if (UTF8CompareLatinTextFast(FStrings.Names[i], NewValue) = 0) then
begin
Result := False;
ShowMessage(Format(rsVLEDuplicateKey,[NewValue, i + FixedRows]));

View File

@ -2185,7 +2185,7 @@ var
continue;
ShortFilename:=ExtractFilename(NewFilename);
CurName:=ExtractFileName(ConflictFile.Filename);
if (UTF8CompareText(CurName,ShortFilename)<>0)
if (UTF8CompareLatinTextFast(CurName,ShortFilename)<>0)
and (CompareFilenames(CurName,ShortFilename)<>0) then
continue;
// name clash on this or other platforms => warn
@ -6578,7 +6578,7 @@ end;
function TLazPackageDescriptors.IndexOf(const Name: string): integer;
begin
Result:=Count-1;
while (Result>=0) and (UTF8CompareText(Name,Items[Result].Name)<>0) do
while (Result>=0) and (UTF8CompareLatinTextFast(Name,Items[Result].Name)<>0) do
dec(Result);
end;