lazutils: started TranslateUTF8Chars

git-svn-id: trunk@39288 -
2025-09-13 21:19:18 +02:00 · 2012-11-16 14:30:58 +00:00 · 2012-11-16 14:30:58 +00:00 · 19c0f84bd2
commit 19c0f84bd2
parent 8f2b9afb13
1 changed files with 90 additions and 3 deletions
--- a/components/lazutils/laz2_xmlutils.pas
+++ b/components/lazutils/laz2_xmlutils.pas
@ -22,7 +22,7 @@ unit laz2_xmlutils;
 interface
 uses
-  SysUtils, Classes;
+  SysUtils, Classes, LazUTF8;
 type
  TXMLUtilString = AnsiString;
@ -41,7 +41,9 @@ procedure NormalizeSpaces(var Value: TXMLUtilString);
 function IsXmlWhiteSpace(c: PXMLUtilChar): Boolean;
 function Hash(InitValue: LongWord; Key: PXMLUtilChar; KeyLen: Integer): LongWord;
 { beware, works in ASCII range only }
-function WStrLIComp(S1, S2: PXMLUtilChar; Len: Integer): Integer;
+function XUStrLIComp(S1, S2: PXMLUtilChar; Len: Integer): Integer;
 procedure TranslateUTF8Chars(var s: TXMLUtilString; SrcChars, DstChars: string);
 { a simple hash table with TXMLUtilString keys }
@ -359,7 +361,7 @@ begin
  Result := c^ in [#32,#9,#10,#13];
 end;
-function WStrLIComp(S1, S2: PXMLUtilChar; Len: Integer): Integer;
+function XUStrLIComp(S1, S2: PXMLUtilChar; Len: Integer): Integer;
 var
  counter: Integer;
  c1, c2: Word;
@ -386,6 +388,91 @@ begin
  result := c1 - c2;
 end;
 procedure TranslateUTF8Chars(var s: TXMLUtilString; SrcChars, DstChars: string);
 { replaces characters in s.
  The mapping is defined by SrcChars and DstChars.
  The n-th UTF-8 character of SrcChars will be replaced with the n-th
  character of DstChars. If there is no n-th character in DstChars then the
  character will be deleted in s.
 }
 var
  p: PChar;
  unique: boolean;
  function IsASCII(const h: string): boolean;
  var
    i: Integer;
  begin
    for i:=1 to length(h) do
      if ord(h[i])>128 then exit(false);
    Result:=true;
  end;
  procedure UniqString; inline;
  var
    OldPos: SizeInt;
  begin
    if unique then exit;
    OldPos:=p-PChar(s);
    UniqueString(s);
    p:=PChar(s)+OldPos;
  end;
  procedure ReplaceASCII;
  var
    OldPos: SizeInt;
    i: SizeInt;
  begin
    p:=PChar(s);
    while p^<>#0 do begin
      i:=Pos(p^,SrcChars);
      if i<1 then begin
        // keep character
        inc(p);
      end else begin
        if i<=length(DstChars) then begin
          // replace a character
          UniqString;
          p^:=DstChars[i];
          inc(p);
        end else begin
          // delete a character
          OldPos:=p-PChar(s);
          Delete(s,OldPos+1,1);
          p:=PChar(s)+OldPos;
        end;
      end;
    end;
  end;
  procedure ReplaceMultiByte;
  var
    sp: PChar;
    clen: Integer;
    sclen: Integer;
  begin
    p:=PChar(s);
    while p^<>#0 do begin
      clen:=UTF8CharacterLength(p);
      if Pos(p^,SrcChars)>0 then begin
        sp:=PChar(SrcChars);
      end;
    end;
  end;
 begin
  if (SrcChars='') or (s='') or (SrcChars=DstChars) then exit;
  unique:=false;
  if IsASCII(SrcChars) and IsASCII(DstChars) then begin
    // search and replace single byte characters
    ReplaceASCII;
    exit;
  end;
  // search for multi byte UTF-8 characters
  ReplaceMultiByte;
 end;
 function Hash(InitValue: LongWord; Key: PXMLUtilChar; KeyLen: Integer): LongWord;
 begin
  Result := InitValue;