lazutils: started TranslateUTF8Chars

git-svn-id: trunk@39288 -
This commit is contained in:
mattias 2012-11-16 14:30:58 +00:00
parent 8f2b9afb13
commit 19c0f84bd2

View File

@ -22,7 +22,7 @@ unit laz2_xmlutils;
interface interface
uses uses
SysUtils, Classes; SysUtils, Classes, LazUTF8;
type type
TXMLUtilString = AnsiString; TXMLUtilString = AnsiString;
@ -41,7 +41,9 @@ procedure NormalizeSpaces(var Value: TXMLUtilString);
function IsXmlWhiteSpace(c: PXMLUtilChar): Boolean; function IsXmlWhiteSpace(c: PXMLUtilChar): Boolean;
function Hash(InitValue: LongWord; Key: PXMLUtilChar; KeyLen: Integer): LongWord; function Hash(InitValue: LongWord; Key: PXMLUtilChar; KeyLen: Integer): LongWord;
{ beware, works in ASCII range only } { beware, works in ASCII range only }
function WStrLIComp(S1, S2: PXMLUtilChar; Len: Integer): Integer; function XUStrLIComp(S1, S2: PXMLUtilChar; Len: Integer): Integer;
procedure TranslateUTF8Chars(var s: TXMLUtilString; SrcChars, DstChars: string);
{ a simple hash table with TXMLUtilString keys } { a simple hash table with TXMLUtilString keys }
@ -359,7 +361,7 @@ begin
Result := c^ in [#32,#9,#10,#13]; Result := c^ in [#32,#9,#10,#13];
end; end;
function WStrLIComp(S1, S2: PXMLUtilChar; Len: Integer): Integer; function XUStrLIComp(S1, S2: PXMLUtilChar; Len: Integer): Integer;
var var
counter: Integer; counter: Integer;
c1, c2: Word; c1, c2: Word;
@ -386,6 +388,91 @@ begin
result := c1 - c2; result := c1 - c2;
end; end;
procedure TranslateUTF8Chars(var s: TXMLUtilString; SrcChars, DstChars: string);
{ replaces characters in s.
The mapping is defined by SrcChars and DstChars.
The n-th UTF-8 character of SrcChars will be replaced with the n-th
character of DstChars. If there is no n-th character in DstChars then the
character will be deleted in s.
}
var
p: PChar;
unique: boolean;
function IsASCII(const h: string): boolean;
var
i: Integer;
begin
for i:=1 to length(h) do
if ord(h[i])>128 then exit(false);
Result:=true;
end;
procedure UniqString; inline;
var
OldPos: SizeInt;
begin
if unique then exit;
OldPos:=p-PChar(s);
UniqueString(s);
p:=PChar(s)+OldPos;
end;
procedure ReplaceASCII;
var
OldPos: SizeInt;
i: SizeInt;
begin
p:=PChar(s);
while p^<>#0 do begin
i:=Pos(p^,SrcChars);
if i<1 then begin
// keep character
inc(p);
end else begin
if i<=length(DstChars) then begin
// replace a character
UniqString;
p^:=DstChars[i];
inc(p);
end else begin
// delete a character
OldPos:=p-PChar(s);
Delete(s,OldPos+1,1);
p:=PChar(s)+OldPos;
end;
end;
end;
end;
procedure ReplaceMultiByte;
var
sp: PChar;
clen: Integer;
sclen: Integer;
begin
p:=PChar(s);
while p^<>#0 do begin
clen:=UTF8CharacterLength(p);
if Pos(p^,SrcChars)>0 then begin
sp:=PChar(SrcChars);
end;
end;
end;
begin
if (SrcChars='') or (s='') or (SrcChars=DstChars) then exit;
unique:=false;
if IsASCII(SrcChars) and IsASCII(DstChars) then begin
// search and replace single byte characters
ReplaceASCII;
exit;
end;
// search for multi byte UTF-8 characters
ReplaceMultiByte;
end;
function Hash(InitValue: LongWord; Key: PXMLUtilChar; KeyLen: Integer): LongWord; function Hash(InitValue: LongWord; Key: PXMLUtilChar; KeyLen: Integer): LongWord;
begin begin
Result := InitValue; Result := InitValue;