mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-09-13 21:19:18 +02:00
lazutils: started TranslateUTF8Chars
git-svn-id: trunk@39288 -
This commit is contained in:
parent
8f2b9afb13
commit
19c0f84bd2
@ -22,7 +22,7 @@ unit laz2_xmlutils;
|
|||||||
interface
|
interface
|
||||||
|
|
||||||
uses
|
uses
|
||||||
SysUtils, Classes;
|
SysUtils, Classes, LazUTF8;
|
||||||
|
|
||||||
type
|
type
|
||||||
TXMLUtilString = AnsiString;
|
TXMLUtilString = AnsiString;
|
||||||
@ -41,7 +41,9 @@ procedure NormalizeSpaces(var Value: TXMLUtilString);
|
|||||||
function IsXmlWhiteSpace(c: PXMLUtilChar): Boolean;
|
function IsXmlWhiteSpace(c: PXMLUtilChar): Boolean;
|
||||||
function Hash(InitValue: LongWord; Key: PXMLUtilChar; KeyLen: Integer): LongWord;
|
function Hash(InitValue: LongWord; Key: PXMLUtilChar; KeyLen: Integer): LongWord;
|
||||||
{ beware, works in ASCII range only }
|
{ beware, works in ASCII range only }
|
||||||
function WStrLIComp(S1, S2: PXMLUtilChar; Len: Integer): Integer;
|
function XUStrLIComp(S1, S2: PXMLUtilChar; Len: Integer): Integer;
|
||||||
|
|
||||||
|
procedure TranslateUTF8Chars(var s: TXMLUtilString; SrcChars, DstChars: string);
|
||||||
|
|
||||||
{ a simple hash table with TXMLUtilString keys }
|
{ a simple hash table with TXMLUtilString keys }
|
||||||
|
|
||||||
@ -359,7 +361,7 @@ begin
|
|||||||
Result := c^ in [#32,#9,#10,#13];
|
Result := c^ in [#32,#9,#10,#13];
|
||||||
end;
|
end;
|
||||||
|
|
||||||
function WStrLIComp(S1, S2: PXMLUtilChar; Len: Integer): Integer;
|
function XUStrLIComp(S1, S2: PXMLUtilChar; Len: Integer): Integer;
|
||||||
var
|
var
|
||||||
counter: Integer;
|
counter: Integer;
|
||||||
c1, c2: Word;
|
c1, c2: Word;
|
||||||
@ -386,6 +388,91 @@ begin
|
|||||||
result := c1 - c2;
|
result := c1 - c2;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
procedure TranslateUTF8Chars(var s: TXMLUtilString; SrcChars, DstChars: string);
|
||||||
|
{ replaces characters in s.
|
||||||
|
The mapping is defined by SrcChars and DstChars.
|
||||||
|
The n-th UTF-8 character of SrcChars will be replaced with the n-th
|
||||||
|
character of DstChars. If there is no n-th character in DstChars then the
|
||||||
|
character will be deleted in s.
|
||||||
|
}
|
||||||
|
var
|
||||||
|
p: PChar;
|
||||||
|
unique: boolean;
|
||||||
|
|
||||||
|
function IsASCII(const h: string): boolean;
|
||||||
|
var
|
||||||
|
i: Integer;
|
||||||
|
begin
|
||||||
|
for i:=1 to length(h) do
|
||||||
|
if ord(h[i])>128 then exit(false);
|
||||||
|
Result:=true;
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure UniqString; inline;
|
||||||
|
var
|
||||||
|
OldPos: SizeInt;
|
||||||
|
begin
|
||||||
|
if unique then exit;
|
||||||
|
OldPos:=p-PChar(s);
|
||||||
|
UniqueString(s);
|
||||||
|
p:=PChar(s)+OldPos;
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure ReplaceASCII;
|
||||||
|
var
|
||||||
|
OldPos: SizeInt;
|
||||||
|
i: SizeInt;
|
||||||
|
begin
|
||||||
|
p:=PChar(s);
|
||||||
|
while p^<>#0 do begin
|
||||||
|
i:=Pos(p^,SrcChars);
|
||||||
|
if i<1 then begin
|
||||||
|
// keep character
|
||||||
|
inc(p);
|
||||||
|
end else begin
|
||||||
|
if i<=length(DstChars) then begin
|
||||||
|
// replace a character
|
||||||
|
UniqString;
|
||||||
|
p^:=DstChars[i];
|
||||||
|
inc(p);
|
||||||
|
end else begin
|
||||||
|
// delete a character
|
||||||
|
OldPos:=p-PChar(s);
|
||||||
|
Delete(s,OldPos+1,1);
|
||||||
|
p:=PChar(s)+OldPos;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure ReplaceMultiByte;
|
||||||
|
var
|
||||||
|
sp: PChar;
|
||||||
|
clen: Integer;
|
||||||
|
sclen: Integer;
|
||||||
|
begin
|
||||||
|
p:=PChar(s);
|
||||||
|
while p^<>#0 do begin
|
||||||
|
clen:=UTF8CharacterLength(p);
|
||||||
|
if Pos(p^,SrcChars)>0 then begin
|
||||||
|
sp:=PChar(SrcChars);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
begin
|
||||||
|
if (SrcChars='') or (s='') or (SrcChars=DstChars) then exit;
|
||||||
|
|
||||||
|
unique:=false;
|
||||||
|
if IsASCII(SrcChars) and IsASCII(DstChars) then begin
|
||||||
|
// search and replace single byte characters
|
||||||
|
ReplaceASCII;
|
||||||
|
exit;
|
||||||
|
end;
|
||||||
|
// search for multi byte UTF-8 characters
|
||||||
|
ReplaceMultiByte;
|
||||||
|
end;
|
||||||
|
|
||||||
function Hash(InitValue: LongWord; Key: PXMLUtilChar; KeyLen: Integer): LongWord;
|
function Hash(InitValue: LongWord; Key: PXMLUtilChar; KeyLen: Integer): LongWord;
|
||||||
begin
|
begin
|
||||||
Result := InitValue;
|
Result := InitValue;
|
||||||
|
Loading…
Reference in New Issue
Block a user