mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-05-03 00:43:39 +02:00

- An attempt to unify the defines for the different scenario's in the use of (not) codepage aware ansistrings and the use of the "Utf8 in RTL" feature. It makes for better separation of code and thus better readability and ease of maintainance (and in a later stadium it makes it easier to remove code that deals with non codepage aware ansistrings (fpc < 3.0)). - Also replace (FPC_FULLVERSION >= xxxx) with FPC_HAS_CPSTRING where appropriate. - Replace the custom HasCP define with built in FPC_HAS_CPSTRING define. git-svn-id: trunk@50498 -
244 lines
5.8 KiB
PHP
244 lines
5.8 KiB
PHP
{%MainUnit ../lconvencoding.pp}
|
|
|
|
{******************************************************************************
|
|
Asian Unicode Functions
|
|
******************************************************************************
|
|
|
|
*****************************************************************************
|
|
This file is part of the Lazarus Component Library (LCL)
|
|
|
|
See the file COPYING.modifiedLGPL.txt, included in this distribution,
|
|
for details about the license.
|
|
*****************************************************************************
|
|
|
|
The clipboard is able to work with the windows and gtk behaviour/features.
|
|
}
|
|
|
|
function DBCSToUTF8(const s: string; CodeP: integer): string;
|
|
var
|
|
len: SizeInt;
|
|
Src: PChar;
|
|
Dest: PChar;
|
|
c: char;
|
|
l: Integer;
|
|
code: word;
|
|
begin
|
|
if s = '' then
|
|
begin
|
|
Result := s;
|
|
exit;
|
|
end;
|
|
len := length(s);
|
|
SetLength(Result, len * 4);// Asia UTF-8 is at most 4 bytes
|
|
Src := PChar(s);
|
|
Dest := PChar(Result);
|
|
repeat
|
|
c := Src^;
|
|
Inc(Src);
|
|
if Ord(c) < 128 then
|
|
begin
|
|
if (c=#0) and (Src-PChar(s)>=len) then break;
|
|
Dest^ := c;
|
|
Inc(Dest);
|
|
end
|
|
else
|
|
begin
|
|
code := Byte(c) shl 8;
|
|
c:=Src^;
|
|
if (c=#0) and (Src-PChar(s)>=len) then break;
|
|
code := code + Byte(c);
|
|
Inc(Src);
|
|
|
|
case CodeP of
|
|
936:
|
|
code := Uni936C[SearchTable(CP936CC, code)];
|
|
950:
|
|
code := Uni950C[SearchTable(CP950CC, code)];
|
|
949:
|
|
code := Uni949C[SearchTable(CP949CC, code)];
|
|
932:
|
|
code := Uni932C[SearchTable(CP932CC, code)];
|
|
else
|
|
code := 0;
|
|
end;
|
|
|
|
if code>0 then
|
|
begin
|
|
l:=UnicodeToUTF8Inline(code,Dest);
|
|
inc(Dest,l);
|
|
end;
|
|
end;
|
|
until false;
|
|
SetLength(Result, {%H-}PtrUInt(Dest) - PtrUInt(Result));
|
|
end;
|
|
|
|
function CP936ToUTF8(const s: string): string;
|
|
begin
|
|
Result := DBCSToUTF8(s, 936);
|
|
end;
|
|
|
|
function CP950ToUTF8(const s: string): string;
|
|
begin
|
|
Result := DBCSToUTF8(s, 950);
|
|
end;
|
|
|
|
function CP949ToUTF8(const s: string): string;
|
|
begin
|
|
Result := DBCSToUTF8(s, 949);
|
|
end;
|
|
|
|
function CP932ToUTF8(const s: string): string;
|
|
begin
|
|
Result := DBCSToUTF8(s, 932);
|
|
end;
|
|
|
|
{$IfNDef UseSystemCPConv}
|
|
function UnicodeToCP936(Unicode: cardinal): integer;
|
|
begin
|
|
case Unicode of
|
|
0..127: Result := Unicode;
|
|
else
|
|
Result := CP936CU[SearchTable(Uni936U, Unicode)];
|
|
end;
|
|
end;
|
|
|
|
function UnicodeToCP950(Unicode: cardinal): integer;
|
|
begin
|
|
case Unicode of
|
|
0..127: Result := Unicode;
|
|
else
|
|
Result := CP950CU[SearchTable(Uni950U, Unicode)];
|
|
end;
|
|
end;
|
|
|
|
function UnicodeToCP949(Unicode: cardinal): integer;
|
|
begin
|
|
case Unicode of
|
|
0..127: Result := Unicode;
|
|
else
|
|
Result := CP949CU[SearchTable(Uni949U, Unicode)];
|
|
end;
|
|
end;
|
|
|
|
function UnicodeToCP932(Unicode: cardinal): integer;
|
|
begin
|
|
case Unicode of
|
|
0..127: Result := Unicode;
|
|
else
|
|
Result := CP932CU[SearchTable(Uni932U, Unicode)];
|
|
end;
|
|
end;
|
|
{$endif}
|
|
|
|
{$ifdef FPC_HAS_CPSTRING}
|
|
procedure InternalUTF8ToDBCS(const s: string; TargetCodePage: TSystemCodePage;
|
|
SetTargetCodePage: boolean;
|
|
{$IfNDef UseSystemCPConv}const UTF8CharConvFunc: TUnicodeToCharID;{$endif}
|
|
out TheResult: RawByteString); inline;
|
|
begin
|
|
{$ifdef UseSystemCPConv}
|
|
TheResult:=s;
|
|
SetCodePage(TheResult, TargetCodePage, True);
|
|
if not SetTargetCodePage then
|
|
SetCodePage(TheResult, CP_ACP, False);
|
|
{$else}
|
|
TheResult:=UTF8ToDBCS(s,UTF8CharConvFunc);
|
|
if SetTargetCodePage then
|
|
SetCodePage(TheResult, TargetCodePage, False);
|
|
{$endif}
|
|
end;
|
|
|
|
function UTF8ToCP932(const s: string; SetTargetCodePage: boolean): RawByteString;
|
|
begin
|
|
InternalUTF8ToDBCS(s,932,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP932{$endif},Result);
|
|
end;
|
|
|
|
function UTF8ToCP936(const s: string; SetTargetCodePage: boolean): RawByteString;
|
|
begin
|
|
InternalUTF8ToDBCS(s,936,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP936{$endif},Result);
|
|
end;
|
|
|
|
function UTF8ToCP949(const s: string; SetTargetCodePage: boolean): RawByteString;
|
|
begin
|
|
InternalUTF8ToDBCS(s,949,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP949{$endif},Result);
|
|
end;
|
|
|
|
function UTF8ToCP950(const s: string; SetTargetCodePage: boolean): RawByteString;
|
|
begin
|
|
InternalUTF8ToDBCS(s,950,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP950{$endif},Result);
|
|
end;
|
|
{$ELSE}
|
|
function UTF8ToCP932(const s: string): string;
|
|
begin
|
|
Result := UTF8ToDBCS(s, @UnicodeToCP932);
|
|
end;
|
|
|
|
function UTF8ToCP936(const s: string): string;
|
|
begin
|
|
Result := UTF8ToDBCS(s, @UnicodeToCP936);
|
|
end;
|
|
|
|
function UTF8ToCP949(const s: string): string;
|
|
begin
|
|
Result := UTF8ToDBCS(s, @UnicodeToCP949);
|
|
end;
|
|
|
|
function UTF8ToCP950(const s: string): string;
|
|
begin
|
|
Result := UTF8ToDBCS(s, @UnicodeToCP950);
|
|
end;
|
|
{$ENDIF}
|
|
|
|
function UTF8ToDBCS(const s: string; const UTF8CharConvFunc: TUnicodeToCharID): string;
|
|
var
|
|
len: integer;
|
|
Src: PChar;
|
|
Dest: PChar;
|
|
c: char;
|
|
Unicode: longword;
|
|
CharLen: integer;
|
|
i: integer;
|
|
begin
|
|
if s = '' then
|
|
begin
|
|
Result := '';
|
|
exit;
|
|
end;
|
|
len := length(s);
|
|
SetLength(Result, len); // DBCS needs at most space as UTF-8
|
|
Src := PChar(s);
|
|
Dest := PChar(Result);
|
|
repeat
|
|
c := Src^;
|
|
if c < #128 then
|
|
begin
|
|
if (c=#0) and (Src-PChar(s)>=len) then break;
|
|
Dest^ := c;
|
|
Inc(Dest);
|
|
Inc(Src);
|
|
end
|
|
else
|
|
begin
|
|
Unicode := UTF8CharacterToUnicode(Src, CharLen);
|
|
Inc(Src, CharLen);
|
|
i := UTF8CharConvFunc(Unicode);
|
|
//writeln(Format('%X', [i]));
|
|
if i >= 0 then
|
|
begin
|
|
if i > $ff then
|
|
begin
|
|
Dest^ := chr(i shr 8);
|
|
Inc(Dest);
|
|
Dest^ := chr(i);
|
|
end
|
|
else
|
|
Dest^ := chr(i);
|
|
Inc(Dest);
|
|
end;
|
|
end;
|
|
until false;
|
|
//SetLength(Result, Dest - PChar(Result));
|
|
SetLength(Result, {%H-}PtrUInt(Dest) - PtrUInt(Result));
|
|
end;
|
|
|