lazarus/components/lazutils/asiancodepagefunctions.inc
bart 0c8df133dd LazUtils:
- An attempt to unify the defines for the different scenario's in the use of (not) codepage aware ansistrings
  and the use of the "Utf8 in RTL" feature.
  It makes for better separation of code and thus better readability and ease of maintainance (and in a later
  stadium it makes it easier to remove code that deals with non codepage aware ansistrings (fpc < 3.0)).
- Also replace (FPC_FULLVERSION >= xxxx) with FPC_HAS_CPSTRING where appropriate.
- Replace the custom HasCP define with built in FPC_HAS_CPSTRING define.

git-svn-id: trunk@50498 -
2015-11-24 16:23:18 +00:00

244 lines
5.8 KiB
PHP

{%MainUnit ../lconvencoding.pp}
{******************************************************************************
Asian Unicode Functions
******************************************************************************
*****************************************************************************
This file is part of the Lazarus Component Library (LCL)
See the file COPYING.modifiedLGPL.txt, included in this distribution,
for details about the license.
*****************************************************************************
The clipboard is able to work with the windows and gtk behaviour/features.
}
function DBCSToUTF8(const s: string; CodeP: integer): string;
var
len: SizeInt;
Src: PChar;
Dest: PChar;
c: char;
l: Integer;
code: word;
begin
if s = '' then
begin
Result := s;
exit;
end;
len := length(s);
SetLength(Result, len * 4);// Asia UTF-8 is at most 4 bytes
Src := PChar(s);
Dest := PChar(Result);
repeat
c := Src^;
Inc(Src);
if Ord(c) < 128 then
begin
if (c=#0) and (Src-PChar(s)>=len) then break;
Dest^ := c;
Inc(Dest);
end
else
begin
code := Byte(c) shl 8;
c:=Src^;
if (c=#0) and (Src-PChar(s)>=len) then break;
code := code + Byte(c);
Inc(Src);
case CodeP of
936:
code := Uni936C[SearchTable(CP936CC, code)];
950:
code := Uni950C[SearchTable(CP950CC, code)];
949:
code := Uni949C[SearchTable(CP949CC, code)];
932:
code := Uni932C[SearchTable(CP932CC, code)];
else
code := 0;
end;
if code>0 then
begin
l:=UnicodeToUTF8Inline(code,Dest);
inc(Dest,l);
end;
end;
until false;
SetLength(Result, {%H-}PtrUInt(Dest) - PtrUInt(Result));
end;
function CP936ToUTF8(const s: string): string;
begin
Result := DBCSToUTF8(s, 936);
end;
function CP950ToUTF8(const s: string): string;
begin
Result := DBCSToUTF8(s, 950);
end;
function CP949ToUTF8(const s: string): string;
begin
Result := DBCSToUTF8(s, 949);
end;
function CP932ToUTF8(const s: string): string;
begin
Result := DBCSToUTF8(s, 932);
end;
{$IfNDef UseSystemCPConv}
function UnicodeToCP936(Unicode: cardinal): integer;
begin
case Unicode of
0..127: Result := Unicode;
else
Result := CP936CU[SearchTable(Uni936U, Unicode)];
end;
end;
function UnicodeToCP950(Unicode: cardinal): integer;
begin
case Unicode of
0..127: Result := Unicode;
else
Result := CP950CU[SearchTable(Uni950U, Unicode)];
end;
end;
function UnicodeToCP949(Unicode: cardinal): integer;
begin
case Unicode of
0..127: Result := Unicode;
else
Result := CP949CU[SearchTable(Uni949U, Unicode)];
end;
end;
function UnicodeToCP932(Unicode: cardinal): integer;
begin
case Unicode of
0..127: Result := Unicode;
else
Result := CP932CU[SearchTable(Uni932U, Unicode)];
end;
end;
{$endif}
{$ifdef FPC_HAS_CPSTRING}
procedure InternalUTF8ToDBCS(const s: string; TargetCodePage: TSystemCodePage;
SetTargetCodePage: boolean;
{$IfNDef UseSystemCPConv}const UTF8CharConvFunc: TUnicodeToCharID;{$endif}
out TheResult: RawByteString); inline;
begin
{$ifdef UseSystemCPConv}
TheResult:=s;
SetCodePage(TheResult, TargetCodePage, True);
if not SetTargetCodePage then
SetCodePage(TheResult, CP_ACP, False);
{$else}
TheResult:=UTF8ToDBCS(s,UTF8CharConvFunc);
if SetTargetCodePage then
SetCodePage(TheResult, TargetCodePage, False);
{$endif}
end;
function UTF8ToCP932(const s: string; SetTargetCodePage: boolean): RawByteString;
begin
InternalUTF8ToDBCS(s,932,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP932{$endif},Result);
end;
function UTF8ToCP936(const s: string; SetTargetCodePage: boolean): RawByteString;
begin
InternalUTF8ToDBCS(s,936,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP936{$endif},Result);
end;
function UTF8ToCP949(const s: string; SetTargetCodePage: boolean): RawByteString;
begin
InternalUTF8ToDBCS(s,949,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP949{$endif},Result);
end;
function UTF8ToCP950(const s: string; SetTargetCodePage: boolean): RawByteString;
begin
InternalUTF8ToDBCS(s,950,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP950{$endif},Result);
end;
{$ELSE}
function UTF8ToCP932(const s: string): string;
begin
Result := UTF8ToDBCS(s, @UnicodeToCP932);
end;
function UTF8ToCP936(const s: string): string;
begin
Result := UTF8ToDBCS(s, @UnicodeToCP936);
end;
function UTF8ToCP949(const s: string): string;
begin
Result := UTF8ToDBCS(s, @UnicodeToCP949);
end;
function UTF8ToCP950(const s: string): string;
begin
Result := UTF8ToDBCS(s, @UnicodeToCP950);
end;
{$ENDIF}
function UTF8ToDBCS(const s: string; const UTF8CharConvFunc: TUnicodeToCharID): string;
var
len: integer;
Src: PChar;
Dest: PChar;
c: char;
Unicode: longword;
CharLen: integer;
i: integer;
begin
if s = '' then
begin
Result := '';
exit;
end;
len := length(s);
SetLength(Result, len); // DBCS needs at most space as UTF-8
Src := PChar(s);
Dest := PChar(Result);
repeat
c := Src^;
if c < #128 then
begin
if (c=#0) and (Src-PChar(s)>=len) then break;
Dest^ := c;
Inc(Dest);
Inc(Src);
end
else
begin
Unicode := UTF8CharacterToUnicode(Src, CharLen);
Inc(Src, CharLen);
i := UTF8CharConvFunc(Unicode);
//writeln(Format('%X', [i]));
if i >= 0 then
begin
if i > $ff then
begin
Dest^ := chr(i shr 8);
Inc(Dest);
Dest^ := chr(i);
end
else
Dest^ := chr(i);
Inc(Dest);
end;
end;
until false;
//SetLength(Result, Dest - PChar(Result));
SetLength(Result, {%H-}PtrUInt(Dest) - PtrUInt(Result));
end;