mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-04-27 08:13:49 +02:00
Patch from JiXian Yang, improves asian encoding support
git-svn-id: trunk@27176 -
This commit is contained in:
parent
a2046cf61b
commit
0ee7468af1
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -4444,6 +4444,7 @@ lcl/imglist.pp svneol=native#text/pascal
|
||||
lcl/include/actionlink.inc svneol=native#text/pascal
|
||||
lcl/include/application.inc svneol=native#text/pascal
|
||||
lcl/include/applicationproperties.inc svneol=native#text/pascal
|
||||
lcl/include/asiancodepagefunctions.inc svneol=native#text/plain
|
||||
lcl/include/asiancodepages.inc svneol=native#text/plain
|
||||
lcl/include/bevel.inc svneol=native#text/pascal
|
||||
lcl/include/bitbtn.inc svneol=native#text/pascal
|
||||
|
228
lcl/include/asiancodepagefunctions.inc
Normal file
228
lcl/include/asiancodepagefunctions.inc
Normal file
@ -0,0 +1,228 @@
|
||||
{%MainUnit ../lconvencoding.pp}
|
||||
|
||||
{******************************************************************************
|
||||
Asian Unicode Functions
|
||||
******************************************************************************
|
||||
|
||||
*****************************************************************************
|
||||
* *
|
||||
* This file is part of the Lazarus Component Library (LCL) *
|
||||
* *
|
||||
* See the file COPYING.modifiedLGPL.txt, included in this distribution, *
|
||||
* for details about the copyright. *
|
||||
* *
|
||||
* This program is distributed in the hope that it will be useful, *
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
|
||||
* *
|
||||
*****************************************************************************
|
||||
|
||||
The clipboard is able to work with the windows and gtk behaviour/features.
|
||||
}
|
||||
|
||||
function SingleByteToUTF8Ex(const s: string; CodeP: integer): string;
|
||||
var
|
||||
len: integer;
|
||||
i, j: integer;
|
||||
Src: PChar;
|
||||
Dest: PChar;
|
||||
p: PChar;
|
||||
c: char;
|
||||
tempstr: ansistring;
|
||||
tempint: integer;
|
||||
begin
|
||||
SetLength(tempstr, 4);
|
||||
if s = '' then
|
||||
begin
|
||||
Result := s;
|
||||
exit;
|
||||
end;
|
||||
len := length(s);
|
||||
SetLength(Result, len * 6);// Asia UTF-8 is at most 6 bytes
|
||||
Src := PChar(s);
|
||||
Dest := PChar(Result);
|
||||
i := 1;
|
||||
while i < len do
|
||||
begin
|
||||
c := Src^;
|
||||
Inc(Src);
|
||||
i := i + 1;
|
||||
if Ord(c) < 128 then
|
||||
begin
|
||||
Dest^ := c;
|
||||
Inc(Dest);
|
||||
end
|
||||
else
|
||||
begin
|
||||
TempStr[2] := c;
|
||||
if i <= len then
|
||||
begin
|
||||
TempStr[1] := Src^;
|
||||
i := i + 1;
|
||||
end
|
||||
else
|
||||
TempStr[1] := #0;
|
||||
TempStr[4] := #0;
|
||||
TempStr[3] := #0;
|
||||
tempint := PInteger(@TempStr[1])^;
|
||||
Inc(Src);
|
||||
|
||||
case CodeP of
|
||||
936:
|
||||
tempint := Uni936C[SearchTable(CP936CC, tempint)];
|
||||
950:
|
||||
tempint := Uni950C[SearchTable(CP950CC, tempint)];
|
||||
949:
|
||||
tempint := Uni949C[SearchTable(CP949CC, tempint)];
|
||||
932:
|
||||
tempint := Uni932C[SearchTable(CP932CC, tempint)];
|
||||
else
|
||||
tempint := -1;
|
||||
end;
|
||||
|
||||
if tempint <> -1 then
|
||||
begin
|
||||
TempStr := UnicodeToUTF8(tempint);
|
||||
|
||||
for j := 1 to Length(TempStr) do
|
||||
begin
|
||||
Dest^ := TempStr[j];
|
||||
Inc(Dest);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
SetLength(Result, PtrUInt(Dest) - PtrUInt(Result));
|
||||
end;
|
||||
|
||||
function CP936ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result := SingleByteToUTF8Ex(s, 936);
|
||||
end;
|
||||
|
||||
function CP950ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result := SingleByteToUTF8Ex(s, 950);
|
||||
end;
|
||||
|
||||
function CP949ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result := SingleByteToUTF8Ex(s, 949);
|
||||
end;
|
||||
|
||||
function CP932ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result := SingleByteToUTF8Ex(s, 932);
|
||||
end;
|
||||
|
||||
function UnicodeToCP936(Unicode: cardinal): integer;
|
||||
begin
|
||||
case Unicode of
|
||||
0..127: Result := Unicode;
|
||||
else
|
||||
Result := CP936CU[SearchTable(Uni936U, Unicode)];
|
||||
end;
|
||||
end;
|
||||
|
||||
function UnicodeToCP950(Unicode: cardinal): integer;
|
||||
begin
|
||||
case Unicode of
|
||||
0..127: Result := Unicode;
|
||||
else
|
||||
Result := CP950CU[SearchTable(Uni950U, Unicode)];
|
||||
end;
|
||||
end;
|
||||
|
||||
function UnicodeToCP949(Unicode: cardinal): integer;
|
||||
begin
|
||||
case Unicode of
|
||||
0..127: Result := Unicode;
|
||||
else
|
||||
Result := CP949CU[SearchTable(Uni949U, Unicode)];
|
||||
end;
|
||||
end;
|
||||
|
||||
function UnicodeToCP932(Unicode: cardinal): integer;
|
||||
begin
|
||||
case Unicode of
|
||||
0..127: Result := Unicode;
|
||||
else
|
||||
Result := CP932CU[SearchTable(Uni932U, Unicode)];
|
||||
end;
|
||||
end;
|
||||
|
||||
function UTF8ToSingleByteEx(const s: string;
|
||||
const UTF8CharConvFunc: TUnicodeToCharID): string;
|
||||
var
|
||||
len: integer;
|
||||
Src: PChar;
|
||||
Dest: PChar;
|
||||
c: char;
|
||||
Unicode: longword;
|
||||
CharLen: integer;
|
||||
i: integer;
|
||||
begin
|
||||
if s = '' then
|
||||
begin
|
||||
Result := '';
|
||||
exit;
|
||||
end;
|
||||
len := length(s);
|
||||
SetLength(Result, len);
|
||||
Src := PChar(s);
|
||||
Dest := PChar(Result);
|
||||
while len > 0 do
|
||||
begin
|
||||
c := Src^;
|
||||
if c < #128 then
|
||||
begin
|
||||
Dest^ := c;
|
||||
Inc(Dest);
|
||||
Inc(Src);
|
||||
Dec(len);
|
||||
end
|
||||
else
|
||||
begin
|
||||
Unicode := UTF8CharacterToUnicode(Src, CharLen);
|
||||
Inc(Src, CharLen);
|
||||
Dec(len, CharLen);
|
||||
i := UTF8CharConvFunc(Unicode);
|
||||
//writeln(Format('%X', [i]));
|
||||
if i >= 0 then
|
||||
begin
|
||||
if i > $ff then
|
||||
begin
|
||||
Dest^ := chr(i shr 8);
|
||||
Inc(Dest);
|
||||
Dest^ := chr(i);
|
||||
end
|
||||
else
|
||||
Dest^ := chr(i);
|
||||
Inc(Dest);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
//SetLength(Result, Dest - PChar(Result));
|
||||
SetLength(Result, PtrUInt(Dest) - PtrUInt(Result));
|
||||
end;
|
||||
|
||||
function UTF8ToCP936(const s: string): string;
|
||||
begin
|
||||
Result := UTF8ToSingleByteEx(s, @UnicodeToCP936);
|
||||
end;
|
||||
|
||||
function UTF8ToCP950(const s: string): string;
|
||||
begin
|
||||
Result := UTF8ToSingleByteEx(s, @UnicodeToCP950);
|
||||
end;
|
||||
|
||||
function UTF8ToCP949(const s: string): string;
|
||||
begin
|
||||
Result := UTF8ToSingleByteEx(s, @UnicodeToCP949);
|
||||
end;
|
||||
|
||||
function UTF8ToCP932(const s: string): string;
|
||||
begin
|
||||
Result := UTF8ToSingleByteEx(s, @UnicodeToCP932);
|
||||
end;
|
||||
|
@ -67,13 +67,8 @@ function CP850ToUTF8(const s: string): string; // DOS western europe
|
||||
function CP866ToUTF8(const s: string): string; // DOS and Windows console's cyrillic
|
||||
function CP874ToUTF8(const s: string): string; // thai
|
||||
function KOI8ToUTF8(const s: string): string; // russian cyrillic
|
||||
function CP936ToUTF8(const s: string): string; // Chinese
|
||||
function CP950ToUTF8(const s: string): string; // Chinese Complex
|
||||
function CP949ToUTF8(const s: string): string; // korea
|
||||
function CP932ToUTF8(const s: string): string; // japanese
|
||||
function SingleByteToUTF8(const s: string;
|
||||
const Table: TCharToUTF8Table): string;
|
||||
function SingleByteToUTF8Ex(const s: string; CodeP: integer): string;
|
||||
function UCS2LEToUTF8(const s: string): string; // UCS2-LE 2byte little endian
|
||||
function UCS2BEToUTF8(const s: string): string; // UCS2-BE 2byte big endian
|
||||
|
||||
@ -94,16 +89,27 @@ function UTF8ToCP850(const s: string): string; // DOS western europe
|
||||
function UTF8ToCP866(const s: string): string; // DOS and Windows console's cyrillic
|
||||
function UTF8ToCP874(const s: string): string; // thai
|
||||
function UTF8ToKOI8(const s: string): string; // russian cyrillic
|
||||
function UTF8ToSingleByte(const s: string;
|
||||
const UTF8CharConvFunc: TUnicodeToCharID): string;
|
||||
function UTF8ToUCS2LE(const s: string): string; // UCS2-LE 2byte little endian
|
||||
function UTF8ToUCS2BE(const s: string): string; // UCS2-BE 2byte big endian
|
||||
|
||||
// Asian encodings
|
||||
|
||||
function CP936ToUTF8(const s: string): string; // Chinese
|
||||
function CP950ToUTF8(const s: string): string; // Chinese Complex
|
||||
function CP949ToUTF8(const s: string): string; // korea
|
||||
function CP932ToUTF8(const s: string): string; // japanese
|
||||
|
||||
function SingleByteToUTF8Ex(const s: string; CodeP: integer): string;
|
||||
|
||||
function UTF8ToCP936(const s: string): string; // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
|
||||
function UTF8ToCP950(const s: string): string; // Chinese Complex
|
||||
function UTF8ToCP949(const s: string): string; // korea
|
||||
function UTF8ToCP932(const s: string): string; // japanese
|
||||
function UTF8ToSingleByte(const s: string;
|
||||
const UTF8CharConvFunc: TUnicodeToCharID): string;
|
||||
|
||||
function UTF8ToSingleByteEx(const s: string;
|
||||
const UTF8CharConvFunc: TUnicodeToCharID): string;
|
||||
function UTF8ToUCS2LE(const s: string): string; // UCS2-LE 2byte little endian
|
||||
function UTF8ToUCS2BE(const s: string): string; // UCS2-BE 2byte big endian
|
||||
const UTF8CharConvFunc: TUnicodeToCharID): string;
|
||||
|
||||
procedure GetSupportedEncodings(List: TStrings);
|
||||
|
||||
@ -113,11 +119,12 @@ implementation
|
||||
uses Windows;
|
||||
{$ENDIF}
|
||||
|
||||
{$include include/asiancodepages.inc}
|
||||
|
||||
var EncodingValid: boolean = false;
|
||||
DefaultTextEncoding: string = EncodingAnsi;
|
||||
|
||||
{$include include/asiancodepages.inc}
|
||||
{$include include/asiancodepagefunctions.inc}
|
||||
|
||||
{$IFDEF Windows}
|
||||
function GetWindowsEncoding: string;
|
||||
var
|
||||
@ -4461,109 +4468,6 @@ begin
|
||||
SetLength(Result,PtrUInt(Dest)-PtrUInt(Result));
|
||||
end;
|
||||
|
||||
function SingleByteToUTF8Ex(const s: string; CodeP: integer): string;
|
||||
var
|
||||
len: integer;
|
||||
i, j: integer;
|
||||
Src: PChar;
|
||||
Dest: PChar;
|
||||
p: PChar;
|
||||
c: char;
|
||||
tempstr: ansistring;
|
||||
tempint: integer;
|
||||
begin
|
||||
SetLength(tempstr, 4);
|
||||
if s = '' then
|
||||
begin
|
||||
Result := s;
|
||||
exit;
|
||||
end;
|
||||
len := length(s);
|
||||
SetLength(Result, len * 6);// UTF-8 is at most 6 bytes
|
||||
Src := PChar(s);
|
||||
Dest := PChar(Result);
|
||||
//for i:=1 to len do begin
|
||||
i := 1;
|
||||
while i < len do
|
||||
begin
|
||||
c := Src^;
|
||||
Inc(Src);
|
||||
i := i + 1;
|
||||
if Ord(c) < 128 then
|
||||
begin
|
||||
Dest^ := c;
|
||||
Inc(Dest);
|
||||
//writeln(Format('%X', [Byte(c)]));
|
||||
end
|
||||
else
|
||||
begin
|
||||
//p:=Table[c];
|
||||
TempStr[2] := c;
|
||||
if i <= len then
|
||||
begin
|
||||
TempStr[1] := Src^;
|
||||
i := i + 1;
|
||||
end
|
||||
else
|
||||
TempStr[1] := #0;
|
||||
TempStr[4] := #0;
|
||||
TempStr[3] := #0;
|
||||
tempint := PInteger(@TempStr[1])^;
|
||||
Inc(Src);
|
||||
///for i:=1 to 4 do
|
||||
// writeln(Format('%X', [tempint]));
|
||||
|
||||
case CodeP of
|
||||
936:
|
||||
tempint := Uni936C[SearchTable(CP936CC, tempint)];
|
||||
950:
|
||||
tempint := Uni950C[SearchTable(CP950CC, tempint)];
|
||||
949:
|
||||
tempint := Uni949C[SearchTable(CP949CC, tempint)];
|
||||
932:
|
||||
tempint := Uni932C[SearchTable(CP932CC, tempint)];
|
||||
else
|
||||
tempint := -1;
|
||||
end;
|
||||
// writeln(Format('U %X ', [tempint]));
|
||||
|
||||
if tempint <> -1 then
|
||||
begin
|
||||
//PInteger(@TempStr[1])^ := CP936CU[SearchTable(CP936CC, tempint)];
|
||||
TempStr := UnicodeToUTF8(tempint); //CP936CU[SearchTable(CP936CC, tempint)]);
|
||||
|
||||
for j := 1 to Length(TempStr) do
|
||||
begin
|
||||
Dest^ := TempStr[j];
|
||||
Inc(Dest);
|
||||
// writeln(Format('%X', [Byte(TempStr[i])]));
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
SetLength(Result, PtrUInt(Dest) - PtrUInt(Result));
|
||||
end;
|
||||
|
||||
function CP936ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result := SingleByteToUTF8Ex(s, 936);
|
||||
end;
|
||||
|
||||
function CP950ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result := SingleByteToUTF8Ex(s, 950);
|
||||
end;
|
||||
|
||||
function CP949ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result := SingleByteToUTF8Ex(s, 949);
|
||||
end;
|
||||
|
||||
function CP932ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result := SingleByteToUTF8Ex(s, 932);
|
||||
end;
|
||||
|
||||
function UCS2LEToUTF8(const s: string): string;
|
||||
var
|
||||
len: Integer;
|
||||
@ -5496,43 +5400,6 @@ begin
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
function UnicodeToCP936(Unicode: cardinal): integer;
|
||||
begin
|
||||
case Unicode of
|
||||
0..127: Result := Unicode;
|
||||
else
|
||||
Result := CP936CU[SearchTable(Uni936U, Unicode)];
|
||||
end;
|
||||
end;
|
||||
|
||||
function UnicodeToCP950(Unicode: cardinal): integer;
|
||||
begin
|
||||
case Unicode of
|
||||
0..127: Result := Unicode;
|
||||
else
|
||||
Result := CP950CU[SearchTable(Uni950U, Unicode)];
|
||||
end;
|
||||
end;
|
||||
|
||||
function UnicodeToCP949(Unicode: cardinal): integer;
|
||||
begin
|
||||
case Unicode of
|
||||
0..127: Result := Unicode;
|
||||
else
|
||||
Result := CP949CU[SearchTable(Uni949U, Unicode)];
|
||||
end;
|
||||
end;
|
||||
|
||||
function UnicodeToCP932(Unicode: cardinal): integer;
|
||||
begin
|
||||
case Unicode of
|
||||
0..127: Result := Unicode;
|
||||
else
|
||||
Result := CP932CU[SearchTable(Uni932U, Unicode)];
|
||||
end;
|
||||
end;
|
||||
|
||||
function UnicodeToKOI8(Unicode: cardinal): integer;
|
||||
begin
|
||||
case Unicode of
|
||||
@ -5806,81 +5673,6 @@ begin
|
||||
SetLength(Result,Dest-PChar(Result));
|
||||
end;
|
||||
|
||||
function UTF8ToSingleByteEx(const s: string;
|
||||
const UTF8CharConvFunc: TUnicodeToCharID): string;
|
||||
var
|
||||
len: integer;
|
||||
Src: PChar;
|
||||
Dest: PChar;
|
||||
c: char;
|
||||
Unicode: longword;
|
||||
CharLen: integer;
|
||||
i: integer;
|
||||
begin
|
||||
if s = '' then
|
||||
begin
|
||||
Result := '';
|
||||
exit;
|
||||
end;
|
||||
len := length(s);
|
||||
SetLength(Result, len);
|
||||
Src := PChar(s);
|
||||
Dest := PChar(Result);
|
||||
while len > 0 do
|
||||
begin
|
||||
c := Src^;
|
||||
if c < #128 then
|
||||
begin
|
||||
Dest^ := c;
|
||||
Inc(Dest);
|
||||
Inc(Src);
|
||||
Dec(len);
|
||||
end
|
||||
else
|
||||
begin
|
||||
Unicode := UTF8CharacterToUnicode(Src, CharLen);
|
||||
Inc(Src, CharLen);
|
||||
Dec(len, CharLen);
|
||||
i := UTF8CharConvFunc(Unicode);
|
||||
//writeln(Format('%X', [i]));
|
||||
if i >= 0 then
|
||||
begin
|
||||
if i > $ff then
|
||||
begin
|
||||
Dest^ := chr(i shr 8);
|
||||
Inc(Dest);
|
||||
Dest^ := chr(i);
|
||||
end
|
||||
else
|
||||
Dest^ := chr(i);
|
||||
Inc(Dest);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
//SetLength(Result, Dest - PChar(Result));
|
||||
SetLength(Result, PtrUInt(Dest) - PtrUInt(Result));
|
||||
end;
|
||||
|
||||
function UTF8ToCP936(const s: string): string;
|
||||
begin
|
||||
Result := UTF8ToSingleByteEx(s, @UnicodeToCP936);
|
||||
end;
|
||||
|
||||
function UTF8ToCP950(const s: string): string;
|
||||
begin
|
||||
Result := UTF8ToSingleByteEx(s, @UnicodeToCP950);
|
||||
end;
|
||||
|
||||
function UTF8ToCP949(const s: string): string;
|
||||
begin
|
||||
Result := UTF8ToSingleByteEx(s, @UnicodeToCP949);
|
||||
end;
|
||||
|
||||
function UTF8ToCP932(const s: string): string;
|
||||
begin
|
||||
Result := UTF8ToSingleByteEx(s, @UnicodeToCP932);
|
||||
end;
|
||||
|
||||
function UTF8ToUCS2LE(const s: string): string;
|
||||
var
|
||||
len: Integer;
|
||||
|
Loading…
Reference in New Issue
Block a user