mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-08-09 21:35:57 +02:00
lazutils: renamed functions DBCSToUTF8, UTF8ToDBCS, fixed dbcs last char, using half space for asian tables
git-svn-id: trunk@38241 -
This commit is contained in:
parent
9295710c2a
commit
3f21a3c32a
@ -20,17 +20,15 @@
|
||||
The clipboard is able to work with the windows and gtk behaviour/features.
|
||||
}
|
||||
|
||||
function SingleByteToUTF8Ex(const s: string; CodeP: integer): string;
|
||||
function DBCSToUTF8(const s: string; CodeP: integer): string;
|
||||
var
|
||||
len: integer;
|
||||
i, j: integer;
|
||||
len: SizeInt;
|
||||
Src: PChar;
|
||||
Dest: PChar;
|
||||
c: char;
|
||||
tempstr: ansistring;
|
||||
tempint: integer;
|
||||
l: Integer;
|
||||
code: word;
|
||||
begin
|
||||
SetLength(tempstr, 4);
|
||||
if s = '' then
|
||||
begin
|
||||
Result := s;
|
||||
@ -40,74 +38,64 @@ begin
|
||||
SetLength(Result, len * 4);// Asia UTF-8 is at most 4 bytes
|
||||
Src := PChar(s);
|
||||
Dest := PChar(Result);
|
||||
i := 1;
|
||||
while i < len do
|
||||
begin
|
||||
repeat
|
||||
c := Src^;
|
||||
Inc(Src);
|
||||
i := i + 1;
|
||||
if Ord(c) < 128 then
|
||||
begin
|
||||
if (c=#0) and (Src-PChar(s)>=len) then break;
|
||||
Dest^ := c;
|
||||
Inc(Dest);
|
||||
end
|
||||
else
|
||||
begin
|
||||
tempint := Byte(c) shl 8;
|
||||
if i <= len then
|
||||
begin
|
||||
tempint := tempint + Byte(Src^);
|
||||
i := i + 1;
|
||||
end;
|
||||
|
||||
code := Byte(c) shl 8;
|
||||
c:=Src^;
|
||||
if (c=#0) and (Src-PChar(s)>=len) then break;
|
||||
code := code + Byte(c);
|
||||
Inc(Src);
|
||||
|
||||
case CodeP of
|
||||
936:
|
||||
tempint := Uni936C[SearchTable(CP936CC, tempint)];
|
||||
code := Uni936C[SearchTable(CP936CC, code)];
|
||||
950:
|
||||
tempint := Uni950C[SearchTable(CP950CC, tempint)];
|
||||
code := Uni950C[SearchTable(CP950CC, code)];
|
||||
949:
|
||||
tempint := Uni949C[SearchTable(CP949CC, tempint)];
|
||||
code := Uni949C[SearchTable(CP949CC, code)];
|
||||
932:
|
||||
tempint := Uni932C[SearchTable(CP932CC, tempint)];
|
||||
code := Uni932C[SearchTable(CP932CC, code)];
|
||||
else
|
||||
tempint := -1;
|
||||
code := 0;
|
||||
end;
|
||||
|
||||
if tempint <> -1 then
|
||||
if code>0 then
|
||||
begin
|
||||
TempStr := UnicodeToUTF8(tempint);
|
||||
|
||||
for j := 1 to Length(TempStr) do
|
||||
begin
|
||||
Dest^ := TempStr[j];
|
||||
Inc(Dest);
|
||||
end;
|
||||
end;
|
||||
l:=UnicodeToUTF8Inline(code,Dest);
|
||||
inc(Dest,l);
|
||||
end;
|
||||
end;
|
||||
until false;
|
||||
SetLength(Result, {%H-}PtrUInt(Dest) - PtrUInt(Result));
|
||||
end;
|
||||
|
||||
function CP936ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result := SingleByteToUTF8Ex(s, 936);
|
||||
Result := DBCSToUTF8(s, 936);
|
||||
end;
|
||||
|
||||
function CP950ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result := SingleByteToUTF8Ex(s, 950);
|
||||
Result := DBCSToUTF8(s, 950);
|
||||
end;
|
||||
|
||||
function CP949ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result := SingleByteToUTF8Ex(s, 949);
|
||||
Result := DBCSToUTF8(s, 949);
|
||||
end;
|
||||
|
||||
function CP932ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result := SingleByteToUTF8Ex(s, 932);
|
||||
Result := DBCSToUTF8(s, 932);
|
||||
end;
|
||||
|
||||
function UnicodeToCP936(Unicode: cardinal): integer;
|
||||
@ -146,7 +134,7 @@ begin
|
||||
end;
|
||||
end;
|
||||
|
||||
function UTF8ToSingleByteEx(const s: string;
|
||||
function UTF8ToDBCS(const s: string;
|
||||
const UTF8CharConvFunc: TUnicodeToCharID): string;
|
||||
var
|
||||
len: integer;
|
||||
@ -163,24 +151,22 @@ begin
|
||||
exit;
|
||||
end;
|
||||
len := length(s);
|
||||
SetLength(Result, len);
|
||||
SetLength(Result, len); // DBCS needs at most space as UTF-8
|
||||
Src := PChar(s);
|
||||
Dest := PChar(Result);
|
||||
while len > 0 do
|
||||
begin
|
||||
repeat
|
||||
c := Src^;
|
||||
if c < #128 then
|
||||
begin
|
||||
if (c=#0) and (Src-PChar(s)>=len) then break;
|
||||
Dest^ := c;
|
||||
Inc(Dest);
|
||||
Inc(Src);
|
||||
Dec(len);
|
||||
end
|
||||
else
|
||||
begin
|
||||
Unicode := UTF8CharacterToUnicode(Src, CharLen);
|
||||
Inc(Src, CharLen);
|
||||
Dec(len, CharLen);
|
||||
i := UTF8CharConvFunc(Unicode);
|
||||
//writeln(Format('%X', [i]));
|
||||
if i >= 0 then
|
||||
@ -196,28 +182,28 @@ begin
|
||||
Inc(Dest);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
until false;
|
||||
//SetLength(Result, Dest - PChar(Result));
|
||||
SetLength(Result, {%H-}PtrUInt(Dest) - PtrUInt(Result));
|
||||
end;
|
||||
|
||||
function UTF8ToCP936(const s: string): string;
|
||||
begin
|
||||
Result := UTF8ToSingleByteEx(s, @UnicodeToCP936);
|
||||
Result := UTF8ToDBCS(s, @UnicodeToCP936);
|
||||
end;
|
||||
|
||||
function UTF8ToCP950(const s: string): string;
|
||||
begin
|
||||
Result := UTF8ToSingleByteEx(s, @UnicodeToCP950);
|
||||
Result := UTF8ToDBCS(s, @UnicodeToCP950);
|
||||
end;
|
||||
|
||||
function UTF8ToCP949(const s: string): string;
|
||||
begin
|
||||
Result := UTF8ToSingleByteEx(s, @UnicodeToCP949);
|
||||
Result := UTF8ToDBCS(s, @UnicodeToCP949);
|
||||
end;
|
||||
|
||||
function UTF8ToCP932(const s: string): string;
|
||||
begin
|
||||
Result := UTF8ToSingleByteEx(s, @UnicodeToCP932);
|
||||
Result := UTF8ToDBCS(s, @UnicodeToCP932);
|
||||
end;
|
||||
|
||||
|
@ -16,14 +16,12 @@
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
|
||||
* *
|
||||
*****************************************************************************
|
||||
|
||||
The clipboard is able to work with the windows and gtk behaviour/features.
|
||||
}
|
||||
type
|
||||
CP936Arr = array[0..22046] of Integer;
|
||||
CP950Arr = array[0..13758] of Integer;
|
||||
CP949Arr = array[0..17303] of Integer;
|
||||
CP932Arr = array[0..7979] of Integer;
|
||||
CP936Arr = array[0..22046] of word;
|
||||
CP950Arr = array[0..13758] of word;
|
||||
CP949Arr = array[0..17303] of word;
|
||||
CP932Arr = array[0..7979] of word;
|
||||
|
||||
const CP936CC: CP936Arr =
|
||||
($00,$01,$02,$03,$04,$05,$06,$07,$08,$09,$0A,$0B,$0C,$0D,$0E,$0F,$10,$11,$12,
|
||||
@ -1727,13 +1725,13 @@ $0041,$0042,$0043,$0044,$0045,$0046,$0047,$0048,$0049,$004A,$004B,$004C,$004D,
|
||||
$004E,$004F,$0050,$0051,$0052,$0053,$0054,$0055,$0056,$0057,$0058,$0059,$005A,
|
||||
$005B,$005C,$005D,$005E,$005F,$0060,$0061,$0062,$0063,$0064,$0065,$0066,$0067,
|
||||
$0068,$0069,$006A,$006B,$006C,$006D,$006E,$006F,$0070,$0071,$0072,$0073,$0074,
|
||||
$0075,$0076,$0077,$0078,$0079,$007A,$007B,$007C,$007D,$007E,$007F,$20AC,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,$4E02,$4E04,$4E05,$4E06,$4E0F,$4E12,$4E17,$4E1F,$4E20,$4E21,$4E23,$4E26,
|
||||
$0075,$0076,$0077,$0078,$0079,$007A,$007B,$007C,$007D,$007E,$007F,$20AC,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,$4E02,$4E04,$4E05,$4E06,$4E0F,$4E12,$4E17,$4E1F,$4E20,$4E21,$4E23,$4E26,
|
||||
$4E29,$4E2E,$4E2F,$4E31,$4E33,$4E35,$4E37,$4E3C,$4E40,$4E41,$4E42,$4E44,$4E46,
|
||||
$4E4A,$4E51,$4E55,$4E57,$4E5A,$4E5B,$4E62,$4E63,$4E64,$4E65,$4E67,$4E68,$4E6A,
|
||||
$4E6B,$4E6C,$4E6D,$4E6E,$4E6F,$4E72,$4E74,$4E75,$4E76,$4E77,$4E78,$4E79,$4E7A,
|
||||
@ -5104,12 +5102,12 @@ $A3EF,$A3F0,$A3F1,$A3F2,$A3F3,$A3F4,$A3F5,$A3F6,$A3F7,$A3F8,$A3F9,$A3FA,$A3FB,
|
||||
$A3FC,$A3FD,$A1AB,$A1E9,$A1EA,$A956,$A3FE,$A957,$A3A4);
|
||||
|
||||
const Uni936U: CP936Arr =
|
||||
(-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,$0000,$0001,$0002,$0003,$0004,$0005,$0006,$0007,$0008,$0009,$000A,$000B,
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,$0000,$0001,$0002,$0003,$0004,$0005,$0006,$0007,$0008,$0009,$000A,$000B,
|
||||
$000C,$000D,$000E,$000F,$0010,$0011,$0012,$0013,$0014,$0015,$0016,$0017,$0018,
|
||||
$0019,$001A,$001B,$001C,$001D,$001E,$001F,$0020,$0021,$0022,$0023,$0024,$0025,
|
||||
$0026,$0027,$0028,$0029,$002A,$002B,$002C,$002D,$002E,$002F,$0030,$0031,$0032,
|
||||
@ -7862,12 +7860,12 @@ $0041,$0042,$0043,$0044,$0045,$0046,$0047,$0048,$0049,$004A,$004B,$004C,$004D,
|
||||
$004E,$004F,$0050,$0051,$0052,$0053,$0054,$0055,$0056,$0057,$0058,$0059,$005A,
|
||||
$005B,$005C,$005D,$005E,$005F,$0060,$0061,$0062,$0063,$0064,$0065,$0066,$0067,
|
||||
$0068,$0069,$006A,$006B,$006C,$006D,$006E,$006F,$0070,$0071,$0072,$0073,$0074,
|
||||
$0075,$0076,$0077,$0078,$0079,$007A,$007B,$007C,$007D,$007E,$007F,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
$0075,$0076,$0077,$0078,$0079,$007A,$007B,$007C,$007D,$007E,$007F,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
$3000,$FF0C,$3001,$3002,$FF0E,$2027,$FF1B,$FF1A,$FF1F,$FF01,$FE30,$2026,$2025,
|
||||
$FE50,$FE51,$FE52,$00B7,$FE54,$FE55,$FE56,$FE57,$FF5C,$2013,$FE31,$2014,$FE33,
|
||||
$2574,$FE34,$FE4F,$FF08,$FF09,$FE35,$FE36,$FF5B,$FF5D,$FE37,$FE38,$3014,$3015,
|
||||
@ -9964,12 +9962,12 @@ $A2FC,$A2FD,$A2FE,$A340,$A341,$A342,$A343,$A161,$A155,$A162,$A1E3,$A246,$A247,
|
||||
$A1C3,$A244);
|
||||
|
||||
const Uni950U: CP950Arr =
|
||||
(-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,$0000,$0001,$0002,$0003,$0004,$0005,$0006,$0007,$0008,$0009,$000A,
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,$0000,$0001,$0002,$0003,$0004,$0005,$0006,$0007,$0008,$0009,$000A,
|
||||
$000B,$000C,$000D,$000E,$000F,$0010,$0011,$0012,$0013,$0014,$0015,$0016,$0017,
|
||||
$0018,$0019,$001A,$001B,$001C,$001D,$001E,$001F,$0020,$0021,$0022,$0023,$0024,
|
||||
$0025,$0026,$0027,$0028,$0029,$002A,$002B,$002C,$002D,$002E,$002F,$0030,$0031,
|
||||
@ -12356,12 +12354,12 @@ $0041,$0042,$0043,$0044,$0045,$0046,$0047,$0048,$0049,$004A,$004B,$004C,$004D,
|
||||
$004E,$004F,$0050,$0051,$0052,$0053,$0054,$0055,$0056,$0057,$0058,$0059,$005A,
|
||||
$005B,$005C,$005D,$005E,$005F,$0060,$0061,$0062,$0063,$0064,$0065,$0066,$0067,
|
||||
$0068,$0069,$006A,$006B,$006C,$006D,$006E,$006F,$0070,$0071,$0072,$0073,$0074,
|
||||
$0075,$0076,$0077,$0078,$0079,$007A,$007B,$007C,$007D,$007E,$007F,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
$0075,$0076,$0077,$0078,$0079,$007A,$007B,$007C,$007D,$007E,$007F,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
$AC02,$AC03,$AC05,$AC06,$AC0B,$AC0C,$AC0D,$AC0E,$AC0F,$AC18,$AC1E,$AC1F,$AC21,
|
||||
$AC22,$AC23,$AC25,$AC26,$AC27,$AC28,$AC29,$AC2A,$AC2B,$AC2E,$AC32,$AC33,$AC34,
|
||||
$AC35,$AC36,$AC37,$AC3A,$AC3B,$AC3D,$AC3E,$AC3F,$AC41,$AC42,$AC43,$AC44,$AC45,
|
||||
@ -15003,12 +15001,12 @@ $A3ED,$A3EE,$A3EF,$A3F0,$A3F1,$A3F2,$A3F3,$A3F4,$A3F5,$A3F6,$A3F7,$A3F8,$A3F9,
|
||||
$A3FA,$A3FB,$A3FC,$A3FD,$A2A6,$A1CB,$A1CC,$A1FE,$A3FE,$A1CD,$A3DC);
|
||||
|
||||
const Uni949U: CP949Arr =
|
||||
(-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,$0000,$0001,$0002,$0003,$0004,$0005,$0006,$0007,$0008,$0009,$000A,
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,$0000,$0001,$0002,$0003,$0004,$0005,$0006,$0007,$0008,$0009,$000A,
|
||||
$000B,$000C,$000D,$000E,$000F,$0010,$0011,$0012,$0013,$0014,$0015,$0016,$0017,
|
||||
$0018,$0019,$001A,$001B,$001C,$001D,$001E,$001F,$0020,$0021,$0022,$0023,$0024,
|
||||
$0025,$0026,$0027,$0028,$0029,$002A,$002B,$002C,$002D,$002E,$002F,$0030,$0031,
|
||||
@ -16951,15 +16949,15 @@ $0041,$0042,$0043,$0044,$0045,$0046,$0047,$0048,$0049,$004A,$004B,$004C,$004D,
|
||||
$004E,$004F,$0050,$0051,$0052,$0053,$0054,$0055,$0056,$0057,$0058,$0059,$005A,
|
||||
$005B,$005C,$005D,$005E,$005F,$0060,$0061,$0062,$0063,$0064,$0065,$0066,$0067,
|
||||
$0068,$0069,$006A,$006B,$006C,$006D,$006E,$006F,$0070,$0071,$0072,$0073,$0074,
|
||||
$0075,$0076,$0077,$0078,$0079,$007A,$007B,$007C,$007D,$007E,$007F,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,$FF61,$FF62,$FF63,$FF64,$FF65,$FF66,$FF67,$FF68,$FF69,$FF6A,
|
||||
$0075,$0076,$0077,$0078,$0079,$007A,$007B,$007C,$007D,$007E,$007F,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,$FF61,$FF62,$FF63,$FF64,$FF65,$FF66,$FF67,$FF68,$FF69,$FF6A,
|
||||
$FF6B,$FF6C,$FF6D,$FF6E,$FF6F,$FF70,$FF71,$FF72,$FF73,$FF74,$FF75,$FF76,$FF77,
|
||||
$FF78,$FF79,$FF7A,$FF7B,$FF7C,$FF7D,$FF7E,$FF7F,$FF80,$FF81,$FF82,$FF83,$FF84,
|
||||
$FF85,$FF86,$FF87,$FF88,$FF89,$FF8A,$FF8B,$FF8C,$FF8D,$FF8E,$FF8F,$FF90,$FF91,
|
||||
$FF92,$FF93,$FF94,$FF95,$FF96,$FF97,$FF98,$FF99,$FF9A,$FF9B,$FF9C,$FF9D,$FF9E,
|
||||
$FF9F,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,$3000,$3001,$3002,$FF0C,$FF0E,$30FB,$FF1A,$FF1B,
|
||||
$FF9F,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,$3000,$3001,$3002,$FF0C,$FF0E,$30FB,$FF1A,$FF1B,
|
||||
$FF1F,$FF01,$309B,$309C,$00B4,$FF40,$00A8,$FF3E,$FFE3,$FF3F,$30FD,$30FE,$309D,
|
||||
$309E,$3003,$4EDD,$3005,$3006,$3007,$30FC,$2015,$2010,$FF0F,$FF3C,$FF5E,$2225,
|
||||
$FF5C,$2026,$2025,$2018,$2019,$201C,$201D,$FF08,$FF09,$3014,$3015,$FF3B,$FF3D,
|
||||
@ -18166,9 +18164,9 @@ $CE,$CF,$D0,$D1,$D2,$D3,$D4,$D5,$D6,$D7,$D8,$D9,$DA,$DB,$DC,$DD,$DE,$DF,$8191,
|
||||
$8192,$81CA,$EEF9,$FA54,$8150,$EEFA,$FA55,$818F);
|
||||
|
||||
const Uni932U: CP932Arr =
|
||||
(-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,$00,$0001,$0002,$0003,$0004,$0005,
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,$00,$0001,$0002,$0003,$0004,$0005,
|
||||
$0006,$0007,$0008,$0009,$000A,$000B,$000C,$000D,$000E,$000F,$0010,$0011,$0012,
|
||||
$0013,$0014,$0015,$0016,$0017,$0018,$0019,$001A,$001B,$001C,$001D,$001E,$001F,
|
||||
$0020,$0021,$0022,$0023,$0024,$0025,$0026,$0027,$0028,$0029,$002A,$002B,$002C,
|
||||
@ -18779,7 +18777,7 @@ $FF8A,$FF8B,$FF8C,$FF8D,$FF8E,$FF8F,$FF90,$FF91,$FF92,$FF93,$FF94,$FF95,$FF96,
|
||||
$FF97,$FF98,$FF99,$FF9A,$FF9B,$FF9C,$FF9D,$FF9E,$FF9F,$FFE0,$FFE1,$FFE2,$FFE2,
|
||||
$FFE2,$FFE3,$FFE4,$FFE4,$FFE5);
|
||||
|
||||
function SearchTable(CodePageArr: array of integer; id: cardinal): longint;
|
||||
function SearchTable(CodePageArr: array of word; id: cardinal): word;
|
||||
var
|
||||
idMid: integer;
|
||||
idLow, idHigh: integer;
|
||||
@ -18796,7 +18794,7 @@ begin
|
||||
end
|
||||
else
|
||||
begin
|
||||
Result := -1;
|
||||
Result := 0;
|
||||
end;
|
||||
Exit;
|
||||
end;
|
||||
@ -18811,7 +18809,7 @@ begin
|
||||
if CodePageArr[idMid] < id then
|
||||
idLow := idMid + 1;
|
||||
end;
|
||||
Result := -1;
|
||||
Result := 0;
|
||||
end;
|
||||
|
||||
|
||||
|
@ -18,6 +18,8 @@ unit LConvEncoding;
|
||||
|
||||
interface
|
||||
|
||||
{ $Define DisableAsianCodePages}
|
||||
|
||||
uses
|
||||
SysUtils, Classes, dos, LazUTF8
|
||||
{$IFDEF EnableIconvEnc},iconvenc{$ENDIF};
|
||||
@ -99,22 +101,23 @@ function UTF8ToSingleByte(const s: string;
|
||||
function UTF8ToUCS2LE(const s: string): string; // UCS2-LE 2byte little endian without BOM
|
||||
function UTF8ToUCS2BE(const s: string): string; // UCS2-BE 2byte big endian without BOM
|
||||
|
||||
{$IFNDEF DisableAsianCodePages}
|
||||
// Asian encodings
|
||||
|
||||
function CP932ToUTF8(const s: string): string; // Japanese
|
||||
function CP936ToUTF8(const s: string): string; // Chinese
|
||||
function CP949ToUTF8(const s: string): string; // Korea
|
||||
function CP950ToUTF8(const s: string): string; // Chinese Complex
|
||||
|
||||
function SingleByteToUTF8Ex(const s: string; CodeP: integer): string; // Note: slow, needs optimization
|
||||
function DBCSToUTF8(const s: string; CodeP: integer): string;
|
||||
|
||||
function UTF8ToCP932(const s: string): string; // Japanese
|
||||
function UTF8ToCP936(const s: string): string; // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
|
||||
function UTF8ToCP949(const s: string): string; // Korea
|
||||
function UTF8ToCP950(const s: string): string; // Chinese Complex
|
||||
|
||||
function UTF8ToSingleByteEx(const s: string;
|
||||
function UTF8ToDBCS(const s: string;
|
||||
const UTF8CharConvFunc: TUnicodeToCharID): string;
|
||||
{$ENDIF}
|
||||
|
||||
procedure GetSupportedEncodings(List: TStrings);
|
||||
|
||||
@ -127,8 +130,10 @@ uses Windows;
|
||||
var EncodingValid: boolean = false;
|
||||
DefaultTextEncoding: string = EncodingAnsi;
|
||||
|
||||
{$IFNDEF DisableAsianCodePages}
|
||||
{$include asiancodepages.inc}
|
||||
{$include asiancodepagefunctions.inc}
|
||||
{$ENDIF}
|
||||
|
||||
{$IFDEF Windows}
|
||||
// AConsole - If false, it is the general system encoding,
|
||||
@ -6198,10 +6203,15 @@ begin
|
||||
List.Add('CP852');
|
||||
List.Add('CP866');
|
||||
List.Add('CP874');
|
||||
List.Add('CP936');
|
||||
List.Add('CP950');
|
||||
List.Add('CP949');
|
||||
|
||||
{$IFNDEF DisableAsianCodePages}
|
||||
// asian
|
||||
List.Add('CP932');
|
||||
List.Add('CP936');
|
||||
List.Add('CP949');
|
||||
List.Add('CP950');
|
||||
{$ENDIF}
|
||||
|
||||
List.Add('ISO-8859-1');
|
||||
List.Add('ISO-8859-2');
|
||||
List.Add('KOI-8');
|
||||
@ -6378,26 +6388,12 @@ begin
|
||||
if ATo='cp852' then begin Result:=UTF8ToCP852(s); exit; end;
|
||||
if ATo='cp866' then begin Result:=UTF8ToCP866(s); exit; end;
|
||||
if ATo='cp874' then begin Result:=UTF8ToCP874(s); exit; end;
|
||||
if ATo = 'cp936' then
|
||||
begin
|
||||
Result := UTF8ToCP936(s);
|
||||
exit;
|
||||
end;
|
||||
if ATo = 'cp950' then
|
||||
begin
|
||||
Result := UTF8ToCP950(s);
|
||||
exit;
|
||||
end;
|
||||
if ATo = 'cp949' then
|
||||
begin
|
||||
Result := UTF8ToCP949(s);
|
||||
exit;
|
||||
end;
|
||||
if ATo = 'cp932' then
|
||||
begin
|
||||
Result := UTF8ToCP932(s);
|
||||
exit;
|
||||
end;
|
||||
{$IFNDEF DisableAsianCodePages}
|
||||
if ATo='cp936' then begin Result := UTF8ToCP936(s); exit; end;
|
||||
if ATo='cp950' then begin Result := UTF8ToCP950(s); exit; end;
|
||||
if ATo='cp949' then begin Result := UTF8ToCP949(s); exit; end;
|
||||
if ATo='cp932' then begin Result := UTF8ToCP932(s); exit; end;
|
||||
{$ENDIF}
|
||||
if ATo='koi8' then begin Result:=UTF8ToKOI8(s); exit; end;
|
||||
if ATo=EncodingUCS2LE then begin Result:=UTF8ToUCS2LE(s); exit; end;
|
||||
if ATo=EncodingUCS2BE then begin Result:=UTF8ToUCS2BE(s); exit; end;
|
||||
@ -6424,26 +6420,12 @@ begin
|
||||
if AFrom='cp852' then begin Result:=CP852ToUTF8(s); exit; end;
|
||||
if AFrom='cp866' then begin Result:=CP866ToUTF8(s); exit; end;
|
||||
if AFrom='cp874' then begin Result:=CP874ToUTF8(s); exit; end;
|
||||
if AFrom = 'cp936' then
|
||||
begin
|
||||
Result := CP936ToUTF8(s);
|
||||
exit;
|
||||
end;
|
||||
if AFrom = 'cp950' then
|
||||
begin
|
||||
Result := CP950ToUTF8(s);
|
||||
exit;
|
||||
end;
|
||||
if AFrom = 'cp949' then
|
||||
begin
|
||||
Result := CP949ToUTF8(s);
|
||||
exit;
|
||||
end;
|
||||
if AFrom = 'cp932' then
|
||||
begin
|
||||
Result := CP932ToUTF8(s);
|
||||
exit;
|
||||
end;
|
||||
{$IFNDEF DisableAsianCodePages}
|
||||
if AFrom='cp936' then begin Result := CP936ToUTF8(s); exit; end;
|
||||
if AFrom='cp950' then begin Result := CP950ToUTF8(s); exit; end;
|
||||
if AFrom='cp949' then begin Result := CP949ToUTF8(s); exit; end;
|
||||
if AFrom='cp932' then begin Result := CP932ToUTF8(s); exit; end;
|
||||
{$ENDIF}
|
||||
if AFrom='koi8' then begin Result:=KOI8ToUTF8(s); exit; end;
|
||||
if AFrom=EncodingUCS2LE then begin Result:=UCS2LEToUTF8(s); exit; end;
|
||||
if AFrom=EncodingUCS2BE then begin Result:=UCS2BEToUTF8(s); exit; end;
|
||||
@ -6519,6 +6501,7 @@ begin
|
||||
Result:=CP874ToUTF8(s);
|
||||
Encoded := true;
|
||||
end
|
||||
{$IFNDEF DisableAsianCodePages}
|
||||
else if AFrom = 'cp936' then
|
||||
begin
|
||||
Result := CP936ToUTF8(s);
|
||||
@ -6539,6 +6522,7 @@ begin
|
||||
Result := CP932ToUTF8(s);
|
||||
Encoded := True;
|
||||
end
|
||||
{$ENDIF}
|
||||
else if AFrom='koi8' then begin
|
||||
Result:=KOI8ToUTF8(s);
|
||||
Encoded := true;
|
||||
@ -6611,6 +6595,7 @@ begin
|
||||
Result:=UTF8ToCP874(Result);
|
||||
Encoded := true;
|
||||
end
|
||||
{$IFNDEF DisableAsianCodePages}
|
||||
else if ATo = 'cp936' then
|
||||
begin
|
||||
Result := UTF8ToCP936(Result);
|
||||
@ -6631,6 +6616,7 @@ begin
|
||||
Result := UTF8ToCP932(Result);
|
||||
Encoded := True;
|
||||
end
|
||||
{$ENDIF}
|
||||
else if ATo='koi8' then begin
|
||||
Result:=UTF8ToKOI8(Result);
|
||||
Encoded := true;
|
||||
|
Loading…
Reference in New Issue
Block a user