lazutils: renamed functions DBCSToUTF8, UTF8ToDBCS, fixed dbcs last char, using half space for asian tables

git-svn-id: trunk@38241 -
This commit is contained in:
mattias 2012-08-14 21:01:39 +00:00
parent 9295710c2a
commit 3f21a3c32a
3 changed files with 116 additions and 146 deletions

View File

@ -20,17 +20,15 @@
The clipboard is able to work with the windows and gtk behaviour/features. The clipboard is able to work with the windows and gtk behaviour/features.
} }
function SingleByteToUTF8Ex(const s: string; CodeP: integer): string; function DBCSToUTF8(const s: string; CodeP: integer): string;
var var
len: integer; len: SizeInt;
i, j: integer;
Src: PChar; Src: PChar;
Dest: PChar; Dest: PChar;
c: char; c: char;
tempstr: ansistring; l: Integer;
tempint: integer; code: word;
begin begin
SetLength(tempstr, 4);
if s = '' then if s = '' then
begin begin
Result := s; Result := s;
@ -40,74 +38,64 @@ begin
SetLength(Result, len * 4);// Asia UTF-8 is at most 4 bytes SetLength(Result, len * 4);// Asia UTF-8 is at most 4 bytes
Src := PChar(s); Src := PChar(s);
Dest := PChar(Result); Dest := PChar(Result);
i := 1; repeat
while i < len do
begin
c := Src^; c := Src^;
Inc(Src); Inc(Src);
i := i + 1;
if Ord(c) < 128 then if Ord(c) < 128 then
begin begin
if (c=#0) and (Src-PChar(s)>=len) then break;
Dest^ := c; Dest^ := c;
Inc(Dest); Inc(Dest);
end end
else else
begin begin
tempint := Byte(c) shl 8; code := Byte(c) shl 8;
if i <= len then c:=Src^;
begin if (c=#0) and (Src-PChar(s)>=len) then break;
tempint := tempint + Byte(Src^); code := code + Byte(c);
i := i + 1;
end;
Inc(Src); Inc(Src);
case CodeP of case CodeP of
936: 936:
tempint := Uni936C[SearchTable(CP936CC, tempint)]; code := Uni936C[SearchTable(CP936CC, code)];
950: 950:
tempint := Uni950C[SearchTable(CP950CC, tempint)]; code := Uni950C[SearchTable(CP950CC, code)];
949: 949:
tempint := Uni949C[SearchTable(CP949CC, tempint)]; code := Uni949C[SearchTable(CP949CC, code)];
932: 932:
tempint := Uni932C[SearchTable(CP932CC, tempint)]; code := Uni932C[SearchTable(CP932CC, code)];
else else
tempint := -1; code := 0;
end; end;
if tempint <> -1 then if code>0 then
begin begin
TempStr := UnicodeToUTF8(tempint); l:=UnicodeToUTF8Inline(code,Dest);
inc(Dest,l);
for j := 1 to Length(TempStr) do
begin
Dest^ := TempStr[j];
Inc(Dest);
end;
end;
end; end;
end; end;
until false;
SetLength(Result, {%H-}PtrUInt(Dest) - PtrUInt(Result)); SetLength(Result, {%H-}PtrUInt(Dest) - PtrUInt(Result));
end; end;
function CP936ToUTF8(const s: string): string; function CP936ToUTF8(const s: string): string;
begin begin
Result := SingleByteToUTF8Ex(s, 936); Result := DBCSToUTF8(s, 936);
end; end;
function CP950ToUTF8(const s: string): string; function CP950ToUTF8(const s: string): string;
begin begin
Result := SingleByteToUTF8Ex(s, 950); Result := DBCSToUTF8(s, 950);
end; end;
function CP949ToUTF8(const s: string): string; function CP949ToUTF8(const s: string): string;
begin begin
Result := SingleByteToUTF8Ex(s, 949); Result := DBCSToUTF8(s, 949);
end; end;
function CP932ToUTF8(const s: string): string; function CP932ToUTF8(const s: string): string;
begin begin
Result := SingleByteToUTF8Ex(s, 932); Result := DBCSToUTF8(s, 932);
end; end;
function UnicodeToCP936(Unicode: cardinal): integer; function UnicodeToCP936(Unicode: cardinal): integer;
@ -146,7 +134,7 @@ begin
end; end;
end; end;
function UTF8ToSingleByteEx(const s: string; function UTF8ToDBCS(const s: string;
const UTF8CharConvFunc: TUnicodeToCharID): string; const UTF8CharConvFunc: TUnicodeToCharID): string;
var var
len: integer; len: integer;
@ -163,24 +151,22 @@ begin
exit; exit;
end; end;
len := length(s); len := length(s);
SetLength(Result, len); SetLength(Result, len); // DBCS needs at most space as UTF-8
Src := PChar(s); Src := PChar(s);
Dest := PChar(Result); Dest := PChar(Result);
while len > 0 do repeat
begin
c := Src^; c := Src^;
if c < #128 then if c < #128 then
begin begin
if (c=#0) and (Src-PChar(s)>=len) then break;
Dest^ := c; Dest^ := c;
Inc(Dest); Inc(Dest);
Inc(Src); Inc(Src);
Dec(len);
end end
else else
begin begin
Unicode := UTF8CharacterToUnicode(Src, CharLen); Unicode := UTF8CharacterToUnicode(Src, CharLen);
Inc(Src, CharLen); Inc(Src, CharLen);
Dec(len, CharLen);
i := UTF8CharConvFunc(Unicode); i := UTF8CharConvFunc(Unicode);
//writeln(Format('%X', [i])); //writeln(Format('%X', [i]));
if i >= 0 then if i >= 0 then
@ -196,28 +182,28 @@ begin
Inc(Dest); Inc(Dest);
end; end;
end; end;
end; until false;
//SetLength(Result, Dest - PChar(Result)); //SetLength(Result, Dest - PChar(Result));
SetLength(Result, {%H-}PtrUInt(Dest) - PtrUInt(Result)); SetLength(Result, {%H-}PtrUInt(Dest) - PtrUInt(Result));
end; end;
function UTF8ToCP936(const s: string): string; function UTF8ToCP936(const s: string): string;
begin begin
Result := UTF8ToSingleByteEx(s, @UnicodeToCP936); Result := UTF8ToDBCS(s, @UnicodeToCP936);
end; end;
function UTF8ToCP950(const s: string): string; function UTF8ToCP950(const s: string): string;
begin begin
Result := UTF8ToSingleByteEx(s, @UnicodeToCP950); Result := UTF8ToDBCS(s, @UnicodeToCP950);
end; end;
function UTF8ToCP949(const s: string): string; function UTF8ToCP949(const s: string): string;
begin begin
Result := UTF8ToSingleByteEx(s, @UnicodeToCP949); Result := UTF8ToDBCS(s, @UnicodeToCP949);
end; end;
function UTF8ToCP932(const s: string): string; function UTF8ToCP932(const s: string): string;
begin begin
Result := UTF8ToSingleByteEx(s, @UnicodeToCP932); Result := UTF8ToDBCS(s, @UnicodeToCP932);
end; end;

View File

@ -16,14 +16,12 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
* * * *
***************************************************************************** *****************************************************************************
The clipboard is able to work with the windows and gtk behaviour/features.
} }
type type
CP936Arr = array[0..22046] of Integer; CP936Arr = array[0..22046] of word;
CP950Arr = array[0..13758] of Integer; CP950Arr = array[0..13758] of word;
CP949Arr = array[0..17303] of Integer; CP949Arr = array[0..17303] of word;
CP932Arr = array[0..7979] of Integer; CP932Arr = array[0..7979] of word;
const CP936CC: CP936Arr = const CP936CC: CP936Arr =
($00,$01,$02,$03,$04,$05,$06,$07,$08,$09,$0A,$0B,$0C,$0D,$0E,$0F,$10,$11,$12, ($00,$01,$02,$03,$04,$05,$06,$07,$08,$09,$0A,$0B,$0C,$0D,$0E,$0F,$10,$11,$12,
@ -1727,13 +1725,13 @@ $0041,$0042,$0043,$0044,$0045,$0046,$0047,$0048,$0049,$004A,$004B,$004C,$004D,
$004E,$004F,$0050,$0051,$0052,$0053,$0054,$0055,$0056,$0057,$0058,$0059,$005A, $004E,$004F,$0050,$0051,$0052,$0053,$0054,$0055,$0056,$0057,$0058,$0059,$005A,
$005B,$005C,$005D,$005E,$005F,$0060,$0061,$0062,$0063,$0064,$0065,$0066,$0067, $005B,$005C,$005D,$005E,$005F,$0060,$0061,$0062,$0063,$0064,$0065,$0066,$0067,
$0068,$0069,$006A,$006B,$006C,$006D,$006E,$006F,$0070,$0071,$0072,$0073,$0074, $0068,$0069,$006A,$006B,$006C,$006D,$006E,$006F,$0070,$0071,$0072,$0073,$0074,
$0075,$0076,$0077,$0078,$0079,$007A,$007B,$007C,$007D,$007E,$007F,$20AC,-1, $0075,$0076,$0077,$0078,$0079,$007A,$007B,$007C,$007D,$007E,$007F,$20AC,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,$4E02,$4E04,$4E05,$4E06,$4E0F,$4E12,$4E17,$4E1F,$4E20,$4E21,$4E23,$4E26, 0,$4E02,$4E04,$4E05,$4E06,$4E0F,$4E12,$4E17,$4E1F,$4E20,$4E21,$4E23,$4E26,
$4E29,$4E2E,$4E2F,$4E31,$4E33,$4E35,$4E37,$4E3C,$4E40,$4E41,$4E42,$4E44,$4E46, $4E29,$4E2E,$4E2F,$4E31,$4E33,$4E35,$4E37,$4E3C,$4E40,$4E41,$4E42,$4E44,$4E46,
$4E4A,$4E51,$4E55,$4E57,$4E5A,$4E5B,$4E62,$4E63,$4E64,$4E65,$4E67,$4E68,$4E6A, $4E4A,$4E51,$4E55,$4E57,$4E5A,$4E5B,$4E62,$4E63,$4E64,$4E65,$4E67,$4E68,$4E6A,
$4E6B,$4E6C,$4E6D,$4E6E,$4E6F,$4E72,$4E74,$4E75,$4E76,$4E77,$4E78,$4E79,$4E7A, $4E6B,$4E6C,$4E6D,$4E6E,$4E6F,$4E72,$4E74,$4E75,$4E76,$4E77,$4E78,$4E79,$4E7A,
@ -5104,12 +5102,12 @@ $A3EF,$A3F0,$A3F1,$A3F2,$A3F3,$A3F4,$A3F5,$A3F6,$A3F7,$A3F8,$A3F9,$A3FA,$A3FB,
$A3FC,$A3FD,$A1AB,$A1E9,$A1EA,$A956,$A3FE,$A957,$A3A4); $A3FC,$A3FD,$A1AB,$A1E9,$A1EA,$A956,$A3FE,$A957,$A3A4);
const Uni936U: CP936Arr = const Uni936U: CP936Arr =
(-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,$0000,$0001,$0002,$0003,$0004,$0005,$0006,$0007,$0008,$0009,$000A,$000B, 0,0,$0000,$0001,$0002,$0003,$0004,$0005,$0006,$0007,$0008,$0009,$000A,$000B,
$000C,$000D,$000E,$000F,$0010,$0011,$0012,$0013,$0014,$0015,$0016,$0017,$0018, $000C,$000D,$000E,$000F,$0010,$0011,$0012,$0013,$0014,$0015,$0016,$0017,$0018,
$0019,$001A,$001B,$001C,$001D,$001E,$001F,$0020,$0021,$0022,$0023,$0024,$0025, $0019,$001A,$001B,$001C,$001D,$001E,$001F,$0020,$0021,$0022,$0023,$0024,$0025,
$0026,$0027,$0028,$0029,$002A,$002B,$002C,$002D,$002E,$002F,$0030,$0031,$0032, $0026,$0027,$0028,$0029,$002A,$002B,$002C,$002D,$002E,$002F,$0030,$0031,$0032,
@ -7862,12 +7860,12 @@ $0041,$0042,$0043,$0044,$0045,$0046,$0047,$0048,$0049,$004A,$004B,$004C,$004D,
$004E,$004F,$0050,$0051,$0052,$0053,$0054,$0055,$0056,$0057,$0058,$0059,$005A, $004E,$004F,$0050,$0051,$0052,$0053,$0054,$0055,$0056,$0057,$0058,$0059,$005A,
$005B,$005C,$005D,$005E,$005F,$0060,$0061,$0062,$0063,$0064,$0065,$0066,$0067, $005B,$005C,$005D,$005E,$005F,$0060,$0061,$0062,$0063,$0064,$0065,$0066,$0067,
$0068,$0069,$006A,$006B,$006C,$006D,$006E,$006F,$0070,$0071,$0072,$0073,$0074, $0068,$0069,$006A,$006B,$006C,$006D,$006E,$006F,$0070,$0071,$0072,$0073,$0074,
$0075,$0076,$0077,$0078,$0079,$007A,$007B,$007C,$007D,$007E,$007F,-1,-1,-1, $0075,$0076,$0077,$0078,$0079,$007A,$007B,$007C,$007D,$007E,$007F,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
$3000,$FF0C,$3001,$3002,$FF0E,$2027,$FF1B,$FF1A,$FF1F,$FF01,$FE30,$2026,$2025, $3000,$FF0C,$3001,$3002,$FF0E,$2027,$FF1B,$FF1A,$FF1F,$FF01,$FE30,$2026,$2025,
$FE50,$FE51,$FE52,$00B7,$FE54,$FE55,$FE56,$FE57,$FF5C,$2013,$FE31,$2014,$FE33, $FE50,$FE51,$FE52,$00B7,$FE54,$FE55,$FE56,$FE57,$FF5C,$2013,$FE31,$2014,$FE33,
$2574,$FE34,$FE4F,$FF08,$FF09,$FE35,$FE36,$FF5B,$FF5D,$FE37,$FE38,$3014,$3015, $2574,$FE34,$FE4F,$FF08,$FF09,$FE35,$FE36,$FF5B,$FF5D,$FE37,$FE38,$3014,$3015,
@ -9964,12 +9962,12 @@ $A2FC,$A2FD,$A2FE,$A340,$A341,$A342,$A343,$A161,$A155,$A162,$A1E3,$A246,$A247,
$A1C3,$A244); $A1C3,$A244);
const Uni950U: CP950Arr = const Uni950U: CP950Arr =
(-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,$0000,$0001,$0002,$0003,$0004,$0005,$0006,$0007,$0008,$0009,$000A, 0,0,0,$0000,$0001,$0002,$0003,$0004,$0005,$0006,$0007,$0008,$0009,$000A,
$000B,$000C,$000D,$000E,$000F,$0010,$0011,$0012,$0013,$0014,$0015,$0016,$0017, $000B,$000C,$000D,$000E,$000F,$0010,$0011,$0012,$0013,$0014,$0015,$0016,$0017,
$0018,$0019,$001A,$001B,$001C,$001D,$001E,$001F,$0020,$0021,$0022,$0023,$0024, $0018,$0019,$001A,$001B,$001C,$001D,$001E,$001F,$0020,$0021,$0022,$0023,$0024,
$0025,$0026,$0027,$0028,$0029,$002A,$002B,$002C,$002D,$002E,$002F,$0030,$0031, $0025,$0026,$0027,$0028,$0029,$002A,$002B,$002C,$002D,$002E,$002F,$0030,$0031,
@ -12356,12 +12354,12 @@ $0041,$0042,$0043,$0044,$0045,$0046,$0047,$0048,$0049,$004A,$004B,$004C,$004D,
$004E,$004F,$0050,$0051,$0052,$0053,$0054,$0055,$0056,$0057,$0058,$0059,$005A, $004E,$004F,$0050,$0051,$0052,$0053,$0054,$0055,$0056,$0057,$0058,$0059,$005A,
$005B,$005C,$005D,$005E,$005F,$0060,$0061,$0062,$0063,$0064,$0065,$0066,$0067, $005B,$005C,$005D,$005E,$005F,$0060,$0061,$0062,$0063,$0064,$0065,$0066,$0067,
$0068,$0069,$006A,$006B,$006C,$006D,$006E,$006F,$0070,$0071,$0072,$0073,$0074, $0068,$0069,$006A,$006B,$006C,$006D,$006E,$006F,$0070,$0071,$0072,$0073,$0074,
$0075,$0076,$0077,$0078,$0079,$007A,$007B,$007C,$007D,$007E,$007F,-1,-1,-1, $0075,$0076,$0077,$0078,$0079,$007A,$007B,$007C,$007D,$007E,$007F,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
$AC02,$AC03,$AC05,$AC06,$AC0B,$AC0C,$AC0D,$AC0E,$AC0F,$AC18,$AC1E,$AC1F,$AC21, $AC02,$AC03,$AC05,$AC06,$AC0B,$AC0C,$AC0D,$AC0E,$AC0F,$AC18,$AC1E,$AC1F,$AC21,
$AC22,$AC23,$AC25,$AC26,$AC27,$AC28,$AC29,$AC2A,$AC2B,$AC2E,$AC32,$AC33,$AC34, $AC22,$AC23,$AC25,$AC26,$AC27,$AC28,$AC29,$AC2A,$AC2B,$AC2E,$AC32,$AC33,$AC34,
$AC35,$AC36,$AC37,$AC3A,$AC3B,$AC3D,$AC3E,$AC3F,$AC41,$AC42,$AC43,$AC44,$AC45, $AC35,$AC36,$AC37,$AC3A,$AC3B,$AC3D,$AC3E,$AC3F,$AC41,$AC42,$AC43,$AC44,$AC45,
@ -15003,12 +15001,12 @@ $A3ED,$A3EE,$A3EF,$A3F0,$A3F1,$A3F2,$A3F3,$A3F4,$A3F5,$A3F6,$A3F7,$A3F8,$A3F9,
$A3FA,$A3FB,$A3FC,$A3FD,$A2A6,$A1CB,$A1CC,$A1FE,$A3FE,$A1CD,$A3DC); $A3FA,$A3FB,$A3FC,$A3FD,$A2A6,$A1CB,$A1CC,$A1FE,$A3FE,$A1CD,$A3DC);
const Uni949U: CP949Arr = const Uni949U: CP949Arr =
(-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,$0000,$0001,$0002,$0003,$0004,$0005,$0006,$0007,$0008,$0009,$000A, 0,0,0,$0000,$0001,$0002,$0003,$0004,$0005,$0006,$0007,$0008,$0009,$000A,
$000B,$000C,$000D,$000E,$000F,$0010,$0011,$0012,$0013,$0014,$0015,$0016,$0017, $000B,$000C,$000D,$000E,$000F,$0010,$0011,$0012,$0013,$0014,$0015,$0016,$0017,
$0018,$0019,$001A,$001B,$001C,$001D,$001E,$001F,$0020,$0021,$0022,$0023,$0024, $0018,$0019,$001A,$001B,$001C,$001D,$001E,$001F,$0020,$0021,$0022,$0023,$0024,
$0025,$0026,$0027,$0028,$0029,$002A,$002B,$002C,$002D,$002E,$002F,$0030,$0031, $0025,$0026,$0027,$0028,$0029,$002A,$002B,$002C,$002D,$002E,$002F,$0030,$0031,
@ -16951,15 +16949,15 @@ $0041,$0042,$0043,$0044,$0045,$0046,$0047,$0048,$0049,$004A,$004B,$004C,$004D,
$004E,$004F,$0050,$0051,$0052,$0053,$0054,$0055,$0056,$0057,$0058,$0059,$005A, $004E,$004F,$0050,$0051,$0052,$0053,$0054,$0055,$0056,$0057,$0058,$0059,$005A,
$005B,$005C,$005D,$005E,$005F,$0060,$0061,$0062,$0063,$0064,$0065,$0066,$0067, $005B,$005C,$005D,$005E,$005F,$0060,$0061,$0062,$0063,$0064,$0065,$0066,$0067,
$0068,$0069,$006A,$006B,$006C,$006D,$006E,$006F,$0070,$0071,$0072,$0073,$0074, $0068,$0069,$006A,$006B,$006C,$006D,$006E,$006F,$0070,$0071,$0072,$0073,$0074,
$0075,$0076,$0077,$0078,$0079,$007A,$007B,$007C,$007D,$007E,$007F,-1,-1,-1, $0075,$0076,$0077,$0078,$0079,$007A,$007B,$007C,$007D,$007E,$007F,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,$FF61,$FF62,$FF63,$FF64,$FF65,$FF66,$FF67,$FF68,$FF69,$FF6A, 0,0,0,0,0,$FF61,$FF62,$FF63,$FF64,$FF65,$FF66,$FF67,$FF68,$FF69,$FF6A,
$FF6B,$FF6C,$FF6D,$FF6E,$FF6F,$FF70,$FF71,$FF72,$FF73,$FF74,$FF75,$FF76,$FF77, $FF6B,$FF6C,$FF6D,$FF6E,$FF6F,$FF70,$FF71,$FF72,$FF73,$FF74,$FF75,$FF76,$FF77,
$FF78,$FF79,$FF7A,$FF7B,$FF7C,$FF7D,$FF7E,$FF7F,$FF80,$FF81,$FF82,$FF83,$FF84, $FF78,$FF79,$FF7A,$FF7B,$FF7C,$FF7D,$FF7E,$FF7F,$FF80,$FF81,$FF82,$FF83,$FF84,
$FF85,$FF86,$FF87,$FF88,$FF89,$FF8A,$FF8B,$FF8C,$FF8D,$FF8E,$FF8F,$FF90,$FF91, $FF85,$FF86,$FF87,$FF88,$FF89,$FF8A,$FF8B,$FF8C,$FF8D,$FF8E,$FF8F,$FF90,$FF91,
$FF92,$FF93,$FF94,$FF95,$FF96,$FF97,$FF98,$FF99,$FF9A,$FF9B,$FF9C,$FF9D,$FF9E, $FF92,$FF93,$FF94,$FF95,$FF96,$FF97,$FF98,$FF99,$FF9A,$FF9B,$FF9C,$FF9D,$FF9E,
$FF9F,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, $FF9F,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,$3000,$3001,$3002,$FF0C,$FF0E,$30FB,$FF1A,$FF1B, 0,0,0,0,0,0,0,0,0,$3000,$3001,$3002,$FF0C,$FF0E,$30FB,$FF1A,$FF1B,
$FF1F,$FF01,$309B,$309C,$00B4,$FF40,$00A8,$FF3E,$FFE3,$FF3F,$30FD,$30FE,$309D, $FF1F,$FF01,$309B,$309C,$00B4,$FF40,$00A8,$FF3E,$FFE3,$FF3F,$30FD,$30FE,$309D,
$309E,$3003,$4EDD,$3005,$3006,$3007,$30FC,$2015,$2010,$FF0F,$FF3C,$FF5E,$2225, $309E,$3003,$4EDD,$3005,$3006,$3007,$30FC,$2015,$2010,$FF0F,$FF3C,$FF5E,$2225,
$FF5C,$2026,$2025,$2018,$2019,$201C,$201D,$FF08,$FF09,$3014,$3015,$FF3B,$FF3D, $FF5C,$2026,$2025,$2018,$2019,$201C,$201D,$FF08,$FF09,$3014,$3015,$FF3B,$FF3D,
@ -18166,9 +18164,9 @@ $CE,$CF,$D0,$D1,$D2,$D3,$D4,$D5,$D6,$D7,$D8,$D9,$DA,$DB,$DC,$DD,$DE,$DF,$8191,
$8192,$81CA,$EEF9,$FA54,$8150,$EEFA,$FA55,$818F); $8192,$81CA,$EEF9,$FA54,$8150,$EEFA,$FA55,$818F);
const Uni932U: CP932Arr = const Uni932U: CP932Arr =
(-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,$00,$0001,$0002,$0003,$0004,$0005, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,$00,$0001,$0002,$0003,$0004,$0005,
$0006,$0007,$0008,$0009,$000A,$000B,$000C,$000D,$000E,$000F,$0010,$0011,$0012, $0006,$0007,$0008,$0009,$000A,$000B,$000C,$000D,$000E,$000F,$0010,$0011,$0012,
$0013,$0014,$0015,$0016,$0017,$0018,$0019,$001A,$001B,$001C,$001D,$001E,$001F, $0013,$0014,$0015,$0016,$0017,$0018,$0019,$001A,$001B,$001C,$001D,$001E,$001F,
$0020,$0021,$0022,$0023,$0024,$0025,$0026,$0027,$0028,$0029,$002A,$002B,$002C, $0020,$0021,$0022,$0023,$0024,$0025,$0026,$0027,$0028,$0029,$002A,$002B,$002C,
@ -18779,7 +18777,7 @@ $FF8A,$FF8B,$FF8C,$FF8D,$FF8E,$FF8F,$FF90,$FF91,$FF92,$FF93,$FF94,$FF95,$FF96,
$FF97,$FF98,$FF99,$FF9A,$FF9B,$FF9C,$FF9D,$FF9E,$FF9F,$FFE0,$FFE1,$FFE2,$FFE2, $FF97,$FF98,$FF99,$FF9A,$FF9B,$FF9C,$FF9D,$FF9E,$FF9F,$FFE0,$FFE1,$FFE2,$FFE2,
$FFE2,$FFE3,$FFE4,$FFE4,$FFE5); $FFE2,$FFE3,$FFE4,$FFE4,$FFE5);
function SearchTable(CodePageArr: array of integer; id: cardinal): longint; function SearchTable(CodePageArr: array of word; id: cardinal): word;
var var
idMid: integer; idMid: integer;
idLow, idHigh: integer; idLow, idHigh: integer;
@ -18796,7 +18794,7 @@ begin
end end
else else
begin begin
Result := -1; Result := 0;
end; end;
Exit; Exit;
end; end;
@ -18811,7 +18809,7 @@ begin
if CodePageArr[idMid] < id then if CodePageArr[idMid] < id then
idLow := idMid + 1; idLow := idMid + 1;
end; end;
Result := -1; Result := 0;
end; end;

View File

@ -18,6 +18,8 @@ unit LConvEncoding;
interface interface
{ $Define DisableAsianCodePages}
uses uses
SysUtils, Classes, dos, LazUTF8 SysUtils, Classes, dos, LazUTF8
{$IFDEF EnableIconvEnc},iconvenc{$ENDIF}; {$IFDEF EnableIconvEnc},iconvenc{$ENDIF};
@ -99,22 +101,23 @@ function UTF8ToSingleByte(const s: string;
function UTF8ToUCS2LE(const s: string): string; // UCS2-LE 2byte little endian without BOM function UTF8ToUCS2LE(const s: string): string; // UCS2-LE 2byte little endian without BOM
function UTF8ToUCS2BE(const s: string): string; // UCS2-BE 2byte big endian without BOM function UTF8ToUCS2BE(const s: string): string; // UCS2-BE 2byte big endian without BOM
{$IFNDEF DisableAsianCodePages}
// Asian encodings // Asian encodings
function CP932ToUTF8(const s: string): string; // Japanese function CP932ToUTF8(const s: string): string; // Japanese
function CP936ToUTF8(const s: string): string; // Chinese function CP936ToUTF8(const s: string): string; // Chinese
function CP949ToUTF8(const s: string): string; // Korea function CP949ToUTF8(const s: string): string; // Korea
function CP950ToUTF8(const s: string): string; // Chinese Complex function CP950ToUTF8(const s: string): string; // Chinese Complex
function SingleByteToUTF8Ex(const s: string; CodeP: integer): string; // Note: slow, needs optimization function DBCSToUTF8(const s: string; CodeP: integer): string;
function UTF8ToCP932(const s: string): string; // Japanese function UTF8ToCP932(const s: string): string; // Japanese
function UTF8ToCP936(const s: string): string; // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030 function UTF8ToCP936(const s: string): string; // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
function UTF8ToCP949(const s: string): string; // Korea function UTF8ToCP949(const s: string): string; // Korea
function UTF8ToCP950(const s: string): string; // Chinese Complex function UTF8ToCP950(const s: string): string; // Chinese Complex
function UTF8ToSingleByteEx(const s: string; function UTF8ToDBCS(const s: string;
const UTF8CharConvFunc: TUnicodeToCharID): string; const UTF8CharConvFunc: TUnicodeToCharID): string;
{$ENDIF}
procedure GetSupportedEncodings(List: TStrings); procedure GetSupportedEncodings(List: TStrings);
@ -127,8 +130,10 @@ uses Windows;
var EncodingValid: boolean = false; var EncodingValid: boolean = false;
DefaultTextEncoding: string = EncodingAnsi; DefaultTextEncoding: string = EncodingAnsi;
{$IFNDEF DisableAsianCodePages}
{$include asiancodepages.inc} {$include asiancodepages.inc}
{$include asiancodepagefunctions.inc} {$include asiancodepagefunctions.inc}
{$ENDIF}
{$IFDEF Windows} {$IFDEF Windows}
// AConsole - If false, it is the general system encoding, // AConsole - If false, it is the general system encoding,
@ -6198,10 +6203,15 @@ begin
List.Add('CP852'); List.Add('CP852');
List.Add('CP866'); List.Add('CP866');
List.Add('CP874'); List.Add('CP874');
List.Add('CP936');
List.Add('CP950'); {$IFNDEF DisableAsianCodePages}
List.Add('CP949'); // asian
List.Add('CP932'); List.Add('CP932');
List.Add('CP936');
List.Add('CP949');
List.Add('CP950');
{$ENDIF}
List.Add('ISO-8859-1'); List.Add('ISO-8859-1');
List.Add('ISO-8859-2'); List.Add('ISO-8859-2');
List.Add('KOI-8'); List.Add('KOI-8');
@ -6378,26 +6388,12 @@ begin
if ATo='cp852' then begin Result:=UTF8ToCP852(s); exit; end; if ATo='cp852' then begin Result:=UTF8ToCP852(s); exit; end;
if ATo='cp866' then begin Result:=UTF8ToCP866(s); exit; end; if ATo='cp866' then begin Result:=UTF8ToCP866(s); exit; end;
if ATo='cp874' then begin Result:=UTF8ToCP874(s); exit; end; if ATo='cp874' then begin Result:=UTF8ToCP874(s); exit; end;
if ATo = 'cp936' then {$IFNDEF DisableAsianCodePages}
begin if ATo='cp936' then begin Result := UTF8ToCP936(s); exit; end;
Result := UTF8ToCP936(s); if ATo='cp950' then begin Result := UTF8ToCP950(s); exit; end;
exit; if ATo='cp949' then begin Result := UTF8ToCP949(s); exit; end;
end; if ATo='cp932' then begin Result := UTF8ToCP932(s); exit; end;
if ATo = 'cp950' then {$ENDIF}
begin
Result := UTF8ToCP950(s);
exit;
end;
if ATo = 'cp949' then
begin
Result := UTF8ToCP949(s);
exit;
end;
if ATo = 'cp932' then
begin
Result := UTF8ToCP932(s);
exit;
end;
if ATo='koi8' then begin Result:=UTF8ToKOI8(s); exit; end; if ATo='koi8' then begin Result:=UTF8ToKOI8(s); exit; end;
if ATo=EncodingUCS2LE then begin Result:=UTF8ToUCS2LE(s); exit; end; if ATo=EncodingUCS2LE then begin Result:=UTF8ToUCS2LE(s); exit; end;
if ATo=EncodingUCS2BE then begin Result:=UTF8ToUCS2BE(s); exit; end; if ATo=EncodingUCS2BE then begin Result:=UTF8ToUCS2BE(s); exit; end;
@ -6424,26 +6420,12 @@ begin
if AFrom='cp852' then begin Result:=CP852ToUTF8(s); exit; end; if AFrom='cp852' then begin Result:=CP852ToUTF8(s); exit; end;
if AFrom='cp866' then begin Result:=CP866ToUTF8(s); exit; end; if AFrom='cp866' then begin Result:=CP866ToUTF8(s); exit; end;
if AFrom='cp874' then begin Result:=CP874ToUTF8(s); exit; end; if AFrom='cp874' then begin Result:=CP874ToUTF8(s); exit; end;
if AFrom = 'cp936' then {$IFNDEF DisableAsianCodePages}
begin if AFrom='cp936' then begin Result := CP936ToUTF8(s); exit; end;
Result := CP936ToUTF8(s); if AFrom='cp950' then begin Result := CP950ToUTF8(s); exit; end;
exit; if AFrom='cp949' then begin Result := CP949ToUTF8(s); exit; end;
end; if AFrom='cp932' then begin Result := CP932ToUTF8(s); exit; end;
if AFrom = 'cp950' then {$ENDIF}
begin
Result := CP950ToUTF8(s);
exit;
end;
if AFrom = 'cp949' then
begin
Result := CP949ToUTF8(s);
exit;
end;
if AFrom = 'cp932' then
begin
Result := CP932ToUTF8(s);
exit;
end;
if AFrom='koi8' then begin Result:=KOI8ToUTF8(s); exit; end; if AFrom='koi8' then begin Result:=KOI8ToUTF8(s); exit; end;
if AFrom=EncodingUCS2LE then begin Result:=UCS2LEToUTF8(s); exit; end; if AFrom=EncodingUCS2LE then begin Result:=UCS2LEToUTF8(s); exit; end;
if AFrom=EncodingUCS2BE then begin Result:=UCS2BEToUTF8(s); exit; end; if AFrom=EncodingUCS2BE then begin Result:=UCS2BEToUTF8(s); exit; end;
@ -6519,6 +6501,7 @@ begin
Result:=CP874ToUTF8(s); Result:=CP874ToUTF8(s);
Encoded := true; Encoded := true;
end end
{$IFNDEF DisableAsianCodePages}
else if AFrom = 'cp936' then else if AFrom = 'cp936' then
begin begin
Result := CP936ToUTF8(s); Result := CP936ToUTF8(s);
@ -6539,6 +6522,7 @@ begin
Result := CP932ToUTF8(s); Result := CP932ToUTF8(s);
Encoded := True; Encoded := True;
end end
{$ENDIF}
else if AFrom='koi8' then begin else if AFrom='koi8' then begin
Result:=KOI8ToUTF8(s); Result:=KOI8ToUTF8(s);
Encoded := true; Encoded := true;
@ -6611,6 +6595,7 @@ begin
Result:=UTF8ToCP874(Result); Result:=UTF8ToCP874(Result);
Encoded := true; Encoded := true;
end end
{$IFNDEF DisableAsianCodePages}
else if ATo = 'cp936' then else if ATo = 'cp936' then
begin begin
Result := UTF8ToCP936(Result); Result := UTF8ToCP936(Result);
@ -6631,6 +6616,7 @@ begin
Result := UTF8ToCP932(Result); Result := UTF8ToCP932(Result);
Encoded := True; Encoded := True;
end end
{$ENDIF}
else if ATo='koi8' then begin else if ATo='koi8' then begin
Result:=UTF8ToKOI8(Result); Result:=UTF8ToKOI8(Result);
Encoded := true; Encoded := true;