mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-04-08 05:58:15 +02:00
LConvEncoding: reverts #05765da982ab46e2486c6932648c71fd692a54d5 and #01e627e35da5030ccea4406cffbdf5b67b85b0b9 and resolves issue #39348 in a different way (by always setting the target's codepage when converting to UTF8).
This commit is contained in:
parent
818c1c82d8
commit
cb9d685f79
@ -12,7 +12,7 @@
|
||||
The clipboard is able to work with the windows and gtk behaviour/features.
|
||||
}
|
||||
|
||||
function DBCSToUTF8(const s: string; const ArrayUni, ArrayCP: array of word; CodeP: integer; SetTargetCodePage: Boolean = False): string;
|
||||
function DBCSToUTF8(const s: string; const ArrayUni, ArrayCP: array of word; CodeP: integer): string;
|
||||
const
|
||||
cp936unodered:array[0..254] of Uint32=($a2ab,$a2ac,$a2ad,$a2ae,$a2af,$a2b0,$a2e3,$a2e4,$a2ef,$a2f0,$a2fd,$a2fe,$a4f4,$a4f5,$a4f6,$a4f7,$a4f8,$a4f9,$a4fa,$a4fb,$a4fc,$a4fd,$a4fe,$a5f7,$a5f8,$a5f9,$a5fa,$a5fb,$a5fc,$a5fd,$a5fe,$a6b9,$a6ba,$a6bb,$a6bc,$a6bd,$a6be,$a6bf,$a6c0,$a6d9,$a6da,$a6db,$a6dc,$a6dd,$a6de,$a6df,$a6ec,$a6ed,$a6f3,$a6f6,$a6f7,$a6f8,$a6f9,$a6fa,$a6fb,$a6fc,$a6fd,$a6fe,$a7c2,$a7c3,$a7c4,$a7c5,$a7c6,$a7c7,$a7c8,$a7c9,$a7ca,$a7cb,$a7cc,$a7cd,$a7ce,$a7cf,$a7d0,$a7f2,$a7f3,$a7f4,$a7f5,$a7f6,$a7f7,$a7f8,$a7f9,$a7fa,$a7fb,$a7fc,$a7fd,$a7fe,$a896,$a897,$a898,$a899,$a89a,$a89b,$a89c,$a89d,$a89e,$a89f,$a8a0,$a8bc,$a8bf,$a8c1,$a8c2,$a8c3,$a8c4,$a8ea,$a8eb,$a8ec,$a8ed,$a8ee,$a8ef,$a8f0,$a8f1,$a8f2,$a8f3,$a8f4,$a8f5,$a8f6,$a8f7,$a8f8,$a8f9,$a8fa,$a8fb,$a8fc,$a8fd,$a8fe,$a958,$a95b,$a95d,$a95e,$a95f,$a989,$a98a,$a98b,$a98c,$a98d,$a98e,$a98f,$a990,$a991,$a992,$a993,$a994,$a995,$a997,$a998,$a999,$a99a,$a99b,$a99c,$a99d,$a99e,$a99f,$a9a0,$a9a1,$a9a2,$a9a3,$a9f0,$a9f1,$a9f2,$a9f3,$a9f4,$a9f5,$a9f6,$a9f7,$a9f8,$a9f9,$a9fa,$a9fb,$a9fc,$a9fd,$a9fe,$d7fa,$d7fb,$d7fc,$d7fd,$d7fe,$fe50,$fe51,$fe52,$fe53,$fe54,$fe55,$fe56,$fe57,$fe58,$fe59,$fe5a,$fe5b,$fe5c,$fe5d,$fe5e,$fe5f,$fe60,$fe61,$fe62,$fe63,$fe64,$fe65,$fe66,$fe67,$fe68,$fe69,$fe6a,$fe6b,$fe6c,$fe6d,$fe6e,$fe6f,$fe70,$fe71,$fe72,$fe73,$fe74,$fe75,$fe76,$fe77,$fe78,$fe79,$fe7a,$fe7b,$fe7c,$fe7d,$fe7e,$fe80,$fe81,$fe82,$fe83,$fe84,$fe85,$fe86,$fe87,$fe88,$fe89,$fe8a,$fe8b,$fe8c,$fe8d,$fe8e,$fe8f,$fe90,$fe91,$fe92,$fe93,$fe94,$fe95,$fe96,$fe97,$fe98,$fe99,$fe9a,$fe9b,$fe9c,$fe9d,$fe9e,$fe9f,$fea0);
|
||||
cp936unoderedstart:Uint32=$e766;
|
||||
@ -182,28 +182,27 @@ begin
|
||||
end;
|
||||
until false;
|
||||
SetLength(Result, {%H-}PtrUInt(Dest) - PtrUInt(Result));
|
||||
if SetTargetCodePage then
|
||||
SetCodePage(RawByteString(Result), CP_UTF8, False);
|
||||
SetCodePage(RawByteString(Result), CP_UTF8, False);
|
||||
end;
|
||||
|
||||
function CP936ToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function CP936ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result := DBCSToUTF8(s, Uni936C, CP936CC,936,SetTargetCodePage);
|
||||
Result := DBCSToUTF8(s, Uni936C, CP936CC,936);
|
||||
end;
|
||||
|
||||
function CP950ToUTF8(const s: string; SetTargetCodePage: Boolean ): string;
|
||||
function CP950ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result := DBCSToUTF8(s, Uni950C, CP950CC,950,SetTargetCodePage);
|
||||
Result := DBCSToUTF8(s, Uni950C, CP950CC,950);
|
||||
end;
|
||||
|
||||
function CP949ToUTF8(const s: string; SetTargetCodePage: Boolean ): string;
|
||||
function CP949ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result := DBCSToUTF8(s, Uni949C, CP949CC,949,SetTargetCodePage);
|
||||
Result := DBCSToUTF8(s, Uni949C, CP949CC,949);
|
||||
end;
|
||||
|
||||
function CP932ToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function CP932ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result := DBCSToUTF8(s, Uni932C, CP932CC,932,SetTargetCodePage);
|
||||
Result := DBCSToUTF8(s, Uni932C, CP932CC,932);
|
||||
end;
|
||||
|
||||
{$IfNDef UseSystemCPConv}
|
||||
|
@ -91,10 +91,15 @@ const
|
||||
|
||||
function GuessEncoding(const s: string): string;
|
||||
|
||||
{
|
||||
Note: Conversions to UTF8 will always set the target's codepage to CP_UTF8
|
||||
This implies that the SetTargetCodePage in the ConvertEncoding() function
|
||||
actually is ignored for conversions to UTF8.
|
||||
}
|
||||
|
||||
function ConvertEncodingFromUTF8(const s, ToEncoding: string; out Encoded: boolean;
|
||||
SetTargetCodePage: boolean = false): string;
|
||||
function ConvertEncodingToUTF8(const s, FromEncoding: string; out Encoded: boolean;
|
||||
SetTargetCodePage: Boolean=False): string;
|
||||
function ConvertEncodingToUTF8(const s, FromEncoding: string; out Encoded: boolean): string;
|
||||
// For UTF8 use the above functions, they save you one parameter
|
||||
function ConvertEncoding(const s, FromEncoding, ToEncoding: string;
|
||||
SetTargetCodePage: boolean = false): string;
|
||||
@ -108,7 +113,7 @@ function GetConsoleTextEncoding: string;
|
||||
function NormalizeEncoding(const Encoding: string): string;
|
||||
|
||||
type
|
||||
TConvertEncodingFunction = function(const s: string; SetTargetCodePage: boolean = False): string;
|
||||
TConvertEncodingFunction = function(const s: string): string;
|
||||
TConvertUTF8ToEncodingFunc = function(const s: string; SetTargetCodePage: boolean = false): RawByteString;
|
||||
TCharToUTF8Table = CodepagesCommon.TCharToUTF8Table;
|
||||
TUnicodeToCharID = function(Unicode: cardinal): integer;
|
||||
@ -116,31 +121,31 @@ var
|
||||
ConvertAnsiToUTF8: TConvertEncodingFunction = nil;
|
||||
ConvertUTF8ToAnsi: TConvertUTF8ToEncodingFunc = nil;
|
||||
|
||||
function UTF8BOMToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // UTF8 with BOM
|
||||
function ISO_8859_1ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // central europe
|
||||
function ISO_8859_15ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // Western European languages
|
||||
function ISO_8859_2ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // eastern europe
|
||||
function CP1250ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // central europe
|
||||
function CP1251ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // cyrillic
|
||||
function CP1252ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // latin 1
|
||||
function CP1253ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // greek
|
||||
function CP1254ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // turkish
|
||||
function CP1255ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // hebrew
|
||||
function CP1256ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // arabic
|
||||
function CP1257ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // baltic
|
||||
function CP1258ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // vietnam
|
||||
function CP437ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // DOS central europe
|
||||
function CP850ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // DOS western europe
|
||||
function CP852ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // DOS central europe
|
||||
function CP866ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // DOS and Windows console's cyrillic
|
||||
function CP874ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // thai
|
||||
function KOI8RToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // russian cyrillic
|
||||
function UTF8BOMToUTF8(const s: string): string; // UTF8 with BOM
|
||||
function ISO_8859_1ToUTF8(const s: string): string; // central europe
|
||||
function ISO_8859_15ToUTF8(const s: string): string; // Western European languages
|
||||
function ISO_8859_2ToUTF8(const s: string): string; // eastern europe
|
||||
function CP1250ToUTF8(const s: string): string; // central europe
|
||||
function CP1251ToUTF8(const s: string): string; // cyrillic
|
||||
function CP1252ToUTF8(const s: string): string; // latin 1
|
||||
function CP1253ToUTF8(const s: string): string; // greek
|
||||
function CP1254ToUTF8(const s: string): string; // turkish
|
||||
function CP1255ToUTF8(const s: string): string; // hebrew
|
||||
function CP1256ToUTF8(const s: string): string; // arabic
|
||||
function CP1257ToUTF8(const s: string): string; // baltic
|
||||
function CP1258ToUTF8(const s: string): string; // vietnam
|
||||
function CP437ToUTF8(const s: string): string; // DOS central europe
|
||||
function CP850ToUTF8(const s: string): string; // DOS western europe
|
||||
function CP852ToUTF8(const s: string): string; // DOS central europe
|
||||
function CP866ToUTF8(const s: string): string; // DOS and Windows console's cyrillic
|
||||
function CP874ToUTF8(const s: string): string; // thai
|
||||
function KOI8RToUTF8(const s: string): string; // russian cyrillic
|
||||
// Deprecated in Lazarus 2.2, 14.7.2021.
|
||||
function KOI8ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; deprecated 'Call KOI8RToUTF8 instead.';
|
||||
function MacintoshToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // Macintosh, alias Mac OS Roman
|
||||
function SingleByteToUTF8(const s: string; const Table: TCharToUTF8Table; SetTargetCodePage: Boolean = False): string;
|
||||
function UCS2LEToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // UCS2-LE 2byte little endian
|
||||
function UCS2BEToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // UCS2-BE 2byte big endian
|
||||
function KOI8ToUTF8(const s: string): string; deprecated 'Call KOI8RToUTF8 instead.';
|
||||
function MacintoshToUTF8(const s: string): string; // Macintosh, alias Mac OS Roman
|
||||
function SingleByteToUTF8(const s: string; const Table: TCharToUTF8Table): string;
|
||||
function UCS2LEToUTF8(const s: string): string; // UCS2-LE 2byte little endian
|
||||
function UCS2BEToUTF8(const s: string): string; // UCS2-BE 2byte big endian
|
||||
|
||||
function UTF8ToUTF8BOM(const s: string): string; // UTF8 with BOM
|
||||
|
||||
@ -174,10 +179,10 @@ function UTF8ToUCS2BE(const s: string): string; // UCS2-BE 2byte big endian with
|
||||
|
||||
{$IFnDEF DisableAsianCodePages}
|
||||
// Asian encodings
|
||||
function CP932ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // Japanese
|
||||
function CP936ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // Chinese
|
||||
function CP949ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // Korea
|
||||
function CP950ToUTF8(const s: string; SetTargetCodePage: Boolean = False): string; // Chinese Complex
|
||||
function CP932ToUTF8(const s: string): string; // Japanese
|
||||
function CP936ToUTF8(const s: string): string; // Chinese
|
||||
function CP949ToUTF8(const s: string): string; // Korea
|
||||
function CP950ToUTF8(const s: string): string; // Chinese Complex
|
||||
|
||||
function UTF8ToCP932(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Japanese
|
||||
function UTF8ToCP936(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
|
||||
@ -327,128 +332,126 @@ begin
|
||||
if Result[i]='-' then System.Delete(Result,i,1);
|
||||
end;
|
||||
|
||||
function UTF8BOMToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function UTF8BOMToUTF8(const s: string): string;
|
||||
begin
|
||||
if s='' then exit('');
|
||||
if CompareMem(@UTF8BOM[1],@s[1],length(UTF8BOM)) then
|
||||
Result:=copy(s,4,length(s))
|
||||
else
|
||||
Result:=s;
|
||||
if SetTargetCodePage then
|
||||
SetCodePage(RawByteString(Result), CP_UTF8, False);
|
||||
end;
|
||||
|
||||
function ISO_8859_1ToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function ISO_8859_1ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayISO_8859_1ToUTF8, SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayISO_8859_1ToUTF8);
|
||||
end;
|
||||
|
||||
function ISO_8859_15ToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function ISO_8859_15ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayISO_8859_15ToUTF8, SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayISO_8859_15ToUTF8);
|
||||
end;
|
||||
|
||||
function ISO_8859_2ToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function ISO_8859_2ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayISO_8859_2ToUTF8, SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayISO_8859_2ToUTF8);
|
||||
end;
|
||||
|
||||
function CP1250ToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function CP1250ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayCP1250ToUTF8,SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayCP1250ToUTF8);
|
||||
end;
|
||||
|
||||
function CP1251ToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function CP1251ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayCP1251ToUTF8,SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayCP1251ToUTF8);
|
||||
end;
|
||||
|
||||
function CP1252ToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function CP1252ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayCP1252ToUTF8,SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayCP1252ToUTF8);
|
||||
end;
|
||||
|
||||
function CP1253ToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function CP1253ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayCP1253ToUTF8,SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayCP1253ToUTF8);
|
||||
end;
|
||||
|
||||
function CP1254ToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function CP1254ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayCP1254ToUTF8,SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayCP1254ToUTF8);
|
||||
end;
|
||||
|
||||
function CP1255ToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function CP1255ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayCP1255ToUTF8,SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayCP1255ToUTF8);
|
||||
end;
|
||||
|
||||
function CP1256ToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function CP1256ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayCP1256ToUTF8,SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayCP1256ToUTF8);
|
||||
end;
|
||||
|
||||
function CP1257ToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function CP1257ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayCP1257ToUTF8,SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayCP1257ToUTF8);
|
||||
end;
|
||||
|
||||
function CP1258ToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function CP1258ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayCP1258ToUTF8,SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayCP1258ToUTF8);
|
||||
end;
|
||||
|
||||
function CP437ToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function CP437ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayCP437ToUTF8,SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayCP437ToUTF8);
|
||||
end;
|
||||
|
||||
function CP850ToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function CP850ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayCP850ToUTF8,SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayCP850ToUTF8);
|
||||
end;
|
||||
|
||||
function CP852ToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function CP852ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayCP852ToUTF8,SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayCP852ToUTF8);
|
||||
end;
|
||||
|
||||
function CP866ToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function CP866ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayCP866ToUTF8,SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayCP866ToUTF8);
|
||||
end;
|
||||
|
||||
function CP874ToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function CP874ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayCP874ToUTF8,SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayCP874ToUTF8);
|
||||
end;
|
||||
|
||||
function KOI8RToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function KOI8RToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayKOI8RToUTF8,SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayKOI8RToUTF8);
|
||||
end;
|
||||
|
||||
function KOI8UToUTF8(const s: string; SetTargetCodePage: Boolean = False): string;
|
||||
function KOI8UToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayKOI8UToUTF8,SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayKOI8UToUTF8);
|
||||
end;
|
||||
|
||||
function KOI8RUToUTF8(const s: string; SetTargetCodePage: Boolean = False): string;
|
||||
function KOI8RUToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayKOI8RUToUTF8,SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayKOI8RUToUTF8);
|
||||
end;
|
||||
|
||||
function KOI8ToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function KOI8ToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=KOI8RUToUTF8(s,SetTargetCodePage);
|
||||
Result:=KOI8RUToUTF8(s);
|
||||
end;
|
||||
|
||||
function MacintoshToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function MacintoshToUTF8(const s: string): string;
|
||||
begin
|
||||
Result:=SingleByteToUTF8(s,ArrayMacintoshToUTF8,SetTargetCodePage);
|
||||
Result:=SingleByteToUTF8(s,ArrayMacintoshToUTF8);
|
||||
end;
|
||||
|
||||
function SingleByteToUTF8(const s: string; const Table: TCharToUTF8Table; SetTargetCodePage: Boolean): string;
|
||||
function SingleByteToUTF8(const s: string; const Table: TCharToUTF8Table): string;
|
||||
var
|
||||
len: Integer;
|
||||
i: Integer;
|
||||
@ -480,11 +483,10 @@ begin
|
||||
end;
|
||||
end;
|
||||
SetLength(Result,{%H-}PtrUInt(Dest)-PtrUInt(Result));
|
||||
if SetTargetCodePage then
|
||||
SetCodePage(RawByteString(Result), CP_Utf8, False);
|
||||
SetCodePage(RawByteString(Result), CP_UTF8, False);
|
||||
end;
|
||||
|
||||
function UCS2LEToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function UCS2LEToUTF8(const s: string): string;
|
||||
var
|
||||
len: Integer;
|
||||
Src: PWord;
|
||||
@ -512,11 +514,10 @@ begin
|
||||
if len>length(Result) then
|
||||
raise Exception.Create('');
|
||||
SetLength(Result,len);
|
||||
if SetTargetCodePage then
|
||||
SetCodePage(RawByteString(Result), CP_UTF8, False);
|
||||
SetCodePage(RawByteString(Result), CP_UTF8, False);
|
||||
end;
|
||||
|
||||
function UCS2BEToUTF8(const s: string; SetTargetCodePage: Boolean): string;
|
||||
function UCS2BEToUTF8(const s: string): string;
|
||||
var
|
||||
len: Integer;
|
||||
Src: PWord;
|
||||
@ -544,8 +545,7 @@ begin
|
||||
if len>length(Result) then
|
||||
raise Exception.Create('');
|
||||
SetLength(Result,len);
|
||||
if SetTargetCodePage then
|
||||
SetCodePage(RawByteString(Result), CP_UTF8, False);
|
||||
SetCodePage(RawByteString(Result), CP_UTF8, False);
|
||||
end;
|
||||
|
||||
function UTF8ToUTF8BOM(const s: string): string;
|
||||
@ -2394,7 +2394,7 @@ begin
|
||||
Encoded:= false;
|
||||
end;
|
||||
|
||||
function ConvertEncodingToUTF8(const s, FromEncoding: string; out Encoded: boolean; SetTargetCodePage: Boolean=False): string;
|
||||
function ConvertEncodingToUTF8(const s, FromEncoding: string; out Encoded: boolean): string;
|
||||
var
|
||||
AFrom: string;
|
||||
begin
|
||||
@ -2402,39 +2402,39 @@ begin
|
||||
Encoded:=true;
|
||||
AFrom:=NormalizeEncoding(FromEncoding);
|
||||
|
||||
if AFrom=EncodingUTF8BOM then begin Result:=UTF8BOMToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCPIso1 then begin Result:=ISO_8859_1ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCPIso15 then begin Result:=ISO_8859_15ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCPIso2 then begin Result:=ISO_8859_2ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCP1250 then begin Result:=CP1250ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCP1251 then begin Result:=CP1251ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCP1252 then begin Result:=CP1252ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCP1253 then begin Result:=CP1253ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCP1254 then begin Result:=CP1254ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCP1255 then begin Result:=CP1255ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCP1256 then begin Result:=CP1256ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCP1257 then begin Result:=CP1257ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCP1258 then begin Result:=CP1258ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCP437 then begin Result:=CP437ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCP850 then begin Result:=CP850ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCP852 then begin Result:=CP852ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCP866 then begin Result:=CP866ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCP874 then begin Result:=CP874ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingUTF8BOM then begin Result:=UTF8BOMToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCPIso1 then begin Result:=ISO_8859_1ToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCPIso15 then begin Result:=ISO_8859_15ToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCPIso2 then begin Result:=ISO_8859_2ToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCP1250 then begin Result:=CP1250ToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCP1251 then begin Result:=CP1251ToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCP1252 then begin Result:=CP1252ToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCP1253 then begin Result:=CP1253ToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCP1254 then begin Result:=CP1254ToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCP1255 then begin Result:=CP1255ToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCP1256 then begin Result:=CP1256ToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCP1257 then begin Result:=CP1257ToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCP1258 then begin Result:=CP1258ToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCP437 then begin Result:=CP437ToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCP850 then begin Result:=CP850ToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCP852 then begin Result:=CP852ToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCP866 then begin Result:=CP866ToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCP874 then begin Result:=CP874ToUTF8(s); exit; end;
|
||||
{$IFnDEF DisableAsianCodePages}
|
||||
if AFrom=EncodingCP936 then begin Result:=CP936ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCP950 then begin Result:=CP950ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCP949 then begin Result:=CP949ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCP932 then begin Result:=CP932ToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCP936 then begin Result:=CP936ToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCP950 then begin Result:=CP950ToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCP949 then begin Result:=CP949ToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCP932 then begin Result:=CP932ToUTF8(s); exit; end;
|
||||
{$ENDIF}
|
||||
if AFrom=EncodingCPKOI8R then begin Result:=KOI8RToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCPKOI8U then begin Result:=KOI8UToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCPKOI8RU then begin Result:=KOI8RUToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCPMac then begin Result:=MacintoshToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingUCS2LE then begin Result:=UCS2LEToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingUCS2BE then begin Result:=UCS2BEToUTF8(s, SetTargetCodePage); exit; end;
|
||||
if AFrom=EncodingCPKOI8R then begin Result:=KOI8RToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCPKOI8U then begin Result:=KOI8UToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCPKOI8RU then begin Result:=KOI8RUToUTF8(s); exit; end;
|
||||
if AFrom=EncodingCPMac then begin Result:=MacintoshToUTF8(s); exit; end;
|
||||
if AFrom=EncodingUCS2LE then begin Result:=UCS2LEToUTF8(s); exit; end;
|
||||
if AFrom=EncodingUCS2BE then begin Result:=UCS2BEToUTF8(s); exit; end;
|
||||
|
||||
if (AFrom=GetDefaultTextEncoding) and Assigned(ConvertAnsiToUTF8) then begin
|
||||
Result:=ConvertAnsiToUTF8(s, SetTargetCodePage);
|
||||
Result:=ConvertAnsiToUTF8(s);
|
||||
exit;
|
||||
end;
|
||||
|
||||
@ -2475,12 +2475,12 @@ begin
|
||||
end
|
||||
else
|
||||
if ATo=EncodingUTF8 then begin
|
||||
Result:=ConvertEncodingToUTF8(s, AFrom, Encoded, SetTargetCodePage);
|
||||
Result:=ConvertEncodingToUTF8(s, AFrom, Encoded);
|
||||
if Encoded then exit;
|
||||
end
|
||||
else
|
||||
begin
|
||||
Result:=ConvertEncodingToUTF8(s, AFrom, Encoded, SetTargetCodePage);
|
||||
Result:=ConvertEncodingToUTF8(s, AFrom, Encoded);
|
||||
if Encoded then
|
||||
Result:=ConvertEncodingFromUTF8(Result, ATo, Encoded, SetTargetCodePage);
|
||||
if Encoded then exit;
|
||||
|
Loading…
Reference in New Issue
Block a user