{ ***************************************************************************** This file is part of LazUtils. See the file COPYING.modifiedLGPL.txt, included in this distribution, for details about the license. ***************************************************************************** Note: The functions of this unit are thread-safe. } unit LConvEncoding; {$mode objfpc}{$H+} {$i lazutils_defines.inc} interface { $Define DisableAsianCodePages} {$IFDEF UTF8_RTL} // Windows provides conversion functions. // Unix: unit cwstring provides conversion functions which are used by default UTF-8 encoding system. {$Define UseSystemCPConv} // use system conversions {$ENDIF} {$ifdef UseLCPConv}{$undef UseSystemCPConv}{$endif} uses SysUtils, Classes, dos, LazUTF8, CodepagesCommon {$IFnDEF DisableAsianCodePages},CodepagesAsian{$ENDIF} {$IFDEF EnableIconvEnc},iconvenc{$ENDIF}; type TConvertEncodingErrorMode = ( ceemSkip, ceemException, ceemReplace, ceemReturnEmpty ); var //Global variable which controls behaviour of encoding conversion error, in 3 places: //a) UTF8 to single byte encoding, b) DBCS (Asian) encoding to UTF8, c) UTF8 to DBCS ConvertEncodingErrorMode: TConvertEncodingErrorMode = ceemSkip; //encoding names const EncodingUTF8 = 'utf8'; EncodingAnsi = 'ansi'; EncodingUTF8BOM = 'utf8bom'; // UTF-8 with byte order mark EncodingUCS2LE = 'ucs2le'; // UCS 2 byte little endian EncodingUCS2BE = 'ucs2be'; // UCS 2 byte big endian EncodingCP1250 = 'cp1250'; EncodingCP1251 = 'cp1251'; EncodingCP1252 = 'cp1252'; EncodingCP1253 = 'cp1253'; EncodingCP1254 = 'cp1254'; EncodingCP1255 = 'cp1255'; EncodingCP1256 = 'cp1256'; EncodingCP1257 = 'cp1257'; EncodingCP1258 = 'cp1258'; EncodingCP437 = 'cp437'; EncodingCP850 = 'cp850'; EncodingCP852 = 'cp852'; EncodingCP865 = 'cp865'; EncodingCP866 = 'cp866'; EncodingCP874 = 'cp874'; EncodingCP932 = 'cp932'; EncodingCP936 = 'cp936'; EncodingCP949 = 'cp949'; EncodingCP950 = 'cp950'; EncodingCPMac = 'macintosh'; EncodingCPKOI8R = 'koi8r'; EncodingCPKOI8U = 'koi8u'; EncodingCPKOI8RU = 'koi8ru'; EncodingCPIso1 = 'iso88591'; EncodingCPIso2 = 'iso88592'; EncodingCPIso3 = 'iso88593'; EncodingCPIso4 = 'iso88594'; EncodingCPIso5 = 'iso88595'; EncodingCPIso7 = 'iso88597'; EncodingCPIso9 = 'iso88599'; EncodingCPIso10 = 'iso885910'; EncodingCPIso13 = 'iso885913'; EncodingCPIso14 = 'iso885914'; EncodingCPIso15 = 'iso885915'; EncodingCPIso16 = 'iso885916'; //signatures in ansi const UTF8BOM = #$EF#$BB#$BF; UTF16BEBOM = #$FE#$FF; UTF16LEBOM = #$FF#$FE; UTF32BEBOM = #0#0#$FE#$FF; UTF32LEBOM = #$FE#$FF#0#0; function GuessEncoding(const s: string): string; { Note: Conversions to UTF8 will always set the target's codepage to CP_UTF8 This implies that the SetTargetCodePage in the ConvertEncoding() function actually is ignored for conversions to UTF8. } function ConvertEncodingFromUTF8(const s, ToEncoding: string; out Encoded: boolean; SetTargetCodePage: boolean = false): string; function ConvertEncodingToUTF8(const s, FromEncoding: string; out Encoded: boolean): string; // For UTF8 use the above functions, they save you one parameter function ConvertEncoding(const s, FromEncoding, ToEncoding: string; SetTargetCodePage: boolean = false): string; // This routine should obtain the encoding utilized by ansistring in the RTL function GetDefaultTextEncoding: string; // This routine returns the console text encoding, which might be different // from the normal system encoding in some Windows systems // see http://mantis.freepascal.org/view.php?id=20552 function GetConsoleTextEncoding: string; function NormalizeEncoding(const Encoding: string): string; type TConvertEncodingFunction = function(const s: string): string; TConvertUTF8ToEncodingFunc = function(const s: string; SetTargetCodePage: boolean = false): RawByteString; TCharToUTF8Table = CodepagesCommon.TCharToUTF8Table; TUnicodeToCharID = function(Unicode: cardinal): integer; var ConvertAnsiToUTF8: TConvertEncodingFunction = nil; ConvertUTF8ToAnsi: TConvertUTF8ToEncodingFunc = nil; function UTF8BOMToUTF8(const s: string): string; // UTF8 with BOM function ISO_8859_1ToUTF8(const s: string): string; // central europe function ISO_8859_2ToUTF8(const s: string): string; // eastern europe function ISO_8859_3ToUTF8(const s: string): string; function ISO_8859_4ToUTF8(const s: string): string; function ISO_8859_5ToUTF8(const s: string): string; // Cyrillic function ISO_8859_7ToUTF8(const s: string): string; function ISO_8859_9ToUTF8(const s: string): string; // Turkish function ISO_8859_10ToUTF8(const s: string): string; function ISO_8859_13ToUTF8(const s: string): string; function ISO_8859_14ToUTF8(const s: string): string; function ISO_8859_15ToUTF8(const s: string): string; // Western European languages function ISO_8859_16ToUTF8(const s: string): string; function CP1250ToUTF8(const s: string): string; // central europe function CP1251ToUTF8(const s: string): string; // cyrillic function CP1252ToUTF8(const s: string): string; // latin 1 function CP1253ToUTF8(const s: string): string; // greek function CP1254ToUTF8(const s: string): string; // turkish function CP1255ToUTF8(const s: string): string; // hebrew function CP1256ToUTF8(const s: string): string; // arabic function CP1257ToUTF8(const s: string): string; // baltic function CP1258ToUTF8(const s: string): string; // vietnam function CP437ToUTF8(const s: string): string; // DOS central europe function CP850ToUTF8(const s: string): string; // DOS western europe function CP852ToUTF8(const s: string): string; // DOS central europe function CP865ToUTF8(const s: string): string; // DOS nordic function CP866ToUTF8(const s: string): string; // DOS and Windows console's cyrillic function CP874ToUTF8(const s: string): string; // thai function KOI8RToUTF8(const s: string): string; // russian cyrillic // Deprecated in Lazarus 2.2, 14.7.2021. function KOI8ToUTF8(const s: string): string; deprecated 'Call KOI8RToUTF8 instead.'; function MacintoshToUTF8(const s: string): string; // Macintosh, alias Mac OS Roman function SingleByteToUTF8(const s: string; const Table: TCharToUTF8Table): string; function UCS2LEToUTF8(const s: string): string; // UCS2-LE 2byte little endian function UCS2BEToUTF8(const s: string): string; // UCS2-BE 2byte big endian function UTF8ToUTF8BOM(const s: string): string; // UTF8 with BOM function UTF8ToISO_8859_1(const s: string; SetTargetCodePage: boolean = false): RawByteString; // central europe function UTF8ToISO_8859_2(const s: string; SetTargetCodePage: boolean = false): RawByteString; // eastern europe function UTF8ToISO_8859_3(const s: string; SetTargetCodePage: boolean = false): RawByteString; function UTF8ToISO_8859_4(const s: string; SetTargetCodePage: boolean = false): RawByteString; function UTF8ToISO_8859_5(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Cyrillic function UTF8ToISO_8859_7(const s: string; SetTargetCodePage: boolean = false): RawByteString; function UTF8ToISO_8859_9(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Turkish function UTF8ToISO_8859_10(const s: string; SetTargetCodePage: boolean = false): RawByteString; function UTF8ToISO_8859_13(const s: string; SetTargetCodePage: boolean = false): RawByteString; function UTF8ToISO_8859_14(const s: string; SetTargetCodePage: boolean = false): RawByteString; function UTF8ToISO_8859_15(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Western European languages function UTF8ToISO_8859_16(const s: string; SetTargetCodePage: boolean = false): RawByteString; function UTF8ToCP1250(const s: string; SetTargetCodePage: boolean = false): RawByteString; // central europe function UTF8ToCP1251(const s: string; SetTargetCodePage: boolean = false): RawByteString; // cyrillic function UTF8ToCP1252(const s: string; SetTargetCodePage: boolean = false): RawByteString; // latin 1 function UTF8ToCP1253(const s: string; SetTargetCodePage: boolean = false): RawByteString; // greek function UTF8ToCP1254(const s: string; SetTargetCodePage: boolean = false): RawByteString; // turkish function UTF8ToCP1255(const s: string; SetTargetCodePage: boolean = false): RawByteString; // hebrew function UTF8ToCP1256(const s: string; SetTargetCodePage: boolean = false): RawByteString; // arabic function UTF8ToCP1257(const s: string; SetTargetCodePage: boolean = false): RawByteString; // baltic function UTF8ToCP1258(const s: string; SetTargetCodePage: boolean = false): RawByteString; // vietnam function UTF8ToCP437(const s: string; SetTargetCodePage: boolean = false): RawByteString; // DOS central europe function UTF8ToCP850(const s: string; SetTargetCodePage: boolean = false): RawByteString; // DOS western europe function UTF8ToCP852(const s: string; SetTargetCodePage: boolean = false): RawByteString; // DOS central europe function UTF8ToCP865(const s: string; SetTargetCodePage: boolean = false): RawByteString; // DOS nordic function UTF8ToCP866(const s: string; SetTargetCodePage: boolean = false): RawByteString; // DOS and Windows console's cyrillic function UTF8ToCP874(const s: string; SetTargetCodePage: boolean = false): RawByteString; // thai function UTF8ToKOI8R(const s: string; SetTargetCodePage: boolean = false): RawByteString; // russian cyrillic function UTF8ToKOI8U(const s: string; SetTargetCodePage: boolean = false): RawByteString; // ukrainian cyrillic function UTF8ToKOI8RU(const s: string; SetTargetCodePage: boolean = false): RawByteString; // belarussian cyrillic function UTF8ToMacintosh(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Macintosh, alias Mac OS Roman // custom conversion function UTF8ToSingleByte(const s: string; const UTF8CharConvFunc: TUnicodeToCharID): string; function UTF8ToUCS2LE(const s: string): string; // UCS2-LE 2byte little endian without BOM function UTF8ToUCS2BE(const s: string): string; // UCS2-BE 2byte big endian without BOM {$IFnDEF DisableAsianCodePages} // Asian encodings function CP932ToUTF8(const s: string): string; // Japanese function CP936ToUTF8(const s: string): string; // Chinese function CP949ToUTF8(const s: string): string; // Korea function CP950ToUTF8(const s: string): string; // Chinese Complex function UTF8ToCP932(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Japanese function UTF8ToCP936(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030 function UTF8ToCP949(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Korea function UTF8ToCP950(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Chinese Complex // Common function used by all UTF8ToXXX functions. function UTF8ToDBCS(const s: string; const UTF8CharConvFunc: TUnicodeToCharID): string; {$ENDIF} procedure GetSupportedEncodings(List: TStrings); implementation {$IFDEF Windows} uses Windows; {$ENDIF} var EncodingValid: boolean = false; DefaultTextEncoding: string = EncodingAnsi; function SearchTable(CodePageArr: array of word; id: cardinal): word; var idMid: integer; idLow, idHigh: integer; begin idLow := 0; idHigh := High(CodePageArr); while (idLow <= idHigh) do begin if idLow = idHigh then begin if CodePageArr[idLow] = id then begin Result := idLow; end else begin Result := 0; end; Exit; end; idMid := (idLow + idHigh) div 2; if CodePageArr[idMid] = id then begin Result := idMid; Exit; end; if CodePageArr[idMid] > id then idHigh := idMid - 1; if CodePageArr[idMid] < id then idLow := idMid + 1; end; Result := 0; end; {$IFnDEF DisableAsianCodePages} {$include asiancodepagefunctions.inc} {$ENDIF} {$IFDEF Windows} // AConsole - If false, it is the general system encoding, // if true, it is the console encoding function GetWindowsEncoding(AConsole: Boolean = False): string; var cp : UINT; {$IFDEF WinCE} // CP_UTF8 is missing in the windows unit of the Windows CE RTL const CP_UTF8 = 65001; {$ENDIF} begin if AConsole then cp := GetOEMCP else cp := GetACP; case cp of CP_UTF8: Result := EncodingUTF8; else Result:='cp'+IntToStr(cp); end; end; {$ELSE} {$IFNDEF Darwin} function GetUnixEncoding: string; var Lang: string; i: integer; begin Result:=EncodingAnsi; lang := GetEnv('LC_ALL'); if Length(lang) = 0 then begin lang := GetEnv('LC_MESSAGES'); if Length(lang) = 0 then begin lang := GetEnv('LANG'); end; end; i:=pos('.',Lang); if (i>0) and (i<=length(Lang)) then Result:=copy(Lang,i+1,length(Lang)-i); end; {$ENDIF} {$ENDIF} function GetDefaultTextEncoding: string; begin if EncodingValid then begin Result:=DefaultTextEncoding; exit; end; {$IFDEF Windows} Result:=GetWindowsEncoding; {$ELSE} {$IFDEF Darwin} Result:=EncodingUTF8; {$ELSE} Result:=GetUnixEncoding; {$ENDIF} {$ENDIF} Result:=NormalizeEncoding(Result); DefaultTextEncoding:=Result; EncodingValid:=true; end; function GetConsoleTextEncoding: string; begin {$ifdef Windows} Result:=GetWindowsEncoding(True); Result:=NormalizeEncoding(Result); {$else} Result := GetDefaultTextEncoding; {$endif} end; function NormalizeEncoding(const Encoding: string): string; var i: Integer; begin Result:=LowerCase(Encoding); for i:=length(Result) downto 1 do if Result[i]='-' then System.Delete(Result,i,1); end; function UTF8BOMToUTF8(const s: string): string; begin if s='' then exit(''); if CompareMem(@UTF8BOM[1],@s[1],length(UTF8BOM)) then Result:=copy(s,4,length(s)) else Result:=s; end; function ISO_8859_1ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayISO_8859_1ToUTF8); end; function ISO_8859_14ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayISO_8859_14ToUTF8); end; function ISO_8859_15ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayISO_8859_15ToUTF8); end; function ISO_8859_16ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayISO_8859_16ToUTF8); end; function ISO_8859_2ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayISO_8859_2ToUTF8); end; function ISO_8859_3ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayISO_8859_3ToUTF8); end; function ISO_8859_4ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayISO_8859_4ToUTF8); end; function ISO_8859_5ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayISO_8859_5ToUTF8); end; function ISO_8859_7ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayISO_8859_7ToUTF8); end; function ISO_8859_9ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayISO_8859_9ToUTF8); end; function ISO_8859_10ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayISO_8859_10ToUTF8); end; function ISO_8859_13ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayISO_8859_13ToUTF8); end; function CP1250ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayCP1250ToUTF8); end; function CP1251ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayCP1251ToUTF8); end; function CP1252ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayCP1252ToUTF8); end; function CP1253ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayCP1253ToUTF8); end; function CP1254ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayCP1254ToUTF8); end; function CP1255ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayCP1255ToUTF8); end; function CP1256ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayCP1256ToUTF8); end; function CP1257ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayCP1257ToUTF8); end; function CP1258ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayCP1258ToUTF8); end; function CP437ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayCP437ToUTF8); end; function CP850ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayCP850ToUTF8); end; function CP852ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayCP852ToUTF8); end; function CP865ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayCP865ToUTF8); end; function CP866ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayCP866ToUTF8); end; function CP874ToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayCP874ToUTF8); end; function KOI8RToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayKOI8RToUTF8); end; function KOI8UToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayKOI8UToUTF8); end; function KOI8RUToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayKOI8RUToUTF8); end; function KOI8ToUTF8(const s: string): string; begin Result:=KOI8RUToUTF8(s); end; function MacintoshToUTF8(const s: string): string; begin Result:=SingleByteToUTF8(s,ArrayMacintoshToUTF8); end; function SingleByteToUTF8(const s: string; const Table: TCharToUTF8Table): string; var len: Integer; i: Integer; Src: PChar; Dest: PChar; p: PChar; c: Char; begin if s='' then exit(''); len:=length(s); SetLength(Result,len*4);// UTF-8 is at most 4 bytes Src:=PChar(s); Dest:=PChar(Result); for i:=1 to len do begin c:=Src^; inc(Src); if ord(c)<128 then begin Dest^:=c; inc(Dest); end else begin p:=Table[c]; if p<>nil then begin while p^<>#0 do begin Dest^:=p^; inc(p); inc(Dest); end; end; end; end; SetLength(Result,{%H-}PtrUInt(Dest)-PtrUInt(Result)); SetCodePage(RawByteString(Result), CP_UTF8, False); end; function UCS2LEToUTF8(const s: string): string; var len: Integer; Src: PWord; Dest: PChar; i: Integer; c: Word; begin len:=length(s) div 2; if len=0 then exit(''); Src:=PWord(Pointer(s)); if (Src^=$FEFF) then // Skip BOM begin inc(Src); dec(len); end; SetLength(Result,len*3);// UTF-8 is at most 3/2 times the size Dest:=PChar(Result); for i:=1 to len do begin c:=LEtoN(Src^); inc(Src); if ord(c)<128 then begin Dest^:=chr(c); inc(Dest); end else begin inc(Dest,UnicodeToUTF8SkipErrors(c,Dest)); end; end; len:={%H-}PtrUInt(Dest)-PtrUInt(Result); if len>length(Result) then raise Exception.Create(''); SetLength(Result,len); SetCodePage(RawByteString(Result), CP_UTF8, False); end; function UCS2BEToUTF8(const s: string): string; var len: Integer; Src: PWord; Dest: PChar; i: Integer; c: Word; begin len:=length(s) div 2; if len=0 then exit(''); Src:=PWord(Pointer(s)); if (Src^=$FFFE) then // Skip BOM begin inc(Src); dec(len); end; SetLength(Result,len*3);// UTF-8 is at most three times the size Dest:=PChar(Result); for i:=1 to len do begin c:=BEtoN(Src^); inc(Src); if ord(c)<128 then begin Dest^:=chr(c); inc(Dest); end else begin inc(Dest,UnicodeToUTF8SkipErrors(c,Dest)); end; end; len:={%H-}PtrUInt(Dest)-PtrUInt(Result); if len>length(Result) then raise Exception.Create(''); SetLength(Result,len); SetCodePage(RawByteString(Result), CP_UTF8, False); end; function UTF8ToUTF8BOM(const s: string): string; begin Result:=UTF8BOM+s; end; {$IfNdef UseSystemCPConv} function UnicodeToCP1256(Unicode: cardinal): integer; begin case Unicode of 0..127: Result:=Unicode; 160: Result:=160; 162..169: Result:=Unicode; 171..185: Result:=Unicode; 187..190: Result:=Unicode; 215: Result:=215; 224: Result:=224; 226: Result:=226; 231..235: Result:=Unicode; 238..239: Result:=Unicode; 244: Result:=244; 247: Result:=247; 249: Result:=249; 251..252: Result:=Unicode; 338: Result:=140; 339: Result:=156; 402: Result:=131; 710: Result:=136; 1548: Result:=161; 1563: Result:=186; 1567: Result:=191; 1569..1590: Result:=Unicode-1376; 1591..1594: Result:=Unicode-1375; 1600..1603: Result:=Unicode-1380; 1604: Result:=225; 1605..1608: Result:=Unicode-1378; 1609..1610: Result:=Unicode-1373; 1611..1614: Result:=Unicode-1371; 1615..1616: Result:=Unicode-1370; 1617: Result:=248; 1618: Result:=250; 1657: Result:=138; 1662: Result:=129; 1670: Result:=141; 1672: Result:=143; 1681: Result:=154; 1688: Result:=142; 1705: Result:=152; 1711: Result:=144; 1722: Result:=159; 1726: Result:=170; 1729: Result:=192; 1746: Result:=255; 8204..8205: Result:=Unicode-8047; 8206..8207: Result:=Unicode-7953; 8211..8212: Result:=Unicode-8061; 8216..8217: Result:=Unicode-8071; 8218: Result:=130; 8220..8221: Result:=Unicode-8073; 8222: Result:=132; 8224..8225: Result:=Unicode-8090; 8226: Result:=149; 8230: Result:=133; 8240: Result:=137; 8249: Result:=139; 8250: Result:=155; 8364: Result:=128; 8482: Result:=153; else Result:=-1; end; end; function UnicodeToCP437(Unicode: cardinal): integer; begin case Unicode of 0..127: Result:=Unicode; 160: Result:=255; 161: Result:=173; 162..163: Result:=Unicode-7; 165: Result:=157; 170: Result:=166; 171: Result:=174; 172: Result:=170; 176: Result:=248; 177: Result:=241; 178: Result:=253; 181: Result:=230; 183: Result:=250; 186: Result:=167; 187: Result:=175; 188: Result:=172; 189: Result:=171; 191: Result:=168; 196..197: Result:=Unicode-54; 198: Result:=146; 199: Result:=128; 201: Result:=144; 209: Result:=165; 214: Result:=153; 220: Result:=154; 223: Result:=225; 224: Result:=133; 225: Result:=160; 226: Result:=131; 228: Result:=132; 229: Result:=134; 230: Result:=145; 231: Result:=135; 232: Result:=138; 233: Result:=130; 234..235: Result:=Unicode-98; 236: Result:=141; 237: Result:=161; 238: Result:=140; 239: Result:=139; 241: Result:=164; 242: Result:=149; 243: Result:=162; 244: Result:=147; 246: Result:=148; 247: Result:=246; 249: Result:=151; 250: Result:=163; 251: Result:=150; 252: Result:=129; 255: Result:=152; 262: Result := 93; 263: Result := 125; 268: Result := 94; 269: Result := 126; 272: Result := 92; 273: Result := 124; 381: Result := 64; 382: Result := 96; 352: Result := 91; 353: Result := 123; 402: Result:=159; 915: Result:=226; 920: Result:=233; 931: Result:=228; 934: Result:=232; 937: Result:=234; 945: Result:=224; 948: Result:=235; 949: Result:=238; 960: Result:=227; 963: Result:=229; 964: Result:=231; 966: Result:=237; 8319: Result:=252; 8359: Result:=158; 8729: Result:=249; 8730: Result:=251; 8734: Result:=236; 8745: Result:=239; 8776: Result:=247; 8801: Result:=240; 8804: Result:=243; 8805: Result:=242; 8976: Result:=169; 8992..8993: Result:=Unicode-8748; 9472: Result:=196; 9474: Result:=179; 9484: Result:=218; 9488: Result:=191; 9492: Result:=192; 9496: Result:=217; 9500: Result:=195; 9508: Result:=180; 9516: Result:=194; 9524: Result:=193; 9532: Result:=197; 9552: Result:=205; 9553: Result:=186; 9554..9555: Result:=Unicode-9341; 9556: Result:=201; 9557: Result:=184; 9558: Result:=183; 9559: Result:=187; 9560: Result:=212; 9561: Result:=211; 9562: Result:=200; 9563: Result:=190; 9564: Result:=189; 9565: Result:=188; 9566..9567: Result:=Unicode-9368; 9568: Result:=204; 9569..9570: Result:=Unicode-9388; 9571: Result:=185; 9572..9573: Result:=Unicode-9363; 9574: Result:=203; 9575..9576: Result:=Unicode-9368; 9577: Result:=202; 9578: Result:=216; 9579: Result:=215; 9580: Result:=206; 9600: Result:=223; 9604: Result:=220; 9608: Result:=219; 9612: Result:=221; 9616: Result:=222; 9617..9619: Result:=Unicode-9441; 9632: Result:=254; else Result:=-1; end; end; function UnicodeToCP850(Unicode: cardinal): integer; begin case Unicode of 0..127: Result:=Unicode; 160: Result:=255; 161: Result:=173; 162: Result:=189; 163: Result:=156; 164: Result:=207; 165: Result:=190; 166: Result:=221; 167: Result:=245; 168: Result:=249; 169: Result:=184; 170: Result:=166; 171: Result:=174; 172: Result:=170; 173: Result:=240; 174: Result:=169; 175: Result:=238; 176: Result:=248; 177: Result:=241; 178: Result:=253; 179: Result:=252; 180: Result:=239; 181: Result:=230; 182: Result:=244; 183: Result:=250; 184: Result:=247; 185: Result:=251; 186: Result:=167; 187: Result:=175; 188: Result:=172; 189: Result:=171; 190: Result:=243; 191: Result:=168; 192: Result:=183; 193..194: Result:=Unicode-12; 195: Result:=199; 196..197: Result:=Unicode-54; 198: Result:=146; 199: Result:=128; 200: Result:=212; 201: Result:=144; 202..203: Result:=Unicode+8; 204: Result:=222; 205..207: Result:=Unicode+9; 208: Result:=209; 209: Result:=165; 210: Result:=227; 211: Result:=224; 212: Result:=226; 213: Result:=229; 214: Result:=153; 215: Result:=158; 216: Result:=157; 217: Result:=235; 218..219: Result:=Unicode+15; 220: Result:=154; 221: Result:=237; 222: Result:=232; 223: Result:=225; 224: Result:=133; 225: Result:=160; 226: Result:=131; 227: Result:=198; 228: Result:=132; 229: Result:=134; 230: Result:=145; 231: Result:=135; 232: Result:=138; 233: Result:=130; 234..235: Result:=Unicode-98; 236: Result:=141; 237: Result:=161; 238: Result:=140; 239: Result:=139; 240: Result:=208; 241: Result:=164; 242: Result:=149; 243: Result:=162; 244: Result:=147; 245: Result:=228; 246: Result:=148; 247: Result:=246; 248: Result:=155; 249: Result:=151; 250: Result:=163; 251: Result:=150; 252: Result:=129; 253: Result:=236; 254: Result:=231; 255: Result:=152; 305: Result:=213; 402: Result:=159; 8215: Result:=242; 9472: Result:=196; 9474: Result:=179; 9484: Result:=218; 9488: Result:=191; 9492: Result:=192; 9496: Result:=217; 9500: Result:=195; 9508: Result:=180; 9516: Result:=194; 9524: Result:=193; 9532: Result:=197; 9552: Result:=205; 9553: Result:=186; 9556: Result:=201; 9559: Result:=187; 9562: Result:=200; 9565: Result:=188; 9568: Result:=204; 9571: Result:=185; 9574: Result:=203; 9577: Result:=202; 9580: Result:=206; 9600: Result:=223; 9604: Result:=220; 9608: Result:=219; 9617..9619: Result:=Unicode-9441; 9632: Result:=254; else Result:=-1; end; end; // ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP852.TXT function UnicodeToCP852(Unicode: cardinal): integer; begin case Unicode of 0..127: Result:=Unicode; 160: Result:=255; 164: Result:=207; 167: Result:=245; 168: Result:=249; 171: Result:=174; 172: Result:=170; 173: Result:=240; 176: Result:=248; 180: Result:=239; 184: Result:=247; 187: Result:=175; 193..194: Result:=Unicode-12; 196: Result:=142; 199: Result:=128; 201: Result:=144; 203: Result:=211; 205..206: Result:=Unicode+9; 211: Result:=224; 212: Result:=226; 214: Result:=153; 215: Result:=158; 218: Result:=233; 220: Result:=154; 221: Result:=237; 223: Result:=225; 225: Result:=160; 226: Result:=131; 228: Result:=132; 231: Result:=135; 233: Result:=130; 235: Result:=137; 237: Result:=161; 238: Result:=140; 243: Result:=162; 244: Result:=147; 246: Result:=148; 247: Result:=246; 250: Result:=163; 252: Result:=129; 253: Result:=236; 258..259: Result:=Unicode-60; 260..261: Result:=Unicode-96; 262: Result:=143; 263: Result:=134; 268: Result:=172; 269: Result:=159; 270: Result:=210; 271: Result:=212; 272: Result:=209; 273: Result:=208; 280..281: Result:=Unicode-112; 282: Result:=183; 283: Result:=216; 313..314: Result:=Unicode-168; 317..318: Result:=Unicode-168; 321: Result:=157; 322: Result:=136; 323..324: Result:=Unicode-96; 327: Result:=213; 328: Result:=229; 336..337: Result:=Unicode-198; 340: Result:=232; 341: Result:=234; 344..345: Result:=Unicode-92; 346..347: Result:=Unicode-195; 350: Result:=184; 351: Result:=173; 352..353: Result:=Unicode-122; 354: Result:=221; 355: Result:=238; 356..357: Result:=Unicode-201; 366: Result:=222; 367: Result:=133; 368: Result:=235; 369: Result:=251; 377: Result:=141; 378: Result:=171; 379..380: Result:=Unicode-190; 381..382: Result:=Unicode-215; 711: Result:=243; 728: Result:=244; 729: Result:=250; 731: Result:=242; 733: Result:=241; 9472: Result:=196; 9474: Result:=179; 9484: Result:=218; 9488: Result:=191; 9492: Result:=192; 9496: Result:=217; 9500: Result:=195; 9508: Result:=180; 9516: Result:=194; 9524: Result:=193; 9532: Result:=197; 9552: Result:=205; 9553: Result:=186; 9556: Result:=201; 9559: Result:=187; 9562: Result:=200; 9565: Result:=188; 9568: Result:=204; 9571: Result:=185; 9574: Result:=203; 9577: Result:=202; 9580: Result:=206; 9600: Result:=223; 9604: Result:=220; 9608: Result:=219; 9617..9619: Result:=Unicode-9441; 9632: Result:=254; else Result:=-1; end; end; function UnicodeToCP865(Unicode: cardinal): integer; begin case Unicode of 0..127: Result:=Unicode; 160: Result:=255; 161: Result:=173; 163: Result:=163-7; 164: Result:=$AF; 170: Result:=166; 171: Result:=174; 172: Result:=170; 176: Result:=248; 177: Result:=241; 178: Result:=253; 181: Result:=230; 183: Result:=250; 186: Result:=167; 188: Result:=172; 189: Result:=171; 191: Result:=168; 196..197: Result:=Unicode-54; 198: Result:=146; 199: Result:=128; 201: Result:=144; 209: Result:=165; 214: Result:=153; 216: Result:=$9D; 220: Result:=154; 223: Result:=225; 224: Result:=133; 225: Result:=160; 226: Result:=131; 228: Result:=132; 229: Result:=134; 230: Result:=145; 231: Result:=135; 232: Result:=138; 233: Result:=130; 234..235: Result:=Unicode-98; 236: Result:=141; 237: Result:=161; 238: Result:=140; 239: Result:=139; 241: Result:=164; 242: Result:=149; 243: Result:=162; 244: Result:=147; 246: Result:=148; 247: Result:=246; 248: Result:=$9B; 249: Result:=151; 250: Result:=163; 251: Result:=150; 252: Result:=129; 255: Result:=152; 262: Result := 93; 263: Result := 125; 268: Result := 94; 269: Result := 126; 272: Result := 92; 273: Result := 124; 381: Result := 64; 382: Result := 96; 352: Result := 91; 353: Result := 123; 402: Result:=159; 915: Result:=226; 920: Result:=233; 931: Result:=228; 934: Result:=232; 937: Result:=234; 945: Result:=224; 948: Result:=235; 949: Result:=238; 960: Result:=227; 963: Result:=229; 964: Result:=231; 966: Result:=237; 8319: Result:=252; 8359: Result:=158; 8729: Result:=249; 8730: Result:=251; 8734: Result:=236; 8745: Result:=239; 8776: Result:=247; 8801: Result:=240; 8804: Result:=243; 8805: Result:=242; 8976: Result:=169; 8992..8993: Result:=Unicode-8748; 9472: Result:=196; 9474: Result:=179; 9484: Result:=218; 9488: Result:=191; 9492: Result:=192; 9496: Result:=217; 9500: Result:=195; 9508: Result:=180; 9516: Result:=194; 9524: Result:=193; 9532: Result:=197; 9552: Result:=205; 9553: Result:=186; 9554..9555: Result:=Unicode-9341; 9556: Result:=201; 9557: Result:=184; 9558: Result:=183; 9559: Result:=187; 9560: Result:=212; 9561: Result:=211; 9562: Result:=200; 9563: Result:=190; 9564: Result:=189; 9565: Result:=188; 9566..9567: Result:=Unicode-9368; 9568: Result:=204; 9569..9570: Result:=Unicode-9388; 9571: Result:=185; 9572..9573: Result:=Unicode-9363; 9574: Result:=203; 9575..9576: Result:=Unicode-9368; 9577: Result:=202; 9578: Result:=216; 9579: Result:=215; 9580: Result:=206; 9600: Result:=223; 9604: Result:=220; 9608: Result:=219; 9612: Result:=221; 9616: Result:=222; 9617..9619: Result:=Unicode-9441; 9632: Result:=254; else Result:=-1; end; end; function UnicodeToCP866(Unicode: cardinal): integer; begin case Unicode of 0..127: Result:=Unicode; 1040..1087 : Result := Unicode-912; 9617..9619 : Result := Unicode-9441; 9474 : Result := 179; 9508 : Result := 180; 9569 : Result := 181; 9570 : Result := 182; 9558 : Result := 183; 9557 : Result := 184; 9571 : Result := 185; 9553 : Result := 186; 9559 : Result := 187; 9565 : Result := 188; 9564 : Result := 189; 9563 : Result := 190; 9488 : Result := 191; 9492 : Result := 192; 9524 : Result := 193; 9516 : Result := 194; 9500 : Result := 195; 9472 : Result := 196; 9532 : Result := 197; 9566 : Result := 198; 9567 : Result := 199; 9562 : Result := 200; 9556 : Result := 201; 9577 : Result := 202; 9574 : Result := 203; 9568 : Result := 204; 9552 : Result := 205; 9580 : Result := 206; 9575 : Result := 207; 9576 : Result := 208; 9572 : Result := 209; 9573 : Result := 210; 9561 : Result := 211; 9560 : Result := 212; 9554 : Result := 213; 9555 : Result := 214; 9579 : Result := 215; 9578 : Result := 216; 9496 : Result := 217; 9484 : Result := 218; 9608 : Result := 219; 9604 : Result := 220; 9612 : Result := 221; 9616 : Result := 222; 9600 : Result := 223; 1088..1103 : Result := Unicode-864; 1025 : Result := 240; 1105 : Result := 241; 1028 : Result := 242; 1108 : Result := 243; 1031 : Result := 244; 1111 : Result := 245; 1038 : Result := 246; 1118 : Result := 247; 176 : Result := 248; 8729 : Result := 249; 183 : Result := 250; 8730 : Result := 251; 8470 : Result := 252; 164 : Result := 253; 9632 : Result := 254; 160 : Result := 255; else Result:=-1; end; end; function UnicodeToISO_8859_1(Unicode: cardinal): integer; begin case Unicode of 0..255: Result:=Unicode; else Result:=-1; end; end; function UnicodeToISO_8859_9(Unicode: cardinal): integer; begin case Unicode of 0..255: Result:=Unicode; $011E: Result:= $D0; $0130: Result:= $DD; $015E: Result:= $DE; $011F: Result:= $F0; $0131: Result:= $FD; $015F: Result:= $FE; else Result:=-1; end; end; function UnicodeToISO_8859_10(Unicode: cardinal): integer; begin case Unicode of 0..255: Result:= Unicode; $104: Result:= $A1; $112: Result:= $A2; $122: Result:= $A3; $12A: Result:= $A4; $128: Result:= $A5; $136: Result:= $A6; $13B: Result:= $A8; $110: Result:= $A9; $160: Result:= $AA; $166: Result:= $AB; $17D: Result:= $AC; $16A: Result:= $AE; $14A: Result:= $AF; $105: Result:= $B1; $113: Result:= $B2; $123: Result:= $B3; $12B: Result:= $B4; $129: Result:= $B5; $137: Result:= $B6; $13C: Result:= $B8; $111: Result:= $B9; $161: Result:= $BA; $167: Result:= $BB; $17E: Result:= $BC; $2015: Result:= $BD; $16B: Result:= $BE; $14B: Result:= $BF; $100: Result:= $C0; $12E: Result:= $C7; $10C: Result:= $C8; $118: Result:= $CA; $116: Result:= $CC; $145: Result:= $D1; $14C: Result:= $D2; $168: Result:= $D7; $172: Result:= $D9; $101: Result:= $E0; $12F: Result:= $E7; $10D: Result:= $E8; $119: Result:= $EA; $117: Result:= $EC; $146: Result:= $F1; $14D: Result:= $F2; $169: Result:= $F7; $173: Result:= $F9; $138: Result:= $FF; else Result:=-1; end; end; function UnicodeToISO_8859_13(Unicode: cardinal): integer; begin case Unicode of 0..$A0, $A2, $A3, $A4, $A6, $A7, $A9, $AB, $AC, $AD, $AE, $B0..$B3, $B5..$B7, $B9, $BB..$BE, $C4, $C5, $C9, $D3, $D5..$D7, $DC, $DF, $E4, $E5, $E9, $F3, $F5..$F7, $FC: Result:= Unicode; $201D: Result:= $A1; $201E: Result:= $A5; $D8: Result:= $A8; $156: Result:= $AA; $C6: Result:= $AF; $201C: Result:= $B4; $F8: Result:= $B8; $157: Result:= $BA; $E6: Result:= $BF; $104: Result:= $C0; $12E: Result:= $C1; $100: Result:= $C2; $106: Result:= $C3; $118: Result:= $C6; $112: Result:= $C7; $10C: Result:= $C8; $179: Result:= $CA; $116: Result:= $CB; $122: Result:= $CC; $136: Result:= $CD; $12A: Result:= $CE; $13B: Result:= $CF; $160: Result:= $D0; $143: Result:= $D1; $145: Result:= $D2; $14C: Result:= $D4; $172: Result:= $D8; $141: Result:= $D9; $15A: Result:= $DA; $16A: Result:= $DB; $17B: Result:= $DD; $17D: Result:= $DE; $105: Result:= $E0; $12F: Result:= $E1; $101: Result:= $E2; $107: Result:= $E3; $119: Result:= $E6; $113: Result:= $E7; $10D: Result:= $E8; $17A: Result:= $EA; $117: Result:= $EB; $123: Result:= $EC; $137: Result:= $ED; $12B: Result:= $EE; $13C: Result:= $EF; $161: Result:= $F0; $144: Result:= $F1; $146: Result:= $F2; $14D: Result:= $F4; $173: Result:= $F8; $142: Result:= $F9; $15B: Result:= $FA; $16B: Result:= $FB; $17C: Result:= $FD; $17E: Result:= $FE; $2019: Result:= $FF; else Result:= -1; end; end; function UnicodeToISO_8859_14(Unicode: cardinal): integer; begin case Unicode of 0..$A0: Result:=Unicode; $A3, $A7, $A9, $AD, $AE, $B6: Result:= Unicode; $1E02: Result:= $A1; $1E03: Result:= $A2; $10A: Result:= $A4; $10B: Result:= $A5; $1E0A: Result:= $A6; $1E80: Result:= $A8; $1E82: Result:= $AA; $1E0B: Result:= $AB; $1EF2: Result:= $AC; $178: Result:= $AF; $1E1E: Result:= $B0; $1E1F: Result:= $B1; $120: Result:= $B2; $121: Result:= $B3; $1E40: Result:= $B4; $1E41: Result:= $B5; $1E56: Result:= $B7; $1E81: Result:= $B8; $1E57: Result:= $B9; $1E83: Result:= $BA; $1E60: Result:= $BB; $1EF3: Result:= $BC; $1E84: Result:= $BD; $1E85: Result:= $BE; $1E61: Result:= $BF; $C0..$CF: Result:= Unicode; $174: Result:= $D0; $D1..$D6: Result:= Unicode; $1E6A: Result:= $D7; $D8..$DD: Result:= Unicode; $176: Result:= $DE; $DF..$EF: Result:= Unicode; $175: Result:= $F0; $F1..$F6: Result:= Unicode; $1E6B: Result:= $F7; $F8..$FD: Result:= Unicode; $177: Result:= $FE; $FF: Result:= $FF; else Result:=-1; end; end; function UnicodeToISO_8859_15(Unicode: cardinal): integer; begin case Unicode of 0..255: Result:=Unicode; 8364: Result:=164; 352: Result:=166; 353: Result:=168; 381: Result:=180; 382: Result:=184; 338: Result:=188; 339: Result:=189; 376: Result:=190; else Result:=-1; end; end; function UnicodeToISO_8859_16(Unicode: cardinal): integer; begin case Unicode of 0..255: Result:=Unicode; $104: Result:= $A1; $105: Result:= $A2; $141: Result:= $A3; $20AC: Result:= $A4; $201E: Result:= $A5; $160: Result:= $A6; $161: Result:= $A8; $218: Result:= $AA; $179: Result:= $AC; $17A: Result:= $AE; $17B: Result:= $AF; $10C: Result:= $B2; $142: Result:= $B3; $17D: Result:= $B4; $201D: Result:= $B5; $17E: Result:= $B8; $10D: Result:= $B9; $219: Result:= $BA; $152: Result:= $BC; $153: Result:= $BD; $178: Result:= $BE; $17C: Result:= $BF; $102: Result:= $C3; $106: Result:= $C5; $110: Result:= $D0; $143: Result:= $D1; $150: Result:= $D5; $15A: Result:= $D7; $170: Result:= $D8; $118: Result:= $DD; $21A: Result:= $DE; $103: Result:= $E3; $107: Result:= $E5; $111: Result:= $F0; $144: Result:= $F1; $151: Result:= $F5; $15B: Result:= $F7; $171: Result:= $F8; $119: Result:= $FD; $21B: Result:= $FE; else Result:=-1; end; end; function UnicodeToISO_8859_2(Unicode: cardinal): integer; begin case Unicode of 0..127: Result:=Unicode; 128..160: Result:=Unicode; 164: Result:=164; 167..168: Result:=Unicode; 173: Result:=173; 176: Result:=176; 180: Result:=180; 184: Result:=184; 193..194: Result:=Unicode; 196: Result:=196; 199: Result:=199; 201: Result:=201; 203: Result:=203; 205..206: Result:=Unicode; 211..212: Result:=Unicode; 214..215: Result:=Unicode; 218: Result:=218; 220..221: Result:=Unicode; 223: Result:=223; 225..226: Result:=Unicode; 228: Result:=228; 231: Result:=231; 233: Result:=233; 235: Result:=235; 237..238: Result:=Unicode; 243..244: Result:=Unicode; 246..247: Result:=Unicode; 250: Result:=250; 252..253: Result:=Unicode; 258: Result:=195; 259: Result:=227; 260: Result:=161; 261: Result:=177; 262: Result:=198; 263: Result:=230; 268: Result:=200; 269: Result:=232; 270: Result:=207; 271: Result:=239; 272: Result:=208; 273: Result:=240; 280: Result:=202; 281: Result:=234; 282: Result:=204; 283: Result:=236; 313: Result:=197; 314: Result:=229; 317: Result:=165; 318: Result:=181; 321: Result:=163; 322: Result:=179; 323: Result:=209; 324: Result:=241; 327: Result:=210; 328: Result:=242; 336: Result:=213; 337: Result:=245; 340: Result:=192; 341: Result:=224; 344: Result:=216; 345: Result:=248; 346: Result:=166; 347: Result:=182; 350: Result:=170; 351: Result:=186; 352: Result:=169; 353: Result:=185; 354: Result:=222; 355: Result:=254; 356: Result:=171; 357: Result:=187; 366: Result:=217; 367: Result:=249; 368: Result:=219; 369: Result:=251; 377: Result:=172; 378: Result:=188; 379: Result:=175; 380: Result:=191; 381: Result:=174; 382: Result:=190; 711: Result:=183; 728: Result:=162; 729: Result:=255; 731: Result:=178; 733: Result:=189; else Result:=-1; end; end; function UnicodeToISO_8859_3(Unicode: cardinal): integer; begin case Unicode of 0..255: Result:= Unicode; $126: Result:= $A1; $2D8: Result:= $A2; $124: Result:= $A6; $130: Result:= $A9; $15E: Result:= $AA; $11E: Result:= $AB; $134: Result:= $AC; $17B: Result:= $AF; $127: Result:= $B1; $125: Result:= $B6; $131: Result:= $B9; $15F: Result:= $BA; $11F: Result:= $BB; $135: Result:= $BC; $17C: Result:= $BF; $10A: Result:= $C5; $108: Result:= $C6; $120: Result:= $D5; $11C: Result:= $D8; $16C: Result:= $DD; $15C: Result:= $DE; $10B: Result:= $E5; $109: Result:= $E6; $121: Result:= $F5; $11D: Result:= $F8; $16D: Result:= $FD; $15D: Result:= $FE; $2D9: Result:= $FF; else Result:= -1; end; end; function UnicodeToISO_8859_4(Unicode: cardinal): integer; begin case Unicode of 0..$A0, $A4, $A7, $A8, $AD, $AF, $B0, $B4, $B8, $C1..$C6, $C9, $CB, $CD, $CE, $D4..$D8, $DA..$DC, $DF, $E1..$E6, $E9, $EB, $ED, $EE, $F4..$F9, $FA..$FC: Result:= Unicode; $104: Result:= $A1; $138: Result:= $A2; $156: Result:= $A3; $128: Result:= $A5; $13B: Result:= $A6; $160: Result:= $A9; $112: Result:= $AA; $122: Result:= $AB; $166: Result:= $AC; $17D: Result:= $AE; $105: Result:= $B1; $2DB: Result:= $B2; $157: Result:= $B3; $129: Result:= $B5; $13C: Result:= $B6; $2C7: Result:= $B7; $161: Result:= $B9; $113: Result:= $BA; $123: Result:= $BB; $167: Result:= $BC; $14A: Result:= $BD; $17E: Result:= $BE; $14B: Result:= $BF; $100: Result:= $C0; $12E: Result:= $C7; $10C: Result:= $C8; $118: Result:= $CA; $116: Result:= $CC; $12A: Result:= $CF; $110: Result:= $D0; $145: Result:= $D1; $14C: Result:= $D2; $136: Result:= $D3; $172: Result:= $D9; $168: Result:= $DD; $16A: Result:= $DE; $101: Result:= $E0; $12F: Result:= $E7; $10D: Result:= $E8; $119: Result:= $EA; $117: Result:= $EC; $12B: Result:= $EF; $111: Result:= $F0; $146: Result:= $F1; $14D: Result:= $F2; $137: Result:= $F3; $173: Result:= $F9; $169: Result:= $FD; $16B: Result:= $FE; $2D9: Result:= $FF; else Result:= -1; end; end; function UnicodeToISO_8859_5(Unicode: cardinal): integer; begin case Unicode of 0..$A6, $A8..$FF: Result:= Unicode; $401..$40C, $40E..$44F: Result:= Unicode-($401-$A1); $2116: Result:= $F0; $451..$45C, $45E..$45F: Result:= Unicode-($451-$F1); $A7: Result:= $FD; else Result:= -1; end; end; function UnicodeToISO_8859_7(Unicode: cardinal): integer; begin case Unicode of 0..$A0, $A3, $A6..$A9, $AB..$AE, $B0..$B3, $B7, $BB, $BD, $D2, $FF: Result:= Unicode; $2BD: Result:= $A1; $2BC: Result:= $A2; $20AC: Result:= $A4; $20AF: Result:= $A5; $37A: Result:= $AA; $2015: Result:= $AF; $384..$386, $388..$38A, $38C, $38E..$3A1, $3A3..$3CE: Result:= Unicode-($384-$B4); else Result:= -1; end; end; function UnicodeToMacintosh(Unicode: cardinal): integer; begin case Unicode of 0..127: Result:=Unicode; 160: Result:=202; 161: Result:=193; 162..163: Result:=Unicode; 165: Result:=180; 167: Result:=164; 168: Result:=172; 169: Result:=169; 170: Result:=187; 171: Result:=199; 172: Result:=194; 174: Result:=168; 175: Result:=248; 176: Result:=161; 177: Result:=177; 180: Result:=171; 181: Result:=181; 182: Result:=166; 183: Result:=225; 184: Result:=252; 186: Result:=188; 187: Result:=200; 191: Result:=192; 192: Result:=203; 193: Result:=231; 194: Result:=229; 195: Result:=204; 196..197: Result:=Unicode-68; 198: Result:=174; 199: Result:=130; 200: Result:=233; 201: Result:=131; 202: Result:=230; 203: Result:=232; 204: Result:=237; 205..207: Result:=Unicode+29; 209: Result:=132; 210: Result:=241; 211..212: Result:=Unicode+27; 213: Result:=205; 214: Result:=133; 216: Result:=175; 217: Result:=244; 218..219: Result:=Unicode+24; 220: Result:=134; 223: Result:=167; 224: Result:=136; 225: Result:=135; 226: Result:=137; 227: Result:=139; 228: Result:=138; 229: Result:=140; 230: Result:=190; 231: Result:=141; 232: Result:=143; 233: Result:=142; 234..235: Result:=Unicode-90; 236: Result:=147; 237: Result:=146; 238..239: Result:=Unicode-90; 241: Result:=150; 242: Result:=152; 243: Result:=151; 244: Result:=153; 245: Result:=155; 246: Result:=154; 247: Result:=214; 248: Result:=191; 249: Result:=157; 250: Result:=156; 251..252: Result:=Unicode-93; 255: Result:=216; 305: Result:=245; 338..339: Result:=Unicode-132; 376: Result:=217; 402: Result:=196; 710: Result:=246; 711: Result:=255; 728..730: Result:=Unicode-479; 731: Result:=254; 732: Result:=247; 733: Result:=253; 916: Result:=198; 937: Result:=189; 960: Result:=185; 8211..8212: Result:=Unicode-8003; 8216..8217: Result:=Unicode-8004; 8218: Result:=226; 8220..8221: Result:=Unicode-8010; 8222: Result:=227; 8224: Result:=160; 8225: Result:=224; 8226: Result:=165; 8230: Result:=201; 8240: Result:=228; 8249..8250: Result:=Unicode-8029; 8260: Result:=218; 8364: Result:=219; 8482: Result:=170; 8706: Result:=182; 8719: Result:=184; 8721: Result:=183; 8730: Result:=195; 8734: Result:=176; 8747: Result:=186; 8776: Result:=197; 8800: Result:=173; 8804..8805: Result:=Unicode-8626; 9674: Result:=215; 57374: Result:=240; 64257..64258: Result:=Unicode-64035; else Result:=-1; end; end; {$endif} function UnicodeToKOI8R(Unicode: cardinal): integer; begin case Unicode of 0..127: Result:=Unicode; 160: Result:=154; 169: Result:=191; 176: Result:=156; 178: Result:=157; 183: Result:=158; 247: Result:=159; 1025: Result:=179; 1040..1041: Result:=Unicode-815; 1042: Result:=247; 1043: Result:=231; 1044..1045: Result:=Unicode-816; 1046: Result:=246; 1047: Result:=250; 1048..1055: Result:=Unicode-815; 1056..1059: Result:=Unicode-814; 1060: Result:=230; 1061: Result:=232; 1062: Result:=227; 1063: Result:=254; 1064: Result:=251; 1065: Result:=253; 1066: Result:=255; 1067: Result:=249; 1068: Result:=248; 1069: Result:=252; 1070: Result:=224; 1071: Result:=241; 1072..1073: Result:=Unicode-879; 1074: Result:=215; 1075: Result:=199; 1076..1077: Result:=Unicode-880; 1078: Result:=214; 1079: Result:=218; 1080..1087: Result:=Unicode-879; 1088..1091: Result:=Unicode-878; 1092: Result:=198; 1093: Result:=200; 1094: Result:=195; 1095: Result:=222; 1096: Result:=219; 1097: Result:=221; 1098: Result:=223; 1099: Result:=217; 1100: Result:=216; 1101: Result:=220; 1102: Result:=192; 1103: Result:=209; 1105: Result:=163; 8729: Result:=149; 8730: Result:=150; 8776: Result:=151; 8804: Result:=152; 8805: Result:=153; 8992: Result:=147; 8993: Result:=155; 9472: Result:=128; 9474: Result:=129; 9484: Result:=130; 9488: Result:=131; 9492: Result:=132; 9496: Result:=133; 9500: Result:=134; 9508: Result:=135; 9516: Result:=136; 9524: Result:=137; 9532: Result:=138; 9552..9554: Result:=Unicode-9392; 9555..9569: Result:=Unicode-9391; 9570..9580: Result:=Unicode-9390; 9600: Result:=139; 9604: Result:=140; 9608: Result:=141; 9612: Result:=142; 9616..9619: Result:=Unicode-9473; 9632: Result:=148; else Result:=-1; end; end; function UnicodeToKOI8U(Unicode: cardinal): integer; begin case Unicode of 1028: Result:=180; 1030..1031: Result:=Unicode-848; 1108: Result:=164; 1110..1111: Result:=Unicode-944; 1168: Result:=189; 1169: Result:=173; else Result:=UnicodeToKOI8R(Unicode); end; end; function UnicodeToKOI8RU(Unicode: cardinal): integer; begin case Unicode of 164 : Result:=159; 171 : Result:=157; 174 : Result:=156; 187 : Result:=155; 1038: Result:=190; 1118: Result:=174; 8212: Result:=151; 8220: Result:=147; 8221: Result:=150; 8470: Result:=152; 8482: Result:=153; else Result:=UnicodeToKOI8U(Unicode); end; end; function UnicodeToCP1250(Unicode: cardinal): integer; begin case Unicode of 0..127,129,131,136,144,152: Result:=Unicode; 160: Result:=160; 164: Result:=164; 166..169: Result:=Unicode; 171..174: Result:=Unicode; 176..177: Result:=Unicode; 180..184: Result:=Unicode; 187: Result:=187; 193..194: Result:=Unicode; 196: Result:=196; 199: Result:=199; 201: Result:=201; 203: Result:=203; 205..206: Result:=Unicode; 211..212: Result:=Unicode; 214..215: Result:=Unicode; 218: Result:=218; 220..221: Result:=Unicode; 223: Result:=223; 225..226: Result:=Unicode; 228: Result:=228; 231: Result:=231; 233: Result:=233; 235: Result:=235; 237..238: Result:=Unicode; 243..244: Result:=Unicode; 246..247: Result:=Unicode; 250: Result:=250; 252..253: Result:=Unicode; 258: Result:=195; 259: Result:=227; 260: Result:=165; 261: Result:=185; 262: Result:=198; 263: Result:=230; 268: Result:=200; 269: Result:=232; 270: Result:=207; 271: Result:=239; 272: Result:=208; 273: Result:=240; 280: Result:=202; 281: Result:=234; 282: Result:=204; 283: Result:=236; 313: Result:=197; 314: Result:=229; 317: Result:=188; 318: Result:=190; 321: Result:=163; 322: Result:=179; 323: Result:=209; 324: Result:=241; 327: Result:=210; 328: Result:=242; 336: Result:=213; 337: Result:=245; 340: Result:=192; 341: Result:=224; 344: Result:=216; 345: Result:=248; 346: Result:=140; 347: Result:=156; 350: Result:=170; 351: Result:=186; 352: Result:=138; 353: Result:=154; 354: Result:=222; 355: Result:=254; 356: Result:=141; 357: Result:=157; 366: Result:=217; 367: Result:=249; 368: Result:=219; 369: Result:=251; 377: Result:=143; 378: Result:=159; 379: Result:=175; 380: Result:=191; 381: Result:=142; 382: Result:=158; 711: Result:=161; 728: Result:=162; 729: Result:=255; 731: Result:=178; 733: Result:=189; 8211..8212: Result:=Unicode-8061; 8216..8217: Result:=Unicode-8071; 8218: Result:=130; 8220..8221: Result:=Unicode-8073; 8222: Result:=132; 8224..8225: Result:=Unicode-8090; 8226: Result:=149; 8230: Result:=133; 8240: Result:=137; 8249: Result:=139; 8250: Result:=155; 8364: Result:=128; 8482: Result:=153; else Result:=-1; end; end; function UnicodeToCP1251(Unicode: cardinal): integer; begin case Unicode of 0..127,152: Result:=Unicode; 160: Result:=160; 164: Result:=164; 166..167: Result:=Unicode; 169: Result:=169; 171..174: Result:=Unicode; 176..177: Result:=Unicode; 181..183: Result:=Unicode; 187: Result:=187; 1025: Result:=168; 1026..1027: Result:=Unicode-898; 1028: Result:=170; 1029: Result:=189; 1030: Result:=178; 1031: Result:=175; 1032: Result:=163; 1033: Result:=138; 1034: Result:=140; 1035: Result:=142; 1036: Result:=141; 1038: Result:=161; 1039: Result:=143; 1040..1103: Result:=Unicode-848; 1105: Result:=184; 1106: Result:=144; 1107: Result:=131; 1108: Result:=186; 1109: Result:=190; 1110: Result:=179; 1111: Result:=191; 1112: Result:=188; 1113: Result:=154; 1114: Result:=156; 1115: Result:=158; 1116: Result:=157; 1118: Result:=162; 1119: Result:=159; 1168: Result:=165; 1169: Result:=180; 8211..8212: Result:=Unicode-8061; 8216..8217: Result:=Unicode-8071; 8218: Result:=130; 8220..8221: Result:=Unicode-8073; 8222: Result:=132; 8224..8225: Result:=Unicode-8090; 8226: Result:=149; 8230: Result:=133; 8240: Result:=137; 8249: Result:=139; 8250: Result:=155; 8364: Result:=136; 8470: Result:=185; 8482: Result:=153; else Result:=-1; end; end; function UnicodeToCP1252(Unicode: cardinal): integer; begin case Unicode of 0..127,129,141,143,144,157: Result:=Unicode; 160..255: Result:=Unicode; 338: Result:=140; 339: Result:=156; 352: Result:=138; 353: Result:=154; 376: Result:=159; 381: Result:=142; 382: Result:=158; 402: Result:=131; 710: Result:=136; 732: Result:=152; 8211..8212: Result:=Unicode-8061; 8216..8217: Result:=Unicode-8071; 8218: Result:=130; 8220..8221: Result:=Unicode-8073; 8222: Result:=132; 8224..8225: Result:=Unicode-8090; 8226: Result:=149; 8230: Result:=133; 8240: Result:=137; 8249: Result:=139; 8250: Result:=155; 8364: Result:=128; 8482: Result:=153; else Result:=-1; end; end; function UnicodeToCP1253(Unicode: cardinal): integer; begin case Unicode of 0..127,129,136,138,140,141,142,143,144,152,154,156,157,158,159,170: Result:=Unicode; 160: Result:=160; 163..169: Result:=Unicode; 171..174: Result:=Unicode; 176..179: Result:=Unicode; 181..183: Result:=Unicode; 187: Result:=187; 189: Result:=189; 402: Result:=131; 900: Result:=180; 901..902: Result:=Unicode-740; 904..906: Result:=Unicode-720; 908: Result:=188; 910..975: Result:=Unicode-720; 8211..8212: Result:=Unicode-8061; 8213: Result:=175; 8216..8217: Result:=Unicode-8071; 8218: Result:=130; 8220..8221: Result:=Unicode-8073; 8222: Result:=132; 8224..8225: Result:=Unicode-8090; 8226: Result:=149; 8230: Result:=133; 8240: Result:=137; 8249: Result:=139; 8250: Result:=155; 8364: Result:=128; 8482: Result:=153; else Result:=-1; end; end; function UnicodeToCP1254(Unicode: cardinal): integer; begin case Unicode of 0..127,129,141,142,143,144,157,158: Result:=Unicode; 160..207: Result:=Unicode; 209..220: Result:=Unicode; 223..239: Result:=Unicode; 241..252: Result:=Unicode; 255: Result:=255; 286: Result:=208; 287: Result:=240; 304: Result:=221; 305: Result:=253; 338: Result:=140; 339: Result:=156; 350: Result:=222; 351: Result:=254; 352: Result:=138; 353: Result:=154; 376: Result:=159; 402: Result:=131; 710: Result:=136; 732: Result:=152; 8211..8212: Result:=Unicode-8061; 8216..8217: Result:=Unicode-8071; 8218: Result:=130; 8220..8221: Result:=Unicode-8073; 8222: Result:=132; 8224..8225: Result:=Unicode-8090; 8226: Result:=149; 8230: Result:=133; 8240: Result:=137; 8249: Result:=139; 8250: Result:=155; 8364: Result:=128; 8482: Result:=153; else Result:=-1; end; end; function UnicodeToCP1255(Unicode: cardinal): integer; begin case Unicode of 0..127,129,138,140..144,154: Result:=Unicode; 156..163: Result:=Unicode; 165..169: Result:=Unicode; 171..185: Result:=Unicode; 187..191: Result:=Unicode; 215: Result:=170; 247: Result:=186; 402: Result:=131; 710: Result:=136; 732: Result:=152; 1456..1475: Result:=Unicode-1264; 1488..1516: Result:=Unicode-1264; 1517: Result:=255; 1520..1535: Result:=Unicode-1308; 8206..8207: Result:=Unicode-7953; 8211..8212: Result:=Unicode-8061; 8216..8217: Result:=Unicode-8071; 8218: Result:=130; 8220..8221: Result:=Unicode-8073; 8222: Result:=132; 8224..8225: Result:=Unicode-8090; 8226: Result:=149; 8230: Result:=133; 8240: Result:=137; 8249: Result:=139; 8250: Result:=155; 8362: Result:=164; 8364: Result:=128; 8482: Result:=153; else Result:=-1; end; end; function UnicodeToCP1257(Unicode: cardinal): integer; begin case Unicode of 0..127: Result:=Unicode; 129: Result:=129; 131: Result:=131; 136: Result:=136; 138: Result:=138; 140: Result:=140; 144: Result:=144; 152: Result:=152; 154: Result:=154; 156: Result:=156; 159..167: Result:=Unicode; 168: Result:=141; 169: Result:=169; 171..174: Result:=Unicode; 175: Result:=157; 176..183: Result:=Unicode; 184: Result:=143; 185: Result:=185; 187..190: Result:=Unicode; 196..197: Result:=Unicode; 198: Result:=175; 201: Result:=201; 211: Result:=211; 213..215: Result:=Unicode; 216: Result:=168; 220: Result:=220; 223: Result:=223; 228..229: Result:=Unicode; 230: Result:=191; 233: Result:=233; 243: Result:=243; 245..247: Result:=Unicode; 248: Result:=184; 252: Result:=252; 256: Result:=194; 257: Result:=226; 260: Result:=192; 261: Result:=224; 262: Result:=195; 263: Result:=227; 268: Result:=200; 269: Result:=232; 274: Result:=199; 275: Result:=231; 278: Result:=203; 279: Result:=235; 280: Result:=198; 281: Result:=230; 290: Result:=204; 291: Result:=236; 298: Result:=206; 299: Result:=238; 302: Result:=193; 303: Result:=225; 310: Result:=205; 311: Result:=237; 315: Result:=207; 316: Result:=239; 321: Result:=217; 322: Result:=249; 323: Result:=209; 324: Result:=241; 325: Result:=210; 326: Result:=242; 332: Result:=212; 333: Result:=244; 342: Result:=170; 343: Result:=186; 346: Result:=218; 347: Result:=250; 352: Result:=208; 353: Result:=240; 362: Result:=219; 363: Result:=251; 370: Result:=216; 371: Result:=248; 377: Result:=202; 378: Result:=234; 379: Result:=221; 380: Result:=253; 381: Result:=222; 382: Result:=254; 711: Result:=142; 729: Result:=255; 731: Result:=158; 8211..8212: Result:=Unicode-8061; 8216..8217: Result:=Unicode-8071; 8218: Result:=130; 8220..8221: Result:=Unicode-8073; 8222: Result:=132; 8224..8225: Result:=Unicode-8090; 8226: Result:=149; 8230: Result:=133; 8240: Result:=137; 8249: Result:=139; 8250: Result:=155; 8364: Result:=128; 8482: Result:=153; else Result:=-1; end; end; function UnicodeToCP1258(Unicode: cardinal): integer; begin case Unicode of 0..127: Result:=Unicode; 129: Result:=129; 138: Result:=138; 141..144: Result:=Unicode; 154: Result:=154; 157..158: Result:=Unicode; 160..194: Result:=Unicode; 196..203: Result:=Unicode; 205..207: Result:=Unicode; 209: Result:=209; 211..212: Result:=Unicode; 214..220: Result:=Unicode; 223..226: Result:=Unicode; 228..235: Result:=Unicode; 237..239: Result:=Unicode; 241: Result:=241; 243..244: Result:=Unicode; 246..252: Result:=Unicode; 255: Result:=255; 258: Result:=195; 259: Result:=227; 272: Result:=208; 273: Result:=240; 338: Result:=140; 339: Result:=156; 376: Result:=159; 402: Result:=131; 416: Result:=213; 417: Result:=245; 431: Result:=221; 432: Result:=253; 710: Result:=136; 732: Result:=152; 768: Result:=204; 769: Result:=236; 771: Result:=222; 777: Result:=210; 803: Result:=242; 8211..8212: Result:=Unicode-8061; 8216..8217: Result:=Unicode-8071; 8218: Result:=130; 8220..8221: Result:=Unicode-8073; 8222: Result:=132; 8224..8225: Result:=Unicode-8090; 8226: Result:=149; 8230: Result:=133; 8240: Result:=137; 8249: Result:=139; 8250: Result:=155; 8363: Result:=254; 8364: Result:=128; 8482: Result:=153; else Result:=-1; end; end; function UnicodeToCP874(Unicode: cardinal): integer; begin case Unicode of 0..127: Result:=Unicode; 129..132: Result:=Unicode; 134..144: Result:=Unicode; 152..160: Result:=Unicode; 219..222: Result:=Unicode; 252..255: Result:=Unicode; 3585..3642: Result:=Unicode-3424; 3647..3675: Result:=Unicode-3424; 8211..8212: Result:=Unicode-8061; 8216..8217: Result:=Unicode-8071; 8220..8221: Result:=Unicode-8073; 8226: Result:=149; 8230: Result:=133; 8364: Result:=128; else Result:=-1; end; end; procedure InternalUTF8ToCP(const s: string; TargetCodePage: TSystemCodePage; SetTargetCodePage: boolean; const UTF8CharConvFunc: TUnicodeToCharID; out TheResult: RawByteString); inline; begin if not Assigned(UTF8CharConvFunc) then begin TheResult:=s; SetCodePage(TheResult, TargetCodePage, True); if not SetTargetCodePage then SetCodePage(TheResult, CP_ACP, False); end else begin TheResult:=UTF8ToSingleByte(s,UTF8CharConvFunc); if SetTargetCodePage then SetCodePage(TheResult, TargetCodePage, False); end; end; function UTF8ToISO_8859_1(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,28591,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_1{$endif},Result); end; function UTF8ToISO_8859_2(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,28592,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_2{$endif},Result); end; function UTF8ToISO_8859_3(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,28593,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_3{$endif},Result); end; function UTF8ToISO_8859_4(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,28594,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_4{$endif},Result); end; function UTF8ToISO_8859_5(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,28595,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_5{$endif},Result); end; function UTF8ToISO_8859_7(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,28597,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_7{$endif},Result); end; function UTF8ToISO_8859_9(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,28599,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_9{$endif},Result); end; function UTF8ToISO_8859_10(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,28600,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_10{$endif},Result); end; function UTF8ToISO_8859_13(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,28603,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_13{$endif},Result); end; function UTF8ToISO_8859_14(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,28604,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_14{$endif},Result); end; function UTF8ToISO_8859_15(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,28605,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_15{$endif},Result); end; function UTF8ToISO_8859_16(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,28606,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_16{$endif},Result); end; function UTF8ToCP1250(const s: string; SetTargetCodePage: boolean): RawByteString; begin // system conversion fails for character #129 -> using table InternalUTF8ToCP(s,1250,SetTargetCodePage,@UnicodeToCP1250,Result); end; function UTF8ToCP1251(const s: string; SetTargetCodePage: boolean): RawByteString; begin // system conversion fails for character #152 -> using table InternalUTF8ToCP(s,1251,SetTargetCodePage,@UnicodeToCP1251,Result); end; function UTF8ToCP1252(const s: string; SetTargetCodePage: boolean): RawByteString; begin // system conversion fails for character #128 -> using table InternalUTF8ToCP(s,1252,SetTargetCodePage,@UnicodeToCP1252,Result); end; function UTF8ToCP1253(const s: string; SetTargetCodePage: boolean): RawByteString; begin // system conversion fails for character #129 -> using table InternalUTF8ToCP(s,1253,SetTargetCodePage,@UnicodeToCP1253,Result); end; function UTF8ToCP1254(const s: string; SetTargetCodePage: boolean): RawByteString; begin // system conversion fails for character #129 -> using table InternalUTF8ToCP(s,1254,SetTargetCodePage,@UnicodeToCP1254,Result); end; function UTF8ToCP1255(const s: string; SetTargetCodePage: boolean): RawByteString; begin // system conversion fails for character #129 -> using table InternalUTF8ToCP(s,1255,SetTargetCodePage,@UnicodeToCP1255,Result); end; function UTF8ToCP1256(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,1256,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP1256{$endif},Result); end; function UTF8ToCP1257(const s: string; SetTargetCodePage: boolean): RawByteString; begin // system conversion fails for character #129 -> using table InternalUTF8ToCP(s,1257,SetTargetCodePage,@UnicodeToCP1257,Result); end; function UTF8ToCP1258(const s: string; SetTargetCodePage: boolean): RawByteString; begin // system conversion fails for character #129 -> using table InternalUTF8ToCP(s,1258,SetTargetCodePage,@UnicodeToCP1258,Result); end; function UTF8ToCP437(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,437,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP437{$endif},Result); end; function UTF8ToCP850(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,850,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP850{$endif},Result); end; function UTF8ToCP852(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,852,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP852{$endif},Result); end; function UTF8ToCP865(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,865,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP865{$endif},Result); end; function UTF8ToCP866(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP866{$endif},Result); end; function UTF8ToCP874(const s: string; SetTargetCodePage: boolean): RawByteString; begin // system conversion fails for character #129 -> using table InternalUTF8ToCP(s,874,SetTargetCodePage,@UnicodeToCP874,Result); end; function UTF8ToKOI8R(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,20866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8R{$endif},Result); end; function UTF8ToKOI8U(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,21866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8U{$endif},Result); end; function UTF8ToKOI8RU(const s: string; SetTargetCodePage: boolean): RawByteString; begin // KOI8-RU dont have code page InternalUTF8ToCP(s,0,SetTargetCodePage,@UnicodeToKOI8RU,Result); end; function UTF8ToMacintosh(const s: string; SetTargetCodePage: boolean): RawByteString; begin InternalUTF8ToCP(s,10000,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToMacintosh{$endif},Result); end; function UTF8ToSingleByte(const s: string; const UTF8CharConvFunc: TUnicodeToCharID): string; var len, i, CharLen: Integer; Src, Dest: PChar; c: Char; Unicode: LongWord; begin if s='' then exit(''); len:=length(s); SetLength(Result,len); Src:=PChar(s); Dest:=PChar(Result); while len>0 do begin c:=Src^; if c<#128 then begin Dest^:=c; inc(Dest); inc(Src); dec(len); end else begin Unicode:=UTF8CodepointToUnicode(Src,CharLen); inc(Src,CharLen); dec(len,CharLen); i:=UTF8CharConvFunc(Unicode); //writeln('UTF8ToSingleByte Unicode=',Unicode,' CharLen=',CharLen,' c="',copy(s,Src-PChar(s)+1-CharLen,CharLen),'" i=',i); if i>=0 then begin Dest^:=chr(i); inc(Dest); end else case ConvertEncodingErrorMode of ceemSkip: begin end; ceemException: raise EConvertError.Create('Cannot convert UTF8 to single byte'); ceemReplace: begin Dest^:='?'; inc(Dest); end; ceemReturnEmpty: Exit(''); end; end; end; SetLength(Result,Dest-PChar(Result)); end; function UTF8ToUCS2LE(const s: string): string; var len: Integer; Src: PChar; Dest: PWord; c: Char; Unicode: LongWord; CharLen: integer; begin if s='' then exit(''); len:=length(s); SetLength(Result,len*2); Src:=PChar(s); Dest:=PWord(Pointer(Result)); while len>0 do begin c:=Src^; if c<#128 then begin Dest^:=NtoLE(Word(ord(c))); inc(Dest); inc(Src); dec(len); end else begin Unicode:=UTF8CodepointToUnicode(Src,CharLen); inc(Src,CharLen); dec(len,CharLen); if Unicode<=$ffff then begin Dest^:=NtoLE(Word(Unicode)); inc(Dest); end; end; end; len:={%H-}PtrUInt(Dest)-PtrUInt(Result); if len>length(Result) then raise Exception.Create(''); SetLength(Result,len); end; function UTF8ToUCS2BE(const s: string): string; var len: Integer; Src: PChar; Dest: PWord; c: Char; Unicode: LongWord; CharLen: integer; begin if s='' then exit(''); len:=length(s); SetLength(Result,len*2); Src:=PChar(s); Dest:=PWord(Pointer(Result)); while len>0 do begin c:=Src^; if c<#128 then begin Dest^:=NtoBE(Word(ord(c))); inc(Dest); inc(Src); dec(len); end else begin Unicode:=UTF8CodepointToUnicode(Src,CharLen); inc(Src,CharLen); dec(len,CharLen); if Unicode<=$ffff then begin Dest^:=NtoBE(Word(Unicode)); inc(Dest); end; end; end; len:={%H-}PtrUInt(Dest)-PtrUInt(Result); if len>length(Result) then raise Exception.Create(''); SetLength(Result,len); end; procedure GetSupportedEncodings(List: TStrings); begin List.Add('UTF-8'); List.Add('UTF-8BOM'); List.Add('Ansi'); List.Add(EncodingCP1250); List.Add(EncodingCP1251); List.Add(EncodingCP1252); List.Add(EncodingCP1253); List.Add(EncodingCP1254); List.Add(EncodingCP1255); List.Add(EncodingCP1256); List.Add(EncodingCP1257); List.Add(EncodingCP1258); List.Add(EncodingCP437); List.Add(EncodingCP850); List.Add(EncodingCP852); List.Add(EncodingCP865); List.Add(EncodingCP866); List.Add(EncodingCP874); {$IFnDEF DisableAsianCodePages} List.Add(EncodingCP932); List.Add(EncodingCP936); List.Add(EncodingCP949); List.Add(EncodingCP950); {$ENDIF} List.Add('ISO-8859-1'); List.Add('ISO-8859-2'); List.Add('ISO-8859-3'); List.Add('ISO-8859-4'); List.Add('ISO-8859-5'); List.Add('ISO-8859-7'); List.Add('ISO-8859-9'); List.Add('ISO-8859-10'); List.Add('ISO-8859-13'); List.Add('ISO-8859-14'); List.Add('ISO-8859-15'); List.Add('ISO-8859-16'); List.Add('KOI8-R'); List.Add('KOI8-U'); List.Add('KOI8-RU'); List.Add('Macintosh'); // UCS2 are less common, list them last List.Add('UCS-2LE'); List.Add('UCS-2BE'); end; function GuessEncoding(const s: string): string; function CompareI(p1, p2: PChar; Count: integer): boolean; var i: Integer; Chr1: Byte; Chr2: Byte; begin for i:=1 to Count do begin Chr1 := byte(p1^); Chr2 := byte(p2^); if Chr1<>Chr2 then begin if Chr1 in [97..122] then dec(Chr1,32); if Chr2 in [97..122] then dec(Chr2,32); if Chr1<>Chr2 then exit(false); end; inc(p1); inc(p2); end; Result:=true; end; {$IFDEF VerboseIDEEncoding} function PosToStr(p: integer): string; var y: Integer; x: Integer; i: Integer; begin y:=1; x:=1; i:=1; while (i<=length(s)) and (is[i-1]) then inc(i); end else begin inc(i); inc(x); end; end; Result:='x='+IntToStr(x)+',y='+IntToStr(y); end; {$ENDIF} var l: Integer; p: PChar; EndPos: PChar; i: LongInt; begin l:=length(s); if l=0 then exit(''); p:=PChar(s); // try UTF-8 BOM (Byte Order Mark) if CompareI(p,UTF8BOM,3) then begin Result:=EncodingUTF8BOM; exit; end; // try ucs-2le BOM FF FE (ToDo: nowadays this BOM is UTF16LE) if (p^=#$FF) and (p[1]=#$FE) then begin Result:=EncodingUCS2LE; exit; end; // try ucs-2be BOM FE FF (ToDo: nowadays this BOM is UTF16BE) if (p^=#$FE) and (p[1]=#$FF) then begin Result:=EncodingUCS2BE; exit; end; // try {%encoding eee} if CompareI(p,'{%encoding ',11) then begin inc(p,length('{%encoding ')); while (p^ in [' ',#9]) do inc(p); EndPos:=p; while not (EndPos^ in ['}',' ',#9,#0]) do inc(EndPos); Result:=NormalizeEncoding(copy(s,p-PChar(s)+1,EndPos-p)); exit; end; // try UTF-8 (this includes ASCII) p:=PChar(s); repeat if ord(p^)<128 then begin // ASCII if (p^=#0) and (p-PChar(s)>=l) then begin Result:=EncodingUTF8; exit; end; inc(p); end else begin i:=UTF8CodepointStrictSize(p); //DebugLn(['GuessEncoding ',i,' ',DbgStr(s[p])]); if i=0 then begin {$IFDEF VerboseIDEEncoding} DebugLn(['GuessEncoding non UTF-8 found at ',PosToStr(p-PChar(s)+1),' ',dbgstr(copy(s,p-PChar(s)-10,20))]); {$ENDIF} break; end; inc(p,i); end; until false; // use system encoding Result:=GetDefaultTextEncoding; if NormalizeEncoding(Result)=EncodingUTF8 then begin // the system encoding is UTF-8, but the text is not UTF-8 // use ISO-8859-1 instead. This encoding has a full 1:1 mapping to unicode, // so no character is lost during conversion back and forth. Result:='ISO-8859-1'; end; end; function ConvertEncodingFromUTF8(const s, ToEncoding: string; out Encoded: boolean; SetTargetCodePage: boolean = false): string; var ATo: string; procedure CheckKeepCP; inline; begin if SetTargetCodePage then raise Exception.Create('ConvertEncodingFromUTF8: cannot set AnsiString codepage to "'+ATo+'"'); end; begin Result:=s; Encoded:=true; ATo:=NormalizeEncoding(ToEncoding); if ATo=EncodingUTF8BOM then begin Result:=UTF8ToUTF8BOM(s); exit; end; if ATo=EncodingCPIso1 then begin Result:=UTF8ToISO_8859_1(s,SetTargetCodePage); exit; end; if ATo=EncodingCPIso2 then begin Result:=UTF8ToISO_8859_2(s,SetTargetCodePage); exit; end; if ATo=EncodingCPIso3 then begin Result:=UTF8ToISO_8859_3(s,SetTargetCodePage); exit; end; if ATo=EncodingCPIso4 then begin Result:=UTF8ToISO_8859_4(s,SetTargetCodePage); exit; end; if ATo=EncodingCPIso5 then begin Result:=UTF8ToISO_8859_5(s,SetTargetCodePage); exit; end; if ATo=EncodingCPIso7 then begin Result:=UTF8ToISO_8859_7(s,SetTargetCodePage); exit; end; if ATo=EncodingCPIso9 then begin Result:=UTF8ToISO_8859_9(s,SetTargetCodePage); exit; end; if ATo=EncodingCPIso10 then begin Result:=UTF8ToISO_8859_10(s,SetTargetCodePage); exit; end; if ATo=EncodingCPIso13 then begin Result:=UTF8ToISO_8859_13(s,SetTargetCodePage); exit; end; if ATo=EncodingCPIso14 then begin Result:=UTF8ToISO_8859_14(s,SetTargetCodePage); exit; end; if ATo=EncodingCPIso15 then begin Result:=UTF8ToISO_8859_15(s,SetTargetCodePage); exit; end; if ATo=EncodingCPIso16 then begin Result:=UTF8ToISO_8859_16(s,SetTargetCodePage); exit; end; if ATo=EncodingCP1250 then begin Result:=UTF8ToCP1250(s,SetTargetCodePage); exit; end; if ATo=EncodingCP1251 then begin Result:=UTF8ToCP1251(s,SetTargetCodePage); exit; end; if ATo=EncodingCP1252 then begin Result:=UTF8ToCP1252(s,SetTargetCodePage); exit; end; if ATo=EncodingCP1253 then begin Result:=UTF8ToCP1253(s,SetTargetCodePage); exit; end; if ATo=EncodingCP1254 then begin Result:=UTF8ToCP1254(s,SetTargetCodePage); exit; end; if ATo=EncodingCP1255 then begin Result:=UTF8ToCP1255(s,SetTargetCodePage); exit; end; if ATo=EncodingCP1256 then begin Result:=UTF8ToCP1256(s,SetTargetCodePage); exit; end; if ATo=EncodingCP1257 then begin Result:=UTF8ToCP1257(s,SetTargetCodePage); exit; end; if ATo=EncodingCP1258 then begin Result:=UTF8ToCP1258(s,SetTargetCodePage); exit; end; if ATo=EncodingCP437 then begin Result:=UTF8ToCP437(s,SetTargetCodePage); exit; end; if ATo=EncodingCP850 then begin Result:=UTF8ToCP850(s,SetTargetCodePage); exit; end; if ATo=EncodingCP852 then begin Result:=UTF8ToCP852(s,SetTargetCodePage); exit; end; if ATo=EncodingCP865 then begin Result:=UTF8ToCP865(s,SetTargetCodePage); exit; end; if ATo=EncodingCP866 then begin Result:=UTF8ToCP866(s,SetTargetCodePage); exit; end; if ATo=EncodingCP874 then begin Result:=UTF8ToCP874(s,SetTargetCodePage); exit; end; {$IFnDEF DisableAsianCodePages} if ATo=EncodingCP936 then begin Result:=UTF8ToCP936(s,SetTargetCodePage); exit; end; if ATo=EncodingCP950 then begin Result:=UTF8ToCP950(s,SetTargetCodePage); exit; end; if ATo=EncodingCP949 then begin Result:=UTF8ToCP949(s,SetTargetCodePage); exit; end; if ATo=EncodingCP932 then begin Result:=UTF8ToCP932(s,SetTargetCodePage); exit; end; {$ENDIF} if ATo=EncodingCPKOI8R then begin Result:=UTF8ToKOI8R(s,SetTargetCodePage); exit; end; if ATo=EncodingCPKOI8U then begin Result:=UTF8ToKOI8U(s,SetTargetCodePage); exit; end; if ATo=EncodingCPKOI8RU then begin Result:=UTF8ToKOI8RU(s,SetTargetCodePage); exit; end; if ATo=EncodingCPMac then begin Result:=UTF8ToMacintosh(s,SetTargetCodePage); exit; end; if ATo=EncodingUCS2LE then begin CheckKeepCP; Result:=UTF8ToUCS2LE(s); exit; end; if ATo=EncodingUCS2BE then begin CheckKeepCP; Result:=UTF8ToUCS2BE(s); exit; end; if (ATo=GetDefaultTextEncoding) and Assigned(ConvertUTF8ToAnsi) then begin Result:=ConvertUTF8ToAnsi(s); exit; end; Encoded:= false; end; function ConvertEncodingToUTF8(const s, FromEncoding: string; out Encoded: boolean): string; var AFrom: string; begin Result:=s; Encoded:=true; AFrom:=NormalizeEncoding(FromEncoding); if AFrom=EncodingUTF8BOM then begin Result:=UTF8BOMToUTF8(s); exit; end; if AFrom=EncodingCPIso1 then begin Result:=ISO_8859_1ToUTF8(s); exit; end; if AFrom=EncodingCPIso2 then begin Result:=ISO_8859_2ToUTF8(s); exit; end; if AFrom=EncodingCPIso3 then begin Result:=ISO_8859_3ToUTF8(s); exit; end; if AFrom=EncodingCPIso4 then begin Result:=ISO_8859_4ToUTF8(s); exit; end; if AFrom=EncodingCPIso5 then begin Result:=ISO_8859_5ToUTF8(s); exit; end; if AFrom=EncodingCPIso7 then begin Result:=ISO_8859_7ToUTF8(s); exit; end; if AFrom=EncodingCPIso9 then begin Result:=ISO_8859_9ToUTF8(s); exit; end; if AFrom=EncodingCPIso10 then begin Result:=ISO_8859_10ToUTF8(s); exit; end; if AFrom=EncodingCPIso13 then begin Result:=ISO_8859_13ToUTF8(s); exit; end; if AFrom=EncodingCPIso14 then begin Result:=ISO_8859_14ToUTF8(s); exit; end; if AFrom=EncodingCPIso15 then begin Result:=ISO_8859_15ToUTF8(s); exit; end; if AFrom=EncodingCPIso16 then begin Result:=ISO_8859_16ToUTF8(s); exit; end; if AFrom=EncodingCP1250 then begin Result:=CP1250ToUTF8(s); exit; end; if AFrom=EncodingCP1251 then begin Result:=CP1251ToUTF8(s); exit; end; if AFrom=EncodingCP1252 then begin Result:=CP1252ToUTF8(s); exit; end; if AFrom=EncodingCP1253 then begin Result:=CP1253ToUTF8(s); exit; end; if AFrom=EncodingCP1254 then begin Result:=CP1254ToUTF8(s); exit; end; if AFrom=EncodingCP1255 then begin Result:=CP1255ToUTF8(s); exit; end; if AFrom=EncodingCP1256 then begin Result:=CP1256ToUTF8(s); exit; end; if AFrom=EncodingCP1257 then begin Result:=CP1257ToUTF8(s); exit; end; if AFrom=EncodingCP1258 then begin Result:=CP1258ToUTF8(s); exit; end; if AFrom=EncodingCP437 then begin Result:=CP437ToUTF8(s); exit; end; if AFrom=EncodingCP850 then begin Result:=CP850ToUTF8(s); exit; end; if AFrom=EncodingCP852 then begin Result:=CP852ToUTF8(s); exit; end; if AFrom=EncodingCP865 then begin Result:=CP865ToUTF8(s); exit; end; if AFrom=EncodingCP866 then begin Result:=CP866ToUTF8(s); exit; end; if AFrom=EncodingCP874 then begin Result:=CP874ToUTF8(s); exit; end; {$IFnDEF DisableAsianCodePages} if AFrom=EncodingCP936 then begin Result:=CP936ToUTF8(s); exit; end; if AFrom=EncodingCP950 then begin Result:=CP950ToUTF8(s); exit; end; if AFrom=EncodingCP949 then begin Result:=CP949ToUTF8(s); exit; end; if AFrom=EncodingCP932 then begin Result:=CP932ToUTF8(s); exit; end; {$ENDIF} if AFrom=EncodingCPKOI8R then begin Result:=KOI8RToUTF8(s); exit; end; if AFrom=EncodingCPKOI8U then begin Result:=KOI8UToUTF8(s); exit; end; if AFrom=EncodingCPKOI8RU then begin Result:=KOI8RUToUTF8(s); exit; end; if AFrom=EncodingCPMac then begin Result:=MacintoshToUTF8(s); exit; end; if AFrom=EncodingUCS2LE then begin Result:=UCS2LEToUTF8(s); exit; end; if AFrom=EncodingUCS2BE then begin Result:=UCS2BEToUTF8(s); exit; end; if (AFrom=GetDefaultTextEncoding) and Assigned(ConvertAnsiToUTF8) then begin Result:=ConvertAnsiToUTF8(s); exit; end; Encoded:= false; end; function ConvertEncoding(const s, FromEncoding, ToEncoding: string; SetTargetCodePage: boolean): string; var AFrom, ATo, SysEnc : String; Encoded : Boolean; {$ifdef EnableIconvEnc} Dummy: String; {$endif} begin AFrom:=NormalizeEncoding(FromEncoding); ATo:=NormalizeEncoding(ToEncoding); SysEnc:=GetDefaultTextEncoding; if AFrom=EncodingAnsi then AFrom:=SysEnc else if AFrom='' then AFrom:=EncodingUTF8; if ATo=EncodingAnsi then ATo:=SysEnc else if ATo='' then ATo:=EncodingUTF8; if AFrom=ATo then begin Result:=s; exit; end; if s='' then begin if ATo=EncodingUTF8BOM then Result:=UTF8BOM else Result := s; exit; end; //DebugLn(['ConvertEncoding ',AFrom,' ',ATo]); if AFrom=EncodingUTF8 then begin Result:=ConvertEncodingFromUTF8(s, ATo, Encoded, SetTargetCodePage); if Encoded then exit; end else if ATo=EncodingUTF8 then begin Result:=ConvertEncodingToUTF8(s, AFrom, Encoded); if Encoded then exit; end else begin Result:=ConvertEncodingToUTF8(s, AFrom, Encoded); if Encoded then Result:=ConvertEncodingFromUTF8(Result, ATo, Encoded, SetTargetCodePage); if Encoded then exit; end; //cannot encode: return orig str Result:=s; {$ifdef EnableIconvEnc} try if not IconvLibFound and not InitIconv(Dummy) then begin {$IFNDEF DisableChecks} DebugLn(['Can not init iconv: ',Dummy]); {$ENDIF} Exit; end; if Iconvert(s, Result, AFrom, ATo)<>0 then begin Result:=s; Exit; end; except end; {$endif} end; end.