mirror of
				https://gitlab.com/freepascal.org/fpc/source.git
				synced 2025-11-04 05:59:37 +01:00 
			
		
		
		
	o support for the new codepage-aware ansistrings in the jvm branch
   o empty ansistrings are now always represented by a nil pointer rather than
     by an empty string, because an empty string also has a code page which
     can confuse code (although this will make ansistrings harder to use
     in Java code)
   o more string helpers code shared between the general and jvm rtl
   o support for indexbyte/word in the jvm rtl (warning: first parameter
     is an open array rather than an untyped parameter there, so
     indexchar(pcharvar^,10,0) will be equivalent to
     indexchar[pcharvar^],10,0) there, which is different from what is
     intended; changing it to an untyped parameter wouldn't help though)
   o default() support is not yet complete
   o calling fpcres is currently broken due to limitations in
     sysutils.executeprocess() regarding handling unix quoting and
     the compiler using the same command lines for scripts and directly
     calling external programs
   o compiling the Java compiler currently requires adding ALLOW_WARNINGS=1
     to the make command line
git-svn-id: branches/jvmbackend@20887 -
		
	
			
		
			
				
	
	
		
			223 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			223 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
    { source: http://download.java.net/jdk8/docs/technotes/guides/intl/encoding.doc.html
 | 
						|
    }
 | 
						|
    
 | 
						|
 type
 | 
						|
   twin2javacs = record
 | 
						|
     cp: word;
 | 
						|
     name: unicodestring;
 | 
						|
   end;
 | 
						|
 (*
 | 
						|
  * Code Page Identifiers
 | 
						|
  * http://msdn.microsoft.com/en-us/library/dd317756.aspx
 | 
						|
  *)
 | 
						|
  const 
 | 
						|
    win2javacs_arr: array[0..154] of twin2javacs =
 | 
						|
       (* (cp:; name:'x-MacDingbat'), not supported ? *)
 | 
						|
       (* (cp:; name:'x-MacSymbol'), not supported ? *)
 | 
						|
       ((cp:037; name:'IBM037'),
 | 
						|
        (cp:437; name:'IBM437'),
 | 
						|
        (cp:500; name:'IBM500'),
 | 
						|
        (cp:737; name:'x-IBM737'),
 | 
						|
        (cp:775; name:'IBM775'),
 | 
						|
        (cp:850; name:'IBM850'),
 | 
						|
        (cp:852; name:'IBM852'),
 | 
						|
        (cp:855; name:'IBM855'),
 | 
						|
        (cp:857; name:'IBM857'),
 | 
						|
        (cp:858; name:'IBM00858'),
 | 
						|
        (cp:860; name:'IBM860'),
 | 
						|
        (cp:861; name:'IBM861'),
 | 
						|
        (cp:862; name:'IBM862'),
 | 
						|
        (cp:863; name:'IBM863'),
 | 
						|
        (cp:864; name:'IBM864'),
 | 
						|
        (cp:865; name:'IBM865'),
 | 
						|
        (cp:866; name:'IBM866'),
 | 
						|
        (cp:868; name:'IBM868'),
 | 
						|
        (cp:869; name:'IBM869'),
 | 
						|
        (cp:870; name:'IBM870'),
 | 
						|
        (cp:874; name:'x-windows-874'),
 | 
						|
        (cp:918; name:'IBM918'),
 | 
						|
        (cp:932; name:'Shift_JIS'),
 | 
						|
        (cp:932; name:'x-PCK'),
 | 
						|
        (cp:936; name:'GB2312'),
 | 
						|
        (cp:936; name:'x-mswin-936'),
 | 
						|
        (cp:942; name:'x-IBM942'),
 | 
						|
        (cp:943; name:'x-IBM943'),
 | 
						|
        (cp:948; name:'x-IBM948'),
 | 
						|
        (cp:949; name:'x-IBM949'),
 | 
						|
        (cp:949; name:'x-windows-949'),
 | 
						|
        (cp:950; name:'Big5'),
 | 
						|
        (cp:950; name:'x-IBM950'),
 | 
						|
        (cp:950; name:'x-windows-950'),
 | 
						|
        (cp:951; name:'x-Big5-HKSCS-2001'),
 | 
						|
        (cp:951; name:'Big5-HKSCS'),
 | 
						|
        (cp:951; name:'x-MS950-HKSCS'),
 | 
						|
        (cp:951; name:'x-MS950-HKSCS-XP'),
 | 
						|
        (cp:964; name:'x-IBM964'),
 | 
						|
        (cp:970; name:'x-IBM970'),
 | 
						|
        (cp:1006; name:'x-IBM1006'),
 | 
						|
        (cp:1025; name:'x-IBM1025'),
 | 
						|
        (cp:1026; name:'IBM1026'),
 | 
						|
        (cp:1046; name:'x-IBM1046'),
 | 
						|
        (cp:1047; name:'IBM1047'),
 | 
						|
        (cp:1097; name:'x-IBM1097'),
 | 
						|
        (cp:1098; name:'x-IBM1098'),
 | 
						|
        (cp:1112; name:'x-IBM1112'),
 | 
						|
        (cp:1122; name:'x-IBM1122'),
 | 
						|
        (cp:1123; name:'x-IBM1123'),
 | 
						|
        (cp:1124; name:'x-IBM1124'),
 | 
						|
        (cp:1140; name:'IBM01140'),
 | 
						|
        (cp:1141; name:'IBM01141'),
 | 
						|
        (cp:1142; name:'IBM01142'),
 | 
						|
        (cp:1143; name:'IBM01143'),
 | 
						|
        (cp:1144; name:'IBM01144'),
 | 
						|
        (cp:1145; name:'IBM01145'),
 | 
						|
        (cp:1146; name:'IBM01146'),
 | 
						|
        (cp:1147; name:'IBM01147'),
 | 
						|
        (cp:1148; name:'IBM01148'),
 | 
						|
        (cp:1149; name:'IBM01149'),
 | 
						|
        (cp:1200; name:'UTF-16LE'),
 | 
						|
        (cp:1200; name:'x-UTF-16LE-BOM'),
 | 
						|
        (cp:1201; name:'UTF-16'),
 | 
						|
        (cp:1201; name:'UTF-16BE'),
 | 
						|
        (cp:1250; name:'windows-1250'),
 | 
						|
        (cp:1251; name:'windows-1251'),
 | 
						|
        (cp:1252; name:'windows-1252'),
 | 
						|
        (cp:1253; name:'windows-1253'),
 | 
						|
        (cp:1254; name:'windows-1254'),
 | 
						|
        (cp:1255; name:'windows-1255'),
 | 
						|
        (cp:1256; name:'windows-1256'),
 | 
						|
        (cp:1257; name:'windows-1257'),
 | 
						|
        (cp:1258; name:'windows-1258'),
 | 
						|
        (cp:1259; name:'windows-31j'),
 | 
						|
        (cp:1361; name:'x-Johab'),
 | 
						|
        (cp:1381; name:'x-IBM1381'),
 | 
						|
        (cp:1383; name:'x-IBM1383'),
 | 
						|
        (cp:10000; name:'MacRoman'),
 | 
						|
        (cp:10004; name:'x-MacArabic'),
 | 
						|
        (cp:10005; name:'x-MacHebrew'),
 | 
						|
        (cp:10006; name:'x-MacGreek'),
 | 
						|
        (cp:10007; name:'x-MacCyrillic'),
 | 
						|
        (cp:10010; name:'x-MacRomania'),
 | 
						|
        (cp:10017; name:'x-MacUkraine'),
 | 
						|
        (cp:10021; name:'x-MacThai'),
 | 
						|
        (cp:10029; name:'x-MacCentralEurope'),
 | 
						|
        (cp:10079; name:'x-MacIceland'),
 | 
						|
        (cp:10081; name:'x-MacTurkish'),
 | 
						|
        (cp:10082; name:'x-MacCroatian'),
 | 
						|
        (cp:12000; name:'UTF-32LE'),
 | 
						|
        (cp:12000; name:'X-UTF-32LE-BOM'),
 | 
						|
        (cp:12001; name:'UTF-32'),
 | 
						|
        (cp:12001; name:'UTF-32BE'),
 | 
						|
        (cp:12001; name:'X-UTF-32BE-BOM'),
 | 
						|
        (cp:20127; name:'US-ASCII'),
 | 
						|
        (cp:20273; name:'IBM273'),
 | 
						|
        (cp:20277; name:'IBM277'),
 | 
						|
        (cp:20278; name:'IBM278'),
 | 
						|
        (cp:20280; name:'IBM280'),
 | 
						|
        (cp:20284; name:'IBM284'),
 | 
						|
        (cp:20285; name:'IBM285'),
 | 
						|
        (cp:20297; name:'IBM297'),
 | 
						|
        (cp:20420; name:'IBM420'),
 | 
						|
        (cp:20424; name:'IBM424'),
 | 
						|
        (cp:20833; name:'x-IBM833'),
 | 
						|
        (cp:20834; name:'x-IBM834'),
 | 
						|
        (cp:20838; name:'IBM-Thai'),
 | 
						|
        (cp:20856; name:'x-IBM856'),
 | 
						|
        (cp:20866; name:'KOI8-R'),
 | 
						|
        (cp:20871; name:'IBM871'),
 | 
						|
        (cp:20874; name:'x-IBM874'),
 | 
						|
        (cp:20875; name:'x-IBM875'),
 | 
						|
        (cp:20921; name:'x-IBM921'),
 | 
						|
        (cp:20922; name:'x-IBM922'),
 | 
						|
        (cp:20932; name:'EUC-JP'),
 | 
						|
        (cp:20932; name:'x-JIS0208'),
 | 
						|
        (cp:20932; name:'x-JISAutoDetect'),
 | 
						|
        (cp:21866; name:'KOI8-U'),
 | 
						|
        (cp:28591; name:'ISO-8859-1'),
 | 
						|
        (cp:28592; name:'ISO-8859-2'),
 | 
						|
        (cp:28593; name:'ISO-8859-3'),
 | 
						|
        (cp:28594; name:'ISO-8859-4'),
 | 
						|
        (cp:28595; name:'ISO-8859-5'),
 | 
						|
        (cp:28596; name:'ISO-8859-6'),
 | 
						|
        (cp:28597; name:'ISO-8859-7'),
 | 
						|
        (cp:28598; name:'ISO-8859-8'),
 | 
						|
        (cp:28599; name:'ISO-8859-9'),
 | 
						|
        (cp:28601; name:'TIS-620'),
 | 
						|
        (cp:28601; name:'x-iso-8859-11'),
 | 
						|
        (cp:28603; name:'ISO-8859-13'),
 | 
						|
        (cp:28605; name:'ISO-8859-15'),
 | 
						|
        (cp:33722; name:'x-IBM33722'),
 | 
						|
        (cp:50220; name:'x-windows-50220'),
 | 
						|
        (cp:50220; name:'x-windows-iso2022jp'),
 | 
						|
        (cp:50221; name:'JIS_X0201'),
 | 
						|
        (cp:50221; name:'x-windows-50221'),
 | 
						|
        (cp:50222; name:'ISO-2022-JP'),
 | 
						|
        (cp:50222; name:'ISO-2022-JP-2'), (* not exact, Windows does not support ISO-2022-JP-2 *)
 | 
						|
        (cp:50222; name:'JIS_X0212-1990'),
 | 
						|
        (cp:50225; name:'ISO-2022-KR'),
 | 
						|
        (cp:50227; name:'ISO-2022-CN'),
 | 
						|
        (cp:50227; name:'x-ISO-2022-CN-CNS'),
 | 
						|
        (cp:50229; name:'x-ISO-2022-CN-GB'),
 | 
						|
        (cp:50930; name:'x-IBM930'),
 | 
						|
        (cp:50933; name:'x-IBM933'),
 | 
						|
        (cp:50935; name:'x-IBM935'),
 | 
						|
        (cp:50937; name:'x-IBM937'),
 | 
						|
        (cp:50939; name:'x-IBM939'),
 | 
						|
        (cp:51932; name:'x-MS932_0213'),
 | 
						|
        (cp:51932; name:'x-SJIS_0213'),
 | 
						|
        (cp:51949; name:'EUC-KR'),
 | 
						|
        (cp:54936; name:'GB18030'),
 | 
						|
        (cp:57002; name:'x-ISCII91'),
 | 
						|
        (cp:65001; name:'UTF-8'));
 | 
						|
 | 
						|
 | 
						|
  function win2javacs(cp: word): unicodestring;
 | 
						|
    var
 | 
						|
      l, h, i, ccp: longint;
 | 
						|
    begin
 | 
						|
      l:=low(win2javacs_arr);
 | 
						|
      h:=high(win2javacs_arr);
 | 
						|
      repeat
 | 
						|
        i:=(l+h+1) shr 1;
 | 
						|
        ccp:=win2javacs_arr[i].cp;
 | 
						|
        if cp=ccp then
 | 
						|
          break;
 | 
						|
        if cp>=ccp then
 | 
						|
          l:=i
 | 
						|
        else
 | 
						|
          h:=i-1;
 | 
						|
      until l>=h;
 | 
						|
      if cp=win2javacs_arr[i].cp then
 | 
						|
        begin
 | 
						|
          { the array has been ordered so that in case multiple alias names
 | 
						|
            exist, the first entry for the cp is the most commonly supported
 | 
						|
            one
 | 
						|
          }
 | 
						|
          while (i>low(win2javacs_arr)) and
 | 
						|
                (win2javacs_arr[i-1].cp=cp) do
 | 
						|
            dec(i);
 | 
						|
          result:=win2javacs_arr[i].name;
 | 
						|
        end
 | 
						|
      else
 | 
						|
        { or better raise an error? }
 | 
						|
        result:='<unsupported>';
 | 
						|
    end;
 | 
						|
    
 | 
						|
    
 | 
						|
  function javacs2win(cpname: unicodestring): word;
 | 
						|
    var
 | 
						|
      i: longint;
 | 
						|
    begin
 | 
						|
      { simple linear scan, not a common operation and hence not worth
 | 
						|
        building a separate array for }
 | 
						|
      for i:=low(win2javacs_arr) to high(win2javacs_arr) do
 | 
						|
        if win2javacs_arr[i].name=cpname then
 | 
						|
          begin
 | 
						|
            result:=win2javacs_arr[i].cp;
 | 
						|
            exit;
 | 
						|
          end;
 | 
						|
      { rawbytestring (or better raise an error?) }
 | 
						|
      result:=65535;
 | 
						|
    end;
 | 
						|
    
 |