mirror of
				https://gitlab.com/freepascal.org/fpc/source.git
				synced 2025-10-31 10:11:27 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			1185 lines
		
	
	
		
			32 KiB
		
	
	
	
		
			ObjectPascal
		
	
	
	
	
	
			
		
		
	
	
			1185 lines
		
	
	
		
			32 KiB
		
	
	
	
		
			ObjectPascal
		
	
	
	
	
	
| {
 | ||
|     This file is part of the Free Pascal run time library.
 | ||
|     Copyright (c) 2005 by Florian Klaempfl,
 | ||
|     member of the Free Pascal development team.
 | ||
| 
 | ||
|     libc based wide string support
 | ||
| 
 | ||
|     See the file COPYING.FPC, included in this distribution,
 | ||
|     for details about the copyright.
 | ||
| 
 | ||
|     This program is distributed in the hope that it will be useful,
 | ||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 | ||
|  **********************************************************************}
 | ||
| 
 | ||
| {$mode objfpc}
 | ||
| {$inline on}
 | ||
| {$implicitexceptions off}
 | ||
| 
 | ||
| unit cwstring;
 | ||
| 
 | ||
| interface
 | ||
| 
 | ||
| procedure SetCWidestringManager;
 | ||
| 
 | ||
| implementation
 | ||
| 
 | ||
| {$linklib c}
 | ||
| 
 | ||
| // Linux (and maybe glibc platforms in general), have iconv in glibc.
 | ||
| {$if defined(linux) or defined(solaris)}
 | ||
|   {$define iconv_is_in_libc}
 | ||
| {$endif}
 | ||
| 
 | ||
| {$ifdef netbsd}
 | ||
|   {$ifndef DISABLE_ICONV_LIBC}
 | ||
|     {$define iconv_is_in_libc}
 | ||
|   {$endif}
 | ||
| {$endif}
 | ||
| 
 | ||
| {$ifndef iconv_is_in_libc}
 | ||
|  {$if defined(haiku)}
 | ||
|    {$linklib textencoding}
 | ||
|  {$else}
 | ||
|    {$linklib iconv}
 | ||
|  {$endif}
 | ||
|  {$define useiconv}
 | ||
| {$endif not iconv_is_in_libc}
 | ||
| 
 | ||
| {$i rtldefs.inc}
 | ||
| 
 | ||
| Uses
 | ||
|   BaseUnix,
 | ||
|   ctypes,
 | ||
|   unix,
 | ||
|   unixtype,
 | ||
|   initc,
 | ||
|   dynlibs,
 | ||
|   unixcp;
 | ||
| 
 | ||
| Const
 | ||
| {$ifndef useiconv}
 | ||
|     libiconvname='c';  // is in libc under Linux.
 | ||
|     libprefix='lib';
 | ||
| {$else}
 | ||
|   {$ifdef haiku}
 | ||
|     libiconvname='textencoding';  // is in libtextencoding under Haiku
 | ||
|     libprefix='lib';
 | ||
|   {$else}
 | ||
|     {$if defined(darwin) or defined(aix)}
 | ||
|       libiconvname='libiconv';
 | ||
|       libprefix='';
 | ||
|     {$else}
 | ||
|       libiconvname='iconv';
 | ||
|       libprefix='lib';
 | ||
|     {$endif}
 | ||
|   {$endif}
 | ||
| {$endif}
 | ||
| 
 | ||
| { helper functions from libc }
 | ||
| function towlower(__wc:wint_t):wint_t;cdecl;external clib name 'towlower';
 | ||
| function towupper(__wc:wint_t):wint_t;cdecl;external clib name 'towupper';
 | ||
| 
 | ||
| function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external clib name 'wcscoll';
 | ||
| function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external clib name 'strcoll';
 | ||
| {$ifdef netbsd}
 | ||
|   { NetBSD has a new setlocale function defined in /usr/include/locale.h
 | ||
|     that should be used }
 | ||
| function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name '__setlocale50';
 | ||
| {$else}
 | ||
| function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
 | ||
| {$endif}
 | ||
| {$if not(defined(beos) and not defined(haiku))}
 | ||
| function mbrtowc(pwc: pwchar_t; const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrtowc';
 | ||
| function wcrtomb(s: pchar; wc: wchar_t; ps: pmbstate_t): size_t; cdecl; external clib name 'wcrtomb';
 | ||
| function mbrlen(const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrlen';
 | ||
| {$else beos}
 | ||
| function mbtowc(pwc: pwchar_t; const s: pchar; n: size_t): size_t; cdecl; external clib name 'mbtowc';
 | ||
| function wctomb(s: pchar; wc: wchar_t): size_t; cdecl; external clib name 'wctomb';
 | ||
| function mblen(const s: pchar; n: size_t): size_t; cdecl; external clib name 'mblen';
 | ||
| {$endif beos}
 | ||
| 
 | ||
| 
 | ||
| const
 | ||
| {$if defined(linux)}
 | ||
|   __LC_CTYPE = 0;
 | ||
|   LC_ALL = 6;
 | ||
|   _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
 | ||
|   _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
 | ||
|   CODESET = _NL_CTYPE_CODESET_NAME;
 | ||
| {$elseif defined(darwin)}
 | ||
|   CODESET = 0;
 | ||
|   LC_ALL = 0;
 | ||
| {$elseif defined(FreeBSD)} // actually FreeBSD5. internationalisation is afaik not default on 4.
 | ||
|   __LC_CTYPE = 0;
 | ||
|   LC_ALL = 0;
 | ||
|   _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
 | ||
|   _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
 | ||
|   CODESET = 0; // _NL_CTYPE_CODESET_NAME;
 | ||
| {$elseif defined(solaris)}
 | ||
|   {$define ACCEPT_646}
 | ||
|   CODESET=49;
 | ||
|   LC_ALL = 6;
 | ||
| {$elseif defined(beos)}
 | ||
|   {$ifdef haiku}
 | ||
|   CODESET= 0; // Checked for Haiku
 | ||
|   LC_ALL = 0; // Checked for Haiku
 | ||
|   {$else}
 | ||
|   {$warning check correct value for BeOS}
 | ||
|   CODESET=49;
 | ||
|   LC_ALL = 6; // Checked for BeOS
 | ||
|   {$endif}
 | ||
|   ESysEILSEQ = EILSEQ;
 | ||
| {$elseif defined(OpenBSD)}
 | ||
|   {$define ACCEPT_646}
 | ||
|   CODESET = 51;
 | ||
|   LC_ALL = 0;
 | ||
| {$elseif defined(NetBSD)}
 | ||
|   {$define ACCEPT_646}
 | ||
|   CODESET = 51;
 | ||
|   LC_ALL = 0;
 | ||
| {$elseif defined(aix)}
 | ||
|   CODESET = 49;
 | ||
|   LC_ALL = -1;
 | ||
| {$elseif defined(dragonfly)}
 | ||
|   CODESET = 0;
 | ||
|   LC_ALL = 0;
 | ||
|   __LC_CTYPE = 0;
 | ||
|   _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
 | ||
|   _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
 | ||
| {$else not aix}
 | ||
| {$error lookup the value of CODESET in /usr/include/langinfo.h, and the value of LC_ALL in /usr/include/locale.h for your OS }
 | ||
| // and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
 | ||
| {$endif}
 | ||
| 
 | ||
| { unicode encoding name }
 | ||
| {$ifdef FPC_LITTLE_ENDIAN}
 | ||
|   unicode_encoding2 = 'UTF-16LE';
 | ||
|   unicode_encoding4 = 'UCS-4LE';
 | ||
| {$else  FPC_LITTLE_ENDIAN}
 | ||
| {$ifdef AIX}
 | ||
|   unicode_encoding2 = 'UTF-16';
 | ||
| {$else AIX}
 | ||
|   unicode_encoding2 = 'UTF-16BE';
 | ||
|   unicode_encoding4 = 'UCS-4BE';
 | ||
| {$endif AIX}
 | ||
| {$endif  FPC_LITTLE_ENDIAN}
 | ||
| 
 | ||
| { en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4   }
 | ||
| { -> 10 should be enough? Should actually use MB_CUR_MAX, but }
 | ||
| { that's a libc macro mapped to internal functions/variables  }
 | ||
| { and thus not a stable external API on systems where libc    }
 | ||
| { breaks backwards compatibility every now and then           }
 | ||
|   MB_CUR_MAX = 10;
 | ||
| 
 | ||
| { Requests for iconvctl }
 | ||
|   ICONV_TRIVIALP          = 0; // int *argument
 | ||
|   ICONV_GET_TRANSLITERATE = 1; // int *argument
 | ||
|   ICONV_SET_TRANSLITERATE = 2; // const int *argument
 | ||
|   ICONV_GET_DISCARD_ILSEQ = 3; // int *argument
 | ||
|   ICONV_SET_DISCARD_ILSEQ = 4; // const int *argument
 | ||
|   ICONV_SET_HOOKS         = 5; // const struct iconv_hooks *argument
 | ||
|   ICONV_SET_FALLBACKS     = 6; // const struct iconv_fallbacks *argument
 | ||
| 
 | ||
| type
 | ||
|   piconv_t = ^iconv_t;
 | ||
|   iconv_t = pointer;
 | ||
|   nl_item = cint;
 | ||
| 
 | ||
| {$ifdef haiku}
 | ||
|   function nl_langinfo(__item:nl_item):pchar;cdecl;external 'root' name 'nl_langinfo';
 | ||
| {$else}
 | ||
|   {$ifndef beos}
 | ||
|   function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
 | ||
|   {$endif}
 | ||
| {$endif}
 | ||
| 
 | ||
| {$if (not defined(bsd) and not defined(beos)) or defined(iconv_is_in_libc) or (defined(darwin) and not defined(cpupowerpc32))}
 | ||
| function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
 | ||
| function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
 | ||
| function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
 | ||
| const
 | ||
|   iconvctlname='iconvctl';
 | ||
| {$else}
 | ||
| function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
 | ||
| function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
 | ||
| function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
 | ||
| const
 | ||
|   iconvctlname='libiconvctl';
 | ||
| {$endif}
 | ||
| var
 | ||
|   iconvctl:function(__cd:iconv_t; __request:cint; __argument:pointer):cint;cdecl;
 | ||
| 
 | ||
| procedure fpc_rangeerror; [external name 'FPC_RANGEERROR'];
 | ||
| 
 | ||
| 
 | ||
| threadvar
 | ||
|   iconv_ansi2wide,
 | ||
|   iconv_wide2ansi : iconv_t;
 | ||
|   { since we cache the iconv_t converters, we have to do the same
 | ||
|     for the DefaultSystemCodePage variable since if it changes, we
 | ||
|     have to re-initialize the converters too. We can't do that via
 | ||
|     a callback in the widestring manager because DefaultSystemCodePage
 | ||
|     is not a threadvar and we can't automatically change this in all
 | ||
|     threads }
 | ||
|   current_DefaultSystemCodePage: TSystemCodePage;
 | ||
| 
 | ||
| 
 | ||
| procedure InitThread;
 | ||
| var
 | ||
|   transliterate: cint;
 | ||
| {$if not(defined(darwin) and (defined(cpuarm) or defined(cpuaarch64))) and not defined(iphonesim)}
 | ||
|   iconvindex: longint;
 | ||
| {$endif}
 | ||
|   iconvname, toencoding: rawbytestring;
 | ||
| begin
 | ||
|   current_DefaultSystemCodePage:=DefaultSystemCodePage;
 | ||
| {$if declared(iconvindex)}
 | ||
|   iconvindex:=GetCodepageData(DefaultSystemCodePage);
 | ||
|   if iconvindex<>-1 then
 | ||
|     iconvname:=UnixCpMap[iconvindex].name
 | ||
|   else
 | ||
|     { default to UTF-8 on Unix platforms }
 | ||
|     iconvname:='UTF-8';
 | ||
| {$else}
 | ||
|   { Unix locale settings are ignored on iPhoneOS/iPhoneSimulator }
 | ||
|   iconvname:='UTF-8';
 | ||
| {$endif}
 | ||
|   toencoding:=iconvname;
 | ||
|   if not assigned(iconvctl) then
 | ||
|     toencoding:=toencoding+'//TRANSLIT';
 | ||
|   iconv_wide2ansi:=iconv_open(pchar(toencoding),unicode_encoding2);
 | ||
|   iconv_ansi2wide:=iconv_open(unicode_encoding2,pchar(iconvname));
 | ||
|   if assigned(iconvctl) and
 | ||
|      (iconv_wide2ansi<>iconv_t(-1)) then
 | ||
|   begin
 | ||
|     transliterate:=1;
 | ||
|     iconvctl(iconv_wide2ansi,ICONV_SET_TRANSLITERATE,@transliterate);
 | ||
|   end;
 | ||
| end;
 | ||
| 
 | ||
| 
 | ||
| procedure FiniThread;
 | ||
| begin
 | ||
|   if (iconv_wide2ansi <> iconv_t(-1)) then
 | ||
|     iconv_close(iconv_wide2ansi);
 | ||
|   if (iconv_ansi2wide <> iconv_t(-1)) then
 | ||
|     iconv_close(iconv_ansi2wide);
 | ||
| end;
 | ||
| 
 | ||
| 
 | ||
| {$if defined(beos) and not defined(haiku)}
 | ||
| function nl_langinfo(__item:nl_item):pchar;
 | ||
| begin
 | ||
|   {$warning TODO BeOS nl_langinfo or more uptodate port of iconv...}
 | ||
|   // Now implement the minimum required to correctly initialize WideString support
 | ||
|   case __item of
 | ||
|     CODESET : Result := 'UTF-8'; // BeOS use UTF-8
 | ||
|     else
 | ||
|     begin
 | ||
|       Assert(False, 'nl_langinfo was called with an unknown nl_item value');
 | ||
|       Result := '';
 | ||
|     end;
 | ||
|   end;
 | ||
| end;
 | ||
| {$endif}
 | ||
| 
 | ||
| 
 | ||
| function open_iconv_for_cps(cp: TSystemCodePage; const otherencoding: pchar; cp_is_from: boolean): iconv_t;
 | ||
|   var
 | ||
|     iconvindex: longint;
 | ||
|     toencoding: rawbytestring;
 | ||
|     transliterate: cint;
 | ||
|   begin
 | ||
|     { TODO: add caching (then we also don't need separate code for
 | ||
|       the default system page and other ones)
 | ||
| 
 | ||
|       -- typecasting an ansistring function result to pchar is
 | ||
|         unsafe normally, but these are constant strings -> no
 | ||
|         problem }
 | ||
|     open_iconv_for_cps:=iconv_t(-1);
 | ||
|     iconvindex:=GetCodepageData(cp);
 | ||
|     if iconvindex=-1 then
 | ||
|       exit;
 | ||
|     repeat
 | ||
|       if cp_is_from then
 | ||
|         open_iconv_for_cps:=iconv_open(otherencoding,pchar(UnixCpMap[iconvindex].name))
 | ||
|       else
 | ||
|       begin
 | ||
|         toencoding:=UnixCpMap[iconvindex].name;
 | ||
|         if not assigned(iconvctl) then
 | ||
|           toencoding:=toencoding+'//TRANSLIT';
 | ||
|         open_iconv_for_cps:=iconv_open(pchar(toencoding),otherencoding);
 | ||
|       end;
 | ||
|       inc(iconvindex);
 | ||
|     until (open_iconv_for_cps<>iconv_t(-1)) or
 | ||
|           (iconvindex>high(UnixCpMap)) or
 | ||
|           (UnixCpMap[iconvindex].cp<>cp);
 | ||
|     if not cp_is_from and
 | ||
|       (open_iconv_for_cps<>iconv_t(-1)) and
 | ||
|       assigned(iconvctl) then
 | ||
|     begin
 | ||
|       transliterate:=1;
 | ||
|       iconvctl(open_iconv_for_cps,ICONV_SET_TRANSLITERATE,@transliterate);
 | ||
|     end;
 | ||
|   end;
 | ||
| 
 | ||
| 
 | ||
| {$ifdef aix}
 | ||
| {$i cwstraix.inc}
 | ||
| {$endif aix}
 | ||
| 
 | ||
| procedure Wide2AnsiMove(source:pwidechar; var dest:RawByteString; cp:TSystemCodePage; len:SizeInt);
 | ||
|   var
 | ||
|     outlength,
 | ||
|     outoffset,
 | ||
|     srclen,
 | ||
|     outleft : size_t;
 | ||
|     use_iconv: iconv_t;
 | ||
|     srcpos : pwidechar;
 | ||
|     destpos: pchar;
 | ||
|     mynil : pchar;
 | ||
|     my0 : size_t;
 | ||
|     err : longint;
 | ||
|     transliterate: cint;
 | ||
|     free_iconv: boolean;
 | ||
| {$ifdef aix}
 | ||
|     intermediate: rawbytestring;
 | ||
| {$endif aix}
 | ||
|   begin
 | ||
| {$ifdef aix}
 | ||
|     { AIX libiconv does not support converting cp866 to anything else except
 | ||
|       for iso-8859-5 -> always first convert to iso-8859-5, then to UTF-16 }
 | ||
|     if cp=866 then
 | ||
|       begin
 | ||
|         Wide2AnsiMove(source,intermediate,28595,len);
 | ||
|         if handle_aix_intermediate(pchar(intermediate),28595,cp,dest,len) then
 | ||
|           exit;
 | ||
|       end;
 | ||
| {$endif aix}
 | ||
|     if (cp=DefaultSystemCodePage) then
 | ||
|       begin
 | ||
|         { update iconv converter in case the DefaultSystemCodePage has been
 | ||
|           changed }
 | ||
|         if current_DefaultSystemCodePage<>DefaultSystemCodePage then
 | ||
|           begin
 | ||
|             FiniThread;
 | ||
|             InitThread;
 | ||
|           end;
 | ||
|         use_iconv:=iconv_wide2ansi;
 | ||
|         free_iconv:=false;
 | ||
|       end
 | ||
|     else
 | ||
|       begin
 | ||
|         use_iconv:=open_iconv_for_cps(cp,unicode_encoding2,false);
 | ||
|         if (use_iconv<>iconv_t(-1)) and
 | ||
|            assigned(iconvctl) then
 | ||
|         begin
 | ||
|           transliterate:=1;
 | ||
|           iconvctl(use_iconv,ICONV_SET_TRANSLITERATE,@transliterate);
 | ||
|         end;
 | ||
|         free_iconv:=true;
 | ||
|       end;
 | ||
|     { unsupported encoding -> default move }
 | ||
|     if use_iconv=iconv_t(-1) then
 | ||
|       begin
 | ||
|         DefaultUnicode2AnsiMove(source,dest,DefaultSystemCodePage,len);
 | ||
|         exit;
 | ||
|       end;
 | ||
|     mynil:=nil;
 | ||
|     my0:=0;
 | ||
|     { rought estimation }
 | ||
|     setlength(dest,len*3);
 | ||
|     outlength:=len*3;
 | ||
|     srclen:=len*2;
 | ||
|     srcpos:=source;
 | ||
|     destpos:=pchar(dest);
 | ||
|     outleft:=outlength;
 | ||
|     while iconv(use_iconv,ppchar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do
 | ||
|       begin
 | ||
|         err:=fpgetCerrno;
 | ||
|         case err of
 | ||
|           { last character is incomplete sequence }
 | ||
|           ESysEINVAL,
 | ||
|           { incomplete sequence in the middle }
 | ||
|           ESysEILSEQ:
 | ||
|             begin
 | ||
|               { skip and set to '?' }
 | ||
|               inc(srcpos);
 | ||
|               dec(srclen,2);
 | ||
|               destpos^:='?';
 | ||
|               inc(destpos);
 | ||
|               dec(outleft);
 | ||
|               { reset }
 | ||
|               iconv(use_iconv,@mynil,@my0,@mynil,@my0);
 | ||
|               if err=ESysEINVAL then
 | ||
|                 break;
 | ||
|             end;
 | ||
|           ESysE2BIG:
 | ||
|             begin
 | ||
|               outoffset:=destpos-pchar(dest);
 | ||
|               { extend }
 | ||
|               setlength(dest,outlength+len*3);
 | ||
|               inc(outleft,len*3);
 | ||
|               inc(outlength,len*3);
 | ||
|               { string could have been moved }
 | ||
|               destpos:=pchar(dest)+outoffset;
 | ||
|             end;
 | ||
|           else
 | ||
|             runerror(231);
 | ||
|         end;
 | ||
|       end;
 | ||
|     // truncate string
 | ||
|     setlength(dest,length(dest)-outleft);
 | ||
|     SetCodePage(dest,cp,false);
 | ||
|     if free_iconv then
 | ||
|       iconv_close(use_iconv);
 | ||
|   end;
 | ||
| 
 | ||
| 
 | ||
| procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; len:SizeInt);
 | ||
|   var
 | ||
|     outlength,
 | ||
|     outoffset,
 | ||
|     outleft : size_t;
 | ||
|     use_iconv: iconv_t;
 | ||
|     srcpos,
 | ||
|     destpos: pchar;
 | ||
|     mynil : pchar;
 | ||
|     my0 : size_t;
 | ||
|     err: cint;
 | ||
|     iconvindex: longint;
 | ||
|     free_iconv: boolean;
 | ||
| {$ifdef aix}
 | ||
|     intermediate: rawbytestring;
 | ||
| {$endif aix}
 | ||
|   begin
 | ||
| {$ifdef aix}
 | ||
|     { AIX libiconv does not support converting cp866 to anything else except
 | ||
|       for iso-8859-5 -> always first convert to iso-8859-5, then to UTF-16 }
 | ||
|     if cp=866 then
 | ||
|       begin
 | ||
|         if handle_aix_intermediate(source,cp,cp,intermediate,len) then
 | ||
|           source:=pchar(intermediate);
 | ||
|       end;
 | ||
| {$endif aix}
 | ||
|     if (cp=DefaultSystemCodePage) then
 | ||
|       begin
 | ||
|         { update iconv converter in case the DefaultSystemCodePage has been
 | ||
|           changed }
 | ||
|         if current_DefaultSystemCodePage<>DefaultSystemCodePage then
 | ||
|           begin
 | ||
|             FiniThread;
 | ||
|             InitThread;
 | ||
|           end;
 | ||
|         use_iconv:=iconv_ansi2wide;
 | ||
|         free_iconv:=false;
 | ||
|       end
 | ||
|     else
 | ||
|       begin
 | ||
|         { TODO: add caching (then we also don't need separate code for
 | ||
|           the default system page and other ones)
 | ||
| 
 | ||
|           -- typecasting an ansistring function result to pchar is
 | ||
|             unsafe normally, but these are constant strings -> no
 | ||
|             problem }
 | ||
|         use_iconv:=open_iconv_for_cps(cp,unicode_encoding2,true);
 | ||
|         free_iconv:=true;
 | ||
|       end;
 | ||
|     { unsupported encoding -> default move }
 | ||
|     if use_iconv=iconv_t(-1) then
 | ||
|       begin
 | ||
|         DefaultAnsi2UnicodeMove(source,DefaultSystemCodePage,dest,len);
 | ||
|         exit;
 | ||
|       end;
 | ||
|     mynil:=nil;
 | ||
|     my0:=0;
 | ||
|     // extra space
 | ||
|     outlength:=len+1;
 | ||
|     setlength(dest,outlength);
 | ||
|     srcpos:=source;
 | ||
|     destpos:=pchar(dest);
 | ||
|     outleft:=outlength*2;
 | ||
|     while iconv(use_iconv,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
 | ||
|       begin
 | ||
|         err:=fpgetCerrno;
 | ||
|         case err of
 | ||
|          ESysEINVAL,
 | ||
|          ESysEILSEQ:
 | ||
|             begin
 | ||
|               { skip and set to '?' }
 | ||
|               inc(srcpos);
 | ||
|               dec(len);
 | ||
|               pwidechar(destpos)^:='?';
 | ||
|               inc(destpos,2);
 | ||
|               dec(outleft,2);
 | ||
|               { reset }
 | ||
|               iconv(use_iconv,@mynil,@my0,@mynil,@my0);
 | ||
|               if err=ESysEINVAL then
 | ||
|                 break;
 | ||
|             end;
 | ||
|           ESysE2BIG:
 | ||
|             begin
 | ||
|               outoffset:=destpos-pchar(dest);
 | ||
|               { extend }
 | ||
|               setlength(dest,outlength+len);
 | ||
|               inc(outleft,len*2);
 | ||
|               inc(outlength,len);
 | ||
|               { string could have been moved }
 | ||
|               destpos:=pchar(dest)+outoffset;
 | ||
|             end;
 | ||
|           else
 | ||
|             runerror(231);
 | ||
|         end;
 | ||
|       end;
 | ||
|     // truncate string
 | ||
|     setlength(dest,length(dest)-outleft div 2);
 | ||
|     if free_iconv then
 | ||
|       iconv_close(use_iconv);
 | ||
|   end;
 | ||
| 
 | ||
| 
 | ||
| function LowerWideString(const s : WideString) : WideString;
 | ||
|   var
 | ||
|     i : SizeInt;
 | ||
|   begin
 | ||
|     SetLength(result,length(s));
 | ||
|     for i:=0 to length(s)-1 do
 | ||
|       pwidechar(result)[i]:=WideChar(towlower(wint_t(s[i+1])));
 | ||
|   end;
 | ||
| 
 | ||
| 
 | ||
| function UpperWideString(const s : WideString) : WideString;
 | ||
|   var
 | ||
|     i : SizeInt;
 | ||
|   begin
 | ||
|     SetLength(result,length(s));
 | ||
|     for i:=0 to length(s)-1 do
 | ||
|       pwidechar(result)[i]:=WideChar(towupper(wint_t(s[i+1])));
 | ||
|   end;
 | ||
| 
 | ||
| 
 | ||
| procedure EnsureAnsiLen(var S: AnsiString; const len: SizeInt); inline;
 | ||
| begin
 | ||
|   if (len>length(s)) then
 | ||
|     if (length(s) < 10*256) then
 | ||
|       setlength(s,length(s)+10)
 | ||
|     else
 | ||
|       setlength(s,length(s)+length(s) shr 8);
 | ||
| end;
 | ||
| 
 | ||
| 
 | ||
| procedure ConcatCharToAnsiStr(const c: char; var S: AnsiString; var index: SizeInt);
 | ||
| begin
 | ||
|   EnsureAnsiLen(s,index);
 | ||
|   pchar(@s[index])^:=c;
 | ||
|   inc(index);
 | ||
| end;
 | ||
| 
 | ||
| 
 | ||
| { concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
 | ||
| {$if not(defined(beos) and not defined(haiku))}
 | ||
| procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt; var mbstate: mbstate_t);
 | ||
| {$else not beos}
 | ||
| procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt);
 | ||
| {$endif beos}
 | ||
| var
 | ||
|   p     : pchar;
 | ||
|   mblen : size_t;
 | ||
| begin
 | ||
|   { we know that s is unique -> avoid uniquestring calls}
 | ||
|   p:=@s[index];
 | ||
|   if (nc<=127) then
 | ||
|     ConcatCharToAnsiStr(char(nc),s,index)
 | ||
|   else
 | ||
|     begin
 | ||
|       EnsureAnsiLen(s,index+MB_CUR_MAX);
 | ||
| {$if not(defined(beos) and not defined(haiku))}
 | ||
|       mblen:=wcrtomb(p,wchar_t(nc),@mbstate);
 | ||
| {$else not beos}
 | ||
|       mblen:=wctomb(p,wchar_t(nc));
 | ||
| {$endif not beos}
 | ||
|       if (mblen<>size_t(-1)) then
 | ||
|         inc(index,mblen)
 | ||
|       else
 | ||
|         begin
 | ||
|           { invalid wide char }
 | ||
|           p^:='?';
 | ||
|           inc(index);
 | ||
|         end;
 | ||
|     end;
 | ||
| end;
 | ||
| 
 | ||
| 
 | ||
| function LowerAnsiString(const s : AnsiString) : AnsiString;
 | ||
|   var
 | ||
|     i, slen,
 | ||
|     resindex : SizeInt;
 | ||
|     mblen    : size_t;
 | ||
| {$if not(defined(beos) and not defined(haiku))}
 | ||
|     ombstate,
 | ||
|     nmbstate : mbstate_t;
 | ||
| {$endif beos}
 | ||
|     wc       : wchar_t;
 | ||
|   begin
 | ||
| {$if not(defined(beos) and not defined(haiku))}
 | ||
|     fillchar(ombstate,sizeof(ombstate),0);
 | ||
|     fillchar(nmbstate,sizeof(nmbstate),0);
 | ||
| {$endif beos}
 | ||
|     slen:=length(s);
 | ||
|     SetLength(result,slen+10);
 | ||
|     i:=1;
 | ||
|     resindex:=1;
 | ||
|     while (i<=slen) do
 | ||
|       begin
 | ||
|         if (s[i]<=#127) then
 | ||
|           begin
 | ||
|             wc:=wchar_t(s[i]);
 | ||
|             mblen:= 1;
 | ||
|           end
 | ||
|         else
 | ||
| {$if not(defined(beos) and not defined(haiku))}
 | ||
|           mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
 | ||
| {$else not beos}
 | ||
|           mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
 | ||
| {$endif not beos}
 | ||
|         case mblen of
 | ||
|           size_t(-2):
 | ||
|             begin
 | ||
|               { partial invalid character, copy literally }
 | ||
|               while (i<=slen) do
 | ||
|                 begin
 | ||
|                   ConcatCharToAnsiStr(s[i],result,resindex);
 | ||
|                   inc(i);
 | ||
|                 end;
 | ||
|             end;
 | ||
|           size_t(-1), 0:
 | ||
|             begin
 | ||
|               { invalid or null character }
 | ||
|               ConcatCharToAnsiStr(s[i],result,resindex);
 | ||
|               inc(i);
 | ||
|             end;
 | ||
|           else
 | ||
|             begin
 | ||
|               { a valid sequence }
 | ||
|               { even if mblen = 1, the lowercase version may have a }
 | ||
|               { different length                                     }
 | ||
|               { We can't do anything special if wchar_t is 16 bit... }
 | ||
| {$if not(defined(beos) and not defined(haiku))}
 | ||
|               ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex,nmbstate);
 | ||
| {$else not beos}
 | ||
|               ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex);
 | ||
| {$endif not beos}
 | ||
|               inc(i,mblen);
 | ||
|             end;
 | ||
|           end;
 | ||
|       end;
 | ||
|     SetLength(result,resindex-1);
 | ||
|   end;
 | ||
| 
 | ||
| 
 | ||
| function UpperAnsiString(const s : AnsiString) : AnsiString;
 | ||
|   var
 | ||
|     i, slen,
 | ||
|     resindex : SizeInt;
 | ||
|     mblen    : size_t;
 | ||
| {$if not(defined(beos) and not defined(haiku))}
 | ||
|     ombstate,
 | ||
|     nmbstate : mbstate_t;
 | ||
| {$endif beos}
 | ||
|     wc       : wchar_t;
 | ||
|   begin
 | ||
| {$if not(defined(beos) and not defined(haiku))}
 | ||
|     fillchar(ombstate,sizeof(ombstate),0);
 | ||
|     fillchar(nmbstate,sizeof(nmbstate),0);
 | ||
| {$endif beos}
 | ||
|     slen:=length(s);
 | ||
|     SetLength(result,slen+10);
 | ||
|     i:=1;
 | ||
|     resindex:=1;
 | ||
|     while (i<=slen) do
 | ||
|       begin
 | ||
|         if (s[i]<=#127) then
 | ||
|           begin
 | ||
|             wc:=wchar_t(s[i]);
 | ||
|             mblen:= 1;
 | ||
|           end
 | ||
|         else
 | ||
| {$if not(defined(beos) and not defined(haiku))}
 | ||
|           mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
 | ||
| {$else not beos}
 | ||
|           mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
 | ||
| {$endif beos}
 | ||
|         case mblen of
 | ||
|           size_t(-2):
 | ||
|             begin
 | ||
|               { partial invalid character, copy literally }
 | ||
|               while (i<=slen) do
 | ||
|                 begin
 | ||
|                   ConcatCharToAnsiStr(s[i],result,resindex);
 | ||
|                   inc(i);
 | ||
|                 end;
 | ||
|             end;
 | ||
|           size_t(-1), 0:
 | ||
|             begin
 | ||
|               { invalid or null character }
 | ||
|               ConcatCharToAnsiStr(s[i],result,resindex);
 | ||
|               inc(i);
 | ||
|             end;
 | ||
|           else
 | ||
|             begin
 | ||
|               { a valid sequence }
 | ||
|               { even if mblen = 1, the uppercase version may have a }
 | ||
|               { different length                                     }
 | ||
|               { We can't do anything special if wchar_t is 16 bit... }
 | ||
| {$if not(defined(beos) and not defined(haiku))}
 | ||
|               ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex,nmbstate);
 | ||
| {$else not beos}
 | ||
|               ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex);
 | ||
| {$endif not beos}
 | ||
|               inc(i,mblen);
 | ||
|             end;
 | ||
|           end;
 | ||
|       end;
 | ||
|     SetLength(result,resindex-1);
 | ||
|   end;
 | ||
| 
 | ||
| function WideStringToUCS4StringNoNulls(const s : WideString) : UCS4String;
 | ||
|   var
 | ||
|     i, slen,
 | ||
|     destindex : SizeInt;
 | ||
|     uch       : UCS4Char;
 | ||
|   begin
 | ||
|     slen:=length(s);
 | ||
|     setlength(result,slen+1);
 | ||
|     i:=1;
 | ||
|     destindex:=0;
 | ||
|     while (i<=slen) do
 | ||
|       begin
 | ||
|         uch:=UCS4Char(s[i]);
 | ||
|         if (uch=0) then
 | ||
|           result[destindex]:=32
 | ||
|         else if (uch<=$d7ff) or (uch>=$e000) then
 | ||
|           result[destindex]:=uch
 | ||
|         else if (uch<=$dbff) and
 | ||
|           (i<slen) and
 | ||
|           (s[i+1]>=#$dc00) and
 | ||
|           (s[i+1]<=#$dfff) then
 | ||
|           begin
 | ||
|             result[destindex]:=(UCS4Char(uch-$d7c0) shl 10)+(UCS4Char(s[i+1]) xor $dc00);
 | ||
|             inc(i);
 | ||
|           end
 | ||
|         else { invalid surrogate pair }
 | ||
|           result[destindex]:=uch;
 | ||
|         inc(i);
 | ||
|         inc(destindex);
 | ||
|       end;
 | ||
|     result[destindex]:=UCS4Char(0);
 | ||
|     { Trimming length in this particular case is just a waste of time,
 | ||
|       because result will be interpreted as null-terminated and discarded
 | ||
|       almost immediately }
 | ||
|   end;
 | ||
| 
 | ||
| 
 | ||
| function CompareWideString(const s1, s2 : WideString; Options : TCompareOptions) : PtrInt;
 | ||
| {$if not(defined (aix) and defined(cpupowerpc32))}
 | ||
|   var
 | ||
|     hs1,hs2 : UCS4String;
 | ||
|     us1,us2 : WideString;
 | ||
| 
 | ||
|   begin
 | ||
|     { wcscoll interprets null chars as end-of-string -> filter out }
 | ||
|     if coIgnoreCase in Options then
 | ||
|       begin
 | ||
|       us1:=UpperWideString(s1);
 | ||
|       us2:=UpperWideString(s2);
 | ||
|       end
 | ||
|     else
 | ||
|       begin
 | ||
|       us1:=s1;
 | ||
|       us2:=s2;
 | ||
|       end;
 | ||
|     hs1:=WideStringToUCS4StringNoNulls(us1);
 | ||
|     hs2:=WideStringToUCS4StringNoNulls(us2);
 | ||
|     result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
 | ||
|   end;
 | ||
| {$else}
 | ||
|   { AIX/PPC32 has a 16 bit wchar_t }
 | ||
|   var
 | ||
|     i, len: longint;
 | ||
|     us1,us2 : WideString;
 | ||
|     hs1, hs2: array of widechar;
 | ||
|   begin
 | ||
|     if coIgnoreCase in Options then
 | ||
|       begin
 | ||
|       us1:=UpperWideString(s1);
 | ||
|       us2:=UpperWideString(s2);
 | ||
|       end
 | ||
|     else
 | ||
|       begin
 | ||
|       us1:=s1;
 | ||
|       us2:=s2;
 | ||
|       end;
 | ||
|     len:=length(us1);
 | ||
|     setlength(hs1,len+1);
 | ||
|     for i:=1 to len do
 | ||
|       if us1[i]<>#0 then
 | ||
|         hs1[i-1]:=us1[i]
 | ||
|       else
 | ||
|         hs1[i-1]:=#32;
 | ||
|     hs1[len]:=#0;
 | ||
| 
 | ||
|     len:=length(us2);
 | ||
|     setlength(hs2,len+1);
 | ||
|     for i:=1 to len do
 | ||
|       if us2[i]<>#0 then
 | ||
|         hs2[i-1]:=us2[i]
 | ||
|       else
 | ||
|         hs2[i-1]:=#32;
 | ||
|     hs2[len]:=#0;
 | ||
|     result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
 | ||
|   end;
 | ||
| {$endif}
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| { return value: number of code points in the string. Whenever an invalid
 | ||
|   code point is encountered, all characters part of this invalid code point
 | ||
|   are considered to form one "character" and the next character is
 | ||
|   considered to be the start of a new (possibly also invalid) code point }
 | ||
| function CharLengthPChar(const Str: PChar): PtrInt;
 | ||
|   var
 | ||
|     nextlen: ptrint;
 | ||
|     s: pchar;
 | ||
| {$if not(defined(beos) and not defined(haiku))}
 | ||
|     mbstate: mbstate_t;
 | ||
| {$endif not beos}
 | ||
|   begin
 | ||
|     result:=0;
 | ||
|     s:=str;
 | ||
| {$if not(defined(beos) and not defined(haiku))}
 | ||
|     fillchar(mbstate,sizeof(mbstate),0);
 | ||
| {$endif not beos}
 | ||
|     repeat
 | ||
| {$if defined(beos) and not defined(haiku)}
 | ||
|       nextlen:=ptrint(mblen(s,MB_CUR_MAX));
 | ||
| {$else beos}
 | ||
|       nextlen:=ptrint(mbrlen(s,MB_CUR_MAX,@mbstate));
 | ||
| {$endif beos}
 | ||
|       { skip invalid/incomplete sequences }
 | ||
|       if (nextlen<0) then
 | ||
|         nextlen:=1;
 | ||
|       inc(result,1);
 | ||
|       inc(s,nextlen);
 | ||
|     until (nextlen=0);
 | ||
|   end;
 | ||
| 
 | ||
| 
 | ||
| function CodePointLength(const Str: PChar; maxlookahead: ptrint): PtrInt;
 | ||
| {$if not(defined(beos) and not defined(haiku))}
 | ||
|   var
 | ||
|     mbstate: mbstate_t;
 | ||
| {$endif not beos}
 | ||
|   begin
 | ||
| {$if defined(beos) and not defined(haiku)}
 | ||
|     result:=ptrint(mblen(str,maxlookahead));
 | ||
| {$else beos}
 | ||
|     fillchar(mbstate,sizeof(mbstate),0);
 | ||
|     result:=ptrint(mbrlen(str,maxlookahead,@mbstate));
 | ||
|     { mbrlen can also return -2 for "incomplete but potially valid character
 | ||
|       and data has been processed" }
 | ||
|     if result<0 then
 | ||
|       result:=-1;
 | ||
| {$endif beos}
 | ||
|   end;
 | ||
| 
 | ||
| 
 | ||
| function StrCompAnsiIntern(s1,s2 : PChar; len1, len2: PtrInt; canmodifys1, canmodifys2: boolean): PtrInt;
 | ||
|   var
 | ||
|     a,b: pchar;
 | ||
|     i: PtrInt;
 | ||
|   begin
 | ||
|     if not(canmodifys1) then
 | ||
|       getmem(a,len1+1)
 | ||
|     else
 | ||
|       a:=s1;
 | ||
|     for i:=0 to len1-1 do
 | ||
|       if s1[i]<>#0 then
 | ||
|         a[i]:=s1[i]
 | ||
|       else
 | ||
|         a[i]:=#32;
 | ||
|     a[len1]:=#0;
 | ||
| 
 | ||
|     if not(canmodifys2) then
 | ||
|       getmem(b,len2+1)
 | ||
|     else
 | ||
|       b:=s2;
 | ||
|     for i:=0 to len2-1 do
 | ||
|       if s2[i]<>#0 then
 | ||
|         b[i]:=s2[i]
 | ||
|       else
 | ||
|         b[i]:=#32;
 | ||
|     b[len2]:=#0;
 | ||
|     result:=strcoll(a,b);
 | ||
|     if not(canmodifys1) then
 | ||
|       freemem(a);
 | ||
|     if not(canmodifys2) then
 | ||
|       freemem(b);
 | ||
|   end;
 | ||
| 
 | ||
| 
 | ||
| function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
 | ||
|   begin
 | ||
|     result:=StrCompAnsiIntern(pchar(s1),pchar(s2),length(s1),length(s2),false,false);
 | ||
|   end;
 | ||
| 
 | ||
| 
 | ||
| function StrCompAnsi(s1,s2 : PChar): PtrInt;
 | ||
|   begin
 | ||
|     result:=strcoll(s1,s2);
 | ||
|   end;
 | ||
| 
 | ||
| 
 | ||
| function AnsiCompareText(const S1, S2: ansistring): PtrInt;
 | ||
|   var
 | ||
|     a, b: AnsiString;
 | ||
|   begin
 | ||
|     a:=UpperAnsistring(s1);
 | ||
|     b:=UpperAnsistring(s2);
 | ||
|     result:=StrCompAnsiIntern(pchar(a),pchar(b),length(a),length(b),true,true);
 | ||
|   end;
 | ||
| 
 | ||
| 
 | ||
| function AnsiStrIComp(S1, S2: PChar): PtrInt;
 | ||
|   begin
 | ||
|     result:=AnsiCompareText(ansistring(s1),ansistring(s2));
 | ||
|   end;
 | ||
| 
 | ||
| 
 | ||
| function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
 | ||
|   var
 | ||
|     a, b: pchar;
 | ||
| begin
 | ||
|   if (maxlen=0) then
 | ||
|     exit(0);
 | ||
|   if (s1[maxlen]<>#0) then
 | ||
|     begin
 | ||
|       getmem(a,maxlen+1);
 | ||
|       move(s1^,a^,maxlen);
 | ||
|       a[maxlen]:=#0;
 | ||
|     end
 | ||
|   else
 | ||
|     a:=s1;
 | ||
|   if (s2[maxlen]<>#0) then
 | ||
|     begin
 | ||
|       getmem(b,maxlen+1);
 | ||
|       move(s2^,b^,maxlen);
 | ||
|       b[maxlen]:=#0;
 | ||
|     end
 | ||
|   else
 | ||
|     b:=s2;
 | ||
|   result:=StrCompAnsiIntern(a,b,maxlen,maxlen,a<>s1,b<>s2);
 | ||
|   if (a<>s1) then
 | ||
|     freemem(a);
 | ||
|   if (b<>s2) then
 | ||
|     freemem(b);
 | ||
| end;
 | ||
| 
 | ||
| 
 | ||
| function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
 | ||
|   var
 | ||
|     a, b: ansistring;
 | ||
| begin
 | ||
|   if (maxlen=0) then
 | ||
|     exit(0);
 | ||
|   setlength(a,maxlen);
 | ||
|   move(s1^,a[1],maxlen);
 | ||
|   setlength(b,maxlen);
 | ||
|   move(s2^,b[1],maxlen);
 | ||
|   result:=AnsiCompareText(a,b);
 | ||
| end;
 | ||
| 
 | ||
| 
 | ||
| procedure ansi2pchar(const s: ansistring; const orgp: pchar; out p: pchar);
 | ||
| var
 | ||
|   newlen: sizeint;
 | ||
| begin
 | ||
|   newlen:=length(s);
 | ||
|   if newlen>strlen(orgp) then
 | ||
|     fpc_rangeerror;
 | ||
|   p:=orgp;
 | ||
|   if (newlen>0) then
 | ||
|     move(s[1],p[0],newlen);
 | ||
|   p[newlen]:=#0;
 | ||
| end;
 | ||
| 
 | ||
| 
 | ||
| function AnsiStrLower(Str: PChar): PChar;
 | ||
| var
 | ||
|   temp: ansistring;
 | ||
| begin
 | ||
|   temp:=loweransistring(str);
 | ||
|   ansi2pchar(temp,str,result);
 | ||
| end;
 | ||
| 
 | ||
| 
 | ||
| function AnsiStrUpper(Str: PChar): PChar;
 | ||
| var
 | ||
|   temp: ansistring;
 | ||
| begin
 | ||
|   temp:=upperansistring(str);
 | ||
|   ansi2pchar(temp,str,result);
 | ||
| end;
 | ||
| 
 | ||
| 
 | ||
| function envvarset(const varname: pchar): boolean;
 | ||
| var
 | ||
|   varval: pchar;
 | ||
| begin
 | ||
|   varval:=fpgetenv(varname);
 | ||
|   result:=
 | ||
|     assigned(varval) and
 | ||
|     (varval[0]<>#0);
 | ||
| end;
 | ||
| 
 | ||
| 
 | ||
| function GetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
 | ||
| var
 | ||
|   langinfo: pchar;
 | ||
| begin
 | ||
| {$ifdef FPCRTL_FILESYSTEM_UTF8}
 | ||
|   if stdcp=scpFileSystemSingleByte then
 | ||
|     begin
 | ||
|       result:=CP_UTF8;
 | ||
|       exit;
 | ||
|     end;
 | ||
| {$endif}
 | ||
|   { if none of the relevant LC_* environment variables are set, fall back to
 | ||
|     UTF-8 (this happens under some versions of OS X for GUI applications, which
 | ||
|     otherwise get CP_ASCII) }
 | ||
|   if envvarset('LC_ALL') or
 | ||
|      envvarset('LC_CTYPE') or
 | ||
|      envvarset('LANG') then
 | ||
|     begin
 | ||
|       langinfo:=nl_langinfo(CODESET);
 | ||
|       { there's a bug in the Mac OS X 10.5 libc (based on FreeBSD's)
 | ||
|         that causes it to return an empty string of UTF-8 locales
 | ||
|         -> patch up (and in general, UTF-8 is a good default on
 | ||
|         Unix platforms) }
 | ||
|       if not assigned(langinfo) or
 | ||
|          (langinfo^=#0) then
 | ||
|         langinfo:='UTF-8';
 | ||
|       Result:=GetCodepageByName(ansistring(langinfo));
 | ||
|     end
 | ||
|   else
 | ||
|     Result:=unixcp.GetSystemCodepage;
 | ||
| end;
 | ||
| 
 | ||
| {$ifdef FPC_HAS_CPSTRING}
 | ||
| 
 | ||
| procedure SetStdIOCodePage(var T: Text); inline;
 | ||
| begin
 | ||
|   case TextRec(T).Mode of
 | ||
|     fmInput:TextRec(T).CodePage:=GetStandardCodePage(scpConsoleInput);
 | ||
|     fmOutput:TextRec(T).CodePage:=GetStandardCodePage(scpConsoleOutput);
 | ||
|   end;
 | ||
| end;
 | ||
| 
 | ||
| procedure SetStdIOCodePages; inline;
 | ||
| begin
 | ||
|   SetStdIOCodePage(Input);
 | ||
|   SetStdIOCodePage(Output);
 | ||
|   SetStdIOCodePage(ErrOutput);
 | ||
|   SetStdIOCodePage(StdOut);
 | ||
|   SetStdIOCodePage(StdErr);
 | ||
| end;
 | ||
| {$endif FPC_HAS_CPSTRING}
 | ||
| 
 | ||
| var
 | ||
|   OrgWideStringManager: TUnicodeStringManager;
 | ||
| 
 | ||
| Procedure SetCWideStringManager;
 | ||
| Var
 | ||
|   CWideStringManager : TUnicodeStringManager;
 | ||
| begin
 | ||
|   GetUnicodeStringManager(OrgWideStringManager);
 | ||
|   CWideStringManager:=OrgWideStringManager;
 | ||
|   With CWideStringManager do
 | ||
|     begin
 | ||
|       Wide2AnsiMoveProc:=@Wide2AnsiMove;
 | ||
|       Ansi2WideMoveProc:=@Ansi2WideMove;
 | ||
| 
 | ||
|       UpperWideStringProc:=@UpperWideString;
 | ||
|       LowerWideStringProc:=@LowerWideString;
 | ||
| 
 | ||
|       CompareWideStringProc:=@CompareWideString;
 | ||
| //      CompareTextWideStringProc:=@CompareTextWideString;
 | ||
| 
 | ||
|       CharLengthPCharProc:=@CharLengthPChar;
 | ||
|       CodePointLengthProc:=@CodePointLength;
 | ||
| 
 | ||
|       UpperAnsiStringProc:=@UpperAnsiString;
 | ||
|       LowerAnsiStringProc:=@LowerAnsiString;
 | ||
|       CompareStrAnsiStringProc:=@CompareStrAnsiString;
 | ||
|       CompareTextAnsiStringProc:=@AnsiCompareText;
 | ||
|       StrCompAnsiStringProc:=@StrCompAnsi;
 | ||
|       StrICompAnsiStringProc:=@AnsiStrIComp;
 | ||
|       StrLCompAnsiStringProc:=@AnsiStrLComp;
 | ||
|       StrLICompAnsiStringProc:=@AnsiStrLIComp;
 | ||
|       StrLowerAnsiStringProc:=@AnsiStrLower;
 | ||
|       StrUpperAnsiStringProc:=@AnsiStrUpper;
 | ||
|       ThreadInitProc:=@InitThread;
 | ||
|       ThreadFiniProc:=@FiniThread;
 | ||
|       { Unicode }
 | ||
|       Unicode2AnsiMoveProc:=@Wide2AnsiMove;
 | ||
|       Ansi2UnicodeMoveProc:=@Ansi2WideMove;
 | ||
|       UpperUnicodeStringProc:=@UpperWideString;
 | ||
|       LowerUnicodeStringProc:=@LowerWideString;
 | ||
|       CompareUnicodeStringProc:=@CompareWideString;
 | ||
|       { CodePage }
 | ||
|       GetStandardCodePageProc:=@GetStandardCodePage;
 | ||
|     end;
 | ||
|   SetUnicodeStringManager(CWideStringManager);
 | ||
| end;
 | ||
| 
 | ||
| var
 | ||
|   iconvlib:TLibHandle;
 | ||
| 
 | ||
| initialization
 | ||
|   SetCWideStringManager;
 | ||
| 
 | ||
|   { you have to call setlocale(LC_ALL,'') to initialise the langinfo stuff  }
 | ||
|   { with the information from the environment variables according to POSIX  }
 | ||
|   { (some OSes do this automatically, but e.g. Darwin and Solaris don't)    }
 | ||
|   setlocale(LC_ALL,'');
 | ||
| 
 | ||
|   { load iconv library and iconvctl function }
 | ||
|   iconvlib:=LoadLibrary(libprefix+libiconvname+'.'+SharedSuffix);
 | ||
|   if iconvlib=0 then
 | ||
|     iconvlib:=LoadLibrary(libprefix+libiconvname+'.'+SharedSuffix+'.6');
 | ||
|   if iconvlib<>0 then
 | ||
|     pointer(iconvctl):=GetProcAddress(iconvlib,iconvctlname);
 | ||
| 
 | ||
|   { set the DefaultSystemCodePage }
 | ||
|   DefaultSystemCodePage:=GetStandardCodePage(scpAnsi);
 | ||
|   DefaultFileSystemCodePage:=GetStandardCodePage(scpFileSystemSingleByte);
 | ||
|   DefaultRTLFileSystemCodePage:=DefaultFileSystemCodePage;
 | ||
| 
 | ||
|   {$ifdef FPC_HAS_CPSTRING}
 | ||
|   SetStdIOCodePages;
 | ||
|   {$endif FPC_HAS_CPSTRING}
 | ||
| 
 | ||
|   { init conversion tables for main program }
 | ||
|   InitThread;
 | ||
| finalization
 | ||
|   { fini conversion tables for main program }
 | ||
|   FiniThread;
 | ||
|   { unload iconv library }
 | ||
|   if iconvlib<>0 then
 | ||
|     FreeLibrary(iconvlib);
 | ||
|   { restore previous (probably default) widestring manager so that subsequent calls
 | ||
|     into the widestring manager won't trigger the finalized functionality }
 | ||
|   SetWideStringManager(OrgWideStringManager);
 | ||
| end.
 | 
