mirror of
				https://gitlab.com/freepascal.org/fpc/source.git
				synced 2025-11-04 09:02:22 +01:00 
			
		
		
		
	manager now has two extra parameterless procedures (ThreadInitProc
    and ThreadFiniProc) which are called whenever a thread begins/ends,
    and cwstring uses these to create separate iconv handles for
    each thread (via threadvars)
  * renamed UCS4 to UCS-4BE/LE, because UCS4 is not recognised by most
    systems
  * clean up all iconv handles on exit, and check whether they are
    valid before doing so
git-svn-id: trunk@7949 -
		
	
			
		
			
				
	
	
		
			377 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			ObjectPascal
		
	
	
	
	
	
			
		
		
	
	
			377 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			ObjectPascal
		
	
	
	
	
	
{
 | 
						||
    This file is part of the Free Pascal run time library.
 | 
						||
    Copyright (c) 2005 by Florian Klaempfl,
 | 
						||
    member of the Free Pascal development team.
 | 
						||
 | 
						||
    libc based wide string support
 | 
						||
 | 
						||
    See the file COPYING.FPC, included in this distribution,
 | 
						||
    for details about the copyright.
 | 
						||
 | 
						||
    This program is distributed in the hope that it will be useful,
 | 
						||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 | 
						||
 **********************************************************************}
 | 
						||
 | 
						||
{$mode objfpc}
 | 
						||
 | 
						||
unit cwstring;
 | 
						||
 | 
						||
interface
 | 
						||
 | 
						||
procedure SetCWidestringManager;
 | 
						||
 | 
						||
implementation
 | 
						||
 | 
						||
{$linklib c}
 | 
						||
 | 
						||
{$if not defined(linux) and not defined(solaris)}  // Linux (and maybe glibc platforms in general), have iconv in glibc.
 | 
						||
 {$linklib iconv}
 | 
						||
 {$define useiconv}
 | 
						||
{$endif linux}
 | 
						||
 | 
						||
Uses
 | 
						||
  BaseUnix,
 | 
						||
  ctypes,
 | 
						||
  unix,
 | 
						||
  unixtype,
 | 
						||
  initc;
 | 
						||
 | 
						||
Const
 | 
						||
{$ifndef useiconv}
 | 
						||
    libiconvname='c';  // is in libc under Linux.
 | 
						||
{$else}
 | 
						||
    libiconvname='iconv';
 | 
						||
{$endif}
 | 
						||
 | 
						||
{ helper functions from libc }
 | 
						||
function towlower(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towlower';
 | 
						||
function towupper(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towupper';
 | 
						||
function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external libiconvname name 'wcscoll';
 | 
						||
function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external libiconvname name 'strcoll';
 | 
						||
function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
 | 
						||
 | 
						||
const
 | 
						||
{$ifdef linux}
 | 
						||
  __LC_CTYPE = 0;
 | 
						||
  LC_ALL = 6;
 | 
						||
  _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
 | 
						||
  _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
 | 
						||
  CODESET = _NL_CTYPE_CODESET_NAME;
 | 
						||
{$else linux}
 | 
						||
{$ifdef darwin}
 | 
						||
  CODESET = 0;
 | 
						||
  LC_ALL = 0;
 | 
						||
{$else darwin}
 | 
						||
{$ifdef FreeBSD} // actually FreeBSD5. internationalisation is afaik not default on 4.
 | 
						||
  __LC_CTYPE = 0;
 | 
						||
  LC_ALL = 0;
 | 
						||
  _NL_CTYPE_CLASS = (__LC_CTYPE shl 16);
 | 
						||
  _NL_CTYPE_CODESET_NAME = (_NL_CTYPE_CLASS)+14;
 | 
						||
  CODESET = 0; // _NL_CTYPE_CODESET_NAME;
 | 
						||
{$else freebsd}
 | 
						||
{$ifdef solaris}
 | 
						||
  CODESET=49;
 | 
						||
  LC_ALL = 6;
 | 
						||
{$else}
 | 
						||
{$error lookup the value of CODESET in /usr/include/langinfo.h, and the value of LC_ALL in /usr/include/locale.h for your OS }
 | 
						||
// and while doing it, check if iconv is in libc, and if the symbols are prefixed with iconv_ or libiconv_
 | 
						||
{$endif solaris}
 | 
						||
{$endif FreeBSD}
 | 
						||
{$endif darwin}
 | 
						||
{$endif linux}
 | 
						||
 | 
						||
{ unicode encoding name }
 | 
						||
{$ifdef FPC_LITTLE_ENDIAN}
 | 
						||
  unicode_encoding2 = 'UTF-16LE';
 | 
						||
  unicode_encoding4 = 'UCS-4LE'; 
 | 
						||
{$else  FPC_LITTLE_ENDIAN}
 | 
						||
  unicode_encoding2 = 'UTF-16BE';
 | 
						||
  unicode_encoding4 = 'UCS-4BE';
 | 
						||
{$endif  FPC_LITTLE_ENDIAN}
 | 
						||
 | 
						||
type
 | 
						||
  piconv_t = ^iconv_t;
 | 
						||
  iconv_t = pointer;
 | 
						||
  nl_item = cint;
 | 
						||
 | 
						||
function nl_langinfo(__item:nl_item):pchar;cdecl;external libiconvname name 'nl_langinfo';
 | 
						||
{$ifndef bsd}
 | 
						||
function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'iconv_open';
 | 
						||
function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'iconv';
 | 
						||
function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'iconv_close';
 | 
						||
{$else}
 | 
						||
function iconv_open(__tocode:pchar; __fromcode:pchar):iconv_t;cdecl;external libiconvname name 'libiconv_open';
 | 
						||
function iconv(__cd:iconv_t; __inbuf:ppchar; __inbytesleft:psize_t; __outbuf:ppchar; __outbytesleft:psize_t):size_t;cdecl;external libiconvname name 'libiconv';
 | 
						||
function iconv_close(__cd:iconv_t):cint;cdecl;external libiconvname name 'libiconv_close';
 | 
						||
{$endif}
 | 
						||
 | 
						||
threadvar
 | 
						||
  iconv_ansi2ucs4,
 | 
						||
  iconv_ucs42ansi,
 | 
						||
  iconv_ansi2wide,
 | 
						||
  iconv_wide2ansi : iconv_t;
 | 
						||
 
 | 
						||
procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
 | 
						||
  var
 | 
						||
    outlength,
 | 
						||
    outoffset,
 | 
						||
    srclen,
 | 
						||
    outleft : size_t;
 | 
						||
    srcpos : pwidechar;
 | 
						||
    destpos: pchar;
 | 
						||
    mynil : pchar;
 | 
						||
    my0 : size_t;
 | 
						||
  begin
 | 
						||
    mynil:=nil;
 | 
						||
    my0:=0;
 | 
						||
    { rought estimation }
 | 
						||
    setlength(dest,len*3);
 | 
						||
    outlength:=len*3;
 | 
						||
    srclen:=len*2;
 | 
						||
    srcpos:=source;
 | 
						||
    destpos:=pchar(dest);
 | 
						||
    outleft:=outlength;
 | 
						||
    while iconv(iconv_wide2ansi,ppchar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do
 | 
						||
      begin
 | 
						||
        case fpgetCerrno of
 | 
						||
          ESysEILSEQ:
 | 
						||
            begin
 | 
						||
              { skip and set to '?' }
 | 
						||
              inc(srcpos);
 | 
						||
              dec(srclen,2);
 | 
						||
              destpos^:='?';
 | 
						||
              inc(destpos);
 | 
						||
              dec(outleft);
 | 
						||
              { reset }
 | 
						||
              iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
 | 
						||
            end;
 | 
						||
          ESysE2BIG:
 | 
						||
            begin
 | 
						||
              outoffset:=destpos-pchar(dest);
 | 
						||
              { extend }
 | 
						||
              setlength(dest,outlength+len*3);
 | 
						||
              inc(outleft,len*3);
 | 
						||
              inc(outlength,len*3);
 | 
						||
              { string could have been moved }
 | 
						||
              destpos:=pchar(dest)+outoffset;
 | 
						||
            end;
 | 
						||
          else
 | 
						||
            runerror(231);
 | 
						||
        end;
 | 
						||
      end;
 | 
						||
    // truncate string
 | 
						||
    setlength(dest,length(dest)-outleft);
 | 
						||
  end;
 | 
						||
 | 
						||
 | 
						||
procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
 | 
						||
  var
 | 
						||
    outlength,
 | 
						||
    outoffset,
 | 
						||
    outleft : size_t;
 | 
						||
    srcpos,
 | 
						||
    destpos: pchar;
 | 
						||
    mynil : pchar;
 | 
						||
    my0 : size_t;
 | 
						||
  begin
 | 
						||
    mynil:=nil;
 | 
						||
    my0:=0;
 | 
						||
    // extra space
 | 
						||
    outlength:=len+1;
 | 
						||
    setlength(dest,outlength);
 | 
						||
    outlength:=len+1;
 | 
						||
    srcpos:=source;
 | 
						||
    destpos:=pchar(dest);
 | 
						||
    outleft:=outlength*2;
 | 
						||
    while iconv(iconv_ansi2wide,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
 | 
						||
      begin
 | 
						||
        case fpgetCerrno of
 | 
						||
         ESysEILSEQ:
 | 
						||
            begin
 | 
						||
              { skip and set to '?' }
 | 
						||
              inc(srcpos);
 | 
						||
              dec(len);
 | 
						||
              pwidechar(destpos)^:='?';
 | 
						||
              inc(destpos,2);
 | 
						||
              dec(outleft,2);
 | 
						||
              { reset }
 | 
						||
              iconv(iconv_ansi2wide,@mynil,@my0,@mynil,@my0);
 | 
						||
            end;
 | 
						||
          ESysE2BIG:
 | 
						||
            begin
 | 
						||
              outoffset:=destpos-pchar(dest);
 | 
						||
              { extend }
 | 
						||
              setlength(dest,outlength+len);
 | 
						||
              inc(outleft,len*2);
 | 
						||
              inc(outlength,len);
 | 
						||
              { string could have been moved }
 | 
						||
              destpos:=pchar(dest)+outoffset;
 | 
						||
            end;
 | 
						||
          else
 | 
						||
            runerror(231);
 | 
						||
        end;
 | 
						||
      end;
 | 
						||
    // truncate string
 | 
						||
    setlength(dest,length(dest)-outleft div 2);
 | 
						||
  end;
 | 
						||
 | 
						||
 | 
						||
function LowerWideString(const s : WideString) : WideString;
 | 
						||
  var
 | 
						||
    i : SizeInt;
 | 
						||
  begin
 | 
						||
    SetLength(result,length(s));
 | 
						||
    for i:=1 to length(s) do
 | 
						||
      result[i]:=WideChar(towlower(wint_t(s[i])));
 | 
						||
  end;
 | 
						||
 | 
						||
 | 
						||
function UpperWideString(const s : WideString) : WideString;
 | 
						||
  var
 | 
						||
    i : SizeInt;
 | 
						||
  begin
 | 
						||
    SetLength(result,length(s));
 | 
						||
    for i:=1 to length(s) do
 | 
						||
      result[i]:=WideChar(towupper(wint_t(s[i])));
 | 
						||
  end;
 | 
						||
 | 
						||
 | 
						||
procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
 | 
						||
  var
 | 
						||
    outlength,
 | 
						||
    outoffset,
 | 
						||
    outleft : size_t;
 | 
						||
    srcpos,
 | 
						||
    destpos: pchar;
 | 
						||
    mynil : pchar;
 | 
						||
    my0 : size_t;
 | 
						||
  begin
 | 
						||
    mynil:=nil;
 | 
						||
    my0:=0;
 | 
						||
    // extra space
 | 
						||
    outlength:=len+1;
 | 
						||
    setlength(dest,outlength);
 | 
						||
    outlength:=len+1;
 | 
						||
    srcpos:=source;
 | 
						||
    destpos:=pchar(dest);
 | 
						||
    outleft:=outlength*4;
 | 
						||
    while iconv(iconv_ansi2ucs4,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
 | 
						||
      begin
 | 
						||
        case fpgetCerrno of
 | 
						||
          ESysE2BIG:
 | 
						||
            begin
 | 
						||
              outoffset:=destpos-pchar(dest);
 | 
						||
              { extend }
 | 
						||
              setlength(dest,outlength+len);
 | 
						||
              inc(outleft,len*4);
 | 
						||
              inc(outlength,len);
 | 
						||
              { string could have been moved }
 | 
						||
              destpos:=pchar(dest)+outoffset;
 | 
						||
            end;
 | 
						||
          else
 | 
						||
            runerror(231);
 | 
						||
        end;
 | 
						||
      end;
 | 
						||
    // truncate string
 | 
						||
    setlength(dest,length(dest)-outleft div 4);
 | 
						||
  end;
 | 
						||
 | 
						||
 | 
						||
function CompareWideString(const s1, s2 : WideString) : PtrInt;
 | 
						||
  var
 | 
						||
    hs1,hs2 : UCS4String;
 | 
						||
  begin
 | 
						||
    hs1:=WideStringToUCS4String(s1);
 | 
						||
    hs2:=WideStringToUCS4String(s2);
 | 
						||
    result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
 | 
						||
  end;
 | 
						||
 | 
						||
 | 
						||
function CompareTextWideString(const s1, s2 : WideString): PtrInt;
 | 
						||
  begin
 | 
						||
    result:=CompareWideString(UpperWideString(s1),UpperWideString(s2));
 | 
						||
  end;
 | 
						||
 | 
						||
 | 
						||
function StrCompAnsi(s1,s2 : PChar): PtrInt;
 | 
						||
  begin
 | 
						||
    result:=strcoll(s1,s2);
 | 
						||
  end;
 | 
						||
 | 
						||
 | 
						||
procedure InitThread;
 | 
						||
begin
 | 
						||
  iconv_wide2ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding2);
 | 
						||
  iconv_ansi2wide:=iconv_open(unicode_encoding2,nl_langinfo(CODESET));
 | 
						||
  iconv_ucs42ansi:=iconv_open(nl_langinfo(CODESET),unicode_encoding4);
 | 
						||
  iconv_ansi2ucs4:=iconv_open(unicode_encoding4,nl_langinfo(CODESET));
 | 
						||
end;
 | 
						||
 | 
						||
 | 
						||
procedure FiniThread;
 | 
						||
begin
 | 
						||
  if (iconv_wide2ansi <> iconv_t(-1)) then
 | 
						||
    iconv_close(iconv_wide2ansi);
 | 
						||
  if (iconv_ansi2wide <> iconv_t(-1)) then
 | 
						||
    iconv_close(iconv_ansi2wide);
 | 
						||
  if (iconv_ucs42ansi <> iconv_t(-1)) then
 | 
						||
    iconv_close(iconv_ucs42ansi);
 | 
						||
  if (iconv_ansi2ucs4 <> iconv_t(-1)) then
 | 
						||
    iconv_close(iconv_ansi2ucs4);
 | 
						||
end;
 | 
						||
 | 
						||
 | 
						||
Procedure SetCWideStringManager;
 | 
						||
Var
 | 
						||
  CWideStringManager : TWideStringManager;
 | 
						||
begin
 | 
						||
  CWideStringManager:=widestringmanager;
 | 
						||
  With CWideStringManager do
 | 
						||
    begin
 | 
						||
      Wide2AnsiMoveProc:=@Wide2AnsiMove;
 | 
						||
      Ansi2WideMoveProc:=@Ansi2WideMove;
 | 
						||
 | 
						||
      UpperWideStringProc:=@UpperWideString;
 | 
						||
      LowerWideStringProc:=@LowerWideString;
 | 
						||
 | 
						||
      CompareWideStringProc:=@CompareWideString;
 | 
						||
      CompareTextWideStringProc:=@CompareTextWideString;
 | 
						||
      {
 | 
						||
      CharLengthPCharProc
 | 
						||
 | 
						||
      UpperAnsiStringProc
 | 
						||
      LowerAnsiStringProc
 | 
						||
      CompareStrAnsiStringProc
 | 
						||
      CompareTextAnsiStringProc
 | 
						||
      }
 | 
						||
      StrCompAnsiStringProc:=@StrCompAnsi;
 | 
						||
      {
 | 
						||
      StrICompAnsiStringProc
 | 
						||
      StrLCompAnsiStringProc
 | 
						||
      StrLICompAnsiStringProc
 | 
						||
      StrLowerAnsiStringProc
 | 
						||
      StrUpperAnsiStringProc
 | 
						||
      }
 | 
						||
      ThreadInitProc:=@InitThread;
 | 
						||
      ThreadFiniProc:=@FiniThread;
 | 
						||
    end;
 | 
						||
  SetWideStringManager(CWideStringManager);
 | 
						||
end;
 | 
						||
 | 
						||
 | 
						||
initialization
 | 
						||
  SetCWideStringManager;
 | 
						||
 | 
						||
  { you have to call setlocale(LC_ALL,'') to initialise the langinfo stuff  }
 | 
						||
  { with the information from the environment variables according to POSIX  }
 | 
						||
  { (some OSes do this automatically, but e.g. Darwin and Solaris don't)    }
 | 
						||
  setlocale(LC_ALL,'');
 | 
						||
 | 
						||
  { init conversion tables for main program }
 | 
						||
  InitThread;
 | 
						||
finalization
 | 
						||
  { fini conversion tables for main program }
 | 
						||
  FiniThread;
 | 
						||
end.
 |