+ AIX support for cwstring

o AIX-specific codepage names
   o AIX/ppc32 uses a 16 bit wchar_t
   o special support for handling cp866 on AIX: AIX only supports converting
     cp866 from/to ISO8859-5 -> always use that code page as an intermediate
     one on AIX (it's lossy, but better than completely failing)
  * in case the default system code page cannot be detected, fall back to
    UTF-8

git-svn-id: trunk@20839 -
This commit is contained in:
Jonas Maebe 2012-04-11 18:06:22 +00:00
parent 2c1f98320f
commit 1e9b92ee50
4 changed files with 869 additions and 422 deletions

1
.gitattributes vendored
View File

@ -8350,6 +8350,7 @@ rtl/unix/convert.inc svneol=native#text/plain
rtl/unix/crt.pp svneol=native#text/plain rtl/unix/crt.pp svneol=native#text/plain
rtl/unix/cthreads.pp svneol=native#text/plain rtl/unix/cthreads.pp svneol=native#text/plain
rtl/unix/ctypes.inc svneol=native#text/plain rtl/unix/ctypes.inc svneol=native#text/plain
rtl/unix/cwstraix.inc svneol=native#text/plain
rtl/unix/cwstring.pp svneol=native#text/plain rtl/unix/cwstring.pp svneol=native#text/plain
rtl/unix/dl.pp svneol=native#text/plain rtl/unix/dl.pp svneol=native#text/plain
rtl/unix/dos.pp svneol=native#text/plain rtl/unix/dos.pp svneol=native#text/plain

106
rtl/unix/cwstraix.inc Normal file
View File

@ -0,0 +1,106 @@
{
This file is part of the Free Pascal run time library.
Copyright (c) 2012 by Jonas Maebe
Helper routines for cwstring AIX
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
function Ansi2AnsiMove(source:pchar; fromcp:TSystemCodePage; const tocp: pchar; out dest:rawbytestring; len:SizeInt): boolean;
var
outlength,
outoffset,
outleft : size_t;
use_iconv: iconv_t;
srcpos,
destpos: pchar;
mynil : pchar;
my0 : size_t;
err: cint;
begin
use_iconv:=open_iconv_for_cps(fromcp,tocp,true);
{ unsupported encoding -> default move }
if use_iconv=iconv_t(-1) then
exit(false);
mynil:=nil;
my0:=0;
// extra space
outlength:=len;
setlength(dest,outlength);
srcpos:=source;
destpos:=pchar(dest);
outleft:=outlength;
while iconv(use_iconv,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
begin
err:=fpgetCerrno;
case err of
ESysEINVAL,
ESysEILSEQ:
begin
{ skip and set to '?' }
inc(srcpos);
dec(len);
pchar(destpos)^:='?';
inc(destpos,2);
dec(outleft,2);
{ reset }
iconv(use_iconv,@mynil,@my0,@mynil,@my0);
if err=ESysEINVAL then
break;
end;
ESysE2BIG:
begin
outoffset:=destpos-pchar(dest);
{ extend }
setlength(dest,outlength+len);
inc(outleft,len);
inc(outlength,len);
{ string could have been moved }
destpos:=pchar(dest)+outoffset;
end;
else
runerror(231);
end;
end;
// truncate string
setlength(dest,length(dest)-outleft);
iconv_close(use_iconv);
result:=true;
end;
function handle_aix_intermediate(source: pchar; sourcecp: TSystemCodePage; out newcp: TSystemCodePage; out str: rawbytestring; len: SizeInt): boolean;
begin
result:=false;
{ for some reason, IBM's iconv only supports converting cp866 to/from
ISO8859-5. This conversion is lossy, but it's better than completely
failing. At least it keeps the cyrillic characters intact }
case sourcecp of
866:
begin
handle_aix_intermediate:=Ansi2AnsiMove(source,sourcecp,'ISO8859-5',str, len);
if handle_aix_intermediate then
begin
newcp:=28595;
setcodepage(str,newcp,false);
end;
end;
28595:
begin
handle_aix_intermediate:=Ansi2AnsiMove(source,sourcecp,'IBM-866',str, len);
if handle_aix_intermediate then
begin
newcp:=866;
setcodepage(str,newcp,false);
end;
end;
end;
end;

View File

@ -129,8 +129,12 @@ const
unicode_encoding2 = 'UTF-16LE'; unicode_encoding2 = 'UTF-16LE';
unicode_encoding4 = 'UCS-4LE'; unicode_encoding4 = 'UCS-4LE';
{$else FPC_LITTLE_ENDIAN} {$else FPC_LITTLE_ENDIAN}
{$ifdef AIX}
unicode_encoding2 = 'UTF-16';
{$else AIX}
unicode_encoding2 = 'UTF-16BE'; unicode_encoding2 = 'UTF-16BE';
unicode_encoding4 = 'UCS-4BE'; unicode_encoding4 = 'UCS-4BE';
{$endif AIX}
{$endif FPC_LITTLE_ENDIAN} {$endif FPC_LITTLE_ENDIAN}
{ en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4 } { en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4 }
@ -193,19 +197,24 @@ threadvar
current_DefaultSystemCodePage: TSystemCodePage; current_DefaultSystemCodePage: TSystemCodePage;
function win2iconv(cp: word): rawbytestring; forward; {$i winiconv.inc}
procedure InitThread; procedure InitThread;
var var
transliterate: cint; transliterate: cint;
iconvindex: longint;
{$if not(defined(darwin) and defined(cpuarm)) and not defined(iphonesim)} {$if not(defined(darwin) and defined(cpuarm)) and not defined(iphonesim)}
iconvname: rawbytestring; iconvname: rawbytestring;
{$endif} {$endif}
begin begin
current_DefaultSystemCodePage:=DefaultSystemCodePage; current_DefaultSystemCodePage:=DefaultSystemCodePage;
{$if not(defined(darwin) and defined(cpuarm)) and not defined(iphonesim)} {$if not(defined(darwin) and defined(cpuarm)) and not defined(iphonesim)}
iconvname:=win2iconv(DefaultSystemCodePage); iconvindex:=win2iconv(DefaultSystemCodePage);
if iconvindex<>-1 then
iconvname:=win2iconv_arr[iconvindex].name
else
{ default to UTF-8 on Unix platforms }
iconvname:='UTF-8';
iconv_wide2ansi:=iconv_open(pchar(iconvname),unicode_encoding2); iconv_wide2ansi:=iconv_open(pchar(iconvname),unicode_encoding2);
iconv_ansi2wide:=iconv_open(unicode_encoding2,pchar(iconvname)); iconv_ansi2wide:=iconv_open(unicode_encoding2,pchar(iconvname));
{$else} {$else}
@ -231,9 +240,6 @@ begin
end; end;
{$i winiconv.inc}
{$if defined(beos) and not defined(haiku)} {$if defined(beos) and not defined(haiku)}
function nl_langinfo(__item:nl_item):pchar; function nl_langinfo(__item:nl_item):pchar;
begin begin
@ -250,6 +256,37 @@ begin
end; end;
{$endif} {$endif}
function open_iconv_for_cps(cp: TSystemCodePage; const otherencoding: pchar; cp_is_from: boolean): iconv_t;
var
iconvindex: longint;
begin
{ TODO: add caching (then we also don't need separate code for
the default system page and other ones)
-- typecasting an ansistring function result to pchar is
unsafe normally, but these are constant strings -> no
problem }
open_iconv_for_cps:=iconv_t(-1);
iconvindex:=win2iconv(cp);
if iconvindex=-1 then
exit;
repeat
if cp_is_from then
open_iconv_for_cps:=iconv_open(otherencoding,pchar(win2iconv_arr[iconvindex].name))
else
open_iconv_for_cps:=iconv_open(pchar(win2iconv_arr[iconvindex].name),otherencoding);
inc(iconvindex);
until (open_iconv_for_cps<>iconv_t(-1)) or
(iconvindex>high(win2iconv_arr)) or
(win2iconv_arr[iconvindex].cp<>cp);
end;
{$ifdef aix}
{$i cwstraix.inc}
{$endif aix}
procedure Wide2AnsiMove(source:pwidechar; var dest:RawByteString; cp:TSystemCodePage; len:SizeInt); procedure Wide2AnsiMove(source:pwidechar; var dest:RawByteString; cp:TSystemCodePage; len:SizeInt);
var var
outlength, outlength,
@ -261,10 +298,23 @@ procedure Wide2AnsiMove(source:pwidechar; var dest:RawByteString; cp:TSystemCode
destpos: pchar; destpos: pchar;
mynil : pchar; mynil : pchar;
my0 : size_t; my0 : size_t;
err, err : longint;
transliterate: cint; transliterate: cint;
free_iconv: boolean; free_iconv: boolean;
{$ifdef aix}
intermediate: rawbytestring;
{$endif aix}
begin begin
{$ifdef aix}
{ AIX libiconv does not support converting cp866 to anything else except
for iso-8859-5 -> always first convert to iso-8859-5, then to UTF-16 }
if cp=866 then
begin
Wide2AnsiMove(source,intermediate,28595,len);
if handle_aix_intermediate(pchar(intermediate),28595,cp,dest,len) then
exit;
end;
{$endif aix}
if (cp=DefaultSystemCodePage) then if (cp=DefaultSystemCodePage) then
begin begin
{ update iconv converter in case the DefaultSystemCodePage has been { update iconv converter in case the DefaultSystemCodePage has been
@ -279,14 +329,9 @@ procedure Wide2AnsiMove(source:pwidechar; var dest:RawByteString; cp:TSystemCode
end end
else else
begin begin
{ TODO: add caching (then we also don't need separate code for use_iconv:=open_iconv_for_cps(cp,unicode_encoding2,false);
the default system page and other ones) if (use_iconv<>iconv_t(-1)) and
assigned(iconvctl) then
-- typecasting an ansistring function result to pchar is
unsafe normally, but these are constant strings -> no
problem }
use_iconv:=iconv_open(pchar(win2iconv(cp)),unicode_encoding2);
if assigned(iconvctl) then
begin begin
transliterate:=1; transliterate:=1;
iconvctl(use_iconv,ICONV_SET_TRANSLITERATE,@transliterate); iconvctl(use_iconv,ICONV_SET_TRANSLITERATE,@transliterate);
@ -361,8 +406,21 @@ procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; l
mynil : pchar; mynil : pchar;
my0 : size_t; my0 : size_t;
err: cint; err: cint;
iconvindex: longint;
free_iconv: boolean; free_iconv: boolean;
{$ifdef aix}
intermediate: rawbytestring;
{$endif aix}
begin begin
{$ifdef aix}
{ AIX libiconv does not support converting cp866 to anything else except
for iso-8859-5 -> always first convert to iso-8859-5, then to UTF-16 }
if cp=866 then
begin
if handle_aix_intermediate(source,cp,cp,intermediate,len) then
source:=pchar(intermediate);
end;
{$endif aix}
if (cp=DefaultSystemCodePage) then if (cp=DefaultSystemCodePage) then
begin begin
{ update iconv converter in case the DefaultSystemCodePage has been { update iconv converter in case the DefaultSystemCodePage has been
@ -383,7 +441,7 @@ procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; l
-- typecasting an ansistring function result to pchar is -- typecasting an ansistring function result to pchar is
unsafe normally, but these are constant strings -> no unsafe normally, but these are constant strings -> no
problem } problem }
use_iconv:=iconv_open(unicode_encoding2,pchar(win2iconv(cp))); use_iconv:=open_iconv_for_cps(cp,unicode_encoding2,true);
free_iconv:=true; free_iconv:=true;
end; end;
{ unsupported encoding -> default move } { unsupported encoding -> default move }
@ -682,6 +740,7 @@ function WideStringToUCS4StringNoNulls(const s : WideString) : UCS4String;
function CompareWideString(const s1, s2 : WideString) : PtrInt; function CompareWideString(const s1, s2 : WideString) : PtrInt;
{$if not(defined (aix) and defined(cpupowerpc32))}
var var
hs1,hs2 : UCS4String; hs1,hs2 : UCS4String;
begin begin
@ -690,6 +749,32 @@ function CompareWideString(const s1, s2 : WideString) : PtrInt;
hs2:=WideStringToUCS4StringNoNulls(s2); hs2:=WideStringToUCS4StringNoNulls(s2);
result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2)); result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
end; end;
{$else}
{ AIX/PPC32 has a 16 bit wchar_t }
var
i, len: longint;
hs1, hs2: array of widechar;
begin
len:=length(s1);
setlength(hs1,len+1);
for i:=1 to len do
if s1[i]<>#0 then
hs1[i-1]:=s1[i]
else
hs1[i-1]:=#32;
hs1[len]:=#0;
len:=length(s2);
setlength(hs2,len+1);
for i:=1 to len do
if s2[i]<>#0 then
hs2[i-1]:=s2[i]
else
hs2[i-1]:=#32;
hs2[len]:=#0;
result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
end;
{$endif}
function CompareTextWideString(const s1, s2 : WideString): PtrInt; function CompareTextWideString(const s1, s2 : WideString): PtrInt;
@ -728,7 +813,6 @@ function CharLengthPChar(const Str: PChar): PtrInt;
function CodePointLength(const Str: PChar; maxlookahead: ptrint): PtrInt; function CodePointLength(const Str: PChar; maxlookahead: ptrint): PtrInt;
var var
nextlen: ptrint;
{$ifndef beos} {$ifndef beos}
mbstate: mbstate_t; mbstate: mbstate_t;
{$endif not beos} {$endif not beos}

File diff suppressed because it is too large Load Diff