+ AIX support for cwstring

o AIX-specific codepage names
   o AIX/ppc32 uses a 16 bit wchar_t
   o special support for handling cp866 on AIX: AIX only supports converting
     cp866 from/to ISO8859-5 -> always use that code page as an intermediate
     one on AIX (it's lossy, but better than completely failing)
  * in case the default system code page cannot be detected, fall back to
    UTF-8

git-svn-id: trunk@20839 -
This commit is contained in:
Jonas Maebe 2012-04-11 18:06:22 +00:00
parent 2c1f98320f
commit 1e9b92ee50
4 changed files with 869 additions and 422 deletions

1
.gitattributes vendored
View File

@ -8350,6 +8350,7 @@ rtl/unix/convert.inc svneol=native#text/plain
rtl/unix/crt.pp svneol=native#text/plain
rtl/unix/cthreads.pp svneol=native#text/plain
rtl/unix/ctypes.inc svneol=native#text/plain
rtl/unix/cwstraix.inc svneol=native#text/plain
rtl/unix/cwstring.pp svneol=native#text/plain
rtl/unix/dl.pp svneol=native#text/plain
rtl/unix/dos.pp svneol=native#text/plain

106
rtl/unix/cwstraix.inc Normal file
View File

@ -0,0 +1,106 @@
{
This file is part of the Free Pascal run time library.
Copyright (c) 2012 by Jonas Maebe
Helper routines for cwstring AIX
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
function Ansi2AnsiMove(source:pchar; fromcp:TSystemCodePage; const tocp: pchar; out dest:rawbytestring; len:SizeInt): boolean;
var
outlength,
outoffset,
outleft : size_t;
use_iconv: iconv_t;
srcpos,
destpos: pchar;
mynil : pchar;
my0 : size_t;
err: cint;
begin
use_iconv:=open_iconv_for_cps(fromcp,tocp,true);
{ unsupported encoding -> default move }
if use_iconv=iconv_t(-1) then
exit(false);
mynil:=nil;
my0:=0;
// extra space
outlength:=len;
setlength(dest,outlength);
srcpos:=source;
destpos:=pchar(dest);
outleft:=outlength;
while iconv(use_iconv,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
begin
err:=fpgetCerrno;
case err of
ESysEINVAL,
ESysEILSEQ:
begin
{ skip and set to '?' }
inc(srcpos);
dec(len);
pchar(destpos)^:='?';
inc(destpos,2);
dec(outleft,2);
{ reset }
iconv(use_iconv,@mynil,@my0,@mynil,@my0);
if err=ESysEINVAL then
break;
end;
ESysE2BIG:
begin
outoffset:=destpos-pchar(dest);
{ extend }
setlength(dest,outlength+len);
inc(outleft,len);
inc(outlength,len);
{ string could have been moved }
destpos:=pchar(dest)+outoffset;
end;
else
runerror(231);
end;
end;
// truncate string
setlength(dest,length(dest)-outleft);
iconv_close(use_iconv);
result:=true;
end;
function handle_aix_intermediate(source: pchar; sourcecp: TSystemCodePage; out newcp: TSystemCodePage; out str: rawbytestring; len: SizeInt): boolean;
begin
result:=false;
{ for some reason, IBM's iconv only supports converting cp866 to/from
ISO8859-5. This conversion is lossy, but it's better than completely
failing. At least it keeps the cyrillic characters intact }
case sourcecp of
866:
begin
handle_aix_intermediate:=Ansi2AnsiMove(source,sourcecp,'ISO8859-5',str, len);
if handle_aix_intermediate then
begin
newcp:=28595;
setcodepage(str,newcp,false);
end;
end;
28595:
begin
handle_aix_intermediate:=Ansi2AnsiMove(source,sourcecp,'IBM-866',str, len);
if handle_aix_intermediate then
begin
newcp:=866;
setcodepage(str,newcp,false);
end;
end;
end;
end;

View File

@ -129,8 +129,12 @@ const
unicode_encoding2 = 'UTF-16LE';
unicode_encoding4 = 'UCS-4LE';
{$else FPC_LITTLE_ENDIAN}
{$ifdef AIX}
unicode_encoding2 = 'UTF-16';
{$else AIX}
unicode_encoding2 = 'UTF-16BE';
unicode_encoding4 = 'UCS-4BE';
{$endif AIX}
{$endif FPC_LITTLE_ENDIAN}
{ en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4 }
@ -193,19 +197,24 @@ threadvar
current_DefaultSystemCodePage: TSystemCodePage;
function win2iconv(cp: word): rawbytestring; forward;
{$i winiconv.inc}
procedure InitThread;
var
transliterate: cint;
iconvindex: longint;
{$if not(defined(darwin) and defined(cpuarm)) and not defined(iphonesim)}
iconvname: rawbytestring;
{$endif}
begin
current_DefaultSystemCodePage:=DefaultSystemCodePage;
{$if not(defined(darwin) and defined(cpuarm)) and not defined(iphonesim)}
iconvname:=win2iconv(DefaultSystemCodePage);
iconvindex:=win2iconv(DefaultSystemCodePage);
if iconvindex<>-1 then
iconvname:=win2iconv_arr[iconvindex].name
else
{ default to UTF-8 on Unix platforms }
iconvname:='UTF-8';
iconv_wide2ansi:=iconv_open(pchar(iconvname),unicode_encoding2);
iconv_ansi2wide:=iconv_open(unicode_encoding2,pchar(iconvname));
{$else}
@ -231,9 +240,6 @@ begin
end;
{$i winiconv.inc}
{$if defined(beos) and not defined(haiku)}
function nl_langinfo(__item:nl_item):pchar;
begin
@ -250,6 +256,37 @@ begin
end;
{$endif}
function open_iconv_for_cps(cp: TSystemCodePage; const otherencoding: pchar; cp_is_from: boolean): iconv_t;
var
iconvindex: longint;
begin
{ TODO: add caching (then we also don't need separate code for
the default system page and other ones)
-- typecasting an ansistring function result to pchar is
unsafe normally, but these are constant strings -> no
problem }
open_iconv_for_cps:=iconv_t(-1);
iconvindex:=win2iconv(cp);
if iconvindex=-1 then
exit;
repeat
if cp_is_from then
open_iconv_for_cps:=iconv_open(otherencoding,pchar(win2iconv_arr[iconvindex].name))
else
open_iconv_for_cps:=iconv_open(pchar(win2iconv_arr[iconvindex].name),otherencoding);
inc(iconvindex);
until (open_iconv_for_cps<>iconv_t(-1)) or
(iconvindex>high(win2iconv_arr)) or
(win2iconv_arr[iconvindex].cp<>cp);
end;
{$ifdef aix}
{$i cwstraix.inc}
{$endif aix}
procedure Wide2AnsiMove(source:pwidechar; var dest:RawByteString; cp:TSystemCodePage; len:SizeInt);
var
outlength,
@ -261,10 +298,23 @@ procedure Wide2AnsiMove(source:pwidechar; var dest:RawByteString; cp:TSystemCode
destpos: pchar;
mynil : pchar;
my0 : size_t;
err,
err : longint;
transliterate: cint;
free_iconv: boolean;
{$ifdef aix}
intermediate: rawbytestring;
{$endif aix}
begin
{$ifdef aix}
{ AIX libiconv does not support converting cp866 to anything else except
for iso-8859-5 -> always first convert to iso-8859-5, then to UTF-16 }
if cp=866 then
begin
Wide2AnsiMove(source,intermediate,28595,len);
if handle_aix_intermediate(pchar(intermediate),28595,cp,dest,len) then
exit;
end;
{$endif aix}
if (cp=DefaultSystemCodePage) then
begin
{ update iconv converter in case the DefaultSystemCodePage has been
@ -279,14 +329,9 @@ procedure Wide2AnsiMove(source:pwidechar; var dest:RawByteString; cp:TSystemCode
end
else
begin
{ TODO: add caching (then we also don't need separate code for
the default system page and other ones)
-- typecasting an ansistring function result to pchar is
unsafe normally, but these are constant strings -> no
problem }
use_iconv:=iconv_open(pchar(win2iconv(cp)),unicode_encoding2);
if assigned(iconvctl) then
use_iconv:=open_iconv_for_cps(cp,unicode_encoding2,false);
if (use_iconv<>iconv_t(-1)) and
assigned(iconvctl) then
begin
transliterate:=1;
iconvctl(use_iconv,ICONV_SET_TRANSLITERATE,@transliterate);
@ -361,8 +406,21 @@ procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; l
mynil : pchar;
my0 : size_t;
err: cint;
iconvindex: longint;
free_iconv: boolean;
{$ifdef aix}
intermediate: rawbytestring;
{$endif aix}
begin
{$ifdef aix}
{ AIX libiconv does not support converting cp866 to anything else except
for iso-8859-5 -> always first convert to iso-8859-5, then to UTF-16 }
if cp=866 then
begin
if handle_aix_intermediate(source,cp,cp,intermediate,len) then
source:=pchar(intermediate);
end;
{$endif aix}
if (cp=DefaultSystemCodePage) then
begin
{ update iconv converter in case the DefaultSystemCodePage has been
@ -383,7 +441,7 @@ procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; l
-- typecasting an ansistring function result to pchar is
unsafe normally, but these are constant strings -> no
problem }
use_iconv:=iconv_open(unicode_encoding2,pchar(win2iconv(cp)));
use_iconv:=open_iconv_for_cps(cp,unicode_encoding2,true);
free_iconv:=true;
end;
{ unsupported encoding -> default move }
@ -682,6 +740,7 @@ function WideStringToUCS4StringNoNulls(const s : WideString) : UCS4String;
function CompareWideString(const s1, s2 : WideString) : PtrInt;
{$if not(defined (aix) and defined(cpupowerpc32))}
var
hs1,hs2 : UCS4String;
begin
@ -690,6 +749,32 @@ function CompareWideString(const s1, s2 : WideString) : PtrInt;
hs2:=WideStringToUCS4StringNoNulls(s2);
result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
end;
{$else}
{ AIX/PPC32 has a 16 bit wchar_t }
var
i, len: longint;
hs1, hs2: array of widechar;
begin
len:=length(s1);
setlength(hs1,len+1);
for i:=1 to len do
if s1[i]<>#0 then
hs1[i-1]:=s1[i]
else
hs1[i-1]:=#32;
hs1[len]:=#0;
len:=length(s2);
setlength(hs2,len+1);
for i:=1 to len do
if s2[i]<>#0 then
hs2[i-1]:=s2[i]
else
hs2[i-1]:=#32;
hs2[len]:=#0;
result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
end;
{$endif}
function CompareTextWideString(const s1, s2 : WideString): PtrInt;
@ -728,7 +813,6 @@ function CharLengthPChar(const Str: PChar): PtrInt;
function CodePointLength(const Str: PChar; maxlookahead: ptrint): PtrInt;
var
nextlen: ptrint;
{$ifndef beos}
mbstate: mbstate_t;
{$endif not beos}

File diff suppressed because it is too large Load Diff