mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-12-06 11:07:30 +01:00
+ AIX support for cwstring
o AIX-specific codepage names
o AIX/ppc32 uses a 16 bit wchar_t
o special support for handling cp866 on AIX: AIX only supports converting
cp866 from/to ISO8859-5 -> always use that code page as an intermediate
one on AIX (it's lossy, but better than completely failing)
* in case the default system code page cannot be detected, fall back to
UTF-8
git-svn-id: trunk@20839 -
This commit is contained in:
parent
2c1f98320f
commit
1e9b92ee50
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -8350,6 +8350,7 @@ rtl/unix/convert.inc svneol=native#text/plain
|
|||||||
rtl/unix/crt.pp svneol=native#text/plain
|
rtl/unix/crt.pp svneol=native#text/plain
|
||||||
rtl/unix/cthreads.pp svneol=native#text/plain
|
rtl/unix/cthreads.pp svneol=native#text/plain
|
||||||
rtl/unix/ctypes.inc svneol=native#text/plain
|
rtl/unix/ctypes.inc svneol=native#text/plain
|
||||||
|
rtl/unix/cwstraix.inc svneol=native#text/plain
|
||||||
rtl/unix/cwstring.pp svneol=native#text/plain
|
rtl/unix/cwstring.pp svneol=native#text/plain
|
||||||
rtl/unix/dl.pp svneol=native#text/plain
|
rtl/unix/dl.pp svneol=native#text/plain
|
||||||
rtl/unix/dos.pp svneol=native#text/plain
|
rtl/unix/dos.pp svneol=native#text/plain
|
||||||
|
|||||||
106
rtl/unix/cwstraix.inc
Normal file
106
rtl/unix/cwstraix.inc
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
{
|
||||||
|
This file is part of the Free Pascal run time library.
|
||||||
|
Copyright (c) 2012 by Jonas Maebe
|
||||||
|
|
||||||
|
Helper routines for cwstring AIX
|
||||||
|
|
||||||
|
See the file COPYING.FPC, included in this distribution,
|
||||||
|
for details about the copyright.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
|
||||||
|
**********************************************************************}
|
||||||
|
|
||||||
|
function Ansi2AnsiMove(source:pchar; fromcp:TSystemCodePage; const tocp: pchar; out dest:rawbytestring; len:SizeInt): boolean;
|
||||||
|
var
|
||||||
|
outlength,
|
||||||
|
outoffset,
|
||||||
|
outleft : size_t;
|
||||||
|
use_iconv: iconv_t;
|
||||||
|
srcpos,
|
||||||
|
destpos: pchar;
|
||||||
|
mynil : pchar;
|
||||||
|
my0 : size_t;
|
||||||
|
err: cint;
|
||||||
|
begin
|
||||||
|
use_iconv:=open_iconv_for_cps(fromcp,tocp,true);
|
||||||
|
{ unsupported encoding -> default move }
|
||||||
|
if use_iconv=iconv_t(-1) then
|
||||||
|
exit(false);
|
||||||
|
mynil:=nil;
|
||||||
|
my0:=0;
|
||||||
|
// extra space
|
||||||
|
outlength:=len;
|
||||||
|
setlength(dest,outlength);
|
||||||
|
srcpos:=source;
|
||||||
|
destpos:=pchar(dest);
|
||||||
|
outleft:=outlength;
|
||||||
|
while iconv(use_iconv,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
|
||||||
|
begin
|
||||||
|
err:=fpgetCerrno;
|
||||||
|
case err of
|
||||||
|
ESysEINVAL,
|
||||||
|
ESysEILSEQ:
|
||||||
|
begin
|
||||||
|
{ skip and set to '?' }
|
||||||
|
inc(srcpos);
|
||||||
|
dec(len);
|
||||||
|
pchar(destpos)^:='?';
|
||||||
|
inc(destpos,2);
|
||||||
|
dec(outleft,2);
|
||||||
|
{ reset }
|
||||||
|
iconv(use_iconv,@mynil,@my0,@mynil,@my0);
|
||||||
|
if err=ESysEINVAL then
|
||||||
|
break;
|
||||||
|
end;
|
||||||
|
ESysE2BIG:
|
||||||
|
begin
|
||||||
|
outoffset:=destpos-pchar(dest);
|
||||||
|
{ extend }
|
||||||
|
setlength(dest,outlength+len);
|
||||||
|
inc(outleft,len);
|
||||||
|
inc(outlength,len);
|
||||||
|
{ string could have been moved }
|
||||||
|
destpos:=pchar(dest)+outoffset;
|
||||||
|
end;
|
||||||
|
else
|
||||||
|
runerror(231);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
// truncate string
|
||||||
|
setlength(dest,length(dest)-outleft);
|
||||||
|
iconv_close(use_iconv);
|
||||||
|
result:=true;
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
function handle_aix_intermediate(source: pchar; sourcecp: TSystemCodePage; out newcp: TSystemCodePage; out str: rawbytestring; len: SizeInt): boolean;
|
||||||
|
begin
|
||||||
|
result:=false;
|
||||||
|
{ for some reason, IBM's iconv only supports converting cp866 to/from
|
||||||
|
ISO8859-5. This conversion is lossy, but it's better than completely
|
||||||
|
failing. At least it keeps the cyrillic characters intact }
|
||||||
|
case sourcecp of
|
||||||
|
866:
|
||||||
|
begin
|
||||||
|
handle_aix_intermediate:=Ansi2AnsiMove(source,sourcecp,'ISO8859-5',str, len);
|
||||||
|
if handle_aix_intermediate then
|
||||||
|
begin
|
||||||
|
newcp:=28595;
|
||||||
|
setcodepage(str,newcp,false);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
28595:
|
||||||
|
begin
|
||||||
|
handle_aix_intermediate:=Ansi2AnsiMove(source,sourcecp,'IBM-866',str, len);
|
||||||
|
if handle_aix_intermediate then
|
||||||
|
begin
|
||||||
|
newcp:=866;
|
||||||
|
setcodepage(str,newcp,false);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
@ -129,8 +129,12 @@ const
|
|||||||
unicode_encoding2 = 'UTF-16LE';
|
unicode_encoding2 = 'UTF-16LE';
|
||||||
unicode_encoding4 = 'UCS-4LE';
|
unicode_encoding4 = 'UCS-4LE';
|
||||||
{$else FPC_LITTLE_ENDIAN}
|
{$else FPC_LITTLE_ENDIAN}
|
||||||
|
{$ifdef AIX}
|
||||||
|
unicode_encoding2 = 'UTF-16';
|
||||||
|
{$else AIX}
|
||||||
unicode_encoding2 = 'UTF-16BE';
|
unicode_encoding2 = 'UTF-16BE';
|
||||||
unicode_encoding4 = 'UCS-4BE';
|
unicode_encoding4 = 'UCS-4BE';
|
||||||
|
{$endif AIX}
|
||||||
{$endif FPC_LITTLE_ENDIAN}
|
{$endif FPC_LITTLE_ENDIAN}
|
||||||
|
|
||||||
{ en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4 }
|
{ en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4 }
|
||||||
@ -193,19 +197,24 @@ threadvar
|
|||||||
current_DefaultSystemCodePage: TSystemCodePage;
|
current_DefaultSystemCodePage: TSystemCodePage;
|
||||||
|
|
||||||
|
|
||||||
function win2iconv(cp: word): rawbytestring; forward;
|
{$i winiconv.inc}
|
||||||
|
|
||||||
|
|
||||||
procedure InitThread;
|
procedure InitThread;
|
||||||
var
|
var
|
||||||
transliterate: cint;
|
transliterate: cint;
|
||||||
|
iconvindex: longint;
|
||||||
{$if not(defined(darwin) and defined(cpuarm)) and not defined(iphonesim)}
|
{$if not(defined(darwin) and defined(cpuarm)) and not defined(iphonesim)}
|
||||||
iconvname: rawbytestring;
|
iconvname: rawbytestring;
|
||||||
{$endif}
|
{$endif}
|
||||||
begin
|
begin
|
||||||
current_DefaultSystemCodePage:=DefaultSystemCodePage;
|
current_DefaultSystemCodePage:=DefaultSystemCodePage;
|
||||||
{$if not(defined(darwin) and defined(cpuarm)) and not defined(iphonesim)}
|
{$if not(defined(darwin) and defined(cpuarm)) and not defined(iphonesim)}
|
||||||
iconvname:=win2iconv(DefaultSystemCodePage);
|
iconvindex:=win2iconv(DefaultSystemCodePage);
|
||||||
|
if iconvindex<>-1 then
|
||||||
|
iconvname:=win2iconv_arr[iconvindex].name
|
||||||
|
else
|
||||||
|
{ default to UTF-8 on Unix platforms }
|
||||||
|
iconvname:='UTF-8';
|
||||||
iconv_wide2ansi:=iconv_open(pchar(iconvname),unicode_encoding2);
|
iconv_wide2ansi:=iconv_open(pchar(iconvname),unicode_encoding2);
|
||||||
iconv_ansi2wide:=iconv_open(unicode_encoding2,pchar(iconvname));
|
iconv_ansi2wide:=iconv_open(unicode_encoding2,pchar(iconvname));
|
||||||
{$else}
|
{$else}
|
||||||
@ -231,9 +240,6 @@ begin
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
{$i winiconv.inc}
|
|
||||||
|
|
||||||
|
|
||||||
{$if defined(beos) and not defined(haiku)}
|
{$if defined(beos) and not defined(haiku)}
|
||||||
function nl_langinfo(__item:nl_item):pchar;
|
function nl_langinfo(__item:nl_item):pchar;
|
||||||
begin
|
begin
|
||||||
@ -250,6 +256,37 @@ begin
|
|||||||
end;
|
end;
|
||||||
{$endif}
|
{$endif}
|
||||||
|
|
||||||
|
|
||||||
|
function open_iconv_for_cps(cp: TSystemCodePage; const otherencoding: pchar; cp_is_from: boolean): iconv_t;
|
||||||
|
var
|
||||||
|
iconvindex: longint;
|
||||||
|
begin
|
||||||
|
{ TODO: add caching (then we also don't need separate code for
|
||||||
|
the default system page and other ones)
|
||||||
|
|
||||||
|
-- typecasting an ansistring function result to pchar is
|
||||||
|
unsafe normally, but these are constant strings -> no
|
||||||
|
problem }
|
||||||
|
open_iconv_for_cps:=iconv_t(-1);
|
||||||
|
iconvindex:=win2iconv(cp);
|
||||||
|
if iconvindex=-1 then
|
||||||
|
exit;
|
||||||
|
repeat
|
||||||
|
if cp_is_from then
|
||||||
|
open_iconv_for_cps:=iconv_open(otherencoding,pchar(win2iconv_arr[iconvindex].name))
|
||||||
|
else
|
||||||
|
open_iconv_for_cps:=iconv_open(pchar(win2iconv_arr[iconvindex].name),otherencoding);
|
||||||
|
inc(iconvindex);
|
||||||
|
until (open_iconv_for_cps<>iconv_t(-1)) or
|
||||||
|
(iconvindex>high(win2iconv_arr)) or
|
||||||
|
(win2iconv_arr[iconvindex].cp<>cp);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
{$ifdef aix}
|
||||||
|
{$i cwstraix.inc}
|
||||||
|
{$endif aix}
|
||||||
|
|
||||||
procedure Wide2AnsiMove(source:pwidechar; var dest:RawByteString; cp:TSystemCodePage; len:SizeInt);
|
procedure Wide2AnsiMove(source:pwidechar; var dest:RawByteString; cp:TSystemCodePage; len:SizeInt);
|
||||||
var
|
var
|
||||||
outlength,
|
outlength,
|
||||||
@ -261,10 +298,23 @@ procedure Wide2AnsiMove(source:pwidechar; var dest:RawByteString; cp:TSystemCode
|
|||||||
destpos: pchar;
|
destpos: pchar;
|
||||||
mynil : pchar;
|
mynil : pchar;
|
||||||
my0 : size_t;
|
my0 : size_t;
|
||||||
err,
|
err : longint;
|
||||||
transliterate: cint;
|
transliterate: cint;
|
||||||
free_iconv: boolean;
|
free_iconv: boolean;
|
||||||
|
{$ifdef aix}
|
||||||
|
intermediate: rawbytestring;
|
||||||
|
{$endif aix}
|
||||||
begin
|
begin
|
||||||
|
{$ifdef aix}
|
||||||
|
{ AIX libiconv does not support converting cp866 to anything else except
|
||||||
|
for iso-8859-5 -> always first convert to iso-8859-5, then to UTF-16 }
|
||||||
|
if cp=866 then
|
||||||
|
begin
|
||||||
|
Wide2AnsiMove(source,intermediate,28595,len);
|
||||||
|
if handle_aix_intermediate(pchar(intermediate),28595,cp,dest,len) then
|
||||||
|
exit;
|
||||||
|
end;
|
||||||
|
{$endif aix}
|
||||||
if (cp=DefaultSystemCodePage) then
|
if (cp=DefaultSystemCodePage) then
|
||||||
begin
|
begin
|
||||||
{ update iconv converter in case the DefaultSystemCodePage has been
|
{ update iconv converter in case the DefaultSystemCodePage has been
|
||||||
@ -279,14 +329,9 @@ procedure Wide2AnsiMove(source:pwidechar; var dest:RawByteString; cp:TSystemCode
|
|||||||
end
|
end
|
||||||
else
|
else
|
||||||
begin
|
begin
|
||||||
{ TODO: add caching (then we also don't need separate code for
|
use_iconv:=open_iconv_for_cps(cp,unicode_encoding2,false);
|
||||||
the default system page and other ones)
|
if (use_iconv<>iconv_t(-1)) and
|
||||||
|
assigned(iconvctl) then
|
||||||
-- typecasting an ansistring function result to pchar is
|
|
||||||
unsafe normally, but these are constant strings -> no
|
|
||||||
problem }
|
|
||||||
use_iconv:=iconv_open(pchar(win2iconv(cp)),unicode_encoding2);
|
|
||||||
if assigned(iconvctl) then
|
|
||||||
begin
|
begin
|
||||||
transliterate:=1;
|
transliterate:=1;
|
||||||
iconvctl(use_iconv,ICONV_SET_TRANSLITERATE,@transliterate);
|
iconvctl(use_iconv,ICONV_SET_TRANSLITERATE,@transliterate);
|
||||||
@ -361,8 +406,21 @@ procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; l
|
|||||||
mynil : pchar;
|
mynil : pchar;
|
||||||
my0 : size_t;
|
my0 : size_t;
|
||||||
err: cint;
|
err: cint;
|
||||||
|
iconvindex: longint;
|
||||||
free_iconv: boolean;
|
free_iconv: boolean;
|
||||||
|
{$ifdef aix}
|
||||||
|
intermediate: rawbytestring;
|
||||||
|
{$endif aix}
|
||||||
begin
|
begin
|
||||||
|
{$ifdef aix}
|
||||||
|
{ AIX libiconv does not support converting cp866 to anything else except
|
||||||
|
for iso-8859-5 -> always first convert to iso-8859-5, then to UTF-16 }
|
||||||
|
if cp=866 then
|
||||||
|
begin
|
||||||
|
if handle_aix_intermediate(source,cp,cp,intermediate,len) then
|
||||||
|
source:=pchar(intermediate);
|
||||||
|
end;
|
||||||
|
{$endif aix}
|
||||||
if (cp=DefaultSystemCodePage) then
|
if (cp=DefaultSystemCodePage) then
|
||||||
begin
|
begin
|
||||||
{ update iconv converter in case the DefaultSystemCodePage has been
|
{ update iconv converter in case the DefaultSystemCodePage has been
|
||||||
@ -383,7 +441,7 @@ procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; l
|
|||||||
-- typecasting an ansistring function result to pchar is
|
-- typecasting an ansistring function result to pchar is
|
||||||
unsafe normally, but these are constant strings -> no
|
unsafe normally, but these are constant strings -> no
|
||||||
problem }
|
problem }
|
||||||
use_iconv:=iconv_open(unicode_encoding2,pchar(win2iconv(cp)));
|
use_iconv:=open_iconv_for_cps(cp,unicode_encoding2,true);
|
||||||
free_iconv:=true;
|
free_iconv:=true;
|
||||||
end;
|
end;
|
||||||
{ unsupported encoding -> default move }
|
{ unsupported encoding -> default move }
|
||||||
@ -682,6 +740,7 @@ function WideStringToUCS4StringNoNulls(const s : WideString) : UCS4String;
|
|||||||
|
|
||||||
|
|
||||||
function CompareWideString(const s1, s2 : WideString) : PtrInt;
|
function CompareWideString(const s1, s2 : WideString) : PtrInt;
|
||||||
|
{$if not(defined (aix) and defined(cpupowerpc32))}
|
||||||
var
|
var
|
||||||
hs1,hs2 : UCS4String;
|
hs1,hs2 : UCS4String;
|
||||||
begin
|
begin
|
||||||
@ -690,6 +749,32 @@ function CompareWideString(const s1, s2 : WideString) : PtrInt;
|
|||||||
hs2:=WideStringToUCS4StringNoNulls(s2);
|
hs2:=WideStringToUCS4StringNoNulls(s2);
|
||||||
result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
|
result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
|
||||||
end;
|
end;
|
||||||
|
{$else}
|
||||||
|
{ AIX/PPC32 has a 16 bit wchar_t }
|
||||||
|
var
|
||||||
|
i, len: longint;
|
||||||
|
hs1, hs2: array of widechar;
|
||||||
|
begin
|
||||||
|
len:=length(s1);
|
||||||
|
setlength(hs1,len+1);
|
||||||
|
for i:=1 to len do
|
||||||
|
if s1[i]<>#0 then
|
||||||
|
hs1[i-1]:=s1[i]
|
||||||
|
else
|
||||||
|
hs1[i-1]:=#32;
|
||||||
|
hs1[len]:=#0;
|
||||||
|
|
||||||
|
len:=length(s2);
|
||||||
|
setlength(hs2,len+1);
|
||||||
|
for i:=1 to len do
|
||||||
|
if s2[i]<>#0 then
|
||||||
|
hs2[i-1]:=s2[i]
|
||||||
|
else
|
||||||
|
hs2[i-1]:=#32;
|
||||||
|
hs2[len]:=#0;
|
||||||
|
result:=wcscoll(pwchar_t(hs1),pwchar_t(hs2));
|
||||||
|
end;
|
||||||
|
{$endif}
|
||||||
|
|
||||||
|
|
||||||
function CompareTextWideString(const s1, s2 : WideString): PtrInt;
|
function CompareTextWideString(const s1, s2 : WideString): PtrInt;
|
||||||
@ -728,7 +813,6 @@ function CharLengthPChar(const Str: PChar): PtrInt;
|
|||||||
|
|
||||||
function CodePointLength(const Str: PChar; maxlookahead: ptrint): PtrInt;
|
function CodePointLength(const Str: PChar; maxlookahead: ptrint): PtrInt;
|
||||||
var
|
var
|
||||||
nextlen: ptrint;
|
|
||||||
{$ifndef beos}
|
{$ifndef beos}
|
||||||
mbstate: mbstate_t;
|
mbstate: mbstate_t;
|
||||||
{$endif not beos}
|
{$endif not beos}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user